From 9c2019421511a1bc646981d55528334ae46464c0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 11 Oct 2007 11:16:21 +0200 Subject: i386: move crypto Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/crypto/Makefile | 5 + arch/x86/crypto/Makefile_32 | 12 + arch/x86/crypto/aes-i586-asm_32.S | 373 ++++++++++++++++++++++++ arch/x86/crypto/aes_32.c | 515 ++++++++++++++++++++++++++++++++++ arch/x86/crypto/twofish-i586-asm_32.S | 335 ++++++++++++++++++++++ arch/x86/crypto/twofish_32.c | 97 +++++++ 6 files changed, 1337 insertions(+) create mode 100644 arch/x86/crypto/Makefile create mode 100644 arch/x86/crypto/Makefile_32 create mode 100644 arch/x86/crypto/aes-i586-asm_32.S create mode 100644 arch/x86/crypto/aes_32.c create mode 100644 arch/x86/crypto/twofish-i586-asm_32.S create mode 100644 arch/x86/crypto/twofish_32.c (limited to 'arch/x86') diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile new file mode 100644 index 00000000000..b1bcf7c6302 --- /dev/null +++ b/arch/x86/crypto/Makefile @@ -0,0 +1,5 @@ +ifeq ($(CONFIG_X86_32),y) +include ${srctree}/arch/x86/crypto/Makefile_32 +else +include ${srctree}/arch/x86_64/crypto/Makefile_64 +endif diff --git a/arch/x86/crypto/Makefile_32 b/arch/x86/crypto/Makefile_32 new file mode 100644 index 00000000000..2d873a2388e --- /dev/null +++ b/arch/x86/crypto/Makefile_32 @@ -0,0 +1,12 @@ +# +# x86/crypto/Makefile +# +# Arch-specific CryptoAPI modules. +# + +obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o +obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o + +aes-i586-y := aes-i586-asm_32.o aes_32.o +twofish-i586-y := twofish-i586-asm_32.o twofish_32.o + diff --git a/arch/x86/crypto/aes-i586-asm_32.S b/arch/x86/crypto/aes-i586-asm_32.S new file mode 100644 index 00000000000..f942f0c8f63 --- /dev/null +++ b/arch/x86/crypto/aes-i586-asm_32.S @@ -0,0 +1,373 @@ +// ------------------------------------------------------------------------- +// Copyright (c) 2001, Dr Brian Gladman < >, Worcester, UK. +// All rights reserved. +// +// LICENSE TERMS +// +// The free distribution and use of this software in both source and binary +// form is allowed (with or without changes) provided that: +// +// 1. distributions of this source code include the above copyright +// notice, this list of conditions and the following disclaimer// +// +// 2. distributions in binary form include the above copyright +// notice, this list of conditions and the following disclaimer +// in the documentation and/or other associated materials// +// +// 3. the copyright holder's name is not used to endorse products +// built using this software without specific written permission. +// +// +// ALTERNATIVELY, provided that this notice is retained in full, this product +// may be distributed under the terms of the GNU General Public License (GPL), +// in which case the provisions of the GPL apply INSTEAD OF those given above. +// +// Copyright (c) 2004 Linus Torvalds +// Copyright (c) 2004 Red Hat, Inc., James Morris + +// DISCLAIMER +// +// This software is provided 'as is' with no explicit or implied warranties +// in respect of its properties including, but not limited to, correctness +// and fitness for purpose. +// ------------------------------------------------------------------------- +// Issue Date: 29/07/2002 + +.file "aes-i586-asm.S" +.text + +#include + +#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words) + +/* offsets to parameters with one register pushed onto stack */ +#define tfm 8 +#define out_blk 12 +#define in_blk 16 + +/* offsets in crypto_tfm structure */ +#define ekey (crypto_tfm_ctx_offset + 0) +#define nrnd (crypto_tfm_ctx_offset + 256) +#define dkey (crypto_tfm_ctx_offset + 260) + +// register mapping for encrypt and decrypt subroutines + +#define r0 eax +#define r1 ebx +#define r2 ecx +#define r3 edx +#define r4 esi +#define r5 edi + +#define eaxl al +#define eaxh ah +#define ebxl bl +#define ebxh bh +#define ecxl cl +#define ecxh ch +#define edxl dl +#define edxh dh + +#define _h(reg) reg##h +#define h(reg) _h(reg) + +#define _l(reg) reg##l +#define l(reg) _l(reg) + +// This macro takes a 32-bit word representing a column and uses +// each of its four bytes to index into four tables of 256 32-bit +// words to obtain values that are then xored into the appropriate +// output registers r0, r1, r4 or r5. + +// Parameters: +// table table base address +// %1 out_state[0] +// %2 out_state[1] +// %3 out_state[2] +// %4 out_state[3] +// idx input register for the round (destroyed) +// tmp scratch register for the round +// sched key schedule + +#define do_col(table, a1,a2,a3,a4, idx, tmp) \ + movzx %l(idx),%tmp; \ + xor table(,%tmp,4),%a1; \ + movzx %h(idx),%tmp; \ + shr $16,%idx; \ + xor table+tlen(,%tmp,4),%a2; \ + movzx %l(idx),%tmp; \ + movzx %h(idx),%idx; \ + xor table+2*tlen(,%tmp,4),%a3; \ + xor table+3*tlen(,%idx,4),%a4; + +// initialise output registers from the key schedule +// NB1: original value of a3 is in idx on exit +// NB2: original values of a1,a2,a4 aren't used +#define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \ + mov 0 sched,%a1; \ + movzx %l(idx),%tmp; \ + mov 12 sched,%a2; \ + xor table(,%tmp,4),%a1; \ + mov 4 sched,%a4; \ + movzx %h(idx),%tmp; \ + shr $16,%idx; \ + xor table+tlen(,%tmp,4),%a2; \ + movzx %l(idx),%tmp; \ + movzx %h(idx),%idx; \ + xor table+3*tlen(,%idx,4),%a4; \ + mov %a3,%idx; \ + mov 8 sched,%a3; \ + xor table+2*tlen(,%tmp,4),%a3; + +// initialise output registers from the key schedule +// NB1: original value of a3 is in idx on exit +// NB2: original values of a1,a2,a4 aren't used +#define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \ + mov 0 sched,%a1; \ + movzx %l(idx),%tmp; \ + mov 4 sched,%a2; \ + xor table(,%tmp,4),%a1; \ + mov 12 sched,%a4; \ + movzx %h(idx),%tmp; \ + shr $16,%idx; \ + xor table+tlen(,%tmp,4),%a2; \ + movzx %l(idx),%tmp; \ + movzx %h(idx),%idx; \ + xor table+3*tlen(,%idx,4),%a4; \ + mov %a3,%idx; \ + mov 8 sched,%a3; \ + xor table+2*tlen(,%tmp,4),%a3; + + +// original Gladman had conditional saves to MMX regs. +#define save(a1, a2) \ + mov %a2,4*a1(%esp) + +#define restore(a1, a2) \ + mov 4*a2(%esp),%a1 + +// These macros perform a forward encryption cycle. They are entered with +// the first previous round column values in r0,r1,r4,r5 and +// exit with the final values in the same registers, using stack +// for temporary storage. + +// round column values +// on entry: r0,r1,r4,r5 +// on exit: r2,r1,r4,r5 +#define fwd_rnd1(arg, table) \ + save (0,r1); \ + save (1,r5); \ + \ + /* compute new column values */ \ + do_fcol(table, r2,r5,r4,r1, r0,r3, arg); /* idx=r0 */ \ + do_col (table, r4,r1,r2,r5, r0,r3); /* idx=r4 */ \ + restore(r0,0); \ + do_col (table, r1,r2,r5,r4, r0,r3); /* idx=r1 */ \ + restore(r0,1); \ + do_col (table, r5,r4,r1,r2, r0,r3); /* idx=r5 */ + +// round column values +// on entry: r2,r1,r4,r5 +// on exit: r0,r1,r4,r5 +#define fwd_rnd2(arg, table) \ + save (0,r1); \ + save (1,r5); \ + \ + /* compute new column values */ \ + do_fcol(table, r0,r5,r4,r1, r2,r3, arg); /* idx=r2 */ \ + do_col (table, r4,r1,r0,r5, r2,r3); /* idx=r4 */ \ + restore(r2,0); \ + do_col (table, r1,r0,r5,r4, r2,r3); /* idx=r1 */ \ + restore(r2,1); \ + do_col (table, r5,r4,r1,r0, r2,r3); /* idx=r5 */ + +// These macros performs an inverse encryption cycle. They are entered with +// the first previous round column values in r0,r1,r4,r5 and +// exit with the final values in the same registers, using stack +// for temporary storage + +// round column values +// on entry: r0,r1,r4,r5 +// on exit: r2,r1,r4,r5 +#define inv_rnd1(arg, table) \ + save (0,r1); \ + save (1,r5); \ + \ + /* compute new column values */ \ + do_icol(table, r2,r1,r4,r5, r0,r3, arg); /* idx=r0 */ \ + do_col (table, r4,r5,r2,r1, r0,r3); /* idx=r4 */ \ + restore(r0,0); \ + do_col (table, r1,r4,r5,r2, r0,r3); /* idx=r1 */ \ + restore(r0,1); \ + do_col (table, r5,r2,r1,r4, r0,r3); /* idx=r5 */ + +// round column values +// on entry: r2,r1,r4,r5 +// on exit: r0,r1,r4,r5 +#define inv_rnd2(arg, table) \ + save (0,r1); \ + save (1,r5); \ + \ + /* compute new column values */ \ + do_icol(table, r0,r1,r4,r5, r2,r3, arg); /* idx=r2 */ \ + do_col (table, r4,r5,r0,r1, r2,r3); /* idx=r4 */ \ + restore(r2,0); \ + do_col (table, r1,r4,r5,r0, r2,r3); /* idx=r1 */ \ + restore(r2,1); \ + do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */ + +// AES (Rijndael) Encryption Subroutine +/* void aes_enc_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */ + +.global aes_enc_blk + +.extern ft_tab +.extern fl_tab + +.align 4 + +aes_enc_blk: + push %ebp + mov tfm(%esp),%ebp + +// CAUTION: the order and the values used in these assigns +// rely on the register mappings + +1: push %ebx + mov in_blk+4(%esp),%r2 + push %esi + mov nrnd(%ebp),%r3 // number of rounds + push %edi +#if ekey != 0 + lea ekey(%ebp),%ebp // key pointer +#endif + +// input four columns and xor in first round key + + mov (%r2),%r0 + mov 4(%r2),%r1 + mov 8(%r2),%r4 + mov 12(%r2),%r5 + xor (%ebp),%r0 + xor 4(%ebp),%r1 + xor 8(%ebp),%r4 + xor 12(%ebp),%r5 + + sub $8,%esp // space for register saves on stack + add $16,%ebp // increment to next round key + cmp $12,%r3 + jb 4f // 10 rounds for 128-bit key + lea 32(%ebp),%ebp + je 3f // 12 rounds for 192-bit key + lea 32(%ebp),%ebp + +2: fwd_rnd1( -64(%ebp) ,ft_tab) // 14 rounds for 256-bit key + fwd_rnd2( -48(%ebp) ,ft_tab) +3: fwd_rnd1( -32(%ebp) ,ft_tab) // 12 rounds for 192-bit key + fwd_rnd2( -16(%ebp) ,ft_tab) +4: fwd_rnd1( (%ebp) ,ft_tab) // 10 rounds for 128-bit key + fwd_rnd2( +16(%ebp) ,ft_tab) + fwd_rnd1( +32(%ebp) ,ft_tab) + fwd_rnd2( +48(%ebp) ,ft_tab) + fwd_rnd1( +64(%ebp) ,ft_tab) + fwd_rnd2( +80(%ebp) ,ft_tab) + fwd_rnd1( +96(%ebp) ,ft_tab) + fwd_rnd2(+112(%ebp) ,ft_tab) + fwd_rnd1(+128(%ebp) ,ft_tab) + fwd_rnd2(+144(%ebp) ,fl_tab) // last round uses a different table + +// move final values to the output array. CAUTION: the +// order of these assigns rely on the register mappings + + add $8,%esp + mov out_blk+12(%esp),%ebp + mov %r5,12(%ebp) + pop %edi + mov %r4,8(%ebp) + pop %esi + mov %r1,4(%ebp) + pop %ebx + mov %r0,(%ebp) + pop %ebp + mov $1,%eax + ret + +// AES (Rijndael) Decryption Subroutine +/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */ + +.global aes_dec_blk + +.extern it_tab +.extern il_tab + +.align 4 + +aes_dec_blk: + push %ebp + mov tfm(%esp),%ebp + +// CAUTION: the order and the values used in these assigns +// rely on the register mappings + +1: push %ebx + mov in_blk+4(%esp),%r2 + push %esi + mov nrnd(%ebp),%r3 // number of rounds + push %edi +#if dkey != 0 + lea dkey(%ebp),%ebp // key pointer +#endif + mov %r3,%r0 + shl $4,%r0 + add %r0,%ebp + +// input four columns and xor in first round key + + mov (%r2),%r0 + mov 4(%r2),%r1 + mov 8(%r2),%r4 + mov 12(%r2),%r5 + xor (%ebp),%r0 + xor 4(%ebp),%r1 + xor 8(%ebp),%r4 + xor 12(%ebp),%r5 + + sub $8,%esp // space for register saves on stack + sub $16,%ebp // increment to next round key + cmp $12,%r3 + jb 4f // 10 rounds for 128-bit key + lea -32(%ebp),%ebp + je 3f // 12 rounds for 192-bit key + lea -32(%ebp),%ebp + +2: inv_rnd1( +64(%ebp), it_tab) // 14 rounds for 256-bit key + inv_rnd2( +48(%ebp), it_tab) +3: inv_rnd1( +32(%ebp), it_tab) // 12 rounds for 192-bit key + inv_rnd2( +16(%ebp), it_tab) +4: inv_rnd1( (%ebp), it_tab) // 10 rounds for 128-bit key + inv_rnd2( -16(%ebp), it_tab) + inv_rnd1( -32(%ebp), it_tab) + inv_rnd2( -48(%ebp), it_tab) + inv_rnd1( -64(%ebp), it_tab) + inv_rnd2( -80(%ebp), it_tab) + inv_rnd1( -96(%ebp), it_tab) + inv_rnd2(-112(%ebp), it_tab) + inv_rnd1(-128(%ebp), it_tab) + inv_rnd2(-144(%ebp), il_tab) // last round uses a different table + +// move final values to the output array. CAUTION: the +// order of these assigns rely on the register mappings + + add $8,%esp + mov out_blk+12(%esp),%ebp + mov %r5,12(%ebp) + pop %edi + mov %r4,8(%ebp) + pop %esi + mov %r1,4(%ebp) + pop %ebx + mov %r0,(%ebp) + pop %ebp + mov $1,%eax + ret + diff --git a/arch/x86/crypto/aes_32.c b/arch/x86/crypto/aes_32.c new file mode 100644 index 00000000000..49aad9397f1 --- /dev/null +++ b/arch/x86/crypto/aes_32.c @@ -0,0 +1,515 @@ +/* + * + * Glue Code for optimized 586 assembler version of AES + * + * Copyright (c) 2002, Dr Brian Gladman <>, Worcester, UK. + * All rights reserved. + * + * LICENSE TERMS + * + * The free distribution and use of this software in both source and binary + * form is allowed (with or without changes) provided that: + * + * 1. distributions of this source code include the above copyright + * notice, this list of conditions and the following disclaimer; + * + * 2. distributions in binary form include the above copyright + * notice, this list of conditions and the following disclaimer + * in the documentation and/or other associated materials; + * + * 3. the copyright holder's name is not used to endorse products + * built using this software without specific written permission. + * + * ALTERNATIVELY, provided that this notice is retained in full, this product + * may be distributed under the terms of the GNU General Public License (GPL), + * in which case the provisions of the GPL apply INSTEAD OF those given above. + * + * DISCLAIMER + * + * This software is provided 'as is' with no explicit or implied warranties + * in respect of its properties, including, but not limited to, correctness + * and/or fitness for purpose. + * + * Copyright (c) 2003, Adam J. Richter (conversion to + * 2.5 API). + * Copyright (c) 2003, 2004 Fruhwirth Clemens + * Copyright (c) 2004 Red Hat, Inc., James Morris + * + */ + +#include +#include +#include +#include +#include +#include +#include + +asmlinkage void aes_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); +asmlinkage void aes_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); + +#define AES_MIN_KEY_SIZE 16 +#define AES_MAX_KEY_SIZE 32 +#define AES_BLOCK_SIZE 16 +#define AES_KS_LENGTH 4 * AES_BLOCK_SIZE +#define RC_LENGTH 29 + +struct aes_ctx { + u32 ekey[AES_KS_LENGTH]; + u32 rounds; + u32 dkey[AES_KS_LENGTH]; +}; + +#define WPOLY 0x011b +#define bytes2word(b0, b1, b2, b3) \ + (((u32)(b3) << 24) | ((u32)(b2) << 16) | ((u32)(b1) << 8) | (b0)) + +/* define the finite field multiplies required for Rijndael */ +#define f2(x) ((x) ? pow[log[x] + 0x19] : 0) +#define f3(x) ((x) ? pow[log[x] + 0x01] : 0) +#define f9(x) ((x) ? pow[log[x] + 0xc7] : 0) +#define fb(x) ((x) ? pow[log[x] + 0x68] : 0) +#define fd(x) ((x) ? pow[log[x] + 0xee] : 0) +#define fe(x) ((x) ? pow[log[x] + 0xdf] : 0) +#define fi(x) ((x) ? pow[255 - log[x]]: 0) + +static inline u32 upr(u32 x, int n) +{ + return (x << 8 * n) | (x >> (32 - 8 * n)); +} + +static inline u8 bval(u32 x, int n) +{ + return x >> 8 * n; +} + +/* The forward and inverse affine transformations used in the S-box */ +#define fwd_affine(x) \ + (w = (u32)x, w ^= (w<<1)^(w<<2)^(w<<3)^(w<<4), 0x63^(u8)(w^(w>>8))) + +#define inv_affine(x) \ + (w = (u32)x, w = (w<<1)^(w<<3)^(w<<6), 0x05^(u8)(w^(w>>8))) + +static u32 rcon_tab[RC_LENGTH]; + +u32 ft_tab[4][256]; +u32 fl_tab[4][256]; +static u32 im_tab[4][256]; +u32 il_tab[4][256]; +u32 it_tab[4][256]; + +static void gen_tabs(void) +{ + u32 i, w; + u8 pow[512], log[256]; + + /* + * log and power tables for GF(2^8) finite field with + * WPOLY as modular polynomial - the simplest primitive + * root is 0x03, used here to generate the tables. + */ + i = 0; w = 1; + + do { + pow[i] = (u8)w; + pow[i + 255] = (u8)w; + log[w] = (u8)i++; + w ^= (w << 1) ^ (w & 0x80 ? WPOLY : 0); + } while (w != 1); + + for(i = 0, w = 1; i < RC_LENGTH; ++i) { + rcon_tab[i] = bytes2word(w, 0, 0, 0); + w = f2(w); + } + + for(i = 0; i < 256; ++i) { + u8 b; + + b = fwd_affine(fi((u8)i)); + w = bytes2word(f2(b), b, b, f3(b)); + + /* tables for a normal encryption round */ + ft_tab[0][i] = w; + ft_tab[1][i] = upr(w, 1); + ft_tab[2][i] = upr(w, 2); + ft_tab[3][i] = upr(w, 3); + w = bytes2word(b, 0, 0, 0); + + /* + * tables for last encryption round + * (may also be used in the key schedule) + */ + fl_tab[0][i] = w; + fl_tab[1][i] = upr(w, 1); + fl_tab[2][i] = upr(w, 2); + fl_tab[3][i] = upr(w, 3); + + b = fi(inv_affine((u8)i)); + w = bytes2word(fe(b), f9(b), fd(b), fb(b)); + + /* tables for the inverse mix column operation */ + im_tab[0][b] = w; + im_tab[1][b] = upr(w, 1); + im_tab[2][b] = upr(w, 2); + im_tab[3][b] = upr(w, 3); + + /* tables for a normal decryption round */ + it_tab[0][i] = w; + it_tab[1][i] = upr(w,1); + it_tab[2][i] = upr(w,2); + it_tab[3][i] = upr(w,3); + + w = bytes2word(b, 0, 0, 0); + + /* tables for last decryption round */ + il_tab[0][i] = w; + il_tab[1][i] = upr(w,1); + il_tab[2][i] = upr(w,2); + il_tab[3][i] = upr(w,3); + } +} + +#define four_tables(x,tab,vf,rf,c) \ +( tab[0][bval(vf(x,0,c),rf(0,c))] ^ \ + tab[1][bval(vf(x,1,c),rf(1,c))] ^ \ + tab[2][bval(vf(x,2,c),rf(2,c))] ^ \ + tab[3][bval(vf(x,3,c),rf(3,c))] \ +) + +#define vf1(x,r,c) (x) +#define rf1(r,c) (r) +#define rf2(r,c) ((r-c)&3) + +#define inv_mcol(x) four_tables(x,im_tab,vf1,rf1,0) +#define ls_box(x,c) four_tables(x,fl_tab,vf1,rf2,c) + +#define ff(x) inv_mcol(x) + +#define ke4(k,i) \ +{ \ + k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ rcon_tab[i]; \ + k[4*(i)+5] = ss[1] ^= ss[0]; \ + k[4*(i)+6] = ss[2] ^= ss[1]; \ + k[4*(i)+7] = ss[3] ^= ss[2]; \ +} + +#define kel4(k,i) \ +{ \ + k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ rcon_tab[i]; \ + k[4*(i)+5] = ss[1] ^= ss[0]; \ + k[4*(i)+6] = ss[2] ^= ss[1]; k[4*(i)+7] = ss[3] ^= ss[2]; \ +} + +#define ke6(k,i) \ +{ \ + k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \ + k[6*(i)+ 7] = ss[1] ^= ss[0]; \ + k[6*(i)+ 8] = ss[2] ^= ss[1]; \ + k[6*(i)+ 9] = ss[3] ^= ss[2]; \ + k[6*(i)+10] = ss[4] ^= ss[3]; \ + k[6*(i)+11] = ss[5] ^= ss[4]; \ +} + +#define kel6(k,i) \ +{ \ + k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \ + k[6*(i)+ 7] = ss[1] ^= ss[0]; \ + k[6*(i)+ 8] = ss[2] ^= ss[1]; \ + k[6*(i)+ 9] = ss[3] ^= ss[2]; \ +} + +#define ke8(k,i) \ +{ \ + k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \ + k[8*(i)+ 9] = ss[1] ^= ss[0]; \ + k[8*(i)+10] = ss[2] ^= ss[1]; \ + k[8*(i)+11] = ss[3] ^= ss[2]; \ + k[8*(i)+12] = ss[4] ^= ls_box(ss[3],0); \ + k[8*(i)+13] = ss[5] ^= ss[4]; \ + k[8*(i)+14] = ss[6] ^= ss[5]; \ + k[8*(i)+15] = ss[7] ^= ss[6]; \ +} + +#define kel8(k,i) \ +{ \ + k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \ + k[8*(i)+ 9] = ss[1] ^= ss[0]; \ + k[8*(i)+10] = ss[2] ^= ss[1]; \ + k[8*(i)+11] = ss[3] ^= ss[2]; \ +} + +#define kdf4(k,i) \ +{ \ + ss[0] = ss[0] ^ ss[2] ^ ss[1] ^ ss[3]; \ + ss[1] = ss[1] ^ ss[3]; \ + ss[2] = ss[2] ^ ss[3]; \ + ss[3] = ss[3]; \ + ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i]; \ + ss[i % 4] ^= ss[4]; \ + ss[4] ^= k[4*(i)]; \ + k[4*(i)+4] = ff(ss[4]); \ + ss[4] ^= k[4*(i)+1]; \ + k[4*(i)+5] = ff(ss[4]); \ + ss[4] ^= k[4*(i)+2]; \ + k[4*(i)+6] = ff(ss[4]); \ + ss[4] ^= k[4*(i)+3]; \ + k[4*(i)+7] = ff(ss[4]); \ +} + +#define kd4(k,i) \ +{ \ + ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i]; \ + ss[i % 4] ^= ss[4]; \ + ss[4] = ff(ss[4]); \ + k[4*(i)+4] = ss[4] ^= k[4*(i)]; \ + k[4*(i)+5] = ss[4] ^= k[4*(i)+1]; \ + k[4*(i)+6] = ss[4] ^= k[4*(i)+2]; \ + k[4*(i)+7] = ss[4] ^= k[4*(i)+3]; \ +} + +#define kdl4(k,i) \ +{ \ + ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i]; \ + ss[i % 4] ^= ss[4]; \ + k[4*(i)+4] = (ss[0] ^= ss[1]) ^ ss[2] ^ ss[3]; \ + k[4*(i)+5] = ss[1] ^ ss[3]; \ + k[4*(i)+6] = ss[0]; \ + k[4*(i)+7] = ss[1]; \ +} + +#define kdf6(k,i) \ +{ \ + ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \ + k[6*(i)+ 6] = ff(ss[0]); \ + ss[1] ^= ss[0]; \ + k[6*(i)+ 7] = ff(ss[1]); \ + ss[2] ^= ss[1]; \ + k[6*(i)+ 8] = ff(ss[2]); \ + ss[3] ^= ss[2]; \ + k[6*(i)+ 9] = ff(ss[3]); \ + ss[4] ^= ss[3]; \ + k[6*(i)+10] = ff(ss[4]); \ + ss[5] ^= ss[4]; \ + k[6*(i)+11] = ff(ss[5]); \ +} + +#define kd6(k,i) \ +{ \ + ss[6] = ls_box(ss[5],3) ^ rcon_tab[i]; \ + ss[0] ^= ss[6]; ss[6] = ff(ss[6]); \ + k[6*(i)+ 6] = ss[6] ^= k[6*(i)]; \ + ss[1] ^= ss[0]; \ + k[6*(i)+ 7] = ss[6] ^= k[6*(i)+ 1]; \ + ss[2] ^= ss[1]; \ + k[6*(i)+ 8] = ss[6] ^= k[6*(i)+ 2]; \ + ss[3] ^= ss[2]; \ + k[6*(i)+ 9] = ss[6] ^= k[6*(i)+ 3]; \ + ss[4] ^= ss[3]; \ + k[6*(i)+10] = ss[6] ^= k[6*(i)+ 4]; \ + ss[5] ^= ss[4]; \ + k[6*(i)+11] = ss[6] ^= k[6*(i)+ 5]; \ +} + +#define kdl6(k,i) \ +{ \ + ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \ + k[6*(i)+ 6] = ss[0]; \ + ss[1] ^= ss[0]; \ + k[6*(i)+ 7] = ss[1]; \ + ss[2] ^= ss[1]; \ + k[6*(i)+ 8] = ss[2]; \ + ss[3] ^= ss[2]; \ + k[6*(i)+ 9] = ss[3]; \ +} + +#define kdf8(k,i) \ +{ \ + ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \ + k[8*(i)+ 8] = ff(ss[0]); \ + ss[1] ^= ss[0]; \ + k[8*(i)+ 9] = ff(ss[1]); \ + ss[2] ^= ss[1]; \ + k[8*(i)+10] = ff(ss[2]); \ + ss[3] ^= ss[2]; \ + k[8*(i)+11] = ff(ss[3]); \ + ss[4] ^= ls_box(ss[3],0); \ + k[8*(i)+12] = ff(ss[4]); \ + ss[5] ^= ss[4]; \ + k[8*(i)+13] = ff(ss[5]); \ + ss[6] ^= ss[5]; \ + k[8*(i)+14] = ff(ss[6]); \ + ss[7] ^= ss[6]; \ + k[8*(i)+15] = ff(ss[7]); \ +} + +#define kd8(k,i) \ +{ \ + u32 __g = ls_box(ss[7],3) ^ rcon_tab[i]; \ + ss[0] ^= __g; \ + __g = ff(__g); \ + k[8*(i)+ 8] = __g ^= k[8*(i)]; \ + ss[1] ^= ss[0]; \ + k[8*(i)+ 9] = __g ^= k[8*(i)+ 1]; \ + ss[2] ^= ss[1]; \ + k[8*(i)+10] = __g ^= k[8*(i)+ 2]; \ + ss[3] ^= ss[2]; \ + k[8*(i)+11] = __g ^= k[8*(i)+ 3]; \ + __g = ls_box(ss[3],0); \ + ss[4] ^= __g; \ + __g = ff(__g); \ + k[8*(i)+12] = __g ^= k[8*(i)+ 4]; \ + ss[5] ^= ss[4]; \ + k[8*(i)+13] = __g ^= k[8*(i)+ 5]; \ + ss[6] ^= ss[5]; \ + k[8*(i)+14] = __g ^= k[8*(i)+ 6]; \ + ss[7] ^= ss[6]; \ + k[8*(i)+15] = __g ^= k[8*(i)+ 7]; \ +} + +#define kdl8(k,i) \ +{ \ + ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \ + k[8*(i)+ 8] = ss[0]; \ + ss[1] ^= ss[0]; \ + k[8*(i)+ 9] = ss[1]; \ + ss[2] ^= ss[1]; \ + k[8*(i)+10] = ss[2]; \ + ss[3] ^= ss[2]; \ + k[8*(i)+11] = ss[3]; \ +} + +static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + int i; + u32 ss[8]; + struct aes_ctx *ctx = crypto_tfm_ctx(tfm); + const __le32 *key = (const __le32 *)in_key; + u32 *flags = &tfm->crt_flags; + + /* encryption schedule */ + + ctx->ekey[0] = ss[0] = le32_to_cpu(key[0]); + ctx->ekey[1] = ss[1] = le32_to_cpu(key[1]); + ctx->ekey[2] = ss[2] = le32_to_cpu(key[2]); + ctx->ekey[3] = ss[3] = le32_to_cpu(key[3]); + + switch(key_len) { + case 16: + for (i = 0; i < 9; i++) + ke4(ctx->ekey, i); + kel4(ctx->ekey, 9); + ctx->rounds = 10; + break; + + case 24: + ctx->ekey[4] = ss[4] = le32_to_cpu(key[4]); + ctx->ekey[5] = ss[5] = le32_to_cpu(key[5]); + for (i = 0; i < 7; i++) + ke6(ctx->ekey, i); + kel6(ctx->ekey, 7); + ctx->rounds = 12; + break; + + case 32: + ctx->ekey[4] = ss[4] = le32_to_cpu(key[4]); + ctx->ekey[5] = ss[5] = le32_to_cpu(key[5]); + ctx->ekey[6] = ss[6] = le32_to_cpu(key[6]); + ctx->ekey[7] = ss[7] = le32_to_cpu(key[7]); + for (i = 0; i < 6; i++) + ke8(ctx->ekey, i); + kel8(ctx->ekey, 6); + ctx->rounds = 14; + break; + + default: + *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + + /* decryption schedule */ + + ctx->dkey[0] = ss[0] = le32_to_cpu(key[0]); + ctx->dkey[1] = ss[1] = le32_to_cpu(key[1]); + ctx->dkey[2] = ss[2] = le32_to_cpu(key[2]); + ctx->dkey[3] = ss[3] = le32_to_cpu(key[3]); + + switch (key_len) { + case 16: + kdf4(ctx->dkey, 0); + for (i = 1; i < 9; i++) + kd4(ctx->dkey, i); + kdl4(ctx->dkey, 9); + break; + + case 24: + ctx->dkey[4] = ff(ss[4] = le32_to_cpu(key[4])); + ctx->dkey[5] = ff(ss[5] = le32_to_cpu(key[5])); + kdf6(ctx->dkey, 0); + for (i = 1; i < 7; i++) + kd6(ctx->dkey, i); + kdl6(ctx->dkey, 7); + break; + + case 32: + ctx->dkey[4] = ff(ss[4] = le32_to_cpu(key[4])); + ctx->dkey[5] = ff(ss[5] = le32_to_cpu(key[5])); + ctx->dkey[6] = ff(ss[6] = le32_to_cpu(key[6])); + ctx->dkey[7] = ff(ss[7] = le32_to_cpu(key[7])); + kdf8(ctx->dkey, 0); + for (i = 1; i < 6; i++) + kd8(ctx->dkey, i); + kdl8(ctx->dkey, 6); + break; + } + return 0; +} + +static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + aes_enc_blk(tfm, dst, src); +} + +static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + aes_dec_blk(tfm, dst, src); +} + +static struct crypto_alg aes_alg = { + .cra_name = "aes", + .cra_driver_name = "aes-i586", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct aes_ctx), + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(aes_alg.cra_list), + .cra_u = { + .cipher = { + .cia_min_keysize = AES_MIN_KEY_SIZE, + .cia_max_keysize = AES_MAX_KEY_SIZE, + .cia_setkey = aes_set_key, + .cia_encrypt = aes_encrypt, + .cia_decrypt = aes_decrypt + } + } +}; + +static int __init aes_init(void) +{ + gen_tabs(); + return crypto_register_alg(&aes_alg); +} + +static void __exit aes_fini(void) +{ + crypto_unregister_alg(&aes_alg); +} + +module_init(aes_init); +module_exit(aes_fini); + +MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, i586 asm optimized"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Fruhwirth Clemens, James Morris, Brian Gladman, Adam Richter"); +MODULE_ALIAS("aes"); diff --git a/arch/x86/crypto/twofish-i586-asm_32.S b/arch/x86/crypto/twofish-i586-asm_32.S new file mode 100644 index 00000000000..39b98ed2c1b --- /dev/null +++ b/arch/x86/crypto/twofish-i586-asm_32.S @@ -0,0 +1,335 @@ +/*************************************************************************** +* Copyright (C) 2006 by Joachim Fritschi, * +* * +* This program is free software; you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published by * +* the Free Software Foundation; either version 2 of the License, or * +* (at your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with this program; if not, write to the * +* Free Software Foundation, Inc., * +* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * +***************************************************************************/ + +.file "twofish-i586-asm.S" +.text + +#include + +/* return adress at 0 */ + +#define in_blk 12 /* input byte array address parameter*/ +#define out_blk 8 /* output byte array address parameter*/ +#define tfm 4 /* Twofish context structure */ + +#define a_offset 0 +#define b_offset 4 +#define c_offset 8 +#define d_offset 12 + +/* Structure of the crypto context struct*/ + +#define s0 0 /* S0 Array 256 Words each */ +#define s1 1024 /* S1 Array */ +#define s2 2048 /* S2 Array */ +#define s3 3072 /* S3 Array */ +#define w 4096 /* 8 whitening keys (word) */ +#define k 4128 /* key 1-32 ( word ) */ + +/* define a few register aliases to allow macro substitution */ + +#define R0D %eax +#define R0B %al +#define R0H %ah + +#define R1D %ebx +#define R1B %bl +#define R1H %bh + +#define R2D %ecx +#define R2B %cl +#define R2H %ch + +#define R3D %edx +#define R3B %dl +#define R3H %dh + + +/* performs input whitening */ +#define input_whitening(src,context,offset)\ + xor w+offset(context), src; + +/* performs input whitening */ +#define output_whitening(src,context,offset)\ + xor w+16+offset(context), src; + +/* + * a input register containing a (rotated 16) + * b input register containing b + * c input register containing c + * d input register containing d (already rol $1) + * operations on a and b are interleaved to increase performance + */ +#define encrypt_round(a,b,c,d,round)\ + push d ## D;\ + movzx b ## B, %edi;\ + mov s1(%ebp,%edi,4),d ## D;\ + movzx a ## B, %edi;\ + mov s2(%ebp,%edi,4),%esi;\ + movzx b ## H, %edi;\ + ror $16, b ## D;\ + xor s2(%ebp,%edi,4),d ## D;\ + movzx a ## H, %edi;\ + ror $16, a ## D;\ + xor s3(%ebp,%edi,4),%esi;\ + movzx b ## B, %edi;\ + xor s3(%ebp,%edi,4),d ## D;\ + movzx a ## B, %edi;\ + xor (%ebp,%edi,4), %esi;\ + movzx b ## H, %edi;\ + ror $15, b ## D;\ + xor (%ebp,%edi,4), d ## D;\ + movzx a ## H, %edi;\ + xor s1(%ebp,%edi,4),%esi;\ + pop %edi;\ + add d ## D, %esi;\ + add %esi, d ## D;\ + add k+round(%ebp), %esi;\ + xor %esi, c ## D;\ + rol $15, c ## D;\ + add k+4+round(%ebp),d ## D;\ + xor %edi, d ## D; + +/* + * a input register containing a (rotated 16) + * b input register containing b + * c input register containing c + * d input register containing d (already rol $1) + * operations on a and b are interleaved to increase performance + * last round has different rotations for the output preparation + */ +#define encrypt_last_round(a,b,c,d,round)\ + push d ## D;\ + movzx b ## B, %edi;\ + mov s1(%ebp,%edi,4),d ## D;\ + movzx a ## B, %edi;\ + mov s2(%ebp,%edi,4),%esi;\ + movzx b ## H, %edi;\ + ror $16, b ## D;\ + xor s2(%ebp,%edi,4),d ## D;\ + movzx a ## H, %edi;\ + ror $16, a ## D;\ + xor s3(%ebp,%edi,4),%esi;\ + movzx b ## B, %edi;\ + xor s3(%ebp,%edi,4),d ## D;\ + movzx a ## B, %edi;\ + xor (%ebp,%edi,4), %esi;\ + movzx b ## H, %edi;\ + ror $16, b ## D;\ + xor (%ebp,%edi,4), d ## D;\ + movzx a ## H, %edi;\ + xor s1(%ebp,%edi,4),%esi;\ + pop %edi;\ + add d ## D, %esi;\ + add %esi, d ## D;\ + add k+round(%ebp), %esi;\ + xor %esi, c ## D;\ + ror $1, c ## D;\ + add k+4+round(%ebp),d ## D;\ + xor %edi, d ## D; + +/* + * a input register containing a + * b input register containing b (rotated 16) + * c input register containing c + * d input register containing d (already rol $1) + * operations on a and b are interleaved to increase performance + */ +#define decrypt_round(a,b,c,d,round)\ + push c ## D;\ + movzx a ## B, %edi;\ + mov (%ebp,%edi,4), c ## D;\ + movzx b ## B, %edi;\ + mov s3(%ebp,%edi,4),%esi;\ + movzx a ## H, %edi;\ + ror $16, a ## D;\ + xor s1(%ebp,%edi,4),c ## D;\ + movzx b ## H, %edi;\ + ror $16, b ## D;\ + xor (%ebp,%edi,4), %esi;\ + movzx a ## B, %edi;\ + xor s2(%ebp,%edi,4),c ## D;\ + movzx b ## B, %edi;\ + xor s1(%ebp,%edi,4),%esi;\ + movzx a ## H, %edi;\ + ror $15, a ## D;\ + xor s3(%ebp,%edi,4),c ## D;\ + movzx b ## H, %edi;\ + xor s2(%ebp,%edi,4),%esi;\ + pop %edi;\ + add %esi, c ## D;\ + add c ## D, %esi;\ + add k+round(%ebp), c ## D;\ + xor %edi, c ## D;\ + add k+4+round(%ebp),%esi;\ + xor %esi, d ## D;\ + rol $15, d ## D; + +/* + * a input register containing a + * b input register containing b (rotated 16) + * c input register containing c + * d input register containing d (already rol $1) + * operations on a and b are interleaved to increase performance + * last round has different rotations for the output preparation + */ +#define decrypt_last_round(a,b,c,d,round)\ + push c ## D;\ + movzx a ## B, %edi;\ + mov (%ebp,%edi,4), c ## D;\ + movzx b ## B, %edi;\ + mov s3(%ebp,%edi,4),%esi;\ + movzx a ## H, %edi;\ + ror $16, a ## D;\ + xor s1(%ebp,%edi,4),c ## D;\ + movzx b ## H, %edi;\ + ror $16, b ## D;\ + xor (%ebp,%edi,4), %esi;\ + movzx a ## B, %edi;\ + xor s2(%ebp,%edi,4),c ## D;\ + movzx b ## B, %edi;\ + xor s1(%ebp,%edi,4),%esi;\ + movzx a ## H, %edi;\ + ror $16, a ## D;\ + xor s3(%ebp,%edi,4),c ## D;\ + movzx b ## H, %edi;\ + xor s2(%ebp,%edi,4),%esi;\ + pop %edi;\ + add %esi, c ## D;\ + add c ## D, %esi;\ + add k+round(%ebp), c ## D;\ + xor %edi, c ## D;\ + add k+4+round(%ebp),%esi;\ + xor %esi, d ## D;\ + ror $1, d ## D; + +.align 4 +.global twofish_enc_blk +.global twofish_dec_blk + +twofish_enc_blk: + push %ebp /* save registers according to calling convention*/ + push %ebx + push %esi + push %edi + + mov tfm + 16(%esp), %ebp /* abuse the base pointer: set new base bointer to the crypto tfm */ + add $crypto_tfm_ctx_offset, %ebp /* ctx adress */ + mov in_blk+16(%esp),%edi /* input adress in edi */ + + mov (%edi), %eax + mov b_offset(%edi), %ebx + mov c_offset(%edi), %ecx + mov d_offset(%edi), %edx + input_whitening(%eax,%ebp,a_offset) + ror $16, %eax + input_whitening(%ebx,%ebp,b_offset) + input_whitening(%ecx,%ebp,c_offset) + input_whitening(%edx,%ebp,d_offset) + rol $1, %edx + + encrypt_round(R0,R1,R2,R3,0); + encrypt_round(R2,R3,R0,R1,8); + encrypt_round(R0,R1,R2,R3,2*8); + encrypt_round(R2,R3,R0,R1,3*8); + encrypt_round(R0,R1,R2,R3,4*8); + encrypt_round(R2,R3,R0,R1,5*8); + encrypt_round(R0,R1,R2,R3,6*8); + encrypt_round(R2,R3,R0,R1,7*8); + encrypt_round(R0,R1,R2,R3,8*8); + encrypt_round(R2,R3,R0,R1,9*8); + encrypt_round(R0,R1,R2,R3,10*8); + encrypt_round(R2,R3,R0,R1,11*8); + encrypt_round(R0,R1,R2,R3,12*8); + encrypt_round(R2,R3,R0,R1,13*8); + encrypt_round(R0,R1,R2,R3,14*8); + encrypt_last_round(R2,R3,R0,R1,15*8); + + output_whitening(%eax,%ebp,c_offset) + output_whitening(%ebx,%ebp,d_offset) + output_whitening(%ecx,%ebp,a_offset) + output_whitening(%edx,%ebp,b_offset) + mov out_blk+16(%esp),%edi; + mov %eax, c_offset(%edi) + mov %ebx, d_offset(%edi) + mov %ecx, (%edi) + mov %edx, b_offset(%edi) + + pop %edi + pop %esi + pop %ebx + pop %ebp + mov $1, %eax + ret + +twofish_dec_blk: + push %ebp /* save registers according to calling convention*/ + push %ebx + push %esi + push %edi + + + mov tfm + 16(%esp), %ebp /* abuse the base pointer: set new base bointer to the crypto tfm */ + add $crypto_tfm_ctx_offset, %ebp /* ctx adress */ + mov in_blk+16(%esp),%edi /* input adress in edi */ + + mov (%edi), %eax + mov b_offset(%edi), %ebx + mov c_offset(%edi), %ecx + mov d_offset(%edi), %edx + output_whitening(%eax,%ebp,a_offset) + output_whitening(%ebx,%ebp,b_offset) + ror $16, %ebx + output_whitening(%ecx,%ebp,c_offset) + output_whitening(%edx,%ebp,d_offset) + rol $1, %ecx + + decrypt_round(R0,R1,R2,R3,15*8); + decrypt_round(R2,R3,R0,R1,14*8); + decrypt_round(R0,R1,R2,R3,13*8); + decrypt_round(R2,R3,R0,R1,12*8); + decrypt_round(R0,R1,R2,R3,11*8); + decrypt_round(R2,R3,R0,R1,10*8); + decrypt_round(R0,R1,R2,R3,9*8); + decrypt_round(R2,R3,R0,R1,8*8); + decrypt_round(R0,R1,R2,R3,7*8); + decrypt_round(R2,R3,R0,R1,6*8); + decrypt_round(R0,R1,R2,R3,5*8); + decrypt_round(R2,R3,R0,R1,4*8); + decrypt_round(R0,R1,R2,R3,3*8); + decrypt_round(R2,R3,R0,R1,2*8); + decrypt_round(R0,R1,R2,R3,1*8); + decrypt_last_round(R2,R3,R0,R1,0); + + input_whitening(%eax,%ebp,c_offset) + input_whitening(%ebx,%ebp,d_offset) + input_whitening(%ecx,%ebp,a_offset) + input_whitening(%edx,%ebp,b_offset) + mov out_blk+16(%esp),%edi; + mov %eax, c_offset(%edi) + mov %ebx, d_offset(%edi) + mov %ecx, (%edi) + mov %edx, b_offset(%edi) + + pop %edi + pop %esi + pop %ebx + pop %ebp + mov $1, %eax + ret diff --git a/arch/x86/crypto/twofish_32.c b/arch/x86/crypto/twofish_32.c new file mode 100644 index 00000000000..e3004dfe9c7 --- /dev/null +++ b/arch/x86/crypto/twofish_32.c @@ -0,0 +1,97 @@ +/* + * Glue Code for optimized 586 assembler version of TWOFISH + * + * Originally Twofish for GPG + * By Matthew Skala , July 26, 1998 + * 256-bit key length added March 20, 1999 + * Some modifications to reduce the text size by Werner Koch, April, 1998 + * Ported to the kerneli patch by Marc Mutz + * Ported to CryptoAPI by Colin Slater + * + * The original author has disclaimed all copyright interest in this + * code and thus put it in the public domain. The subsequent authors + * have put this under the GNU General Public License. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + * This code is a "clean room" implementation, written from the paper + * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey, + * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available + * through http://www.counterpane.com/twofish.html + * + * For background information on multiplication in finite fields, used for + * the matrix operations in the key schedule, see the book _Contemporary + * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the + * Third Edition. + */ + +#include +#include +#include +#include +#include + + +asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); +asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); + +static void twofish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + twofish_enc_blk(tfm, dst, src); +} + +static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + twofish_dec_blk(tfm, dst, src); +} + +static struct crypto_alg alg = { + .cra_name = "twofish", + .cra_driver_name = "twofish-i586", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = TF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct twofish_ctx), + .cra_alignmask = 3, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(alg.cra_list), + .cra_u = { + .cipher = { + .cia_min_keysize = TF_MIN_KEY_SIZE, + .cia_max_keysize = TF_MAX_KEY_SIZE, + .cia_setkey = twofish_setkey, + .cia_encrypt = twofish_encrypt, + .cia_decrypt = twofish_decrypt + } + } +}; + +static int __init init(void) +{ + return crypto_register_alg(&alg); +} + +static void __exit fini(void) +{ + crypto_unregister_alg(&alg); +} + +module_init(init); +module_exit(fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION ("Twofish Cipher Algorithm, i586 asm optimized"); +MODULE_ALIAS("twofish"); -- cgit v1.2.3-70-g09d2 From 23d6f82bd1f07886b3a974c5193baa715475dd37 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 11 Oct 2007 11:16:23 +0200 Subject: i386: move kernel/acpi Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/i386/kernel/Makefile_32 | 2 +- arch/i386/kernel/acpi/Makefile | 5 - arch/i386/kernel/acpi/Makefile_32 | 10 - arch/i386/kernel/acpi/boot.c | 1326 --------------------------------- arch/i386/kernel/acpi/cstate.c | 164 ---- arch/i386/kernel/acpi/earlyquirk_32.c | 84 --- arch/i386/kernel/acpi/processor.c | 75 -- arch/i386/kernel/acpi/sleep_32.c | 110 --- arch/i386/kernel/acpi/wakeup_32.S | 321 -------- arch/x86/kernel/acpi/Makefile | 5 + arch/x86/kernel/acpi/Makefile_32 | 10 + arch/x86/kernel/acpi/boot.c | 1326 +++++++++++++++++++++++++++++++++ arch/x86/kernel/acpi/cstate.c | 164 ++++ arch/x86/kernel/acpi/earlyquirk_32.c | 84 +++ arch/x86/kernel/acpi/processor.c | 75 ++ arch/x86/kernel/acpi/sleep_32.c | 110 +++ arch/x86/kernel/acpi/wakeup_32.S | 321 ++++++++ arch/x86_64/kernel/acpi/Makefile | 2 +- arch/x86_64/kernel/acpi/Makefile_64 | 4 +- 19 files changed, 2099 insertions(+), 2099 deletions(-) delete mode 100644 arch/i386/kernel/acpi/Makefile delete mode 100644 arch/i386/kernel/acpi/Makefile_32 delete mode 100644 arch/i386/kernel/acpi/boot.c delete mode 100644 arch/i386/kernel/acpi/cstate.c delete mode 100644 arch/i386/kernel/acpi/earlyquirk_32.c delete mode 100644 arch/i386/kernel/acpi/processor.c delete mode 100644 arch/i386/kernel/acpi/sleep_32.c delete mode 100644 arch/i386/kernel/acpi/wakeup_32.S create mode 100644 arch/x86/kernel/acpi/Makefile create mode 100644 arch/x86/kernel/acpi/Makefile_32 create mode 100644 arch/x86/kernel/acpi/boot.c create mode 100644 arch/x86/kernel/acpi/cstate.c create mode 100644 arch/x86/kernel/acpi/earlyquirk_32.c create mode 100644 arch/x86/kernel/acpi/processor.c create mode 100644 arch/x86/kernel/acpi/sleep_32.c create mode 100644 arch/x86/kernel/acpi/wakeup_32.S (limited to 'arch/x86') diff --git a/arch/i386/kernel/Makefile_32 b/arch/i386/kernel/Makefile_32 index d9dd3d677b7..af8304b921d 100644 --- a/arch/i386/kernel/Makefile_32 +++ b/arch/i386/kernel/Makefile_32 @@ -11,7 +11,7 @@ obj-y := process_32.o signal_32.o entry_32.o traps_32.o irq_32.o \ obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += cpu/ -obj-y += acpi/ +obj-y += ../../x86/kernel/acpi/ obj-$(CONFIG_X86_BIOS_REBOOT) += reboot_32.o obj-$(CONFIG_MCA) += mca_32.o obj-$(CONFIG_X86_MSR) += msr.o diff --git a/arch/i386/kernel/acpi/Makefile b/arch/i386/kernel/acpi/Makefile deleted file mode 100644 index 6e00bfeb59a..00000000000 --- a/arch/i386/kernel/acpi/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -ifeq ($(CONFIG_X86_32),y) -include ${srctree}/arch/i386/kernel/acpi/Makefile_32 -else -include ${srctree}/arch/x86_64/kernel/acpi/Makefile_64 -endif diff --git a/arch/i386/kernel/acpi/Makefile_32 b/arch/i386/kernel/acpi/Makefile_32 deleted file mode 100644 index a4852a2e919..00000000000 --- a/arch/i386/kernel/acpi/Makefile_32 +++ /dev/null @@ -1,10 +0,0 @@ -obj-$(CONFIG_ACPI) += boot.o -ifneq ($(CONFIG_PCI),) -obj-$(CONFIG_X86_IO_APIC) += earlyquirk_32.o -endif -obj-$(CONFIG_ACPI_SLEEP) += sleep_32.o wakeup_32.o - -ifneq ($(CONFIG_ACPI_PROCESSOR),) -obj-y += cstate.o processor.o -endif - diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c deleted file mode 100644 index cacdd883bf2..00000000000 --- a/arch/i386/kernel/acpi/boot.c +++ /dev/null @@ -1,1326 +0,0 @@ -/* - * boot.c - Architecture-Specific Low-Level ACPI Boot Support - * - * Copyright (C) 2001, 2002 Paul Diefenbaugh - * Copyright (C) 2001 Jun Nakajima - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -static int __initdata acpi_force = 0; - -#ifdef CONFIG_ACPI -int acpi_disabled = 0; -#else -int acpi_disabled = 1; -#endif -EXPORT_SYMBOL(acpi_disabled); - -#ifdef CONFIG_X86_64 - -#include - -static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return 0; } - - -#else /* X86 */ - -#ifdef CONFIG_X86_LOCAL_APIC -#include -#include -#endif /* CONFIG_X86_LOCAL_APIC */ - -#endif /* X86 */ - -#define BAD_MADT_ENTRY(entry, end) ( \ - (!entry) || (unsigned long)entry + sizeof(*entry) > end || \ - ((struct acpi_subtable_header *)entry)->length < sizeof(*entry)) - -#define PREFIX "ACPI: " - -int acpi_noirq; /* skip ACPI IRQ initialization */ -int acpi_pci_disabled __initdata; /* skip ACPI PCI scan and IRQ initialization */ -int acpi_ht __initdata = 1; /* enable HT */ - -int acpi_lapic; -int acpi_ioapic; -int acpi_strict; -EXPORT_SYMBOL(acpi_strict); - -u8 acpi_sci_flags __initdata; -int acpi_sci_override_gsi __initdata; -int acpi_skip_timer_override __initdata; -int acpi_use_timer_override __initdata; - -#ifdef CONFIG_X86_LOCAL_APIC -static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; -#endif - -#ifndef __HAVE_ARCH_CMPXCHG -#warning ACPI uses CMPXCHG, i486 and later hardware -#endif - -/* -------------------------------------------------------------------------- - Boot-time Configuration - -------------------------------------------------------------------------- */ - -/* - * The default interrupt routing model is PIC (8259). This gets - * overriden if IOAPICs are enumerated (below). - */ -enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC; - -#ifdef CONFIG_X86_64 - -/* rely on all ACPI tables being in the direct mapping */ -char *__acpi_map_table(unsigned long phys_addr, unsigned long size) -{ - if (!phys_addr || !size) - return NULL; - - if (phys_addr+size <= (end_pfn_map << PAGE_SHIFT) + PAGE_SIZE) - return __va(phys_addr); - - return NULL; -} - -#else - -/* - * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END, - * to map the target physical address. The problem is that set_fixmap() - * provides a single page, and it is possible that the page is not - * sufficient. - * By using this area, we can map up to MAX_IO_APICS pages temporarily, - * i.e. until the next __va_range() call. - * - * Important Safety Note: The fixed I/O APIC page numbers are *subtracted* - * from the fixed base. That's why we start at FIX_IO_APIC_BASE_END and - * count idx down while incrementing the phys address. - */ -char *__acpi_map_table(unsigned long phys, unsigned long size) -{ - unsigned long base, offset, mapped_size; - int idx; - - if (phys + size < 8 * 1024 * 1024) - return __va(phys); - - offset = phys & (PAGE_SIZE - 1); - mapped_size = PAGE_SIZE - offset; - set_fixmap(FIX_ACPI_END, phys); - base = fix_to_virt(FIX_ACPI_END); - - /* - * Most cases can be covered by the below. - */ - idx = FIX_ACPI_END; - while (mapped_size < size) { - if (--idx < FIX_ACPI_BEGIN) - return NULL; /* cannot handle this */ - phys += PAGE_SIZE; - set_fixmap(idx, phys); - mapped_size += PAGE_SIZE; - } - - return ((unsigned char *)base + offset); -} -#endif - -#ifdef CONFIG_PCI_MMCONFIG -/* The physical address of the MMCONFIG aperture. Set from ACPI tables. */ -struct acpi_mcfg_allocation *pci_mmcfg_config; -int pci_mmcfg_config_num; - -int __init acpi_parse_mcfg(struct acpi_table_header *header) -{ - struct acpi_table_mcfg *mcfg; - unsigned long i; - int config_size; - - if (!header) - return -EINVAL; - - mcfg = (struct acpi_table_mcfg *)header; - - /* how many config structures do we have */ - pci_mmcfg_config_num = 0; - i = header->length - sizeof(struct acpi_table_mcfg); - while (i >= sizeof(struct acpi_mcfg_allocation)) { - ++pci_mmcfg_config_num; - i -= sizeof(struct acpi_mcfg_allocation); - }; - if (pci_mmcfg_config_num == 0) { - printk(KERN_ERR PREFIX "MMCONFIG has no entries\n"); - return -ENODEV; - } - - config_size = pci_mmcfg_config_num * sizeof(*pci_mmcfg_config); - pci_mmcfg_config = kmalloc(config_size, GFP_KERNEL); - if (!pci_mmcfg_config) { - printk(KERN_WARNING PREFIX - "No memory for MCFG config tables\n"); - return -ENOMEM; - } - - memcpy(pci_mmcfg_config, &mcfg[1], config_size); - for (i = 0; i < pci_mmcfg_config_num; ++i) { - if (pci_mmcfg_config[i].address > 0xFFFFFFFF) { - printk(KERN_ERR PREFIX - "MMCONFIG not in low 4GB of memory\n"); - kfree(pci_mmcfg_config); - pci_mmcfg_config_num = 0; - return -ENODEV; - } - } - - return 0; -} -#endif /* CONFIG_PCI_MMCONFIG */ - -#ifdef CONFIG_X86_LOCAL_APIC -static int __init acpi_parse_madt(struct acpi_table_header *table) -{ - struct acpi_table_madt *madt = NULL; - - if (!cpu_has_apic) - return -EINVAL; - - madt = (struct acpi_table_madt *)table; - if (!madt) { - printk(KERN_WARNING PREFIX "Unable to map MADT\n"); - return -ENODEV; - } - - if (madt->address) { - acpi_lapic_addr = (u64) madt->address; - - printk(KERN_DEBUG PREFIX "Local APIC address 0x%08x\n", - madt->address); - } - - acpi_madt_oem_check(madt->header.oem_id, madt->header.oem_table_id); - - return 0; -} - -static int __init -acpi_parse_lapic(struct acpi_subtable_header * header, const unsigned long end) -{ - struct acpi_madt_local_apic *processor = NULL; - - processor = (struct acpi_madt_local_apic *)header; - - if (BAD_MADT_ENTRY(processor, end)) - return -EINVAL; - - acpi_table_print_madt_entry(header); - - /* - * We need to register disabled CPU as well to permit - * counting disabled CPUs. This allows us to size - * cpus_possible_map more accurately, to permit - * to not preallocating memory for all NR_CPUS - * when we use CPU hotplug. - */ - mp_register_lapic(processor->id, /* APIC ID */ - processor->lapic_flags & ACPI_MADT_ENABLED); /* Enabled? */ - - return 0; -} - -static int __init -acpi_parse_lapic_addr_ovr(struct acpi_subtable_header * header, - const unsigned long end) -{ - struct acpi_madt_local_apic_override *lapic_addr_ovr = NULL; - - lapic_addr_ovr = (struct acpi_madt_local_apic_override *)header; - - if (BAD_MADT_ENTRY(lapic_addr_ovr, end)) - return -EINVAL; - - acpi_lapic_addr = lapic_addr_ovr->address; - - return 0; -} - -static int __init -acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long end) -{ - struct acpi_madt_local_apic_nmi *lapic_nmi = NULL; - - lapic_nmi = (struct acpi_madt_local_apic_nmi *)header; - - if (BAD_MADT_ENTRY(lapic_nmi, end)) - return -EINVAL; - - acpi_table_print_madt_entry(header); - - if (lapic_nmi->lint != 1) - printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n"); - - return 0; -} - -#endif /*CONFIG_X86_LOCAL_APIC */ - -#ifdef CONFIG_X86_IO_APIC - -static int __init -acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) -{ - struct acpi_madt_io_apic *ioapic = NULL; - - ioapic = (struct acpi_madt_io_apic *)header; - - if (BAD_MADT_ENTRY(ioapic, end)) - return -EINVAL; - - acpi_table_print_madt_entry(header); - - mp_register_ioapic(ioapic->id, - ioapic->address, ioapic->global_irq_base); - - return 0; -} - -/* - * Parse Interrupt Source Override for the ACPI SCI - */ -static void __init acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger) -{ - if (trigger == 0) /* compatible SCI trigger is level */ - trigger = 3; - - if (polarity == 0) /* compatible SCI polarity is low */ - polarity = 3; - - /* Command-line over-ride via acpi_sci= */ - if (acpi_sci_flags & ACPI_MADT_TRIGGER_MASK) - trigger = (acpi_sci_flags & ACPI_MADT_TRIGGER_MASK) >> 2; - - if (acpi_sci_flags & ACPI_MADT_POLARITY_MASK) - polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK; - - /* - * mp_config_acpi_legacy_irqs() already setup IRQs < 16 - * If GSI is < 16, this will update its flags, - * else it will create a new mp_irqs[] entry. - */ - mp_override_legacy_irq(gsi, polarity, trigger, gsi); - - /* - * stash over-ride to indicate we've been here - * and for later update of acpi_gbl_FADT - */ - acpi_sci_override_gsi = gsi; - return; -} - -static int __init -acpi_parse_int_src_ovr(struct acpi_subtable_header * header, - const unsigned long end) -{ - struct acpi_madt_interrupt_override *intsrc = NULL; - - intsrc = (struct acpi_madt_interrupt_override *)header; - - if (BAD_MADT_ENTRY(intsrc, end)) - return -EINVAL; - - acpi_table_print_madt_entry(header); - - if (intsrc->source_irq == acpi_gbl_FADT.sci_interrupt) { - acpi_sci_ioapic_setup(intsrc->global_irq, - intsrc->inti_flags & ACPI_MADT_POLARITY_MASK, - (intsrc->inti_flags & ACPI_MADT_TRIGGER_MASK) >> 2); - return 0; - } - - if (acpi_skip_timer_override && - intsrc->source_irq == 0 && intsrc->global_irq == 2) { - printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n"); - return 0; - } - - mp_override_legacy_irq(intsrc->source_irq, - intsrc->inti_flags & ACPI_MADT_POLARITY_MASK, - (intsrc->inti_flags & ACPI_MADT_TRIGGER_MASK) >> 2, - intsrc->global_irq); - - return 0; -} - -static int __init -acpi_parse_nmi_src(struct acpi_subtable_header * header, const unsigned long end) -{ - struct acpi_madt_nmi_source *nmi_src = NULL; - - nmi_src = (struct acpi_madt_nmi_source *)header; - - if (BAD_MADT_ENTRY(nmi_src, end)) - return -EINVAL; - - acpi_table_print_madt_entry(header); - - /* TBD: Support nimsrc entries? */ - - return 0; -} - -#endif /* CONFIG_X86_IO_APIC */ - -/* - * acpi_pic_sci_set_trigger() - * - * use ELCR to set PIC-mode trigger type for SCI - * - * If a PIC-mode SCI is not recognized or gives spurious IRQ7's - * it may require Edge Trigger -- use "acpi_sci=edge" - * - * Port 0x4d0-4d1 are ECLR1 and ECLR2, the Edge/Level Control Registers - * for the 8259 PIC. bit[n] = 1 means irq[n] is Level, otherwise Edge. - * ECLR1 is IRQ's 0-7 (IRQ 0, 1, 2 must be 0) - * ECLR2 is IRQ's 8-15 (IRQ 8, 13 must be 0) - */ - -void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger) -{ - unsigned int mask = 1 << irq; - unsigned int old, new; - - /* Real old ELCR mask */ - old = inb(0x4d0) | (inb(0x4d1) << 8); - - /* - * If we use ACPI to set PCI irq's, then we should clear ELCR - * since we will set it correctly as we enable the PCI irq - * routing. - */ - new = acpi_noirq ? old : 0; - - /* - * Update SCI information in the ELCR, it isn't in the PCI - * routing tables.. - */ - switch (trigger) { - case 1: /* Edge - clear */ - new &= ~mask; - break; - case 3: /* Level - set */ - new |= mask; - break; - } - - if (old == new) - return; - - printk(PREFIX "setting ELCR to %04x (from %04x)\n", new, old); - outb(new, 0x4d0); - outb(new >> 8, 0x4d1); -} - -int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) -{ - *irq = gsi; - return 0; -} - -/* - * success: return IRQ number (>=0) - * failure: return < 0 - */ -int acpi_register_gsi(u32 gsi, int triggering, int polarity) -{ - unsigned int irq; - unsigned int plat_gsi = gsi; - -#ifdef CONFIG_PCI - /* - * Make sure all (legacy) PCI IRQs are set as level-triggered. - */ - if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) { - extern void eisa_set_level_irq(unsigned int irq); - - if (triggering == ACPI_LEVEL_SENSITIVE) - eisa_set_level_irq(gsi); - } -#endif - -#ifdef CONFIG_X86_IO_APIC - if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) { - plat_gsi = mp_register_gsi(gsi, triggering, polarity); - } -#endif - acpi_gsi_to_irq(plat_gsi, &irq); - return irq; -} - -EXPORT_SYMBOL(acpi_register_gsi); - -/* - * ACPI based hotplug support for CPU - */ -#ifdef CONFIG_ACPI_HOTPLUG_CPU -int acpi_map_lsapic(acpi_handle handle, int *pcpu) -{ - struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; - union acpi_object *obj; - struct acpi_madt_local_apic *lapic; - cpumask_t tmp_map, new_map; - u8 physid; - int cpu; - - if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer))) - return -EINVAL; - - if (!buffer.length || !buffer.pointer) - return -EINVAL; - - obj = buffer.pointer; - if (obj->type != ACPI_TYPE_BUFFER || - obj->buffer.length < sizeof(*lapic)) { - kfree(buffer.pointer); - return -EINVAL; - } - - lapic = (struct acpi_madt_local_apic *)obj->buffer.pointer; - - if (lapic->header.type != ACPI_MADT_TYPE_LOCAL_APIC || - !(lapic->lapic_flags & ACPI_MADT_ENABLED)) { - kfree(buffer.pointer); - return -EINVAL; - } - - physid = lapic->id; - - kfree(buffer.pointer); - buffer.length = ACPI_ALLOCATE_BUFFER; - buffer.pointer = NULL; - - tmp_map = cpu_present_map; - mp_register_lapic(physid, lapic->lapic_flags & ACPI_MADT_ENABLED); - - /* - * If mp_register_lapic successfully generates a new logical cpu - * number, then the following will get us exactly what was mapped - */ - cpus_andnot(new_map, cpu_present_map, tmp_map); - if (cpus_empty(new_map)) { - printk ("Unable to map lapic to logical cpu number\n"); - return -EINVAL; - } - - cpu = first_cpu(new_map); - - *pcpu = cpu; - return 0; -} - -EXPORT_SYMBOL(acpi_map_lsapic); - -int acpi_unmap_lsapic(int cpu) -{ - x86_cpu_to_apicid[cpu] = -1; - cpu_clear(cpu, cpu_present_map); - num_processors--; - - return (0); -} - -EXPORT_SYMBOL(acpi_unmap_lsapic); -#endif /* CONFIG_ACPI_HOTPLUG_CPU */ - -int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base) -{ - /* TBD */ - return -EINVAL; -} - -EXPORT_SYMBOL(acpi_register_ioapic); - -int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base) -{ - /* TBD */ - return -EINVAL; -} - -EXPORT_SYMBOL(acpi_unregister_ioapic); - -static unsigned long __init -acpi_scan_rsdp(unsigned long start, unsigned long length) -{ - unsigned long offset = 0; - unsigned long sig_len = sizeof("RSD PTR ") - 1; - - /* - * Scan all 16-byte boundaries of the physical memory region for the - * RSDP signature. - */ - for (offset = 0; offset < length; offset += 16) { - if (strncmp((char *)(phys_to_virt(start) + offset), "RSD PTR ", sig_len)) - continue; - return (start + offset); - } - - return 0; -} - -static int __init acpi_parse_sbf(struct acpi_table_header *table) -{ - struct acpi_table_boot *sb; - - sb = (struct acpi_table_boot *)table; - if (!sb) { - printk(KERN_WARNING PREFIX "Unable to map SBF\n"); - return -ENODEV; - } - - sbf_port = sb->cmos_index; /* Save CMOS port */ - - return 0; -} - -#ifdef CONFIG_HPET_TIMER -#include - -static struct __initdata resource *hpet_res; - -static int __init acpi_parse_hpet(struct acpi_table_header *table) -{ - struct acpi_table_hpet *hpet_tbl; - - hpet_tbl = (struct acpi_table_hpet *)table; - if (!hpet_tbl) { - printk(KERN_WARNING PREFIX "Unable to map HPET\n"); - return -ENODEV; - } - - if (hpet_tbl->address.space_id != ACPI_SPACE_MEM) { - printk(KERN_WARNING PREFIX "HPET timers must be located in " - "memory.\n"); - return -1; - } - - hpet_address = hpet_tbl->address.address; - printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", - hpet_tbl->id, hpet_address); - - /* - * Allocate and initialize the HPET firmware resource for adding into - * the resource tree during the lateinit timeframe. - */ -#define HPET_RESOURCE_NAME_SIZE 9 - hpet_res = alloc_bootmem(sizeof(*hpet_res) + HPET_RESOURCE_NAME_SIZE); - - if (!hpet_res) - return 0; - - memset(hpet_res, 0, sizeof(*hpet_res)); - hpet_res->name = (void *)&hpet_res[1]; - hpet_res->flags = IORESOURCE_MEM; - snprintf((char *)hpet_res->name, HPET_RESOURCE_NAME_SIZE, "HPET %u", - hpet_tbl->sequence); - - hpet_res->start = hpet_address; - hpet_res->end = hpet_address + (1 * 1024) - 1; - - return 0; -} - -/* - * hpet_insert_resource inserts the HPET resources used into the resource - * tree. - */ -static __init int hpet_insert_resource(void) -{ - if (!hpet_res) - return 1; - - return insert_resource(&iomem_resource, hpet_res); -} - -late_initcall(hpet_insert_resource); - -#else -#define acpi_parse_hpet NULL -#endif - -static int __init acpi_parse_fadt(struct acpi_table_header *table) -{ - -#ifdef CONFIG_X86_PM_TIMER - /* detect the location of the ACPI PM Timer */ - if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) { - /* FADT rev. 2 */ - if (acpi_gbl_FADT.xpm_timer_block.space_id != - ACPI_ADR_SPACE_SYSTEM_IO) - return 0; - - pmtmr_ioport = acpi_gbl_FADT.xpm_timer_block.address; - /* - * "X" fields are optional extensions to the original V1.0 - * fields, so we must selectively expand V1.0 fields if the - * corresponding X field is zero. - */ - if (!pmtmr_ioport) - pmtmr_ioport = acpi_gbl_FADT.pm_timer_block; - } else { - /* FADT rev. 1 */ - pmtmr_ioport = acpi_gbl_FADT.pm_timer_block; - } - if (pmtmr_ioport) - printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n", - pmtmr_ioport); -#endif - return 0; -} - -unsigned long __init acpi_find_rsdp(void) -{ - unsigned long rsdp_phys = 0; - - if (efi_enabled) { - if (efi.acpi20 != EFI_INVALID_TABLE_ADDR) - return efi.acpi20; - else if (efi.acpi != EFI_INVALID_TABLE_ADDR) - return efi.acpi; - } - /* - * Scan memory looking for the RSDP signature. First search EBDA (low - * memory) paragraphs and then search upper memory (E0000-FFFFF). - */ - rsdp_phys = acpi_scan_rsdp(0, 0x400); - if (!rsdp_phys) - rsdp_phys = acpi_scan_rsdp(0xE0000, 0x20000); - - return rsdp_phys; -} - -#ifdef CONFIG_X86_LOCAL_APIC -/* - * Parse LAPIC entries in MADT - * returns 0 on success, < 0 on error - */ -static int __init acpi_parse_madt_lapic_entries(void) -{ - int count; - - if (!cpu_has_apic) - return -ENODEV; - - /* - * Note that the LAPIC address is obtained from the MADT (32-bit value) - * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value). - */ - - count = - acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE, - acpi_parse_lapic_addr_ovr, 0); - if (count < 0) { - printk(KERN_ERR PREFIX - "Error parsing LAPIC address override entry\n"); - return count; - } - - mp_register_lapic_address(acpi_lapic_addr); - - count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC, acpi_parse_lapic, - MAX_APICS); - if (!count) { - printk(KERN_ERR PREFIX "No LAPIC entries present\n"); - /* TBD: Cleanup to allow fallback to MPS */ - return -ENODEV; - } else if (count < 0) { - printk(KERN_ERR PREFIX "Error parsing LAPIC entry\n"); - /* TBD: Cleanup to allow fallback to MPS */ - return count; - } - - count = - acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_NMI, acpi_parse_lapic_nmi, 0); - if (count < 0) { - printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n"); - /* TBD: Cleanup to allow fallback to MPS */ - return count; - } - return 0; -} -#endif /* CONFIG_X86_LOCAL_APIC */ - -#ifdef CONFIG_X86_IO_APIC -/* - * Parse IOAPIC related entries in MADT - * returns 0 on success, < 0 on error - */ -static int __init acpi_parse_madt_ioapic_entries(void) -{ - int count; - - /* - * ACPI interpreter is required to complete interrupt setup, - * so if it is off, don't enumerate the io-apics with ACPI. - * If MPS is present, it will handle them, - * otherwise the system will stay in PIC mode - */ - if (acpi_disabled || acpi_noirq) { - return -ENODEV; - } - - if (!cpu_has_apic) - return -ENODEV; - - /* - * if "noapic" boot option, don't look for IO-APICs - */ - if (skip_ioapic_setup) { - printk(KERN_INFO PREFIX "Skipping IOAPIC probe " - "due to 'noapic' option.\n"); - return -ENODEV; - } - - count = - acpi_table_parse_madt(ACPI_MADT_TYPE_IO_APIC, acpi_parse_ioapic, - MAX_IO_APICS); - if (!count) { - printk(KERN_ERR PREFIX "No IOAPIC entries present\n"); - return -ENODEV; - } else if (count < 0) { - printk(KERN_ERR PREFIX "Error parsing IOAPIC entry\n"); - return count; - } - - count = - acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_parse_int_src_ovr, - NR_IRQ_VECTORS); - if (count < 0) { - printk(KERN_ERR PREFIX - "Error parsing interrupt source overrides entry\n"); - /* TBD: Cleanup to allow fallback to MPS */ - return count; - } - - /* - * If BIOS did not supply an INT_SRC_OVR for the SCI - * pretend we got one so we can set the SCI flags. - */ - if (!acpi_sci_override_gsi) - acpi_sci_ioapic_setup(acpi_gbl_FADT.sci_interrupt, 0, 0); - - /* Fill in identity legacy mapings where no override */ - mp_config_acpi_legacy_irqs(); - - count = - acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE, acpi_parse_nmi_src, - NR_IRQ_VECTORS); - if (count < 0) { - printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n"); - /* TBD: Cleanup to allow fallback to MPS */ - return count; - } - - return 0; -} -#else -static inline int acpi_parse_madt_ioapic_entries(void) -{ - return -1; -} -#endif /* !CONFIG_X86_IO_APIC */ - -static void __init acpi_process_madt(void) -{ -#ifdef CONFIG_X86_LOCAL_APIC - int error; - - if (!acpi_table_parse(ACPI_SIG_MADT, acpi_parse_madt)) { - - /* - * Parse MADT LAPIC entries - */ - error = acpi_parse_madt_lapic_entries(); - if (!error) { - acpi_lapic = 1; - -#ifdef CONFIG_X86_GENERICARCH - generic_bigsmp_probe(); -#endif - /* - * Parse MADT IO-APIC entries - */ - error = acpi_parse_madt_ioapic_entries(); - if (!error) { - acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC; - acpi_irq_balance_set(NULL); - acpi_ioapic = 1; - - smp_found_config = 1; - setup_apic_routing(); - } - } - if (error == -EINVAL) { - /* - * Dell Precision Workstation 410, 610 come here. - */ - printk(KERN_ERR PREFIX - "Invalid BIOS MADT, disabling ACPI\n"); - disable_acpi(); - } - } -#endif - return; -} - -#ifdef __i386__ - -static int __init disable_acpi_irq(struct dmi_system_id *d) -{ - if (!acpi_force) { - printk(KERN_NOTICE "%s detected: force use of acpi=noirq\n", - d->ident); - acpi_noirq_set(); - } - return 0; -} - -static int __init disable_acpi_pci(struct dmi_system_id *d) -{ - if (!acpi_force) { - printk(KERN_NOTICE "%s detected: force use of pci=noacpi\n", - d->ident); - acpi_disable_pci(); - } - return 0; -} - -static int __init dmi_disable_acpi(struct dmi_system_id *d) -{ - if (!acpi_force) { - printk(KERN_NOTICE "%s detected: acpi off\n", d->ident); - disable_acpi(); - } else { - printk(KERN_NOTICE - "Warning: DMI blacklist says broken, but acpi forced\n"); - } - return 0; -} - -/* - * Limit ACPI to CPU enumeration for HT - */ -static int __init force_acpi_ht(struct dmi_system_id *d) -{ - if (!acpi_force) { - printk(KERN_NOTICE "%s detected: force use of acpi=ht\n", - d->ident); - disable_acpi(); - acpi_ht = 1; - } else { - printk(KERN_NOTICE - "Warning: acpi=force overrules DMI blacklist: acpi=ht\n"); - } - return 0; -} - -/* - * If your system is blacklisted here, but you find that acpi=force - * works for you, please contact acpi-devel@sourceforge.net - */ -static struct dmi_system_id __initdata acpi_dmi_table[] = { - /* - * Boxes that need ACPI disabled - */ - { - .callback = dmi_disable_acpi, - .ident = "IBM Thinkpad", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), - DMI_MATCH(DMI_BOARD_NAME, "2629H1G"), - }, - }, - - /* - * Boxes that need acpi=ht - */ - { - .callback = force_acpi_ht, - .ident = "FSC Primergy T850", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), - DMI_MATCH(DMI_PRODUCT_NAME, "PRIMERGY T850"), - }, - }, - { - .callback = force_acpi_ht, - .ident = "HP VISUALIZE NT Workstation", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"), - DMI_MATCH(DMI_PRODUCT_NAME, "HP VISUALIZE NT Workstation"), - }, - }, - { - .callback = force_acpi_ht, - .ident = "Compaq Workstation W8000", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Compaq"), - DMI_MATCH(DMI_PRODUCT_NAME, "Workstation W8000"), - }, - }, - { - .callback = force_acpi_ht, - .ident = "ASUS P4B266", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), - DMI_MATCH(DMI_BOARD_NAME, "P4B266"), - }, - }, - { - .callback = force_acpi_ht, - .ident = "ASUS P2B-DS", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), - DMI_MATCH(DMI_BOARD_NAME, "P2B-DS"), - }, - }, - { - .callback = force_acpi_ht, - .ident = "ASUS CUR-DLS", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), - DMI_MATCH(DMI_BOARD_NAME, "CUR-DLS"), - }, - }, - { - .callback = force_acpi_ht, - .ident = "ABIT i440BX-W83977", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "ABIT "), - DMI_MATCH(DMI_BOARD_NAME, "i440BX-W83977 (BP6)"), - }, - }, - { - .callback = force_acpi_ht, - .ident = "IBM Bladecenter", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), - DMI_MATCH(DMI_BOARD_NAME, "IBM eServer BladeCenter HS20"), - }, - }, - { - .callback = force_acpi_ht, - .ident = "IBM eServer xSeries 360", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), - DMI_MATCH(DMI_BOARD_NAME, "eServer xSeries 360"), - }, - }, - { - .callback = force_acpi_ht, - .ident = "IBM eserver xSeries 330", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), - DMI_MATCH(DMI_BOARD_NAME, "eserver xSeries 330"), - }, - }, - { - .callback = force_acpi_ht, - .ident = "IBM eserver xSeries 440", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), - DMI_MATCH(DMI_PRODUCT_NAME, "eserver xSeries 440"), - }, - }, - - /* - * Boxes that need ACPI PCI IRQ routing disabled - */ - { - .callback = disable_acpi_irq, - .ident = "ASUS A7V", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC"), - DMI_MATCH(DMI_BOARD_NAME, ""), - /* newer BIOS, Revision 1011, does work */ - DMI_MATCH(DMI_BIOS_VERSION, - "ASUS A7V ACPI BIOS Revision 1007"), - }, - }, - { - /* - * Latest BIOS for IBM 600E (1.16) has bad pcinum - * for LPC bridge, which is needed for the PCI - * interrupt links to work. DSDT fix is in bug 5966. - * 2645, 2646 model numbers are shared with 600/600E/600X - */ - .callback = disable_acpi_irq, - .ident = "IBM Thinkpad 600 Series 2645", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), - DMI_MATCH(DMI_BOARD_NAME, "2645"), - }, - }, - { - .callback = disable_acpi_irq, - .ident = "IBM Thinkpad 600 Series 2646", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), - DMI_MATCH(DMI_BOARD_NAME, "2646"), - }, - }, - /* - * Boxes that need ACPI PCI IRQ routing and PCI scan disabled - */ - { /* _BBN 0 bug */ - .callback = disable_acpi_pci, - .ident = "ASUS PR-DLS", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), - DMI_MATCH(DMI_BOARD_NAME, "PR-DLS"), - DMI_MATCH(DMI_BIOS_VERSION, - "ASUS PR-DLS ACPI BIOS Revision 1010"), - DMI_MATCH(DMI_BIOS_DATE, "03/21/2003") - }, - }, - { - .callback = disable_acpi_pci, - .ident = "Acer TravelMate 36x Laptop", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Acer"), - DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"), - }, - }, - {} -}; - -#endif /* __i386__ */ - -/* - * acpi_boot_table_init() and acpi_boot_init() - * called from setup_arch(), always. - * 1. checksums all tables - * 2. enumerates lapics - * 3. enumerates io-apics - * - * acpi_table_init() is separate to allow reading SRAT without - * other side effects. - * - * side effects of acpi_boot_init: - * acpi_lapic = 1 if LAPIC found - * acpi_ioapic = 1 if IOAPIC found - * if (acpi_lapic && acpi_ioapic) smp_found_config = 1; - * if acpi_blacklisted() acpi_disabled = 1; - * acpi_irq_model=... - * ... - * - * return value: (currently ignored) - * 0: success - * !0: failure - */ - -int __init acpi_boot_table_init(void) -{ - int error; - -#ifdef __i386__ - dmi_check_system(acpi_dmi_table); -#endif - - /* - * If acpi_disabled, bail out - * One exception: acpi=ht continues far enough to enumerate LAPICs - */ - if (acpi_disabled && !acpi_ht) - return 1; - - /* - * Initialize the ACPI boot-time table parser. - */ - error = acpi_table_init(); - if (error) { - disable_acpi(); - return error; - } - - acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf); - - /* - * blacklist may disable ACPI entirely - */ - error = acpi_blacklisted(); - if (error) { - if (acpi_force) { - printk(KERN_WARNING PREFIX "acpi=force override\n"); - } else { - printk(KERN_WARNING PREFIX "Disabling ACPI support\n"); - disable_acpi(); - return error; - } - } - - return 0; -} - -int __init acpi_boot_init(void) -{ - /* - * If acpi_disabled, bail out - * One exception: acpi=ht continues far enough to enumerate LAPICs - */ - if (acpi_disabled && !acpi_ht) - return 1; - - acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf); - - /* - * set sci_int and PM timer address - */ - acpi_table_parse(ACPI_SIG_FADT, acpi_parse_fadt); - - /* - * Process the Multiple APIC Description Table (MADT), if present - */ - acpi_process_madt(); - - acpi_table_parse(ACPI_SIG_HPET, acpi_parse_hpet); - - return 0; -} - -static int __init parse_acpi(char *arg) -{ - if (!arg) - return -EINVAL; - - /* "acpi=off" disables both ACPI table parsing and interpreter */ - if (strcmp(arg, "off") == 0) { - disable_acpi(); - } - /* acpi=force to over-ride black-list */ - else if (strcmp(arg, "force") == 0) { - acpi_force = 1; - acpi_ht = 1; - acpi_disabled = 0; - } - /* acpi=strict disables out-of-spec workarounds */ - else if (strcmp(arg, "strict") == 0) { - acpi_strict = 1; - } - /* Limit ACPI just to boot-time to enable HT */ - else if (strcmp(arg, "ht") == 0) { - if (!acpi_force) - disable_acpi(); - acpi_ht = 1; - } - /* "acpi=noirq" disables ACPI interrupt routing */ - else if (strcmp(arg, "noirq") == 0) { - acpi_noirq_set(); - } else { - /* Core will printk when we return error. */ - return -EINVAL; - } - return 0; -} -early_param("acpi", parse_acpi); - -/* FIXME: Using pci= for an ACPI parameter is a travesty. */ -static int __init parse_pci(char *arg) -{ - if (arg && strcmp(arg, "noacpi") == 0) - acpi_disable_pci(); - return 0; -} -early_param("pci", parse_pci); - -#ifdef CONFIG_X86_IO_APIC -static int __init parse_acpi_skip_timer_override(char *arg) -{ - acpi_skip_timer_override = 1; - return 0; -} -early_param("acpi_skip_timer_override", parse_acpi_skip_timer_override); - -static int __init parse_acpi_use_timer_override(char *arg) -{ - acpi_use_timer_override = 1; - return 0; -} -early_param("acpi_use_timer_override", parse_acpi_use_timer_override); -#endif /* CONFIG_X86_IO_APIC */ - -static int __init setup_acpi_sci(char *s) -{ - if (!s) - return -EINVAL; - if (!strcmp(s, "edge")) - acpi_sci_flags = ACPI_MADT_TRIGGER_EDGE | - (acpi_sci_flags & ~ACPI_MADT_TRIGGER_MASK); - else if (!strcmp(s, "level")) - acpi_sci_flags = ACPI_MADT_TRIGGER_LEVEL | - (acpi_sci_flags & ~ACPI_MADT_TRIGGER_MASK); - else if (!strcmp(s, "high")) - acpi_sci_flags = ACPI_MADT_POLARITY_ACTIVE_HIGH | - (acpi_sci_flags & ~ACPI_MADT_POLARITY_MASK); - else if (!strcmp(s, "low")) - acpi_sci_flags = ACPI_MADT_POLARITY_ACTIVE_LOW | - (acpi_sci_flags & ~ACPI_MADT_POLARITY_MASK); - else - return -EINVAL; - return 0; -} -early_param("acpi_sci", setup_acpi_sci); - -int __acpi_acquire_global_lock(unsigned int *lock) -{ - unsigned int old, new, val; - do { - old = *lock; - new = (((old & ~0x3) + 2) + ((old >> 1) & 0x1)); - val = cmpxchg(lock, old, new); - } while (unlikely (val != old)); - return (new < 3) ? -1 : 0; -} - -int __acpi_release_global_lock(unsigned int *lock) -{ - unsigned int old, new, val; - do { - old = *lock; - new = old & ~0x3; - val = cmpxchg(lock, old, new); - } while (unlikely (val != old)); - return old & 0x1; -} diff --git a/arch/i386/kernel/acpi/cstate.c b/arch/i386/kernel/acpi/cstate.c deleted file mode 100644 index 2d39f55d29a..00000000000 --- a/arch/i386/kernel/acpi/cstate.c +++ /dev/null @@ -1,164 +0,0 @@ -/* - * arch/i386/kernel/acpi/cstate.c - * - * Copyright (C) 2005 Intel Corporation - * Venkatesh Pallipadi - * - Added _PDC for SMP C-states on Intel CPUs - */ - -#include -#include -#include -#include -#include -#include - -#include -#include - -/* - * Initialize bm_flags based on the CPU cache properties - * On SMP it depends on cache configuration - * - When cache is not shared among all CPUs, we flush cache - * before entering C3. - * - When cache is shared among all CPUs, we use bm_check - * mechanism as in UP case - * - * This routine is called only after all the CPUs are online - */ -void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags, - unsigned int cpu) -{ - struct cpuinfo_x86 *c = cpu_data + cpu; - - flags->bm_check = 0; - if (num_online_cpus() == 1) - flags->bm_check = 1; - else if (c->x86_vendor == X86_VENDOR_INTEL) { - /* - * Today all CPUs that support C3 share cache. - * TBD: This needs to look at cache shared map, once - * multi-core detection patch makes to the base. - */ - flags->bm_check = 1; - } -} -EXPORT_SYMBOL(acpi_processor_power_init_bm_check); - -/* The code below handles cstate entry with monitor-mwait pair on Intel*/ - -struct cstate_entry { - struct { - unsigned int eax; - unsigned int ecx; - } states[ACPI_PROCESSOR_MAX_POWER]; -}; -static struct cstate_entry *cpu_cstate_entry; /* per CPU ptr */ - -static short mwait_supported[ACPI_PROCESSOR_MAX_POWER]; - -#define MWAIT_SUBSTATE_MASK (0xf) -#define MWAIT_SUBSTATE_SIZE (4) - -#define CPUID_MWAIT_LEAF (5) -#define CPUID5_ECX_EXTENSIONS_SUPPORTED (0x1) -#define CPUID5_ECX_INTERRUPT_BREAK (0x2) - -#define MWAIT_ECX_INTERRUPT_BREAK (0x1) - -#define NATIVE_CSTATE_BEYOND_HALT (2) - -int acpi_processor_ffh_cstate_probe(unsigned int cpu, - struct acpi_processor_cx *cx, struct acpi_power_register *reg) -{ - struct cstate_entry *percpu_entry; - struct cpuinfo_x86 *c = cpu_data + cpu; - - cpumask_t saved_mask; - int retval; - unsigned int eax, ebx, ecx, edx; - unsigned int edx_part; - unsigned int cstate_type; /* C-state type and not ACPI C-state type */ - unsigned int num_cstate_subtype; - - if (!cpu_cstate_entry || c->cpuid_level < CPUID_MWAIT_LEAF ) - return -1; - - if (reg->bit_offset != NATIVE_CSTATE_BEYOND_HALT) - return -1; - - percpu_entry = per_cpu_ptr(cpu_cstate_entry, cpu); - percpu_entry->states[cx->index].eax = 0; - percpu_entry->states[cx->index].ecx = 0; - - /* Make sure we are running on right CPU */ - saved_mask = current->cpus_allowed; - retval = set_cpus_allowed(current, cpumask_of_cpu(cpu)); - if (retval) - return -1; - - cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx); - - /* Check whether this particular cx_type (in CST) is supported or not */ - cstate_type = (cx->address >> MWAIT_SUBSTATE_SIZE) + 1; - edx_part = edx >> (cstate_type * MWAIT_SUBSTATE_SIZE); - num_cstate_subtype = edx_part & MWAIT_SUBSTATE_MASK; - - retval = 0; - if (num_cstate_subtype < (cx->address & MWAIT_SUBSTATE_MASK)) { - retval = -1; - goto out; - } - - /* mwait ecx extensions INTERRUPT_BREAK should be supported for C2/C3 */ - if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || - !(ecx & CPUID5_ECX_INTERRUPT_BREAK)) { - retval = -1; - goto out; - } - percpu_entry->states[cx->index].ecx = MWAIT_ECX_INTERRUPT_BREAK; - - /* Use the hint in CST */ - percpu_entry->states[cx->index].eax = cx->address; - - if (!mwait_supported[cstate_type]) { - mwait_supported[cstate_type] = 1; - printk(KERN_DEBUG "Monitor-Mwait will be used to enter C-%d " - "state\n", cx->type); - } - -out: - set_cpus_allowed(current, saved_mask); - return retval; -} -EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe); - -void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx) -{ - unsigned int cpu = smp_processor_id(); - struct cstate_entry *percpu_entry; - - percpu_entry = per_cpu_ptr(cpu_cstate_entry, cpu); - mwait_idle_with_hints(percpu_entry->states[cx->index].eax, - percpu_entry->states[cx->index].ecx); -} -EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_enter); - -static int __init ffh_cstate_init(void) -{ - struct cpuinfo_x86 *c = &boot_cpu_data; - if (c->x86_vendor != X86_VENDOR_INTEL) - return -1; - - cpu_cstate_entry = alloc_percpu(struct cstate_entry); - return 0; -} - -static void __exit ffh_cstate_exit(void) -{ - free_percpu(cpu_cstate_entry); - cpu_cstate_entry = NULL; -} - -arch_initcall(ffh_cstate_init); -__exitcall(ffh_cstate_exit); diff --git a/arch/i386/kernel/acpi/earlyquirk_32.c b/arch/i386/kernel/acpi/earlyquirk_32.c deleted file mode 100644 index 23f78efc577..00000000000 --- a/arch/i386/kernel/acpi/earlyquirk_32.c +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Do early PCI probing for bug detection when the main PCI subsystem is - * not up yet. - */ -#include -#include -#include -#include - -#include -#include -#include - -#ifdef CONFIG_ACPI - -static int __init nvidia_hpet_check(struct acpi_table_header *header) -{ - return 0; -} -#endif - -static int __init check_bridge(int vendor, int device) -{ -#ifdef CONFIG_ACPI - static int warned; - /* According to Nvidia all timer overrides are bogus unless HPET - is enabled. */ - if (!acpi_use_timer_override && vendor == PCI_VENDOR_ID_NVIDIA) { - if (!warned && acpi_table_parse(ACPI_SIG_HPET, - nvidia_hpet_check)) { - warned = 1; - acpi_skip_timer_override = 1; - printk(KERN_INFO "Nvidia board " - "detected. Ignoring ACPI " - "timer override.\n"); - printk(KERN_INFO "If you got timer trouble " - "try acpi_use_timer_override\n"); - - } - } -#endif - if (vendor == PCI_VENDOR_ID_ATI && timer_over_8254 == 1) { - timer_over_8254 = 0; - printk(KERN_INFO "ATI board detected. Disabling timer routing " - "over 8254.\n"); - } - return 0; -} - -void __init check_acpi_pci(void) -{ - int num, slot, func; - - /* Assume the machine supports type 1. If not it will - always read ffffffff and should not have any side effect. - Actually a few buggy systems can machine check. Allow the user - to disable it by command line option at least -AK */ - if (!early_pci_allowed()) - return; - - /* Poor man's PCI discovery */ - for (num = 0; num < 32; num++) { - for (slot = 0; slot < 32; slot++) { - for (func = 0; func < 8; func++) { - u32 class; - u32 vendor; - class = read_pci_config(num, slot, func, - PCI_CLASS_REVISION); - if (class == 0xffffffff) - break; - - if ((class >> 16) != PCI_CLASS_BRIDGE_PCI) - continue; - - vendor = read_pci_config(num, slot, func, - PCI_VENDOR_ID); - - if (check_bridge(vendor & 0xffff, vendor >> 16)) - return; - } - - } - } -} diff --git a/arch/i386/kernel/acpi/processor.c b/arch/i386/kernel/acpi/processor.c deleted file mode 100644 index b54fded4983..00000000000 --- a/arch/i386/kernel/acpi/processor.c +++ /dev/null @@ -1,75 +0,0 @@ -/* - * arch/i386/kernel/acpi/processor.c - * - * Copyright (C) 2005 Intel Corporation - * Venkatesh Pallipadi - * - Added _PDC for platforms with Intel CPUs - */ - -#include -#include -#include -#include - -#include -#include - -static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c) -{ - struct acpi_object_list *obj_list; - union acpi_object *obj; - u32 *buf; - - /* allocate and initialize pdc. It will be used later. */ - obj_list = kmalloc(sizeof(struct acpi_object_list), GFP_KERNEL); - if (!obj_list) { - printk(KERN_ERR "Memory allocation error\n"); - return; - } - - obj = kmalloc(sizeof(union acpi_object), GFP_KERNEL); - if (!obj) { - printk(KERN_ERR "Memory allocation error\n"); - kfree(obj_list); - return; - } - - buf = kmalloc(12, GFP_KERNEL); - if (!buf) { - printk(KERN_ERR "Memory allocation error\n"); - kfree(obj); - kfree(obj_list); - return; - } - - buf[0] = ACPI_PDC_REVISION_ID; - buf[1] = 1; - buf[2] = ACPI_PDC_C_CAPABILITY_SMP; - - if (cpu_has(c, X86_FEATURE_EST)) - buf[2] |= ACPI_PDC_EST_CAPABILITY_SWSMP; - - obj->type = ACPI_TYPE_BUFFER; - obj->buffer.length = 12; - obj->buffer.pointer = (u8 *) buf; - obj_list->count = 1; - obj_list->pointer = obj; - pr->pdc = obj_list; - - return; -} - -/* Initialize _PDC data based on the CPU vendor */ -void arch_acpi_processor_init_pdc(struct acpi_processor *pr) -{ - unsigned int cpu = pr->id; - struct cpuinfo_x86 *c = cpu_data + cpu; - - pr->pdc = NULL; - if (c->x86_vendor == X86_VENDOR_INTEL) - init_intel_pdc(pr, c); - - return; -} - -EXPORT_SYMBOL(arch_acpi_processor_init_pdc); diff --git a/arch/i386/kernel/acpi/sleep_32.c b/arch/i386/kernel/acpi/sleep_32.c deleted file mode 100644 index c42b5ab49de..00000000000 --- a/arch/i386/kernel/acpi/sleep_32.c +++ /dev/null @@ -1,110 +0,0 @@ -/* - * sleep.c - x86-specific ACPI sleep support. - * - * Copyright (C) 2001-2003 Patrick Mochel - * Copyright (C) 2001-2003 Pavel Machek - */ - -#include -#include -#include -#include - -#include - -/* address in low memory of the wakeup routine. */ -unsigned long acpi_wakeup_address = 0; -unsigned long acpi_realmode_flags; -extern char wakeup_start, wakeup_end; - -extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long)); - -/** - * acpi_save_state_mem - save kernel state - * - * Create an identity mapped page table and copy the wakeup routine to - * low memory. - */ -int acpi_save_state_mem(void) -{ - if (!acpi_wakeup_address) - return 1; - memcpy((void *)acpi_wakeup_address, &wakeup_start, - &wakeup_end - &wakeup_start); - acpi_copy_wakeup_routine(acpi_wakeup_address); - - return 0; -} - -/* - * acpi_restore_state - undo effects of acpi_save_state_mem - */ -void acpi_restore_state_mem(void) -{ -} - -/** - * acpi_reserve_bootmem - do _very_ early ACPI initialisation - * - * We allocate a page from the first 1MB of memory for the wakeup - * routine for when we come back from a sleep state. The - * runtime allocator allows specification of <16MB pages, but not - * <1MB pages. - */ -void __init acpi_reserve_bootmem(void) -{ - if ((&wakeup_end - &wakeup_start) > PAGE_SIZE) { - printk(KERN_ERR - "ACPI: Wakeup code way too big, S3 disabled.\n"); - return; - } - - acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE); - if (!acpi_wakeup_address) - printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n"); -} - -static int __init acpi_sleep_setup(char *str) -{ - while ((str != NULL) && (*str != '\0')) { - if (strncmp(str, "s3_bios", 7) == 0) - acpi_realmode_flags |= 1; - if (strncmp(str, "s3_mode", 7) == 0) - acpi_realmode_flags |= 2; - if (strncmp(str, "s3_beep", 7) == 0) - acpi_realmode_flags |= 4; - str = strchr(str, ','); - if (str != NULL) - str += strspn(str, ", \t"); - } - return 1; -} - -__setup("acpi_sleep=", acpi_sleep_setup); - -/* Ouch, we want to delete this. We already have better version in userspace, in - s2ram from suspend.sf.net project */ -static __init int reset_videomode_after_s3(struct dmi_system_id *d) -{ - acpi_realmode_flags |= 2; - return 0; -} - -static __initdata struct dmi_system_id acpisleep_dmi_table[] = { - { /* Reset video mode after returning from ACPI S3 sleep */ - .callback = reset_videomode_after_s3, - .ident = "Toshiba Satellite 4030cdt", - .matches = { - DMI_MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"), - }, - }, - {} -}; - -static int __init acpisleep_dmi_init(void) -{ - dmi_check_system(acpisleep_dmi_table); - return 0; -} - -core_initcall(acpisleep_dmi_init); diff --git a/arch/i386/kernel/acpi/wakeup_32.S b/arch/i386/kernel/acpi/wakeup_32.S deleted file mode 100644 index f22ba8534d2..00000000000 --- a/arch/i386/kernel/acpi/wakeup_32.S +++ /dev/null @@ -1,321 +0,0 @@ -.text -#include -#include -#include - -# -# wakeup_code runs in real mode, and at unknown address (determined at run-time). -# Therefore it must only use relative jumps/calls. -# -# Do we need to deal with A20? It is okay: ACPI specs says A20 must be enabled -# -# If physical address of wakeup_code is 0x12345, BIOS should call us with -# cs = 0x1234, eip = 0x05 -# - -#define BEEP \ - inb $97, %al; \ - outb %al, $0x80; \ - movb $3, %al; \ - outb %al, $97; \ - outb %al, $0x80; \ - movb $-74, %al; \ - outb %al, $67; \ - outb %al, $0x80; \ - movb $-119, %al; \ - outb %al, $66; \ - outb %al, $0x80; \ - movb $15, %al; \ - outb %al, $66; - -ALIGN - .align 4096 -ENTRY(wakeup_start) -wakeup_code: - wakeup_code_start = . - .code16 - - movw $0xb800, %ax - movw %ax,%fs - movw $0x0e00 + 'L', %fs:(0x10) - - cli - cld - - # setup data segment - movw %cs, %ax - movw %ax, %ds # Make ds:0 point to wakeup_start - movw %ax, %ss - - testl $4, realmode_flags - wakeup_code - jz 1f - BEEP -1: - mov $(wakeup_stack - wakeup_code), %sp # Private stack is needed for ASUS board - movw $0x0e00 + 'S', %fs:(0x12) - - pushl $0 # Kill any dangerous flags - popfl - - movl real_magic - wakeup_code, %eax - cmpl $0x12345678, %eax - jne bogus_real_magic - - testl $1, realmode_flags - wakeup_code - jz 1f - lcall $0xc000,$3 - movw %cs, %ax - movw %ax, %ds # Bios might have played with that - movw %ax, %ss -1: - - testl $2, realmode_flags - wakeup_code - jz 1f - mov video_mode - wakeup_code, %ax - call mode_set -1: - - # set up page table - movl $swsusp_pg_dir-__PAGE_OFFSET, %eax - movl %eax, %cr3 - - testl $1, real_efer_save_restore - wakeup_code - jz 4f - # restore efer setting - movl real_save_efer_edx - wakeup_code, %edx - movl real_save_efer_eax - wakeup_code, %eax - mov $0xc0000080, %ecx - wrmsr -4: - # make sure %cr4 is set correctly (features, etc) - movl real_save_cr4 - wakeup_code, %eax - movl %eax, %cr4 - movw $0xb800, %ax - movw %ax,%fs - movw $0x0e00 + 'i', %fs:(0x12) - - # need a gdt -- use lgdtl to force 32-bit operands, in case - # the GDT is located past 16 megabytes. - lgdtl real_save_gdt - wakeup_code - - movl real_save_cr0 - wakeup_code, %eax - movl %eax, %cr0 - jmp 1f -1: - movw $0x0e00 + 'n', %fs:(0x14) - - movl real_magic - wakeup_code, %eax - cmpl $0x12345678, %eax - jne bogus_real_magic - - testl $8, realmode_flags - wakeup_code - jz 1f - BEEP -1: - ljmpl $__KERNEL_CS, $wakeup_pmode_return - -real_save_gdt: .word 0 - .long 0 -real_save_cr0: .long 0 -real_save_cr3: .long 0 -real_save_cr4: .long 0 -real_magic: .long 0 -video_mode: .long 0 -realmode_flags: .long 0 -beep_flags: .long 0 -real_efer_save_restore: .long 0 -real_save_efer_edx: .long 0 -real_save_efer_eax: .long 0 - -bogus_real_magic: - movw $0x0e00 + 'B', %fs:(0x12) - jmp bogus_real_magic - -/* This code uses an extended set of video mode numbers. These include: - * Aliases for standard modes - * NORMAL_VGA (-1) - * EXTENDED_VGA (-2) - * ASK_VGA (-3) - * Video modes numbered by menu position -- NOT RECOMMENDED because of lack - * of compatibility when extending the table. These are between 0x00 and 0xff. - */ -#define VIDEO_FIRST_MENU 0x0000 - -/* Standard BIOS video modes (BIOS number + 0x0100) */ -#define VIDEO_FIRST_BIOS 0x0100 - -/* VESA BIOS video modes (VESA number + 0x0200) */ -#define VIDEO_FIRST_VESA 0x0200 - -/* Video7 special modes (BIOS number + 0x0900) */ -#define VIDEO_FIRST_V7 0x0900 - -# Setting of user mode (AX=mode ID) => CF=success - -# For now, we only handle VESA modes (0x0200..0x03ff). To handle other -# modes, we should probably compile in the video code from the boot -# directory. -mode_set: - movw %ax, %bx - subb $VIDEO_FIRST_VESA>>8, %bh - cmpb $2, %bh - jb check_vesa - -setbad: - clc - ret - -check_vesa: - orw $0x4000, %bx # Use linear frame buffer - movw $0x4f02, %ax # VESA BIOS mode set call - int $0x10 - cmpw $0x004f, %ax # AL=4f if implemented - jnz setbad # AH=0 if OK - - stc - ret - - .code32 - ALIGN - -.org 0x800 -wakeup_stack_begin: # Stack grows down - -.org 0xff0 # Just below end of page -wakeup_stack: -ENTRY(wakeup_end) - -.org 0x1000 - -wakeup_pmode_return: - movw $__KERNEL_DS, %ax - movw %ax, %ss - movw %ax, %ds - movw %ax, %es - movw %ax, %fs - movw %ax, %gs - movw $0x0e00 + 'u', 0xb8016 - - # reload the gdt, as we need the full 32 bit address - lgdt saved_gdt - lidt saved_idt - lldt saved_ldt - ljmp $(__KERNEL_CS),$1f -1: - movl %cr3, %eax - movl %eax, %cr3 - wbinvd - - # and restore the stack ... but you need gdt for this to work - movl saved_context_esp, %esp - - movl %cs:saved_magic, %eax - cmpl $0x12345678, %eax - jne bogus_magic - - # jump to place where we left off - movl saved_eip,%eax - jmp *%eax - -bogus_magic: - movw $0x0e00 + 'B', 0xb8018 - jmp bogus_magic - - -## -# acpi_copy_wakeup_routine -# -# Copy the above routine to low memory. -# -# Parameters: -# %eax: place to copy wakeup routine to -# -# Returned address is location of code in low memory (past data and stack) -# -ENTRY(acpi_copy_wakeup_routine) - - pushl %ebx - sgdt saved_gdt - sidt saved_idt - sldt saved_ldt - str saved_tss - - movl nx_enabled, %edx - movl %edx, real_efer_save_restore - wakeup_start (%eax) - testl $1, real_efer_save_restore - wakeup_start (%eax) - jz 2f - # save efer setting - pushl %eax - movl %eax, %ebx - mov $0xc0000080, %ecx - rdmsr - movl %edx, real_save_efer_edx - wakeup_start (%ebx) - movl %eax, real_save_efer_eax - wakeup_start (%ebx) - popl %eax -2: - - movl %cr3, %edx - movl %edx, real_save_cr3 - wakeup_start (%eax) - movl %cr4, %edx - movl %edx, real_save_cr4 - wakeup_start (%eax) - movl %cr0, %edx - movl %edx, real_save_cr0 - wakeup_start (%eax) - sgdt real_save_gdt - wakeup_start (%eax) - - movl saved_videomode, %edx - movl %edx, video_mode - wakeup_start (%eax) - movl acpi_realmode_flags, %edx - movl %edx, realmode_flags - wakeup_start (%eax) - movl $0x12345678, real_magic - wakeup_start (%eax) - movl $0x12345678, saved_magic - popl %ebx - ret - -save_registers: - leal 4(%esp), %eax - movl %eax, saved_context_esp - movl %ebx, saved_context_ebx - movl %ebp, saved_context_ebp - movl %esi, saved_context_esi - movl %edi, saved_context_edi - pushfl ; popl saved_context_eflags - - movl $ret_point, saved_eip - ret - - -restore_registers: - movl saved_context_ebp, %ebp - movl saved_context_ebx, %ebx - movl saved_context_esi, %esi - movl saved_context_edi, %edi - pushl saved_context_eflags ; popfl - ret - -ENTRY(do_suspend_lowlevel) - call save_processor_state - call save_registers - pushl $3 - call acpi_enter_sleep_state - addl $4, %esp - -# In case of S3 failure, we'll emerge here. Jump -# to ret_point to recover - jmp ret_point - .p2align 4,,7 -ret_point: - call restore_registers - call restore_processor_state - ret - -.data -ALIGN -ENTRY(saved_magic) .long 0 -ENTRY(saved_eip) .long 0 - -# saved registers -saved_gdt: .long 0,0 -saved_idt: .long 0,0 -saved_ldt: .long 0 -saved_tss: .long 0 - diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile new file mode 100644 index 00000000000..dd4eb7cef2b --- /dev/null +++ b/arch/x86/kernel/acpi/Makefile @@ -0,0 +1,5 @@ +ifeq ($(CONFIG_X86_32),y) +include ${srctree}/arch/x86/kernel/acpi/Makefile_32 +else +include ${srctree}/arch/x86_64/kernel/acpi/Makefile_64 +endif diff --git a/arch/x86/kernel/acpi/Makefile_32 b/arch/x86/kernel/acpi/Makefile_32 new file mode 100644 index 00000000000..a4852a2e919 --- /dev/null +++ b/arch/x86/kernel/acpi/Makefile_32 @@ -0,0 +1,10 @@ +obj-$(CONFIG_ACPI) += boot.o +ifneq ($(CONFIG_PCI),) +obj-$(CONFIG_X86_IO_APIC) += earlyquirk_32.o +endif +obj-$(CONFIG_ACPI_SLEEP) += sleep_32.o wakeup_32.o + +ifneq ($(CONFIG_ACPI_PROCESSOR),) +obj-y += cstate.o processor.o +endif + diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c new file mode 100644 index 00000000000..cacdd883bf2 --- /dev/null +++ b/arch/x86/kernel/acpi/boot.c @@ -0,0 +1,1326 @@ +/* + * boot.c - Architecture-Specific Low-Level ACPI Boot Support + * + * Copyright (C) 2001, 2002 Paul Diefenbaugh + * Copyright (C) 2001 Jun Nakajima + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +static int __initdata acpi_force = 0; + +#ifdef CONFIG_ACPI +int acpi_disabled = 0; +#else +int acpi_disabled = 1; +#endif +EXPORT_SYMBOL(acpi_disabled); + +#ifdef CONFIG_X86_64 + +#include + +static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return 0; } + + +#else /* X86 */ + +#ifdef CONFIG_X86_LOCAL_APIC +#include +#include +#endif /* CONFIG_X86_LOCAL_APIC */ + +#endif /* X86 */ + +#define BAD_MADT_ENTRY(entry, end) ( \ + (!entry) || (unsigned long)entry + sizeof(*entry) > end || \ + ((struct acpi_subtable_header *)entry)->length < sizeof(*entry)) + +#define PREFIX "ACPI: " + +int acpi_noirq; /* skip ACPI IRQ initialization */ +int acpi_pci_disabled __initdata; /* skip ACPI PCI scan and IRQ initialization */ +int acpi_ht __initdata = 1; /* enable HT */ + +int acpi_lapic; +int acpi_ioapic; +int acpi_strict; +EXPORT_SYMBOL(acpi_strict); + +u8 acpi_sci_flags __initdata; +int acpi_sci_override_gsi __initdata; +int acpi_skip_timer_override __initdata; +int acpi_use_timer_override __initdata; + +#ifdef CONFIG_X86_LOCAL_APIC +static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; +#endif + +#ifndef __HAVE_ARCH_CMPXCHG +#warning ACPI uses CMPXCHG, i486 and later hardware +#endif + +/* -------------------------------------------------------------------------- + Boot-time Configuration + -------------------------------------------------------------------------- */ + +/* + * The default interrupt routing model is PIC (8259). This gets + * overriden if IOAPICs are enumerated (below). + */ +enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC; + +#ifdef CONFIG_X86_64 + +/* rely on all ACPI tables being in the direct mapping */ +char *__acpi_map_table(unsigned long phys_addr, unsigned long size) +{ + if (!phys_addr || !size) + return NULL; + + if (phys_addr+size <= (end_pfn_map << PAGE_SHIFT) + PAGE_SIZE) + return __va(phys_addr); + + return NULL; +} + +#else + +/* + * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END, + * to map the target physical address. The problem is that set_fixmap() + * provides a single page, and it is possible that the page is not + * sufficient. + * By using this area, we can map up to MAX_IO_APICS pages temporarily, + * i.e. until the next __va_range() call. + * + * Important Safety Note: The fixed I/O APIC page numbers are *subtracted* + * from the fixed base. That's why we start at FIX_IO_APIC_BASE_END and + * count idx down while incrementing the phys address. + */ +char *__acpi_map_table(unsigned long phys, unsigned long size) +{ + unsigned long base, offset, mapped_size; + int idx; + + if (phys + size < 8 * 1024 * 1024) + return __va(phys); + + offset = phys & (PAGE_SIZE - 1); + mapped_size = PAGE_SIZE - offset; + set_fixmap(FIX_ACPI_END, phys); + base = fix_to_virt(FIX_ACPI_END); + + /* + * Most cases can be covered by the below. + */ + idx = FIX_ACPI_END; + while (mapped_size < size) { + if (--idx < FIX_ACPI_BEGIN) + return NULL; /* cannot handle this */ + phys += PAGE_SIZE; + set_fixmap(idx, phys); + mapped_size += PAGE_SIZE; + } + + return ((unsigned char *)base + offset); +} +#endif + +#ifdef CONFIG_PCI_MMCONFIG +/* The physical address of the MMCONFIG aperture. Set from ACPI tables. */ +struct acpi_mcfg_allocation *pci_mmcfg_config; +int pci_mmcfg_config_num; + +int __init acpi_parse_mcfg(struct acpi_table_header *header) +{ + struct acpi_table_mcfg *mcfg; + unsigned long i; + int config_size; + + if (!header) + return -EINVAL; + + mcfg = (struct acpi_table_mcfg *)header; + + /* how many config structures do we have */ + pci_mmcfg_config_num = 0; + i = header->length - sizeof(struct acpi_table_mcfg); + while (i >= sizeof(struct acpi_mcfg_allocation)) { + ++pci_mmcfg_config_num; + i -= sizeof(struct acpi_mcfg_allocation); + }; + if (pci_mmcfg_config_num == 0) { + printk(KERN_ERR PREFIX "MMCONFIG has no entries\n"); + return -ENODEV; + } + + config_size = pci_mmcfg_config_num * sizeof(*pci_mmcfg_config); + pci_mmcfg_config = kmalloc(config_size, GFP_KERNEL); + if (!pci_mmcfg_config) { + printk(KERN_WARNING PREFIX + "No memory for MCFG config tables\n"); + return -ENOMEM; + } + + memcpy(pci_mmcfg_config, &mcfg[1], config_size); + for (i = 0; i < pci_mmcfg_config_num; ++i) { + if (pci_mmcfg_config[i].address > 0xFFFFFFFF) { + printk(KERN_ERR PREFIX + "MMCONFIG not in low 4GB of memory\n"); + kfree(pci_mmcfg_config); + pci_mmcfg_config_num = 0; + return -ENODEV; + } + } + + return 0; +} +#endif /* CONFIG_PCI_MMCONFIG */ + +#ifdef CONFIG_X86_LOCAL_APIC +static int __init acpi_parse_madt(struct acpi_table_header *table) +{ + struct acpi_table_madt *madt = NULL; + + if (!cpu_has_apic) + return -EINVAL; + + madt = (struct acpi_table_madt *)table; + if (!madt) { + printk(KERN_WARNING PREFIX "Unable to map MADT\n"); + return -ENODEV; + } + + if (madt->address) { + acpi_lapic_addr = (u64) madt->address; + + printk(KERN_DEBUG PREFIX "Local APIC address 0x%08x\n", + madt->address); + } + + acpi_madt_oem_check(madt->header.oem_id, madt->header.oem_table_id); + + return 0; +} + +static int __init +acpi_parse_lapic(struct acpi_subtable_header * header, const unsigned long end) +{ + struct acpi_madt_local_apic *processor = NULL; + + processor = (struct acpi_madt_local_apic *)header; + + if (BAD_MADT_ENTRY(processor, end)) + return -EINVAL; + + acpi_table_print_madt_entry(header); + + /* + * We need to register disabled CPU as well to permit + * counting disabled CPUs. This allows us to size + * cpus_possible_map more accurately, to permit + * to not preallocating memory for all NR_CPUS + * when we use CPU hotplug. + */ + mp_register_lapic(processor->id, /* APIC ID */ + processor->lapic_flags & ACPI_MADT_ENABLED); /* Enabled? */ + + return 0; +} + +static int __init +acpi_parse_lapic_addr_ovr(struct acpi_subtable_header * header, + const unsigned long end) +{ + struct acpi_madt_local_apic_override *lapic_addr_ovr = NULL; + + lapic_addr_ovr = (struct acpi_madt_local_apic_override *)header; + + if (BAD_MADT_ENTRY(lapic_addr_ovr, end)) + return -EINVAL; + + acpi_lapic_addr = lapic_addr_ovr->address; + + return 0; +} + +static int __init +acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long end) +{ + struct acpi_madt_local_apic_nmi *lapic_nmi = NULL; + + lapic_nmi = (struct acpi_madt_local_apic_nmi *)header; + + if (BAD_MADT_ENTRY(lapic_nmi, end)) + return -EINVAL; + + acpi_table_print_madt_entry(header); + + if (lapic_nmi->lint != 1) + printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n"); + + return 0; +} + +#endif /*CONFIG_X86_LOCAL_APIC */ + +#ifdef CONFIG_X86_IO_APIC + +static int __init +acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) +{ + struct acpi_madt_io_apic *ioapic = NULL; + + ioapic = (struct acpi_madt_io_apic *)header; + + if (BAD_MADT_ENTRY(ioapic, end)) + return -EINVAL; + + acpi_table_print_madt_entry(header); + + mp_register_ioapic(ioapic->id, + ioapic->address, ioapic->global_irq_base); + + return 0; +} + +/* + * Parse Interrupt Source Override for the ACPI SCI + */ +static void __init acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger) +{ + if (trigger == 0) /* compatible SCI trigger is level */ + trigger = 3; + + if (polarity == 0) /* compatible SCI polarity is low */ + polarity = 3; + + /* Command-line over-ride via acpi_sci= */ + if (acpi_sci_flags & ACPI_MADT_TRIGGER_MASK) + trigger = (acpi_sci_flags & ACPI_MADT_TRIGGER_MASK) >> 2; + + if (acpi_sci_flags & ACPI_MADT_POLARITY_MASK) + polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK; + + /* + * mp_config_acpi_legacy_irqs() already setup IRQs < 16 + * If GSI is < 16, this will update its flags, + * else it will create a new mp_irqs[] entry. + */ + mp_override_legacy_irq(gsi, polarity, trigger, gsi); + + /* + * stash over-ride to indicate we've been here + * and for later update of acpi_gbl_FADT + */ + acpi_sci_override_gsi = gsi; + return; +} + +static int __init +acpi_parse_int_src_ovr(struct acpi_subtable_header * header, + const unsigned long end) +{ + struct acpi_madt_interrupt_override *intsrc = NULL; + + intsrc = (struct acpi_madt_interrupt_override *)header; + + if (BAD_MADT_ENTRY(intsrc, end)) + return -EINVAL; + + acpi_table_print_madt_entry(header); + + if (intsrc->source_irq == acpi_gbl_FADT.sci_interrupt) { + acpi_sci_ioapic_setup(intsrc->global_irq, + intsrc->inti_flags & ACPI_MADT_POLARITY_MASK, + (intsrc->inti_flags & ACPI_MADT_TRIGGER_MASK) >> 2); + return 0; + } + + if (acpi_skip_timer_override && + intsrc->source_irq == 0 && intsrc->global_irq == 2) { + printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n"); + return 0; + } + + mp_override_legacy_irq(intsrc->source_irq, + intsrc->inti_flags & ACPI_MADT_POLARITY_MASK, + (intsrc->inti_flags & ACPI_MADT_TRIGGER_MASK) >> 2, + intsrc->global_irq); + + return 0; +} + +static int __init +acpi_parse_nmi_src(struct acpi_subtable_header * header, const unsigned long end) +{ + struct acpi_madt_nmi_source *nmi_src = NULL; + + nmi_src = (struct acpi_madt_nmi_source *)header; + + if (BAD_MADT_ENTRY(nmi_src, end)) + return -EINVAL; + + acpi_table_print_madt_entry(header); + + /* TBD: Support nimsrc entries? */ + + return 0; +} + +#endif /* CONFIG_X86_IO_APIC */ + +/* + * acpi_pic_sci_set_trigger() + * + * use ELCR to set PIC-mode trigger type for SCI + * + * If a PIC-mode SCI is not recognized or gives spurious IRQ7's + * it may require Edge Trigger -- use "acpi_sci=edge" + * + * Port 0x4d0-4d1 are ECLR1 and ECLR2, the Edge/Level Control Registers + * for the 8259 PIC. bit[n] = 1 means irq[n] is Level, otherwise Edge. + * ECLR1 is IRQ's 0-7 (IRQ 0, 1, 2 must be 0) + * ECLR2 is IRQ's 8-15 (IRQ 8, 13 must be 0) + */ + +void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger) +{ + unsigned int mask = 1 << irq; + unsigned int old, new; + + /* Real old ELCR mask */ + old = inb(0x4d0) | (inb(0x4d1) << 8); + + /* + * If we use ACPI to set PCI irq's, then we should clear ELCR + * since we will set it correctly as we enable the PCI irq + * routing. + */ + new = acpi_noirq ? old : 0; + + /* + * Update SCI information in the ELCR, it isn't in the PCI + * routing tables.. + */ + switch (trigger) { + case 1: /* Edge - clear */ + new &= ~mask; + break; + case 3: /* Level - set */ + new |= mask; + break; + } + + if (old == new) + return; + + printk(PREFIX "setting ELCR to %04x (from %04x)\n", new, old); + outb(new, 0x4d0); + outb(new >> 8, 0x4d1); +} + +int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) +{ + *irq = gsi; + return 0; +} + +/* + * success: return IRQ number (>=0) + * failure: return < 0 + */ +int acpi_register_gsi(u32 gsi, int triggering, int polarity) +{ + unsigned int irq; + unsigned int plat_gsi = gsi; + +#ifdef CONFIG_PCI + /* + * Make sure all (legacy) PCI IRQs are set as level-triggered. + */ + if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) { + extern void eisa_set_level_irq(unsigned int irq); + + if (triggering == ACPI_LEVEL_SENSITIVE) + eisa_set_level_irq(gsi); + } +#endif + +#ifdef CONFIG_X86_IO_APIC + if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) { + plat_gsi = mp_register_gsi(gsi, triggering, polarity); + } +#endif + acpi_gsi_to_irq(plat_gsi, &irq); + return irq; +} + +EXPORT_SYMBOL(acpi_register_gsi); + +/* + * ACPI based hotplug support for CPU + */ +#ifdef CONFIG_ACPI_HOTPLUG_CPU +int acpi_map_lsapic(acpi_handle handle, int *pcpu) +{ + struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + union acpi_object *obj; + struct acpi_madt_local_apic *lapic; + cpumask_t tmp_map, new_map; + u8 physid; + int cpu; + + if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer))) + return -EINVAL; + + if (!buffer.length || !buffer.pointer) + return -EINVAL; + + obj = buffer.pointer; + if (obj->type != ACPI_TYPE_BUFFER || + obj->buffer.length < sizeof(*lapic)) { + kfree(buffer.pointer); + return -EINVAL; + } + + lapic = (struct acpi_madt_local_apic *)obj->buffer.pointer; + + if (lapic->header.type != ACPI_MADT_TYPE_LOCAL_APIC || + !(lapic->lapic_flags & ACPI_MADT_ENABLED)) { + kfree(buffer.pointer); + return -EINVAL; + } + + physid = lapic->id; + + kfree(buffer.pointer); + buffer.length = ACPI_ALLOCATE_BUFFER; + buffer.pointer = NULL; + + tmp_map = cpu_present_map; + mp_register_lapic(physid, lapic->lapic_flags & ACPI_MADT_ENABLED); + + /* + * If mp_register_lapic successfully generates a new logical cpu + * number, then the following will get us exactly what was mapped + */ + cpus_andnot(new_map, cpu_present_map, tmp_map); + if (cpus_empty(new_map)) { + printk ("Unable to map lapic to logical cpu number\n"); + return -EINVAL; + } + + cpu = first_cpu(new_map); + + *pcpu = cpu; + return 0; +} + +EXPORT_SYMBOL(acpi_map_lsapic); + +int acpi_unmap_lsapic(int cpu) +{ + x86_cpu_to_apicid[cpu] = -1; + cpu_clear(cpu, cpu_present_map); + num_processors--; + + return (0); +} + +EXPORT_SYMBOL(acpi_unmap_lsapic); +#endif /* CONFIG_ACPI_HOTPLUG_CPU */ + +int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base) +{ + /* TBD */ + return -EINVAL; +} + +EXPORT_SYMBOL(acpi_register_ioapic); + +int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base) +{ + /* TBD */ + return -EINVAL; +} + +EXPORT_SYMBOL(acpi_unregister_ioapic); + +static unsigned long __init +acpi_scan_rsdp(unsigned long start, unsigned long length) +{ + unsigned long offset = 0; + unsigned long sig_len = sizeof("RSD PTR ") - 1; + + /* + * Scan all 16-byte boundaries of the physical memory region for the + * RSDP signature. + */ + for (offset = 0; offset < length; offset += 16) { + if (strncmp((char *)(phys_to_virt(start) + offset), "RSD PTR ", sig_len)) + continue; + return (start + offset); + } + + return 0; +} + +static int __init acpi_parse_sbf(struct acpi_table_header *table) +{ + struct acpi_table_boot *sb; + + sb = (struct acpi_table_boot *)table; + if (!sb) { + printk(KERN_WARNING PREFIX "Unable to map SBF\n"); + return -ENODEV; + } + + sbf_port = sb->cmos_index; /* Save CMOS port */ + + return 0; +} + +#ifdef CONFIG_HPET_TIMER +#include + +static struct __initdata resource *hpet_res; + +static int __init acpi_parse_hpet(struct acpi_table_header *table) +{ + struct acpi_table_hpet *hpet_tbl; + + hpet_tbl = (struct acpi_table_hpet *)table; + if (!hpet_tbl) { + printk(KERN_WARNING PREFIX "Unable to map HPET\n"); + return -ENODEV; + } + + if (hpet_tbl->address.space_id != ACPI_SPACE_MEM) { + printk(KERN_WARNING PREFIX "HPET timers must be located in " + "memory.\n"); + return -1; + } + + hpet_address = hpet_tbl->address.address; + printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", + hpet_tbl->id, hpet_address); + + /* + * Allocate and initialize the HPET firmware resource for adding into + * the resource tree during the lateinit timeframe. + */ +#define HPET_RESOURCE_NAME_SIZE 9 + hpet_res = alloc_bootmem(sizeof(*hpet_res) + HPET_RESOURCE_NAME_SIZE); + + if (!hpet_res) + return 0; + + memset(hpet_res, 0, sizeof(*hpet_res)); + hpet_res->name = (void *)&hpet_res[1]; + hpet_res->flags = IORESOURCE_MEM; + snprintf((char *)hpet_res->name, HPET_RESOURCE_NAME_SIZE, "HPET %u", + hpet_tbl->sequence); + + hpet_res->start = hpet_address; + hpet_res->end = hpet_address + (1 * 1024) - 1; + + return 0; +} + +/* + * hpet_insert_resource inserts the HPET resources used into the resource + * tree. + */ +static __init int hpet_insert_resource(void) +{ + if (!hpet_res) + return 1; + + return insert_resource(&iomem_resource, hpet_res); +} + +late_initcall(hpet_insert_resource); + +#else +#define acpi_parse_hpet NULL +#endif + +static int __init acpi_parse_fadt(struct acpi_table_header *table) +{ + +#ifdef CONFIG_X86_PM_TIMER + /* detect the location of the ACPI PM Timer */ + if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) { + /* FADT rev. 2 */ + if (acpi_gbl_FADT.xpm_timer_block.space_id != + ACPI_ADR_SPACE_SYSTEM_IO) + return 0; + + pmtmr_ioport = acpi_gbl_FADT.xpm_timer_block.address; + /* + * "X" fields are optional extensions to the original V1.0 + * fields, so we must selectively expand V1.0 fields if the + * corresponding X field is zero. + */ + if (!pmtmr_ioport) + pmtmr_ioport = acpi_gbl_FADT.pm_timer_block; + } else { + /* FADT rev. 1 */ + pmtmr_ioport = acpi_gbl_FADT.pm_timer_block; + } + if (pmtmr_ioport) + printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n", + pmtmr_ioport); +#endif + return 0; +} + +unsigned long __init acpi_find_rsdp(void) +{ + unsigned long rsdp_phys = 0; + + if (efi_enabled) { + if (efi.acpi20 != EFI_INVALID_TABLE_ADDR) + return efi.acpi20; + else if (efi.acpi != EFI_INVALID_TABLE_ADDR) + return efi.acpi; + } + /* + * Scan memory looking for the RSDP signature. First search EBDA (low + * memory) paragraphs and then search upper memory (E0000-FFFFF). + */ + rsdp_phys = acpi_scan_rsdp(0, 0x400); + if (!rsdp_phys) + rsdp_phys = acpi_scan_rsdp(0xE0000, 0x20000); + + return rsdp_phys; +} + +#ifdef CONFIG_X86_LOCAL_APIC +/* + * Parse LAPIC entries in MADT + * returns 0 on success, < 0 on error + */ +static int __init acpi_parse_madt_lapic_entries(void) +{ + int count; + + if (!cpu_has_apic) + return -ENODEV; + + /* + * Note that the LAPIC address is obtained from the MADT (32-bit value) + * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value). + */ + + count = + acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE, + acpi_parse_lapic_addr_ovr, 0); + if (count < 0) { + printk(KERN_ERR PREFIX + "Error parsing LAPIC address override entry\n"); + return count; + } + + mp_register_lapic_address(acpi_lapic_addr); + + count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC, acpi_parse_lapic, + MAX_APICS); + if (!count) { + printk(KERN_ERR PREFIX "No LAPIC entries present\n"); + /* TBD: Cleanup to allow fallback to MPS */ + return -ENODEV; + } else if (count < 0) { + printk(KERN_ERR PREFIX "Error parsing LAPIC entry\n"); + /* TBD: Cleanup to allow fallback to MPS */ + return count; + } + + count = + acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_NMI, acpi_parse_lapic_nmi, 0); + if (count < 0) { + printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n"); + /* TBD: Cleanup to allow fallback to MPS */ + return count; + } + return 0; +} +#endif /* CONFIG_X86_LOCAL_APIC */ + +#ifdef CONFIG_X86_IO_APIC +/* + * Parse IOAPIC related entries in MADT + * returns 0 on success, < 0 on error + */ +static int __init acpi_parse_madt_ioapic_entries(void) +{ + int count; + + /* + * ACPI interpreter is required to complete interrupt setup, + * so if it is off, don't enumerate the io-apics with ACPI. + * If MPS is present, it will handle them, + * otherwise the system will stay in PIC mode + */ + if (acpi_disabled || acpi_noirq) { + return -ENODEV; + } + + if (!cpu_has_apic) + return -ENODEV; + + /* + * if "noapic" boot option, don't look for IO-APICs + */ + if (skip_ioapic_setup) { + printk(KERN_INFO PREFIX "Skipping IOAPIC probe " + "due to 'noapic' option.\n"); + return -ENODEV; + } + + count = + acpi_table_parse_madt(ACPI_MADT_TYPE_IO_APIC, acpi_parse_ioapic, + MAX_IO_APICS); + if (!count) { + printk(KERN_ERR PREFIX "No IOAPIC entries present\n"); + return -ENODEV; + } else if (count < 0) { + printk(KERN_ERR PREFIX "Error parsing IOAPIC entry\n"); + return count; + } + + count = + acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_parse_int_src_ovr, + NR_IRQ_VECTORS); + if (count < 0) { + printk(KERN_ERR PREFIX + "Error parsing interrupt source overrides entry\n"); + /* TBD: Cleanup to allow fallback to MPS */ + return count; + } + + /* + * If BIOS did not supply an INT_SRC_OVR for the SCI + * pretend we got one so we can set the SCI flags. + */ + if (!acpi_sci_override_gsi) + acpi_sci_ioapic_setup(acpi_gbl_FADT.sci_interrupt, 0, 0); + + /* Fill in identity legacy mapings where no override */ + mp_config_acpi_legacy_irqs(); + + count = + acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE, acpi_parse_nmi_src, + NR_IRQ_VECTORS); + if (count < 0) { + printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n"); + /* TBD: Cleanup to allow fallback to MPS */ + return count; + } + + return 0; +} +#else +static inline int acpi_parse_madt_ioapic_entries(void) +{ + return -1; +} +#endif /* !CONFIG_X86_IO_APIC */ + +static void __init acpi_process_madt(void) +{ +#ifdef CONFIG_X86_LOCAL_APIC + int error; + + if (!acpi_table_parse(ACPI_SIG_MADT, acpi_parse_madt)) { + + /* + * Parse MADT LAPIC entries + */ + error = acpi_parse_madt_lapic_entries(); + if (!error) { + acpi_lapic = 1; + +#ifdef CONFIG_X86_GENERICARCH + generic_bigsmp_probe(); +#endif + /* + * Parse MADT IO-APIC entries + */ + error = acpi_parse_madt_ioapic_entries(); + if (!error) { + acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC; + acpi_irq_balance_set(NULL); + acpi_ioapic = 1; + + smp_found_config = 1; + setup_apic_routing(); + } + } + if (error == -EINVAL) { + /* + * Dell Precision Workstation 410, 610 come here. + */ + printk(KERN_ERR PREFIX + "Invalid BIOS MADT, disabling ACPI\n"); + disable_acpi(); + } + } +#endif + return; +} + +#ifdef __i386__ + +static int __init disable_acpi_irq(struct dmi_system_id *d) +{ + if (!acpi_force) { + printk(KERN_NOTICE "%s detected: force use of acpi=noirq\n", + d->ident); + acpi_noirq_set(); + } + return 0; +} + +static int __init disable_acpi_pci(struct dmi_system_id *d) +{ + if (!acpi_force) { + printk(KERN_NOTICE "%s detected: force use of pci=noacpi\n", + d->ident); + acpi_disable_pci(); + } + return 0; +} + +static int __init dmi_disable_acpi(struct dmi_system_id *d) +{ + if (!acpi_force) { + printk(KERN_NOTICE "%s detected: acpi off\n", d->ident); + disable_acpi(); + } else { + printk(KERN_NOTICE + "Warning: DMI blacklist says broken, but acpi forced\n"); + } + return 0; +} + +/* + * Limit ACPI to CPU enumeration for HT + */ +static int __init force_acpi_ht(struct dmi_system_id *d) +{ + if (!acpi_force) { + printk(KERN_NOTICE "%s detected: force use of acpi=ht\n", + d->ident); + disable_acpi(); + acpi_ht = 1; + } else { + printk(KERN_NOTICE + "Warning: acpi=force overrules DMI blacklist: acpi=ht\n"); + } + return 0; +} + +/* + * If your system is blacklisted here, but you find that acpi=force + * works for you, please contact acpi-devel@sourceforge.net + */ +static struct dmi_system_id __initdata acpi_dmi_table[] = { + /* + * Boxes that need ACPI disabled + */ + { + .callback = dmi_disable_acpi, + .ident = "IBM Thinkpad", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), + DMI_MATCH(DMI_BOARD_NAME, "2629H1G"), + }, + }, + + /* + * Boxes that need acpi=ht + */ + { + .callback = force_acpi_ht, + .ident = "FSC Primergy T850", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), + DMI_MATCH(DMI_PRODUCT_NAME, "PRIMERGY T850"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "HP VISUALIZE NT Workstation", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP VISUALIZE NT Workstation"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "Compaq Workstation W8000", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Compaq"), + DMI_MATCH(DMI_PRODUCT_NAME, "Workstation W8000"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "ASUS P4B266", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), + DMI_MATCH(DMI_BOARD_NAME, "P4B266"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "ASUS P2B-DS", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), + DMI_MATCH(DMI_BOARD_NAME, "P2B-DS"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "ASUS CUR-DLS", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), + DMI_MATCH(DMI_BOARD_NAME, "CUR-DLS"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "ABIT i440BX-W83977", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ABIT "), + DMI_MATCH(DMI_BOARD_NAME, "i440BX-W83977 (BP6)"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "IBM Bladecenter", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), + DMI_MATCH(DMI_BOARD_NAME, "IBM eServer BladeCenter HS20"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "IBM eServer xSeries 360", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), + DMI_MATCH(DMI_BOARD_NAME, "eServer xSeries 360"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "IBM eserver xSeries 330", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), + DMI_MATCH(DMI_BOARD_NAME, "eserver xSeries 330"), + }, + }, + { + .callback = force_acpi_ht, + .ident = "IBM eserver xSeries 440", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), + DMI_MATCH(DMI_PRODUCT_NAME, "eserver xSeries 440"), + }, + }, + + /* + * Boxes that need ACPI PCI IRQ routing disabled + */ + { + .callback = disable_acpi_irq, + .ident = "ASUS A7V", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC"), + DMI_MATCH(DMI_BOARD_NAME, ""), + /* newer BIOS, Revision 1011, does work */ + DMI_MATCH(DMI_BIOS_VERSION, + "ASUS A7V ACPI BIOS Revision 1007"), + }, + }, + { + /* + * Latest BIOS for IBM 600E (1.16) has bad pcinum + * for LPC bridge, which is needed for the PCI + * interrupt links to work. DSDT fix is in bug 5966. + * 2645, 2646 model numbers are shared with 600/600E/600X + */ + .callback = disable_acpi_irq, + .ident = "IBM Thinkpad 600 Series 2645", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), + DMI_MATCH(DMI_BOARD_NAME, "2645"), + }, + }, + { + .callback = disable_acpi_irq, + .ident = "IBM Thinkpad 600 Series 2646", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), + DMI_MATCH(DMI_BOARD_NAME, "2646"), + }, + }, + /* + * Boxes that need ACPI PCI IRQ routing and PCI scan disabled + */ + { /* _BBN 0 bug */ + .callback = disable_acpi_pci, + .ident = "ASUS PR-DLS", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), + DMI_MATCH(DMI_BOARD_NAME, "PR-DLS"), + DMI_MATCH(DMI_BIOS_VERSION, + "ASUS PR-DLS ACPI BIOS Revision 1010"), + DMI_MATCH(DMI_BIOS_DATE, "03/21/2003") + }, + }, + { + .callback = disable_acpi_pci, + .ident = "Acer TravelMate 36x Laptop", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"), + }, + }, + {} +}; + +#endif /* __i386__ */ + +/* + * acpi_boot_table_init() and acpi_boot_init() + * called from setup_arch(), always. + * 1. checksums all tables + * 2. enumerates lapics + * 3. enumerates io-apics + * + * acpi_table_init() is separate to allow reading SRAT without + * other side effects. + * + * side effects of acpi_boot_init: + * acpi_lapic = 1 if LAPIC found + * acpi_ioapic = 1 if IOAPIC found + * if (acpi_lapic && acpi_ioapic) smp_found_config = 1; + * if acpi_blacklisted() acpi_disabled = 1; + * acpi_irq_model=... + * ... + * + * return value: (currently ignored) + * 0: success + * !0: failure + */ + +int __init acpi_boot_table_init(void) +{ + int error; + +#ifdef __i386__ + dmi_check_system(acpi_dmi_table); +#endif + + /* + * If acpi_disabled, bail out + * One exception: acpi=ht continues far enough to enumerate LAPICs + */ + if (acpi_disabled && !acpi_ht) + return 1; + + /* + * Initialize the ACPI boot-time table parser. + */ + error = acpi_table_init(); + if (error) { + disable_acpi(); + return error; + } + + acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf); + + /* + * blacklist may disable ACPI entirely + */ + error = acpi_blacklisted(); + if (error) { + if (acpi_force) { + printk(KERN_WARNING PREFIX "acpi=force override\n"); + } else { + printk(KERN_WARNING PREFIX "Disabling ACPI support\n"); + disable_acpi(); + return error; + } + } + + return 0; +} + +int __init acpi_boot_init(void) +{ + /* + * If acpi_disabled, bail out + * One exception: acpi=ht continues far enough to enumerate LAPICs + */ + if (acpi_disabled && !acpi_ht) + return 1; + + acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf); + + /* + * set sci_int and PM timer address + */ + acpi_table_parse(ACPI_SIG_FADT, acpi_parse_fadt); + + /* + * Process the Multiple APIC Description Table (MADT), if present + */ + acpi_process_madt(); + + acpi_table_parse(ACPI_SIG_HPET, acpi_parse_hpet); + + return 0; +} + +static int __init parse_acpi(char *arg) +{ + if (!arg) + return -EINVAL; + + /* "acpi=off" disables both ACPI table parsing and interpreter */ + if (strcmp(arg, "off") == 0) { + disable_acpi(); + } + /* acpi=force to over-ride black-list */ + else if (strcmp(arg, "force") == 0) { + acpi_force = 1; + acpi_ht = 1; + acpi_disabled = 0; + } + /* acpi=strict disables out-of-spec workarounds */ + else if (strcmp(arg, "strict") == 0) { + acpi_strict = 1; + } + /* Limit ACPI just to boot-time to enable HT */ + else if (strcmp(arg, "ht") == 0) { + if (!acpi_force) + disable_acpi(); + acpi_ht = 1; + } + /* "acpi=noirq" disables ACPI interrupt routing */ + else if (strcmp(arg, "noirq") == 0) { + acpi_noirq_set(); + } else { + /* Core will printk when we return error. */ + return -EINVAL; + } + return 0; +} +early_param("acpi", parse_acpi); + +/* FIXME: Using pci= for an ACPI parameter is a travesty. */ +static int __init parse_pci(char *arg) +{ + if (arg && strcmp(arg, "noacpi") == 0) + acpi_disable_pci(); + return 0; +} +early_param("pci", parse_pci); + +#ifdef CONFIG_X86_IO_APIC +static int __init parse_acpi_skip_timer_override(char *arg) +{ + acpi_skip_timer_override = 1; + return 0; +} +early_param("acpi_skip_timer_override", parse_acpi_skip_timer_override); + +static int __init parse_acpi_use_timer_override(char *arg) +{ + acpi_use_timer_override = 1; + return 0; +} +early_param("acpi_use_timer_override", parse_acpi_use_timer_override); +#endif /* CONFIG_X86_IO_APIC */ + +static int __init setup_acpi_sci(char *s) +{ + if (!s) + return -EINVAL; + if (!strcmp(s, "edge")) + acpi_sci_flags = ACPI_MADT_TRIGGER_EDGE | + (acpi_sci_flags & ~ACPI_MADT_TRIGGER_MASK); + else if (!strcmp(s, "level")) + acpi_sci_flags = ACPI_MADT_TRIGGER_LEVEL | + (acpi_sci_flags & ~ACPI_MADT_TRIGGER_MASK); + else if (!strcmp(s, "high")) + acpi_sci_flags = ACPI_MADT_POLARITY_ACTIVE_HIGH | + (acpi_sci_flags & ~ACPI_MADT_POLARITY_MASK); + else if (!strcmp(s, "low")) + acpi_sci_flags = ACPI_MADT_POLARITY_ACTIVE_LOW | + (acpi_sci_flags & ~ACPI_MADT_POLARITY_MASK); + else + return -EINVAL; + return 0; +} +early_param("acpi_sci", setup_acpi_sci); + +int __acpi_acquire_global_lock(unsigned int *lock) +{ + unsigned int old, new, val; + do { + old = *lock; + new = (((old & ~0x3) + 2) + ((old >> 1) & 0x1)); + val = cmpxchg(lock, old, new); + } while (unlikely (val != old)); + return (new < 3) ? -1 : 0; +} + +int __acpi_release_global_lock(unsigned int *lock) +{ + unsigned int old, new, val; + do { + old = *lock; + new = old & ~0x3; + val = cmpxchg(lock, old, new); + } while (unlikely (val != old)); + return old & 0x1; +} diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c new file mode 100644 index 00000000000..2d39f55d29a --- /dev/null +++ b/arch/x86/kernel/acpi/cstate.c @@ -0,0 +1,164 @@ +/* + * arch/i386/kernel/acpi/cstate.c + * + * Copyright (C) 2005 Intel Corporation + * Venkatesh Pallipadi + * - Added _PDC for SMP C-states on Intel CPUs + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +/* + * Initialize bm_flags based on the CPU cache properties + * On SMP it depends on cache configuration + * - When cache is not shared among all CPUs, we flush cache + * before entering C3. + * - When cache is shared among all CPUs, we use bm_check + * mechanism as in UP case + * + * This routine is called only after all the CPUs are online + */ +void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags, + unsigned int cpu) +{ + struct cpuinfo_x86 *c = cpu_data + cpu; + + flags->bm_check = 0; + if (num_online_cpus() == 1) + flags->bm_check = 1; + else if (c->x86_vendor == X86_VENDOR_INTEL) { + /* + * Today all CPUs that support C3 share cache. + * TBD: This needs to look at cache shared map, once + * multi-core detection patch makes to the base. + */ + flags->bm_check = 1; + } +} +EXPORT_SYMBOL(acpi_processor_power_init_bm_check); + +/* The code below handles cstate entry with monitor-mwait pair on Intel*/ + +struct cstate_entry { + struct { + unsigned int eax; + unsigned int ecx; + } states[ACPI_PROCESSOR_MAX_POWER]; +}; +static struct cstate_entry *cpu_cstate_entry; /* per CPU ptr */ + +static short mwait_supported[ACPI_PROCESSOR_MAX_POWER]; + +#define MWAIT_SUBSTATE_MASK (0xf) +#define MWAIT_SUBSTATE_SIZE (4) + +#define CPUID_MWAIT_LEAF (5) +#define CPUID5_ECX_EXTENSIONS_SUPPORTED (0x1) +#define CPUID5_ECX_INTERRUPT_BREAK (0x2) + +#define MWAIT_ECX_INTERRUPT_BREAK (0x1) + +#define NATIVE_CSTATE_BEYOND_HALT (2) + +int acpi_processor_ffh_cstate_probe(unsigned int cpu, + struct acpi_processor_cx *cx, struct acpi_power_register *reg) +{ + struct cstate_entry *percpu_entry; + struct cpuinfo_x86 *c = cpu_data + cpu; + + cpumask_t saved_mask; + int retval; + unsigned int eax, ebx, ecx, edx; + unsigned int edx_part; + unsigned int cstate_type; /* C-state type and not ACPI C-state type */ + unsigned int num_cstate_subtype; + + if (!cpu_cstate_entry || c->cpuid_level < CPUID_MWAIT_LEAF ) + return -1; + + if (reg->bit_offset != NATIVE_CSTATE_BEYOND_HALT) + return -1; + + percpu_entry = per_cpu_ptr(cpu_cstate_entry, cpu); + percpu_entry->states[cx->index].eax = 0; + percpu_entry->states[cx->index].ecx = 0; + + /* Make sure we are running on right CPU */ + saved_mask = current->cpus_allowed; + retval = set_cpus_allowed(current, cpumask_of_cpu(cpu)); + if (retval) + return -1; + + cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx); + + /* Check whether this particular cx_type (in CST) is supported or not */ + cstate_type = (cx->address >> MWAIT_SUBSTATE_SIZE) + 1; + edx_part = edx >> (cstate_type * MWAIT_SUBSTATE_SIZE); + num_cstate_subtype = edx_part & MWAIT_SUBSTATE_MASK; + + retval = 0; + if (num_cstate_subtype < (cx->address & MWAIT_SUBSTATE_MASK)) { + retval = -1; + goto out; + } + + /* mwait ecx extensions INTERRUPT_BREAK should be supported for C2/C3 */ + if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || + !(ecx & CPUID5_ECX_INTERRUPT_BREAK)) { + retval = -1; + goto out; + } + percpu_entry->states[cx->index].ecx = MWAIT_ECX_INTERRUPT_BREAK; + + /* Use the hint in CST */ + percpu_entry->states[cx->index].eax = cx->address; + + if (!mwait_supported[cstate_type]) { + mwait_supported[cstate_type] = 1; + printk(KERN_DEBUG "Monitor-Mwait will be used to enter C-%d " + "state\n", cx->type); + } + +out: + set_cpus_allowed(current, saved_mask); + return retval; +} +EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe); + +void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx) +{ + unsigned int cpu = smp_processor_id(); + struct cstate_entry *percpu_entry; + + percpu_entry = per_cpu_ptr(cpu_cstate_entry, cpu); + mwait_idle_with_hints(percpu_entry->states[cx->index].eax, + percpu_entry->states[cx->index].ecx); +} +EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_enter); + +static int __init ffh_cstate_init(void) +{ + struct cpuinfo_x86 *c = &boot_cpu_data; + if (c->x86_vendor != X86_VENDOR_INTEL) + return -1; + + cpu_cstate_entry = alloc_percpu(struct cstate_entry); + return 0; +} + +static void __exit ffh_cstate_exit(void) +{ + free_percpu(cpu_cstate_entry); + cpu_cstate_entry = NULL; +} + +arch_initcall(ffh_cstate_init); +__exitcall(ffh_cstate_exit); diff --git a/arch/x86/kernel/acpi/earlyquirk_32.c b/arch/x86/kernel/acpi/earlyquirk_32.c new file mode 100644 index 00000000000..23f78efc577 --- /dev/null +++ b/arch/x86/kernel/acpi/earlyquirk_32.c @@ -0,0 +1,84 @@ +/* + * Do early PCI probing for bug detection when the main PCI subsystem is + * not up yet. + */ +#include +#include +#include +#include + +#include +#include +#include + +#ifdef CONFIG_ACPI + +static int __init nvidia_hpet_check(struct acpi_table_header *header) +{ + return 0; +} +#endif + +static int __init check_bridge(int vendor, int device) +{ +#ifdef CONFIG_ACPI + static int warned; + /* According to Nvidia all timer overrides are bogus unless HPET + is enabled. */ + if (!acpi_use_timer_override && vendor == PCI_VENDOR_ID_NVIDIA) { + if (!warned && acpi_table_parse(ACPI_SIG_HPET, + nvidia_hpet_check)) { + warned = 1; + acpi_skip_timer_override = 1; + printk(KERN_INFO "Nvidia board " + "detected. Ignoring ACPI " + "timer override.\n"); + printk(KERN_INFO "If you got timer trouble " + "try acpi_use_timer_override\n"); + + } + } +#endif + if (vendor == PCI_VENDOR_ID_ATI && timer_over_8254 == 1) { + timer_over_8254 = 0; + printk(KERN_INFO "ATI board detected. Disabling timer routing " + "over 8254.\n"); + } + return 0; +} + +void __init check_acpi_pci(void) +{ + int num, slot, func; + + /* Assume the machine supports type 1. If not it will + always read ffffffff and should not have any side effect. + Actually a few buggy systems can machine check. Allow the user + to disable it by command line option at least -AK */ + if (!early_pci_allowed()) + return; + + /* Poor man's PCI discovery */ + for (num = 0; num < 32; num++) { + for (slot = 0; slot < 32; slot++) { + for (func = 0; func < 8; func++) { + u32 class; + u32 vendor; + class = read_pci_config(num, slot, func, + PCI_CLASS_REVISION); + if (class == 0xffffffff) + break; + + if ((class >> 16) != PCI_CLASS_BRIDGE_PCI) + continue; + + vendor = read_pci_config(num, slot, func, + PCI_VENDOR_ID); + + if (check_bridge(vendor & 0xffff, vendor >> 16)) + return; + } + + } + } +} diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c new file mode 100644 index 00000000000..b54fded4983 --- /dev/null +++ b/arch/x86/kernel/acpi/processor.c @@ -0,0 +1,75 @@ +/* + * arch/i386/kernel/acpi/processor.c + * + * Copyright (C) 2005 Intel Corporation + * Venkatesh Pallipadi + * - Added _PDC for platforms with Intel CPUs + */ + +#include +#include +#include +#include + +#include +#include + +static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c) +{ + struct acpi_object_list *obj_list; + union acpi_object *obj; + u32 *buf; + + /* allocate and initialize pdc. It will be used later. */ + obj_list = kmalloc(sizeof(struct acpi_object_list), GFP_KERNEL); + if (!obj_list) { + printk(KERN_ERR "Memory allocation error\n"); + return; + } + + obj = kmalloc(sizeof(union acpi_object), GFP_KERNEL); + if (!obj) { + printk(KERN_ERR "Memory allocation error\n"); + kfree(obj_list); + return; + } + + buf = kmalloc(12, GFP_KERNEL); + if (!buf) { + printk(KERN_ERR "Memory allocation error\n"); + kfree(obj); + kfree(obj_list); + return; + } + + buf[0] = ACPI_PDC_REVISION_ID; + buf[1] = 1; + buf[2] = ACPI_PDC_C_CAPABILITY_SMP; + + if (cpu_has(c, X86_FEATURE_EST)) + buf[2] |= ACPI_PDC_EST_CAPABILITY_SWSMP; + + obj->type = ACPI_TYPE_BUFFER; + obj->buffer.length = 12; + obj->buffer.pointer = (u8 *) buf; + obj_list->count = 1; + obj_list->pointer = obj; + pr->pdc = obj_list; + + return; +} + +/* Initialize _PDC data based on the CPU vendor */ +void arch_acpi_processor_init_pdc(struct acpi_processor *pr) +{ + unsigned int cpu = pr->id; + struct cpuinfo_x86 *c = cpu_data + cpu; + + pr->pdc = NULL; + if (c->x86_vendor == X86_VENDOR_INTEL) + init_intel_pdc(pr, c); + + return; +} + +EXPORT_SYMBOL(arch_acpi_processor_init_pdc); diff --git a/arch/x86/kernel/acpi/sleep_32.c b/arch/x86/kernel/acpi/sleep_32.c new file mode 100644 index 00000000000..c42b5ab49de --- /dev/null +++ b/arch/x86/kernel/acpi/sleep_32.c @@ -0,0 +1,110 @@ +/* + * sleep.c - x86-specific ACPI sleep support. + * + * Copyright (C) 2001-2003 Patrick Mochel + * Copyright (C) 2001-2003 Pavel Machek + */ + +#include +#include +#include +#include + +#include + +/* address in low memory of the wakeup routine. */ +unsigned long acpi_wakeup_address = 0; +unsigned long acpi_realmode_flags; +extern char wakeup_start, wakeup_end; + +extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long)); + +/** + * acpi_save_state_mem - save kernel state + * + * Create an identity mapped page table and copy the wakeup routine to + * low memory. + */ +int acpi_save_state_mem(void) +{ + if (!acpi_wakeup_address) + return 1; + memcpy((void *)acpi_wakeup_address, &wakeup_start, + &wakeup_end - &wakeup_start); + acpi_copy_wakeup_routine(acpi_wakeup_address); + + return 0; +} + +/* + * acpi_restore_state - undo effects of acpi_save_state_mem + */ +void acpi_restore_state_mem(void) +{ +} + +/** + * acpi_reserve_bootmem - do _very_ early ACPI initialisation + * + * We allocate a page from the first 1MB of memory for the wakeup + * routine for when we come back from a sleep state. The + * runtime allocator allows specification of <16MB pages, but not + * <1MB pages. + */ +void __init acpi_reserve_bootmem(void) +{ + if ((&wakeup_end - &wakeup_start) > PAGE_SIZE) { + printk(KERN_ERR + "ACPI: Wakeup code way too big, S3 disabled.\n"); + return; + } + + acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE); + if (!acpi_wakeup_address) + printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n"); +} + +static int __init acpi_sleep_setup(char *str) +{ + while ((str != NULL) && (*str != '\0')) { + if (strncmp(str, "s3_bios", 7) == 0) + acpi_realmode_flags |= 1; + if (strncmp(str, "s3_mode", 7) == 0) + acpi_realmode_flags |= 2; + if (strncmp(str, "s3_beep", 7) == 0) + acpi_realmode_flags |= 4; + str = strchr(str, ','); + if (str != NULL) + str += strspn(str, ", \t"); + } + return 1; +} + +__setup("acpi_sleep=", acpi_sleep_setup); + +/* Ouch, we want to delete this. We already have better version in userspace, in + s2ram from suspend.sf.net project */ +static __init int reset_videomode_after_s3(struct dmi_system_id *d) +{ + acpi_realmode_flags |= 2; + return 0; +} + +static __initdata struct dmi_system_id acpisleep_dmi_table[] = { + { /* Reset video mode after returning from ACPI S3 sleep */ + .callback = reset_videomode_after_s3, + .ident = "Toshiba Satellite 4030cdt", + .matches = { + DMI_MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"), + }, + }, + {} +}; + +static int __init acpisleep_dmi_init(void) +{ + dmi_check_system(acpisleep_dmi_table); + return 0; +} + +core_initcall(acpisleep_dmi_init); diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S new file mode 100644 index 00000000000..f22ba8534d2 --- /dev/null +++ b/arch/x86/kernel/acpi/wakeup_32.S @@ -0,0 +1,321 @@ +.text +#include +#include +#include + +# +# wakeup_code runs in real mode, and at unknown address (determined at run-time). +# Therefore it must only use relative jumps/calls. +# +# Do we need to deal with A20? It is okay: ACPI specs says A20 must be enabled +# +# If physical address of wakeup_code is 0x12345, BIOS should call us with +# cs = 0x1234, eip = 0x05 +# + +#define BEEP \ + inb $97, %al; \ + outb %al, $0x80; \ + movb $3, %al; \ + outb %al, $97; \ + outb %al, $0x80; \ + movb $-74, %al; \ + outb %al, $67; \ + outb %al, $0x80; \ + movb $-119, %al; \ + outb %al, $66; \ + outb %al, $0x80; \ + movb $15, %al; \ + outb %al, $66; + +ALIGN + .align 4096 +ENTRY(wakeup_start) +wakeup_code: + wakeup_code_start = . + .code16 + + movw $0xb800, %ax + movw %ax,%fs + movw $0x0e00 + 'L', %fs:(0x10) + + cli + cld + + # setup data segment + movw %cs, %ax + movw %ax, %ds # Make ds:0 point to wakeup_start + movw %ax, %ss + + testl $4, realmode_flags - wakeup_code + jz 1f + BEEP +1: + mov $(wakeup_stack - wakeup_code), %sp # Private stack is needed for ASUS board + movw $0x0e00 + 'S', %fs:(0x12) + + pushl $0 # Kill any dangerous flags + popfl + + movl real_magic - wakeup_code, %eax + cmpl $0x12345678, %eax + jne bogus_real_magic + + testl $1, realmode_flags - wakeup_code + jz 1f + lcall $0xc000,$3 + movw %cs, %ax + movw %ax, %ds # Bios might have played with that + movw %ax, %ss +1: + + testl $2, realmode_flags - wakeup_code + jz 1f + mov video_mode - wakeup_code, %ax + call mode_set +1: + + # set up page table + movl $swsusp_pg_dir-__PAGE_OFFSET, %eax + movl %eax, %cr3 + + testl $1, real_efer_save_restore - wakeup_code + jz 4f + # restore efer setting + movl real_save_efer_edx - wakeup_code, %edx + movl real_save_efer_eax - wakeup_code, %eax + mov $0xc0000080, %ecx + wrmsr +4: + # make sure %cr4 is set correctly (features, etc) + movl real_save_cr4 - wakeup_code, %eax + movl %eax, %cr4 + movw $0xb800, %ax + movw %ax,%fs + movw $0x0e00 + 'i', %fs:(0x12) + + # need a gdt -- use lgdtl to force 32-bit operands, in case + # the GDT is located past 16 megabytes. + lgdtl real_save_gdt - wakeup_code + + movl real_save_cr0 - wakeup_code, %eax + movl %eax, %cr0 + jmp 1f +1: + movw $0x0e00 + 'n', %fs:(0x14) + + movl real_magic - wakeup_code, %eax + cmpl $0x12345678, %eax + jne bogus_real_magic + + testl $8, realmode_flags - wakeup_code + jz 1f + BEEP +1: + ljmpl $__KERNEL_CS, $wakeup_pmode_return + +real_save_gdt: .word 0 + .long 0 +real_save_cr0: .long 0 +real_save_cr3: .long 0 +real_save_cr4: .long 0 +real_magic: .long 0 +video_mode: .long 0 +realmode_flags: .long 0 +beep_flags: .long 0 +real_efer_save_restore: .long 0 +real_save_efer_edx: .long 0 +real_save_efer_eax: .long 0 + +bogus_real_magic: + movw $0x0e00 + 'B', %fs:(0x12) + jmp bogus_real_magic + +/* This code uses an extended set of video mode numbers. These include: + * Aliases for standard modes + * NORMAL_VGA (-1) + * EXTENDED_VGA (-2) + * ASK_VGA (-3) + * Video modes numbered by menu position -- NOT RECOMMENDED because of lack + * of compatibility when extending the table. These are between 0x00 and 0xff. + */ +#define VIDEO_FIRST_MENU 0x0000 + +/* Standard BIOS video modes (BIOS number + 0x0100) */ +#define VIDEO_FIRST_BIOS 0x0100 + +/* VESA BIOS video modes (VESA number + 0x0200) */ +#define VIDEO_FIRST_VESA 0x0200 + +/* Video7 special modes (BIOS number + 0x0900) */ +#define VIDEO_FIRST_V7 0x0900 + +# Setting of user mode (AX=mode ID) => CF=success + +# For now, we only handle VESA modes (0x0200..0x03ff). To handle other +# modes, we should probably compile in the video code from the boot +# directory. +mode_set: + movw %ax, %bx + subb $VIDEO_FIRST_VESA>>8, %bh + cmpb $2, %bh + jb check_vesa + +setbad: + clc + ret + +check_vesa: + orw $0x4000, %bx # Use linear frame buffer + movw $0x4f02, %ax # VESA BIOS mode set call + int $0x10 + cmpw $0x004f, %ax # AL=4f if implemented + jnz setbad # AH=0 if OK + + stc + ret + + .code32 + ALIGN + +.org 0x800 +wakeup_stack_begin: # Stack grows down + +.org 0xff0 # Just below end of page +wakeup_stack: +ENTRY(wakeup_end) + +.org 0x1000 + +wakeup_pmode_return: + movw $__KERNEL_DS, %ax + movw %ax, %ss + movw %ax, %ds + movw %ax, %es + movw %ax, %fs + movw %ax, %gs + movw $0x0e00 + 'u', 0xb8016 + + # reload the gdt, as we need the full 32 bit address + lgdt saved_gdt + lidt saved_idt + lldt saved_ldt + ljmp $(__KERNEL_CS),$1f +1: + movl %cr3, %eax + movl %eax, %cr3 + wbinvd + + # and restore the stack ... but you need gdt for this to work + movl saved_context_esp, %esp + + movl %cs:saved_magic, %eax + cmpl $0x12345678, %eax + jne bogus_magic + + # jump to place where we left off + movl saved_eip,%eax + jmp *%eax + +bogus_magic: + movw $0x0e00 + 'B', 0xb8018 + jmp bogus_magic + + +## +# acpi_copy_wakeup_routine +# +# Copy the above routine to low memory. +# +# Parameters: +# %eax: place to copy wakeup routine to +# +# Returned address is location of code in low memory (past data and stack) +# +ENTRY(acpi_copy_wakeup_routine) + + pushl %ebx + sgdt saved_gdt + sidt saved_idt + sldt saved_ldt + str saved_tss + + movl nx_enabled, %edx + movl %edx, real_efer_save_restore - wakeup_start (%eax) + testl $1, real_efer_save_restore - wakeup_start (%eax) + jz 2f + # save efer setting + pushl %eax + movl %eax, %ebx + mov $0xc0000080, %ecx + rdmsr + movl %edx, real_save_efer_edx - wakeup_start (%ebx) + movl %eax, real_save_efer_eax - wakeup_start (%ebx) + popl %eax +2: + + movl %cr3, %edx + movl %edx, real_save_cr3 - wakeup_start (%eax) + movl %cr4, %edx + movl %edx, real_save_cr4 - wakeup_start (%eax) + movl %cr0, %edx + movl %edx, real_save_cr0 - wakeup_start (%eax) + sgdt real_save_gdt - wakeup_start (%eax) + + movl saved_videomode, %edx + movl %edx, video_mode - wakeup_start (%eax) + movl acpi_realmode_flags, %edx + movl %edx, realmode_flags - wakeup_start (%eax) + movl $0x12345678, real_magic - wakeup_start (%eax) + movl $0x12345678, saved_magic + popl %ebx + ret + +save_registers: + leal 4(%esp), %eax + movl %eax, saved_context_esp + movl %ebx, saved_context_ebx + movl %ebp, saved_context_ebp + movl %esi, saved_context_esi + movl %edi, saved_context_edi + pushfl ; popl saved_context_eflags + + movl $ret_point, saved_eip + ret + + +restore_registers: + movl saved_context_ebp, %ebp + movl saved_context_ebx, %ebx + movl saved_context_esi, %esi + movl saved_context_edi, %edi + pushl saved_context_eflags ; popfl + ret + +ENTRY(do_suspend_lowlevel) + call save_processor_state + call save_registers + pushl $3 + call acpi_enter_sleep_state + addl $4, %esp + +# In case of S3 failure, we'll emerge here. Jump +# to ret_point to recover + jmp ret_point + .p2align 4,,7 +ret_point: + call restore_registers + call restore_processor_state + ret + +.data +ALIGN +ENTRY(saved_magic) .long 0 +ENTRY(saved_eip) .long 0 + +# saved registers +saved_gdt: .long 0,0 +saved_idt: .long 0,0 +saved_ldt: .long 0 +saved_tss: .long 0 + diff --git a/arch/x86_64/kernel/acpi/Makefile b/arch/x86_64/kernel/acpi/Makefile index 6e00bfeb59a..dd4eb7cef2b 100644 --- a/arch/x86_64/kernel/acpi/Makefile +++ b/arch/x86_64/kernel/acpi/Makefile @@ -1,5 +1,5 @@ ifeq ($(CONFIG_X86_32),y) -include ${srctree}/arch/i386/kernel/acpi/Makefile_32 +include ${srctree}/arch/x86/kernel/acpi/Makefile_32 else include ${srctree}/arch/x86_64/kernel/acpi/Makefile_64 endif diff --git a/arch/x86_64/kernel/acpi/Makefile_64 b/arch/x86_64/kernel/acpi/Makefile_64 index c24fe95b0d9..fc377c87e0b 100644 --- a/arch/x86_64/kernel/acpi/Makefile_64 +++ b/arch/x86_64/kernel/acpi/Makefile_64 @@ -1,9 +1,9 @@ obj-y := boot.o -boot-y := ../../../i386/kernel/acpi/boot.o +boot-y := ../../../x86/kernel/acpi/boot.o obj-$(CONFIG_ACPI_SLEEP) += sleep_64.o wakeup_64.o ifneq ($(CONFIG_ACPI_PROCESSOR),) obj-y += processor.o -processor-y := ../../../i386/kernel/acpi/processor.o ../../../i386/kernel/acpi/cstate.o +processor-y := ../../../x86/kernel/acpi/processor.o ../../../x86/kernel/acpi/cstate.o endif -- cgit v1.2.3-70-g09d2 From c18db0d7e299791c73d4dbe5ae7905b2ab8ba332 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 11 Oct 2007 11:16:25 +0200 Subject: i386: move kernel/cpu/mcheck Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/i386/kernel/cpu/Makefile | 2 +- arch/i386/kernel/cpu/mcheck/Makefile | 2 - arch/i386/kernel/cpu/mcheck/k7.c | 102 ------------ arch/i386/kernel/cpu/mcheck/mce.c | 90 ----------- arch/i386/kernel/cpu/mcheck/mce.h | 14 -- arch/i386/kernel/cpu/mcheck/non-fatal.c | 91 ----------- arch/i386/kernel/cpu/mcheck/p4.c | 253 ------------------------------ arch/i386/kernel/cpu/mcheck/p5.c | 53 ------- arch/i386/kernel/cpu/mcheck/p6.c | 119 -------------- arch/i386/kernel/cpu/mcheck/therm_throt.c | 186 ---------------------- arch/i386/kernel/cpu/mcheck/winchip.c | 36 ----- arch/x86/kernel/cpu/mcheck/Makefile | 2 + arch/x86/kernel/cpu/mcheck/k7.c | 102 ++++++++++++ arch/x86/kernel/cpu/mcheck/mce.c | 90 +++++++++++ arch/x86/kernel/cpu/mcheck/mce.h | 14 ++ arch/x86/kernel/cpu/mcheck/non-fatal.c | 91 +++++++++++ arch/x86/kernel/cpu/mcheck/p4.c | 253 ++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/mcheck/p5.c | 53 +++++++ arch/x86/kernel/cpu/mcheck/p6.c | 119 ++++++++++++++ arch/x86/kernel/cpu/mcheck/therm_throt.c | 186 ++++++++++++++++++++++ arch/x86/kernel/cpu/mcheck/winchip.c | 36 +++++ arch/x86_64/kernel/Makefile_64 | 2 +- 22 files changed, 948 insertions(+), 948 deletions(-) delete mode 100644 arch/i386/kernel/cpu/mcheck/Makefile delete mode 100644 arch/i386/kernel/cpu/mcheck/k7.c delete mode 100644 arch/i386/kernel/cpu/mcheck/mce.c delete mode 100644 arch/i386/kernel/cpu/mcheck/mce.h delete mode 100644 arch/i386/kernel/cpu/mcheck/non-fatal.c delete mode 100644 arch/i386/kernel/cpu/mcheck/p4.c delete mode 100644 arch/i386/kernel/cpu/mcheck/p5.c delete mode 100644 arch/i386/kernel/cpu/mcheck/p6.c delete mode 100644 arch/i386/kernel/cpu/mcheck/therm_throt.c delete mode 100644 arch/i386/kernel/cpu/mcheck/winchip.c create mode 100644 arch/x86/kernel/cpu/mcheck/Makefile create mode 100644 arch/x86/kernel/cpu/mcheck/k7.c create mode 100644 arch/x86/kernel/cpu/mcheck/mce.c create mode 100644 arch/x86/kernel/cpu/mcheck/mce.h create mode 100644 arch/x86/kernel/cpu/mcheck/non-fatal.c create mode 100644 arch/x86/kernel/cpu/mcheck/p4.c create mode 100644 arch/x86/kernel/cpu/mcheck/p5.c create mode 100644 arch/x86/kernel/cpu/mcheck/p6.c create mode 100644 arch/x86/kernel/cpu/mcheck/therm_throt.c create mode 100644 arch/x86/kernel/cpu/mcheck/winchip.c (limited to 'arch/x86') diff --git a/arch/i386/kernel/cpu/Makefile b/arch/i386/kernel/cpu/Makefile index 778396c78d6..09effc02e35 100644 --- a/arch/i386/kernel/cpu/Makefile +++ b/arch/i386/kernel/cpu/Makefile @@ -12,7 +12,7 @@ obj-y += intel.o intel_cacheinfo.o addon_cpuid_features.o obj-y += nexgen.o obj-y += umc.o -obj-$(CONFIG_X86_MCE) += mcheck/ +obj-$(CONFIG_X86_MCE) += ../../../x86/kernel/cpu/mcheck/ obj-$(CONFIG_MTRR) += mtrr/ obj-$(CONFIG_CPU_FREQ) += cpufreq/ diff --git a/arch/i386/kernel/cpu/mcheck/Makefile b/arch/i386/kernel/cpu/mcheck/Makefile deleted file mode 100644 index f1ebe1c1c17..00000000000 --- a/arch/i386/kernel/cpu/mcheck/Makefile +++ /dev/null @@ -1,2 +0,0 @@ -obj-y = mce.o k7.o p4.o p5.o p6.o winchip.o therm_throt.o -obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o diff --git a/arch/i386/kernel/cpu/mcheck/k7.c b/arch/i386/kernel/cpu/mcheck/k7.c deleted file mode 100644 index eef63e3630c..00000000000 --- a/arch/i386/kernel/cpu/mcheck/k7.c +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Athlon/Hammer specific Machine Check Exception Reporting - * (C) Copyright 2002 Dave Jones - */ - -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "mce.h" - -/* Machine Check Handler For AMD Athlon/Duron */ -static fastcall void k7_machine_check(struct pt_regs * regs, long error_code) -{ - int recover=1; - u32 alow, ahigh, high, low; - u32 mcgstl, mcgsth; - int i; - - rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); - if (mcgstl & (1<<0)) /* Recoverable ? */ - recover=0; - - printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", - smp_processor_id(), mcgsth, mcgstl); - - for (i=1; i, Dave Jones - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "mce.h" - -int mce_disabled = 0; -int nr_mce_banks; - -EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ - -/* Handle unconfigured int18 (should never happen) */ -static fastcall void unexpected_machine_check(struct pt_regs * regs, long error_code) -{ - printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", smp_processor_id()); -} - -/* Call the installed machine check handler for this CPU setup. */ -void fastcall (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check; - -/* This has to be run for each processor */ -void mcheck_init(struct cpuinfo_x86 *c) -{ - if (mce_disabled==1) - return; - - switch (c->x86_vendor) { - case X86_VENDOR_AMD: - amd_mcheck_init(c); - break; - - case X86_VENDOR_INTEL: - if (c->x86==5) - intel_p5_mcheck_init(c); - if (c->x86==6) - intel_p6_mcheck_init(c); - if (c->x86==15) - intel_p4_mcheck_init(c); - break; - - case X86_VENDOR_CENTAUR: - if (c->x86==5) - winchip_mcheck_init(c); - break; - - default: - break; - } -} - -static unsigned long old_cr4 __initdata; - -void __init stop_mce(void) -{ - old_cr4 = read_cr4(); - clear_in_cr4(X86_CR4_MCE); -} - -void __init restart_mce(void) -{ - if (old_cr4 & X86_CR4_MCE) - set_in_cr4(X86_CR4_MCE); -} - -static int __init mcheck_disable(char *str) -{ - mce_disabled = 1; - return 1; -} - -static int __init mcheck_enable(char *str) -{ - mce_disabled = -1; - return 1; -} - -__setup("nomce", mcheck_disable); -__setup("mce", mcheck_enable); diff --git a/arch/i386/kernel/cpu/mcheck/mce.h b/arch/i386/kernel/cpu/mcheck/mce.h deleted file mode 100644 index 81fb6e2d35f..00000000000 --- a/arch/i386/kernel/cpu/mcheck/mce.h +++ /dev/null @@ -1,14 +0,0 @@ -#include -#include - -void amd_mcheck_init(struct cpuinfo_x86 *c); -void intel_p4_mcheck_init(struct cpuinfo_x86 *c); -void intel_p5_mcheck_init(struct cpuinfo_x86 *c); -void intel_p6_mcheck_init(struct cpuinfo_x86 *c); -void winchip_mcheck_init(struct cpuinfo_x86 *c); - -/* Call the installed machine check handler for this CPU setup. */ -extern fastcall void (*machine_check_vector)(struct pt_regs *, long error_code); - -extern int nr_mce_banks; - diff --git a/arch/i386/kernel/cpu/mcheck/non-fatal.c b/arch/i386/kernel/cpu/mcheck/non-fatal.c deleted file mode 100644 index bf39409b383..00000000000 --- a/arch/i386/kernel/cpu/mcheck/non-fatal.c +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Non Fatal Machine Check Exception Reporting - * - * (C) Copyright 2002 Dave Jones. - * - * This file contains routines to check for non-fatal MCEs every 15s - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "mce.h" - -static int firstbank; - -#define MCE_RATE 15*HZ /* timer rate is 15s */ - -static void mce_checkregs (void *info) -{ - u32 low, high; - int i; - - for (i=firstbank; i -#include -#include -#include -#include - -#include -#include -#include -#include - -#include - -#include "mce.h" - -/* as supported by the P4/Xeon family */ -struct intel_mce_extended_msrs { - u32 eax; - u32 ebx; - u32 ecx; - u32 edx; - u32 esi; - u32 edi; - u32 ebp; - u32 esp; - u32 eflags; - u32 eip; - /* u32 *reserved[]; */ -}; - -static int mce_num_extended_msrs = 0; - - -#ifdef CONFIG_X86_MCE_P4THERMAL -static void unexpected_thermal_interrupt(struct pt_regs *regs) -{ - printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", - smp_processor_id()); - add_taint(TAINT_MACHINE_CHECK); -} - -/* P4/Xeon Thermal transition interrupt handler */ -static void intel_thermal_interrupt(struct pt_regs *regs) -{ - __u64 msr_val; - - ack_APIC_irq(); - - rdmsrl(MSR_IA32_THERM_STATUS, msr_val); - therm_throt_process(msr_val & 0x1); -} - -/* Thermal interrupt handler for this CPU setup */ -static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_thermal_interrupt; - -fastcall void smp_thermal_interrupt(struct pt_regs *regs) -{ - irq_enter(); - vendor_thermal_interrupt(regs); - irq_exit(); -} - -/* P4/Xeon Thermal regulation detect and init */ -static void intel_init_thermal(struct cpuinfo_x86 *c) -{ - u32 l, h; - unsigned int cpu = smp_processor_id(); - - /* Thermal monitoring */ - if (!cpu_has(c, X86_FEATURE_ACPI)) - return; /* -ENODEV */ - - /* Clock modulation */ - if (!cpu_has(c, X86_FEATURE_ACC)) - return; /* -ENODEV */ - - /* first check if its enabled already, in which case there might - * be some SMM goo which handles it, so we can't even put a handler - * since it might be delivered via SMI already -zwanem. - */ - rdmsr (MSR_IA32_MISC_ENABLE, l, h); - h = apic_read(APIC_LVTTHMR); - if ((l & (1<<3)) && (h & APIC_DM_SMI)) { - printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", - cpu); - return; /* -EBUSY */ - } - - /* check whether a vector already exists, temporarily masked? */ - if (h & APIC_VECTOR_MASK) { - printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already " - "installed\n", - cpu, (h & APIC_VECTOR_MASK)); - return; /* -EBUSY */ - } - - /* The temperature transition interrupt handler setup */ - h = THERMAL_APIC_VECTOR; /* our delivery vector */ - h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */ - apic_write_around(APIC_LVTTHMR, h); - - rdmsr (MSR_IA32_THERM_INTERRUPT, l, h); - wrmsr (MSR_IA32_THERM_INTERRUPT, l | 0x03 , h); - - /* ok we're good to go... */ - vendor_thermal_interrupt = intel_thermal_interrupt; - - rdmsr (MSR_IA32_MISC_ENABLE, l, h); - wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h); - - l = apic_read (APIC_LVTTHMR); - apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED); - printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); - - /* enable thermal throttle processing */ - atomic_set(&therm_throt_en, 1); - return; -} -#endif /* CONFIG_X86_MCE_P4THERMAL */ - - -/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */ -static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) -{ - u32 h; - - rdmsr (MSR_IA32_MCG_EAX, r->eax, h); - rdmsr (MSR_IA32_MCG_EBX, r->ebx, h); - rdmsr (MSR_IA32_MCG_ECX, r->ecx, h); - rdmsr (MSR_IA32_MCG_EDX, r->edx, h); - rdmsr (MSR_IA32_MCG_ESI, r->esi, h); - rdmsr (MSR_IA32_MCG_EDI, r->edi, h); - rdmsr (MSR_IA32_MCG_EBP, r->ebp, h); - rdmsr (MSR_IA32_MCG_ESP, r->esp, h); - rdmsr (MSR_IA32_MCG_EFLAGS, r->eflags, h); - rdmsr (MSR_IA32_MCG_EIP, r->eip, h); -} - -static fastcall void intel_machine_check(struct pt_regs * regs, long error_code) -{ - int recover=1; - u32 alow, ahigh, high, low; - u32 mcgstl, mcgsth; - int i; - - rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); - if (mcgstl & (1<<0)) /* Recoverable ? */ - recover=0; - - printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", - smp_processor_id(), mcgsth, mcgstl); - - if (mce_num_extended_msrs > 0) { - struct intel_mce_extended_msrs dbg; - intel_get_extended_msrs(&dbg); - printk (KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n", - smp_processor_id(), dbg.eip, dbg.eflags); - printk (KERN_DEBUG "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n", - dbg.eax, dbg.ebx, dbg.ecx, dbg.edx); - printk (KERN_DEBUG "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n", - dbg.esi, dbg.edi, dbg.ebp, dbg.esp); - } - - for (i=0; i> 16) & 0xff; - printk (KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)" - " available\n", - smp_processor_id(), mce_num_extended_msrs); - -#ifdef CONFIG_X86_MCE_P4THERMAL - /* Check for P4/Xeon Thermal monitor */ - intel_init_thermal(c); -#endif - } -} diff --git a/arch/i386/kernel/cpu/mcheck/p5.c b/arch/i386/kernel/cpu/mcheck/p5.c deleted file mode 100644 index 94bc43d950c..00000000000 --- a/arch/i386/kernel/cpu/mcheck/p5.c +++ /dev/null @@ -1,53 +0,0 @@ -/* - * P5 specific Machine Check Exception Reporting - * (C) Copyright 2002 Alan Cox - */ - -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "mce.h" - -/* Machine check handler for Pentium class Intel */ -static fastcall void pentium_machine_check(struct pt_regs * regs, long error_code) -{ - u32 loaddr, hi, lotype; - rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi); - rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi); - printk(KERN_EMERG "CPU#%d: Machine Check Exception: 0x%8X (type 0x%8X).\n", smp_processor_id(), loaddr, lotype); - if(lotype&(1<<5)) - printk(KERN_EMERG "CPU#%d: Possible thermal failure (CPU on fire ?).\n", smp_processor_id()); - add_taint(TAINT_MACHINE_CHECK); -} - -/* Set up machine check reporting for processors with Intel style MCE */ -void intel_p5_mcheck_init(struct cpuinfo_x86 *c) -{ - u32 l, h; - - /*Check for MCE support */ - if( !cpu_has(c, X86_FEATURE_MCE) ) - return; - - /* Default P5 to off as its often misconnected */ - if(mce_disabled != -1) - return; - machine_check_vector = pentium_machine_check; - wmb(); - - /* Read registers before enabling */ - rdmsr(MSR_IA32_P5_MC_ADDR, l, h); - rdmsr(MSR_IA32_P5_MC_TYPE, l, h); - printk(KERN_INFO "Intel old style machine check architecture supported.\n"); - - /* Enable MCE */ - set_in_cr4(X86_CR4_MCE); - printk(KERN_INFO "Intel old style machine check reporting enabled on CPU#%d.\n", smp_processor_id()); -} diff --git a/arch/i386/kernel/cpu/mcheck/p6.c b/arch/i386/kernel/cpu/mcheck/p6.c deleted file mode 100644 index deeae42ce19..00000000000 --- a/arch/i386/kernel/cpu/mcheck/p6.c +++ /dev/null @@ -1,119 +0,0 @@ -/* - * P6 specific Machine Check Exception Reporting - * (C) Copyright 2002 Alan Cox - */ - -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "mce.h" - -/* Machine Check Handler For PII/PIII */ -static fastcall void intel_machine_check(struct pt_regs * regs, long error_code) -{ - int recover=1; - u32 alow, ahigh, high, low; - u32 mcgstl, mcgsth; - int i; - - rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); - if (mcgstl & (1<<0)) /* Recoverable ? */ - recover=0; - - printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", - smp_processor_id(), mcgsth, mcgstl); - - for (i=0; i -#include -#include -#include -#include -#include -#include - -/* How long to wait between reporting thermal events */ -#define CHECK_INTERVAL (300 * HZ) - -static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; -static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); -atomic_t therm_throt_en = ATOMIC_INIT(0); - -#ifdef CONFIG_SYSFS -#define define_therm_throt_sysdev_one_ro(_name) \ - static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) - -#define define_therm_throt_sysdev_show_func(name) \ -static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \ - char *buf) \ -{ \ - unsigned int cpu = dev->id; \ - ssize_t ret; \ - \ - preempt_disable(); /* CPU hotplug */ \ - if (cpu_online(cpu)) \ - ret = sprintf(buf, "%lu\n", \ - per_cpu(thermal_throttle_##name, cpu)); \ - else \ - ret = 0; \ - preempt_enable(); \ - \ - return ret; \ -} - -define_therm_throt_sysdev_show_func(count); -define_therm_throt_sysdev_one_ro(count); - -static struct attribute *thermal_throttle_attrs[] = { - &attr_count.attr, - NULL -}; - -static struct attribute_group thermal_throttle_attr_group = { - .attrs = thermal_throttle_attrs, - .name = "thermal_throttle" -}; -#endif /* CONFIG_SYSFS */ - -/*** - * therm_throt_process - Process thermal throttling event from interrupt - * @curr: Whether the condition is current or not (boolean), since the - * thermal interrupt normally gets called both when the thermal - * event begins and once the event has ended. - * - * This function is called by the thermal interrupt after the - * IRQ has been acknowledged. - * - * It will take care of rate limiting and printing messages to the syslog. - * - * Returns: 0 : Event should NOT be further logged, i.e. still in - * "timeout" from previous log message. - * 1 : Event should be logged further, and a message has been - * printed to the syslog. - */ -int therm_throt_process(int curr) -{ - unsigned int cpu = smp_processor_id(); - __u64 tmp_jiffs = get_jiffies_64(); - - if (curr) - __get_cpu_var(thermal_throttle_count)++; - - if (time_before64(tmp_jiffs, __get_cpu_var(next_check))) - return 0; - - __get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL; - - /* if we just entered the thermal event */ - if (curr) { - printk(KERN_CRIT "CPU%d: Temperature above threshold, " - "cpu clock throttled (total events = %lu)\n", cpu, - __get_cpu_var(thermal_throttle_count)); - - add_taint(TAINT_MACHINE_CHECK); - } else { - printk(KERN_CRIT "CPU%d: Temperature/speed normal\n", cpu); - } - - return 1; -} - -#ifdef CONFIG_SYSFS -/* Add/Remove thermal_throttle interface for CPU device */ -static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev) -{ - return sysfs_create_group(&sys_dev->kobj, &thermal_throttle_attr_group); -} - -static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev) -{ - return sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group); -} - -/* Mutex protecting device creation against CPU hotplug */ -static DEFINE_MUTEX(therm_cpu_lock); - -/* Get notified when a cpu comes on/off. Be hotplug friendly. */ -static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb, - unsigned long action, - void *hcpu) -{ - unsigned int cpu = (unsigned long)hcpu; - struct sys_device *sys_dev; - int err; - - sys_dev = get_cpu_sysdev(cpu); - switch (action) { - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - mutex_lock(&therm_cpu_lock); - err = thermal_throttle_add_dev(sys_dev); - mutex_unlock(&therm_cpu_lock); - WARN_ON(err); - break; - case CPU_DEAD: - case CPU_DEAD_FROZEN: - mutex_lock(&therm_cpu_lock); - thermal_throttle_remove_dev(sys_dev); - mutex_unlock(&therm_cpu_lock); - break; - } - return NOTIFY_OK; -} - -static struct notifier_block thermal_throttle_cpu_notifier = -{ - .notifier_call = thermal_throttle_cpu_callback, -}; - -static __init int thermal_throttle_init_device(void) -{ - unsigned int cpu = 0; - int err; - - if (!atomic_read(&therm_throt_en)) - return 0; - - register_hotcpu_notifier(&thermal_throttle_cpu_notifier); - -#ifdef CONFIG_HOTPLUG_CPU - mutex_lock(&therm_cpu_lock); -#endif - /* connect live CPUs to sysfs */ - for_each_online_cpu(cpu) { - err = thermal_throttle_add_dev(get_cpu_sysdev(cpu)); - WARN_ON(err); - } -#ifdef CONFIG_HOTPLUG_CPU - mutex_unlock(&therm_cpu_lock); -#endif - - return 0; -} - -device_initcall(thermal_throttle_init_device); -#endif /* CONFIG_SYSFS */ diff --git a/arch/i386/kernel/cpu/mcheck/winchip.c b/arch/i386/kernel/cpu/mcheck/winchip.c deleted file mode 100644 index 9e424b6c293..00000000000 --- a/arch/i386/kernel/cpu/mcheck/winchip.c +++ /dev/null @@ -1,36 +0,0 @@ -/* - * IDT Winchip specific Machine Check Exception Reporting - * (C) Copyright 2002 Alan Cox - */ - -#include -#include -#include -#include - -#include -#include -#include - -#include "mce.h" - -/* Machine check handler for WinChip C6 */ -static fastcall void winchip_machine_check(struct pt_regs * regs, long error_code) -{ - printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); - add_taint(TAINT_MACHINE_CHECK); -} - -/* Set up machine check reporting on the Winchip C6 series */ -void winchip_mcheck_init(struct cpuinfo_x86 *c) -{ - u32 lo, hi; - machine_check_vector = winchip_machine_check; - wmb(); - rdmsr(MSR_IDT_FCR1, lo, hi); - lo|= (1<<2); /* Enable EIERRINT (int 18 MCE) */ - lo&= ~(1<<4); /* Enable MCE */ - wrmsr(MSR_IDT_FCR1, lo, hi); - set_in_cr4(X86_CR4_MCE); - printk(KERN_INFO "Winchip machine check reporting enabled on CPU#0.\n"); -} diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile new file mode 100644 index 00000000000..f1ebe1c1c17 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/Makefile @@ -0,0 +1,2 @@ +obj-y = mce.o k7.o p4.o p5.o p6.o winchip.o therm_throt.o +obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c new file mode 100644 index 00000000000..eef63e3630c --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/k7.c @@ -0,0 +1,102 @@ +/* + * Athlon/Hammer specific Machine Check Exception Reporting + * (C) Copyright 2002 Dave Jones + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "mce.h" + +/* Machine Check Handler For AMD Athlon/Duron */ +static fastcall void k7_machine_check(struct pt_regs * regs, long error_code) +{ + int recover=1; + u32 alow, ahigh, high, low; + u32 mcgstl, mcgsth; + int i; + + rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); + if (mcgstl & (1<<0)) /* Recoverable ? */ + recover=0; + + printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", + smp_processor_id(), mcgsth, mcgstl); + + for (i=1; i, Dave Jones + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "mce.h" + +int mce_disabled = 0; +int nr_mce_banks; + +EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ + +/* Handle unconfigured int18 (should never happen) */ +static fastcall void unexpected_machine_check(struct pt_regs * regs, long error_code) +{ + printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", smp_processor_id()); +} + +/* Call the installed machine check handler for this CPU setup. */ +void fastcall (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check; + +/* This has to be run for each processor */ +void mcheck_init(struct cpuinfo_x86 *c) +{ + if (mce_disabled==1) + return; + + switch (c->x86_vendor) { + case X86_VENDOR_AMD: + amd_mcheck_init(c); + break; + + case X86_VENDOR_INTEL: + if (c->x86==5) + intel_p5_mcheck_init(c); + if (c->x86==6) + intel_p6_mcheck_init(c); + if (c->x86==15) + intel_p4_mcheck_init(c); + break; + + case X86_VENDOR_CENTAUR: + if (c->x86==5) + winchip_mcheck_init(c); + break; + + default: + break; + } +} + +static unsigned long old_cr4 __initdata; + +void __init stop_mce(void) +{ + old_cr4 = read_cr4(); + clear_in_cr4(X86_CR4_MCE); +} + +void __init restart_mce(void) +{ + if (old_cr4 & X86_CR4_MCE) + set_in_cr4(X86_CR4_MCE); +} + +static int __init mcheck_disable(char *str) +{ + mce_disabled = 1; + return 1; +} + +static int __init mcheck_enable(char *str) +{ + mce_disabled = -1; + return 1; +} + +__setup("nomce", mcheck_disable); +__setup("mce", mcheck_enable); diff --git a/arch/x86/kernel/cpu/mcheck/mce.h b/arch/x86/kernel/cpu/mcheck/mce.h new file mode 100644 index 00000000000..81fb6e2d35f --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/mce.h @@ -0,0 +1,14 @@ +#include +#include + +void amd_mcheck_init(struct cpuinfo_x86 *c); +void intel_p4_mcheck_init(struct cpuinfo_x86 *c); +void intel_p5_mcheck_init(struct cpuinfo_x86 *c); +void intel_p6_mcheck_init(struct cpuinfo_x86 *c); +void winchip_mcheck_init(struct cpuinfo_x86 *c); + +/* Call the installed machine check handler for this CPU setup. */ +extern fastcall void (*machine_check_vector)(struct pt_regs *, long error_code); + +extern int nr_mce_banks; + diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c new file mode 100644 index 00000000000..bf39409b383 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c @@ -0,0 +1,91 @@ +/* + * Non Fatal Machine Check Exception Reporting + * + * (C) Copyright 2002 Dave Jones. + * + * This file contains routines to check for non-fatal MCEs every 15s + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "mce.h" + +static int firstbank; + +#define MCE_RATE 15*HZ /* timer rate is 15s */ + +static void mce_checkregs (void *info) +{ + u32 low, high; + int i; + + for (i=firstbank; i +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include "mce.h" + +/* as supported by the P4/Xeon family */ +struct intel_mce_extended_msrs { + u32 eax; + u32 ebx; + u32 ecx; + u32 edx; + u32 esi; + u32 edi; + u32 ebp; + u32 esp; + u32 eflags; + u32 eip; + /* u32 *reserved[]; */ +}; + +static int mce_num_extended_msrs = 0; + + +#ifdef CONFIG_X86_MCE_P4THERMAL +static void unexpected_thermal_interrupt(struct pt_regs *regs) +{ + printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", + smp_processor_id()); + add_taint(TAINT_MACHINE_CHECK); +} + +/* P4/Xeon Thermal transition interrupt handler */ +static void intel_thermal_interrupt(struct pt_regs *regs) +{ + __u64 msr_val; + + ack_APIC_irq(); + + rdmsrl(MSR_IA32_THERM_STATUS, msr_val); + therm_throt_process(msr_val & 0x1); +} + +/* Thermal interrupt handler for this CPU setup */ +static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_thermal_interrupt; + +fastcall void smp_thermal_interrupt(struct pt_regs *regs) +{ + irq_enter(); + vendor_thermal_interrupt(regs); + irq_exit(); +} + +/* P4/Xeon Thermal regulation detect and init */ +static void intel_init_thermal(struct cpuinfo_x86 *c) +{ + u32 l, h; + unsigned int cpu = smp_processor_id(); + + /* Thermal monitoring */ + if (!cpu_has(c, X86_FEATURE_ACPI)) + return; /* -ENODEV */ + + /* Clock modulation */ + if (!cpu_has(c, X86_FEATURE_ACC)) + return; /* -ENODEV */ + + /* first check if its enabled already, in which case there might + * be some SMM goo which handles it, so we can't even put a handler + * since it might be delivered via SMI already -zwanem. + */ + rdmsr (MSR_IA32_MISC_ENABLE, l, h); + h = apic_read(APIC_LVTTHMR); + if ((l & (1<<3)) && (h & APIC_DM_SMI)) { + printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", + cpu); + return; /* -EBUSY */ + } + + /* check whether a vector already exists, temporarily masked? */ + if (h & APIC_VECTOR_MASK) { + printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already " + "installed\n", + cpu, (h & APIC_VECTOR_MASK)); + return; /* -EBUSY */ + } + + /* The temperature transition interrupt handler setup */ + h = THERMAL_APIC_VECTOR; /* our delivery vector */ + h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */ + apic_write_around(APIC_LVTTHMR, h); + + rdmsr (MSR_IA32_THERM_INTERRUPT, l, h); + wrmsr (MSR_IA32_THERM_INTERRUPT, l | 0x03 , h); + + /* ok we're good to go... */ + vendor_thermal_interrupt = intel_thermal_interrupt; + + rdmsr (MSR_IA32_MISC_ENABLE, l, h); + wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h); + + l = apic_read (APIC_LVTTHMR); + apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED); + printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); + + /* enable thermal throttle processing */ + atomic_set(&therm_throt_en, 1); + return; +} +#endif /* CONFIG_X86_MCE_P4THERMAL */ + + +/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */ +static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) +{ + u32 h; + + rdmsr (MSR_IA32_MCG_EAX, r->eax, h); + rdmsr (MSR_IA32_MCG_EBX, r->ebx, h); + rdmsr (MSR_IA32_MCG_ECX, r->ecx, h); + rdmsr (MSR_IA32_MCG_EDX, r->edx, h); + rdmsr (MSR_IA32_MCG_ESI, r->esi, h); + rdmsr (MSR_IA32_MCG_EDI, r->edi, h); + rdmsr (MSR_IA32_MCG_EBP, r->ebp, h); + rdmsr (MSR_IA32_MCG_ESP, r->esp, h); + rdmsr (MSR_IA32_MCG_EFLAGS, r->eflags, h); + rdmsr (MSR_IA32_MCG_EIP, r->eip, h); +} + +static fastcall void intel_machine_check(struct pt_regs * regs, long error_code) +{ + int recover=1; + u32 alow, ahigh, high, low; + u32 mcgstl, mcgsth; + int i; + + rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); + if (mcgstl & (1<<0)) /* Recoverable ? */ + recover=0; + + printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", + smp_processor_id(), mcgsth, mcgstl); + + if (mce_num_extended_msrs > 0) { + struct intel_mce_extended_msrs dbg; + intel_get_extended_msrs(&dbg); + printk (KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n", + smp_processor_id(), dbg.eip, dbg.eflags); + printk (KERN_DEBUG "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n", + dbg.eax, dbg.ebx, dbg.ecx, dbg.edx); + printk (KERN_DEBUG "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n", + dbg.esi, dbg.edi, dbg.ebp, dbg.esp); + } + + for (i=0; i> 16) & 0xff; + printk (KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)" + " available\n", + smp_processor_id(), mce_num_extended_msrs); + +#ifdef CONFIG_X86_MCE_P4THERMAL + /* Check for P4/Xeon Thermal monitor */ + intel_init_thermal(c); +#endif + } +} diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c new file mode 100644 index 00000000000..94bc43d950c --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/p5.c @@ -0,0 +1,53 @@ +/* + * P5 specific Machine Check Exception Reporting + * (C) Copyright 2002 Alan Cox + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "mce.h" + +/* Machine check handler for Pentium class Intel */ +static fastcall void pentium_machine_check(struct pt_regs * regs, long error_code) +{ + u32 loaddr, hi, lotype; + rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi); + rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi); + printk(KERN_EMERG "CPU#%d: Machine Check Exception: 0x%8X (type 0x%8X).\n", smp_processor_id(), loaddr, lotype); + if(lotype&(1<<5)) + printk(KERN_EMERG "CPU#%d: Possible thermal failure (CPU on fire ?).\n", smp_processor_id()); + add_taint(TAINT_MACHINE_CHECK); +} + +/* Set up machine check reporting for processors with Intel style MCE */ +void intel_p5_mcheck_init(struct cpuinfo_x86 *c) +{ + u32 l, h; + + /*Check for MCE support */ + if( !cpu_has(c, X86_FEATURE_MCE) ) + return; + + /* Default P5 to off as its often misconnected */ + if(mce_disabled != -1) + return; + machine_check_vector = pentium_machine_check; + wmb(); + + /* Read registers before enabling */ + rdmsr(MSR_IA32_P5_MC_ADDR, l, h); + rdmsr(MSR_IA32_P5_MC_TYPE, l, h); + printk(KERN_INFO "Intel old style machine check architecture supported.\n"); + + /* Enable MCE */ + set_in_cr4(X86_CR4_MCE); + printk(KERN_INFO "Intel old style machine check reporting enabled on CPU#%d.\n", smp_processor_id()); +} diff --git a/arch/x86/kernel/cpu/mcheck/p6.c b/arch/x86/kernel/cpu/mcheck/p6.c new file mode 100644 index 00000000000..deeae42ce19 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/p6.c @@ -0,0 +1,119 @@ +/* + * P6 specific Machine Check Exception Reporting + * (C) Copyright 2002 Alan Cox + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "mce.h" + +/* Machine Check Handler For PII/PIII */ +static fastcall void intel_machine_check(struct pt_regs * regs, long error_code) +{ + int recover=1; + u32 alow, ahigh, high, low; + u32 mcgstl, mcgsth; + int i; + + rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); + if (mcgstl & (1<<0)) /* Recoverable ? */ + recover=0; + + printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", + smp_processor_id(), mcgsth, mcgstl); + + for (i=0; i +#include +#include +#include +#include +#include +#include + +/* How long to wait between reporting thermal events */ +#define CHECK_INTERVAL (300 * HZ) + +static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; +static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); +atomic_t therm_throt_en = ATOMIC_INIT(0); + +#ifdef CONFIG_SYSFS +#define define_therm_throt_sysdev_one_ro(_name) \ + static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) + +#define define_therm_throt_sysdev_show_func(name) \ +static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \ + char *buf) \ +{ \ + unsigned int cpu = dev->id; \ + ssize_t ret; \ + \ + preempt_disable(); /* CPU hotplug */ \ + if (cpu_online(cpu)) \ + ret = sprintf(buf, "%lu\n", \ + per_cpu(thermal_throttle_##name, cpu)); \ + else \ + ret = 0; \ + preempt_enable(); \ + \ + return ret; \ +} + +define_therm_throt_sysdev_show_func(count); +define_therm_throt_sysdev_one_ro(count); + +static struct attribute *thermal_throttle_attrs[] = { + &attr_count.attr, + NULL +}; + +static struct attribute_group thermal_throttle_attr_group = { + .attrs = thermal_throttle_attrs, + .name = "thermal_throttle" +}; +#endif /* CONFIG_SYSFS */ + +/*** + * therm_throt_process - Process thermal throttling event from interrupt + * @curr: Whether the condition is current or not (boolean), since the + * thermal interrupt normally gets called both when the thermal + * event begins and once the event has ended. + * + * This function is called by the thermal interrupt after the + * IRQ has been acknowledged. + * + * It will take care of rate limiting and printing messages to the syslog. + * + * Returns: 0 : Event should NOT be further logged, i.e. still in + * "timeout" from previous log message. + * 1 : Event should be logged further, and a message has been + * printed to the syslog. + */ +int therm_throt_process(int curr) +{ + unsigned int cpu = smp_processor_id(); + __u64 tmp_jiffs = get_jiffies_64(); + + if (curr) + __get_cpu_var(thermal_throttle_count)++; + + if (time_before64(tmp_jiffs, __get_cpu_var(next_check))) + return 0; + + __get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL; + + /* if we just entered the thermal event */ + if (curr) { + printk(KERN_CRIT "CPU%d: Temperature above threshold, " + "cpu clock throttled (total events = %lu)\n", cpu, + __get_cpu_var(thermal_throttle_count)); + + add_taint(TAINT_MACHINE_CHECK); + } else { + printk(KERN_CRIT "CPU%d: Temperature/speed normal\n", cpu); + } + + return 1; +} + +#ifdef CONFIG_SYSFS +/* Add/Remove thermal_throttle interface for CPU device */ +static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev) +{ + return sysfs_create_group(&sys_dev->kobj, &thermal_throttle_attr_group); +} + +static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev) +{ + return sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group); +} + +/* Mutex protecting device creation against CPU hotplug */ +static DEFINE_MUTEX(therm_cpu_lock); + +/* Get notified when a cpu comes on/off. Be hotplug friendly. */ +static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb, + unsigned long action, + void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + struct sys_device *sys_dev; + int err; + + sys_dev = get_cpu_sysdev(cpu); + switch (action) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + mutex_lock(&therm_cpu_lock); + err = thermal_throttle_add_dev(sys_dev); + mutex_unlock(&therm_cpu_lock); + WARN_ON(err); + break; + case CPU_DEAD: + case CPU_DEAD_FROZEN: + mutex_lock(&therm_cpu_lock); + thermal_throttle_remove_dev(sys_dev); + mutex_unlock(&therm_cpu_lock); + break; + } + return NOTIFY_OK; +} + +static struct notifier_block thermal_throttle_cpu_notifier = +{ + .notifier_call = thermal_throttle_cpu_callback, +}; + +static __init int thermal_throttle_init_device(void) +{ + unsigned int cpu = 0; + int err; + + if (!atomic_read(&therm_throt_en)) + return 0; + + register_hotcpu_notifier(&thermal_throttle_cpu_notifier); + +#ifdef CONFIG_HOTPLUG_CPU + mutex_lock(&therm_cpu_lock); +#endif + /* connect live CPUs to sysfs */ + for_each_online_cpu(cpu) { + err = thermal_throttle_add_dev(get_cpu_sysdev(cpu)); + WARN_ON(err); + } +#ifdef CONFIG_HOTPLUG_CPU + mutex_unlock(&therm_cpu_lock); +#endif + + return 0; +} + +device_initcall(thermal_throttle_init_device); +#endif /* CONFIG_SYSFS */ diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c new file mode 100644 index 00000000000..9e424b6c293 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/winchip.c @@ -0,0 +1,36 @@ +/* + * IDT Winchip specific Machine Check Exception Reporting + * (C) Copyright 2002 Alan Cox + */ + +#include +#include +#include +#include + +#include +#include +#include + +#include "mce.h" + +/* Machine check handler for WinChip C6 */ +static fastcall void winchip_machine_check(struct pt_regs * regs, long error_code) +{ + printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); + add_taint(TAINT_MACHINE_CHECK); +} + +/* Set up machine check reporting on the Winchip C6 series */ +void winchip_mcheck_init(struct cpuinfo_x86 *c) +{ + u32 lo, hi; + machine_check_vector = winchip_machine_check; + wmb(); + rdmsr(MSR_IDT_FCR1, lo, hi); + lo|= (1<<2); /* Enable EIERRINT (int 18 MCE) */ + lo&= ~(1<<4); /* Enable MCE */ + wrmsr(MSR_IDT_FCR1, lo, hi); + set_in_cr4(X86_CR4_MCE); + printk(KERN_INFO "Winchip machine check reporting enabled on CPU#0.\n"); +} diff --git a/arch/x86_64/kernel/Makefile_64 b/arch/x86_64/kernel/Makefile_64 index 8253617af12..1c9de796fa1 100644 --- a/arch/x86_64/kernel/Makefile_64 +++ b/arch/x86_64/kernel/Makefile_64 @@ -48,7 +48,7 @@ obj-y += pcspeaker.o CFLAGS_vsyscall_64.o := $(PROFILING) -g0 -therm_throt-y += ../../i386/kernel/cpu/mcheck/therm_throt.o +therm_throt-y += ../../x86/kernel/cpu/mcheck/therm_throt.o bootflag-y += ../../i386/kernel/bootflag.o cpuid-$(subst m,y,$(CONFIG_X86_CPUID)) += ../../i386/kernel/cpuid.o topology-y += ../../i386/kernel/topology.o -- cgit v1.2.3-70-g09d2 From ee580dc91efd83e6b55955e7261e8ad2a0e08d1a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 11 Oct 2007 11:16:27 +0200 Subject: i386: move kernel/cpu/cpufreq Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/i386/Kconfig | 2 +- arch/i386/kernel/cpu/Makefile | 2 +- arch/i386/kernel/cpu/cpufreq/Kconfig | 250 ---- arch/i386/kernel/cpu/cpufreq/Makefile | 16 - arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c | 799 ------------ arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c | 441 ------- arch/i386/kernel/cpu/cpufreq/e_powersaver.c | 334 ----- arch/i386/kernel/cpu/cpufreq/elanfreq.c | 309 ----- arch/i386/kernel/cpu/cpufreq/gx-suspmod.c | 495 -------- arch/i386/kernel/cpu/cpufreq/longhaul.c | 1024 ---------------- arch/i386/kernel/cpu/cpufreq/longhaul.h | 353 ------ arch/i386/kernel/cpu/cpufreq/longrun.c | 325 ----- arch/i386/kernel/cpu/cpufreq/p4-clockmod.c | 316 ----- arch/i386/kernel/cpu/cpufreq/powernow-k6.c | 256 ---- arch/i386/kernel/cpu/cpufreq/powernow-k7.c | 703 ----------- arch/i386/kernel/cpu/cpufreq/powernow-k7.h | 44 - arch/i386/kernel/cpu/cpufreq/powernow-k8.c | 1363 --------------------- arch/i386/kernel/cpu/cpufreq/powernow-k8.h | 232 ---- arch/i386/kernel/cpu/cpufreq/sc520_freq.c | 191 --- arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c | 634 ---------- arch/i386/kernel/cpu/cpufreq/speedstep-ich.c | 440 ------- arch/i386/kernel/cpu/cpufreq/speedstep-lib.c | 444 ------- arch/i386/kernel/cpu/cpufreq/speedstep-lib.h | 49 - arch/i386/kernel/cpu/cpufreq/speedstep-smi.c | 424 ------- arch/x86/kernel/cpu/cpufreq/Kconfig | 250 ++++ arch/x86/kernel/cpu/cpufreq/Makefile | 16 + arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 799 ++++++++++++ arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c | 441 +++++++ arch/x86/kernel/cpu/cpufreq/e_powersaver.c | 334 +++++ arch/x86/kernel/cpu/cpufreq/elanfreq.c | 309 +++++ arch/x86/kernel/cpu/cpufreq/gx-suspmod.c | 495 ++++++++ arch/x86/kernel/cpu/cpufreq/longhaul.c | 1024 ++++++++++++++++ arch/x86/kernel/cpu/cpufreq/longhaul.h | 353 ++++++ arch/x86/kernel/cpu/cpufreq/longrun.c | 325 +++++ arch/x86/kernel/cpu/cpufreq/p4-clockmod.c | 316 +++++ arch/x86/kernel/cpu/cpufreq/powernow-k6.c | 256 ++++ arch/x86/kernel/cpu/cpufreq/powernow-k7.c | 703 +++++++++++ arch/x86/kernel/cpu/cpufreq/powernow-k7.h | 44 + arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 1363 +++++++++++++++++++++ arch/x86/kernel/cpu/cpufreq/powernow-k8.h | 232 ++++ arch/x86/kernel/cpu/cpufreq/sc520_freq.c | 191 +++ arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c | 634 ++++++++++ arch/x86/kernel/cpu/cpufreq/speedstep-ich.c | 440 +++++++ arch/x86/kernel/cpu/cpufreq/speedstep-lib.c | 444 +++++++ arch/x86/kernel/cpu/cpufreq/speedstep-lib.h | 49 + arch/x86/kernel/cpu/cpufreq/speedstep-smi.c | 424 +++++++ arch/x86_64/kernel/Makefile_64 | 2 +- 47 files changed, 9445 insertions(+), 9445 deletions(-) delete mode 100644 arch/i386/kernel/cpu/cpufreq/Kconfig delete mode 100644 arch/i386/kernel/cpu/cpufreq/Makefile delete mode 100644 arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c delete mode 100644 arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c delete mode 100644 arch/i386/kernel/cpu/cpufreq/e_powersaver.c delete mode 100644 arch/i386/kernel/cpu/cpufreq/elanfreq.c delete mode 100644 arch/i386/kernel/cpu/cpufreq/gx-suspmod.c delete mode 100644 arch/i386/kernel/cpu/cpufreq/longhaul.c delete mode 100644 arch/i386/kernel/cpu/cpufreq/longhaul.h delete mode 100644 arch/i386/kernel/cpu/cpufreq/longrun.c delete mode 100644 arch/i386/kernel/cpu/cpufreq/p4-clockmod.c delete mode 100644 arch/i386/kernel/cpu/cpufreq/powernow-k6.c delete mode 100644 arch/i386/kernel/cpu/cpufreq/powernow-k7.c delete mode 100644 arch/i386/kernel/cpu/cpufreq/powernow-k7.h delete mode 100644 arch/i386/kernel/cpu/cpufreq/powernow-k8.c delete mode 100644 arch/i386/kernel/cpu/cpufreq/powernow-k8.h delete mode 100644 arch/i386/kernel/cpu/cpufreq/sc520_freq.c delete mode 100644 arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c delete mode 100644 arch/i386/kernel/cpu/cpufreq/speedstep-ich.c delete mode 100644 arch/i386/kernel/cpu/cpufreq/speedstep-lib.c delete mode 100644 arch/i386/kernel/cpu/cpufreq/speedstep-lib.h delete mode 100644 arch/i386/kernel/cpu/cpufreq/speedstep-smi.c create mode 100644 arch/x86/kernel/cpu/cpufreq/Kconfig create mode 100644 arch/x86/kernel/cpu/cpufreq/Makefile create mode 100644 arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c create mode 100644 arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c create mode 100644 arch/x86/kernel/cpu/cpufreq/e_powersaver.c create mode 100644 arch/x86/kernel/cpu/cpufreq/elanfreq.c create mode 100644 arch/x86/kernel/cpu/cpufreq/gx-suspmod.c create mode 100644 arch/x86/kernel/cpu/cpufreq/longhaul.c create mode 100644 arch/x86/kernel/cpu/cpufreq/longhaul.h create mode 100644 arch/x86/kernel/cpu/cpufreq/longrun.c create mode 100644 arch/x86/kernel/cpu/cpufreq/p4-clockmod.c create mode 100644 arch/x86/kernel/cpu/cpufreq/powernow-k6.c create mode 100644 arch/x86/kernel/cpu/cpufreq/powernow-k7.c create mode 100644 arch/x86/kernel/cpu/cpufreq/powernow-k7.h create mode 100644 arch/x86/kernel/cpu/cpufreq/powernow-k8.c create mode 100644 arch/x86/kernel/cpu/cpufreq/powernow-k8.h create mode 100644 arch/x86/kernel/cpu/cpufreq/sc520_freq.c create mode 100644 arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c create mode 100644 arch/x86/kernel/cpu/cpufreq/speedstep-ich.c create mode 100644 arch/x86/kernel/cpu/cpufreq/speedstep-lib.c create mode 100644 arch/x86/kernel/cpu/cpufreq/speedstep-lib.h create mode 100644 arch/x86/kernel/cpu/cpufreq/speedstep-smi.c (limited to 'arch/x86') diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 97b64d7d6bf..fc86d41d252 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -1067,7 +1067,7 @@ config APM_REAL_MODE_POWER_OFF endif # APM -source "arch/i386/kernel/cpu/cpufreq/Kconfig" +source "arch/x86/kernel/cpu/cpufreq/Kconfig" endmenu diff --git a/arch/i386/kernel/cpu/Makefile b/arch/i386/kernel/cpu/Makefile index 09effc02e35..8d9ce0232ad 100644 --- a/arch/i386/kernel/cpu/Makefile +++ b/arch/i386/kernel/cpu/Makefile @@ -15,6 +15,6 @@ obj-y += umc.o obj-$(CONFIG_X86_MCE) += ../../../x86/kernel/cpu/mcheck/ obj-$(CONFIG_MTRR) += mtrr/ -obj-$(CONFIG_CPU_FREQ) += cpufreq/ +obj-$(CONFIG_CPU_FREQ) += ../../../x86/kernel/cpu/cpufreq/ obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o diff --git a/arch/i386/kernel/cpu/cpufreq/Kconfig b/arch/i386/kernel/cpu/cpufreq/Kconfig deleted file mode 100644 index d8c6f132dc7..00000000000 --- a/arch/i386/kernel/cpu/cpufreq/Kconfig +++ /dev/null @@ -1,250 +0,0 @@ -# -# CPU Frequency scaling -# - -menu "CPU Frequency scaling" - -source "drivers/cpufreq/Kconfig" - -if CPU_FREQ - -comment "CPUFreq processor drivers" - -config X86_ACPI_CPUFREQ - tristate "ACPI Processor P-States driver" - select CPU_FREQ_TABLE - depends on ACPI_PROCESSOR - help - This driver adds a CPUFreq driver which utilizes the ACPI - Processor Performance States. - This driver also supports Intel Enhanced Speedstep. - - For details, take a look at . - - If in doubt, say N. - -config ELAN_CPUFREQ - tristate "AMD Elan SC400 and SC410" - select CPU_FREQ_TABLE - depends on X86_ELAN - ---help--- - This adds the CPUFreq driver for AMD Elan SC400 and SC410 - processors. - - You need to specify the processor maximum speed as boot - parameter: elanfreq=maxspeed (in kHz) or as module - parameter "max_freq". - - For details, take a look at . - - If in doubt, say N. - -config SC520_CPUFREQ - tristate "AMD Elan SC520" - select CPU_FREQ_TABLE - depends on X86_ELAN - ---help--- - This adds the CPUFreq driver for AMD Elan SC520 processor. - - For details, take a look at . - - If in doubt, say N. - - -config X86_POWERNOW_K6 - tristate "AMD Mobile K6-2/K6-3 PowerNow!" - select CPU_FREQ_TABLE - help - This adds the CPUFreq driver for mobile AMD K6-2+ and mobile - AMD K6-3+ processors. - - For details, take a look at . - - If in doubt, say N. - -config X86_POWERNOW_K7 - tristate "AMD Mobile Athlon/Duron PowerNow!" - select CPU_FREQ_TABLE - help - This adds the CPUFreq driver for mobile AMD K7 mobile processors. - - For details, take a look at . - - If in doubt, say N. - -config X86_POWERNOW_K7_ACPI - bool - depends on X86_POWERNOW_K7 && ACPI_PROCESSOR - depends on !(X86_POWERNOW_K7 = y && ACPI_PROCESSOR = m) - default y - -config X86_POWERNOW_K8 - tristate "AMD Opteron/Athlon64 PowerNow!" - select CPU_FREQ_TABLE - depends on EXPERIMENTAL - help - This adds the CPUFreq driver for mobile AMD Opteron/Athlon64 processors. - - For details, take a look at . - - If in doubt, say N. - -config X86_POWERNOW_K8_ACPI - bool "ACPI Support" - select ACPI_PROCESSOR - depends on ACPI && X86_POWERNOW_K8 - default y - help - This provides access to the K8s Processor Performance States via ACPI. - This driver is probably required for CPUFreq to work with multi-socket and - SMP systems. It is not required on at least some single-socket yet - multi-core systems, even if SMP is enabled. - - It is safe to say Y here. - -config X86_GX_SUSPMOD - tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation" - depends on PCI - help - This add the CPUFreq driver for NatSemi Geode processors which - support suspend modulation. - - For details, take a look at . - - If in doubt, say N. - -config X86_SPEEDSTEP_CENTRINO - tristate "Intel Enhanced SpeedStep" - select CPU_FREQ_TABLE - select X86_SPEEDSTEP_CENTRINO_TABLE - help - This adds the CPUFreq driver for Enhanced SpeedStep enabled - mobile CPUs. This means Intel Pentium M (Centrino) CPUs. However, - you also need to say Y to "Use ACPI tables to decode..." below - [which might imply enabling ACPI] if you want to use this driver - on non-Banias CPUs. - - For details, take a look at . - - If in doubt, say N. - -config X86_SPEEDSTEP_CENTRINO_TABLE - bool "Built-in tables for Banias CPUs" - depends on X86_SPEEDSTEP_CENTRINO - default y - help - Use built-in tables for Banias CPUs if ACPI encoding - is not available. - - If in doubt, say N. - -config X86_SPEEDSTEP_ICH - tristate "Intel Speedstep on ICH-M chipsets (ioport interface)" - select CPU_FREQ_TABLE - help - This adds the CPUFreq driver for certain mobile Intel Pentium III - (Coppermine), all mobile Intel Pentium III-M (Tualatin) and all - mobile Intel Pentium 4 P4-M on systems which have an Intel ICH2, - ICH3 or ICH4 southbridge. - - For details, take a look at . - - If in doubt, say N. - -config X86_SPEEDSTEP_SMI - tristate "Intel SpeedStep on 440BX/ZX/MX chipsets (SMI interface)" - select CPU_FREQ_TABLE - depends on EXPERIMENTAL - help - This adds the CPUFreq driver for certain mobile Intel Pentium III - (Coppermine), all mobile Intel Pentium III-M (Tualatin) - on systems which have an Intel 440BX/ZX/MX southbridge. - - For details, take a look at . - - If in doubt, say N. - -config X86_P4_CLOCKMOD - tristate "Intel Pentium 4 clock modulation" - select CPU_FREQ_TABLE - help - This adds the CPUFreq driver for Intel Pentium 4 / XEON - processors. - - For details, take a look at . - - If in doubt, say N. - -config X86_CPUFREQ_NFORCE2 - tristate "nVidia nForce2 FSB changing" - depends on EXPERIMENTAL - help - This adds the CPUFreq driver for FSB changing on nVidia nForce2 - platforms. - - For details, take a look at . - - If in doubt, say N. - -config X86_LONGRUN - tristate "Transmeta LongRun" - help - This adds the CPUFreq driver for Transmeta Crusoe and Efficeon processors - which support LongRun. - - For details, take a look at . - - If in doubt, say N. - -config X86_LONGHAUL - tristate "VIA Cyrix III Longhaul" - select CPU_FREQ_TABLE - depends on ACPI_PROCESSOR - help - This adds the CPUFreq driver for VIA Samuel/CyrixIII, - VIA Cyrix Samuel/C3, VIA Cyrix Ezra and VIA Cyrix Ezra-T - processors. - - For details, take a look at . - - If in doubt, say N. - -config X86_E_POWERSAVER - tristate "VIA C7 Enhanced PowerSaver (EXPERIMENTAL)" - select CPU_FREQ_TABLE - depends on EXPERIMENTAL - help - This adds the CPUFreq driver for VIA C7 processors. - - If in doubt, say N. - -comment "shared options" - -config X86_ACPI_CPUFREQ_PROC_INTF - bool "/proc/acpi/processor/../performance interface (deprecated)" - depends on PROC_FS - depends on X86_ACPI_CPUFREQ || X86_POWERNOW_K7_ACPI || X86_POWERNOW_K8_ACPI - help - This enables the deprecated /proc/acpi/processor/../performance - interface. While it is helpful for debugging, the generic, - cross-architecture cpufreq interfaces should be used. - - If in doubt, say N. - -config X86_SPEEDSTEP_LIB - tristate - default X86_SPEEDSTEP_ICH || X86_SPEEDSTEP_SMI || X86_P4_CLOCKMOD - -config X86_SPEEDSTEP_RELAXED_CAP_CHECK - bool "Relaxed speedstep capability checks" - depends on (X86_SPEEDSTEP_SMI || X86_SPEEDSTEP_ICH) - help - Don't perform all checks for a speedstep capable system which would - normally be done. Some ancient or strange systems, though speedstep - capable, don't always indicate that they are speedstep capable. This - option lets the probing code bypass some of those checks if the - parameter "relaxed_check=1" is passed to the module. - -endif # CPU_FREQ - -endmenu diff --git a/arch/i386/kernel/cpu/cpufreq/Makefile b/arch/i386/kernel/cpu/cpufreq/Makefile deleted file mode 100644 index 560f7760dae..00000000000 --- a/arch/i386/kernel/cpu/cpufreq/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o -obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o -obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o -obj-$(CONFIG_X86_LONGHAUL) += longhaul.o -obj-$(CONFIG_X86_E_POWERSAVER) += e_powersaver.o -obj-$(CONFIG_ELAN_CPUFREQ) += elanfreq.o -obj-$(CONFIG_SC520_CPUFREQ) += sc520_freq.o -obj-$(CONFIG_X86_LONGRUN) += longrun.o -obj-$(CONFIG_X86_GX_SUSPMOD) += gx-suspmod.o -obj-$(CONFIG_X86_SPEEDSTEP_ICH) += speedstep-ich.o -obj-$(CONFIG_X86_SPEEDSTEP_LIB) += speedstep-lib.o -obj-$(CONFIG_X86_SPEEDSTEP_SMI) += speedstep-smi.o -obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o -obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO) += speedstep-centrino.o -obj-$(CONFIG_X86_P4_CLOCKMOD) += p4-clockmod.o -obj-$(CONFIG_X86_CPUFREQ_NFORCE2) += cpufreq-nforce2.o diff --git a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c deleted file mode 100644 index 705e13a3078..00000000000 --- a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c +++ /dev/null @@ -1,799 +0,0 @@ -/* - * acpi-cpufreq.c - ACPI Processor P-States Driver ($Revision: 1.4 $) - * - * Copyright (C) 2001, 2002 Andy Grover - * Copyright (C) 2001, 2002 Paul Diefenbaugh - * Copyright (C) 2002 - 2004 Dominik Brodowski - * Copyright (C) 2006 Denis Sadykov - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or (at - * your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include - -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "acpi-cpufreq", msg) - -MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski"); -MODULE_DESCRIPTION("ACPI Processor P-States Driver"); -MODULE_LICENSE("GPL"); - -enum { - UNDEFINED_CAPABLE = 0, - SYSTEM_INTEL_MSR_CAPABLE, - SYSTEM_IO_CAPABLE, -}; - -#define INTEL_MSR_RANGE (0xffff) -#define CPUID_6_ECX_APERFMPERF_CAPABILITY (0x1) - -struct acpi_cpufreq_data { - struct acpi_processor_performance *acpi_data; - struct cpufreq_frequency_table *freq_table; - unsigned int max_freq; - unsigned int resume; - unsigned int cpu_feature; -}; - -static struct acpi_cpufreq_data *drv_data[NR_CPUS]; -/* acpi_perf_data is a pointer to percpu data. */ -static struct acpi_processor_performance *acpi_perf_data; - -static struct cpufreq_driver acpi_cpufreq_driver; - -static unsigned int acpi_pstate_strict; - -static int check_est_cpu(unsigned int cpuid) -{ - struct cpuinfo_x86 *cpu = &cpu_data[cpuid]; - - if (cpu->x86_vendor != X86_VENDOR_INTEL || - !cpu_has(cpu, X86_FEATURE_EST)) - return 0; - - return 1; -} - -static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data) -{ - struct acpi_processor_performance *perf; - int i; - - perf = data->acpi_data; - - for (i=0; istate_count; i++) { - if (value == perf->states[i].status) - return data->freq_table[i].frequency; - } - return 0; -} - -static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data) -{ - int i; - struct acpi_processor_performance *perf; - - msr &= INTEL_MSR_RANGE; - perf = data->acpi_data; - - for (i=0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) { - if (msr == perf->states[data->freq_table[i].index].status) - return data->freq_table[i].frequency; - } - return data->freq_table[0].frequency; -} - -static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data) -{ - switch (data->cpu_feature) { - case SYSTEM_INTEL_MSR_CAPABLE: - return extract_msr(val, data); - case SYSTEM_IO_CAPABLE: - return extract_io(val, data); - default: - return 0; - } -} - -struct msr_addr { - u32 reg; -}; - -struct io_addr { - u16 port; - u8 bit_width; -}; - -typedef union { - struct msr_addr msr; - struct io_addr io; -} drv_addr_union; - -struct drv_cmd { - unsigned int type; - cpumask_t mask; - drv_addr_union addr; - u32 val; -}; - -static void do_drv_read(struct drv_cmd *cmd) -{ - u32 h; - - switch (cmd->type) { - case SYSTEM_INTEL_MSR_CAPABLE: - rdmsr(cmd->addr.msr.reg, cmd->val, h); - break; - case SYSTEM_IO_CAPABLE: - acpi_os_read_port((acpi_io_address)cmd->addr.io.port, - &cmd->val, - (u32)cmd->addr.io.bit_width); - break; - default: - break; - } -} - -static void do_drv_write(struct drv_cmd *cmd) -{ - u32 lo, hi; - - switch (cmd->type) { - case SYSTEM_INTEL_MSR_CAPABLE: - rdmsr(cmd->addr.msr.reg, lo, hi); - lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE); - wrmsr(cmd->addr.msr.reg, lo, hi); - break; - case SYSTEM_IO_CAPABLE: - acpi_os_write_port((acpi_io_address)cmd->addr.io.port, - cmd->val, - (u32)cmd->addr.io.bit_width); - break; - default: - break; - } -} - -static void drv_read(struct drv_cmd *cmd) -{ - cpumask_t saved_mask = current->cpus_allowed; - cmd->val = 0; - - set_cpus_allowed(current, cmd->mask); - do_drv_read(cmd); - set_cpus_allowed(current, saved_mask); -} - -static void drv_write(struct drv_cmd *cmd) -{ - cpumask_t saved_mask = current->cpus_allowed; - unsigned int i; - - for_each_cpu_mask(i, cmd->mask) { - set_cpus_allowed(current, cpumask_of_cpu(i)); - do_drv_write(cmd); - } - - set_cpus_allowed(current, saved_mask); - return; -} - -static u32 get_cur_val(cpumask_t mask) -{ - struct acpi_processor_performance *perf; - struct drv_cmd cmd; - - if (unlikely(cpus_empty(mask))) - return 0; - - switch (drv_data[first_cpu(mask)]->cpu_feature) { - case SYSTEM_INTEL_MSR_CAPABLE: - cmd.type = SYSTEM_INTEL_MSR_CAPABLE; - cmd.addr.msr.reg = MSR_IA32_PERF_STATUS; - break; - case SYSTEM_IO_CAPABLE: - cmd.type = SYSTEM_IO_CAPABLE; - perf = drv_data[first_cpu(mask)]->acpi_data; - cmd.addr.io.port = perf->control_register.address; - cmd.addr.io.bit_width = perf->control_register.bit_width; - break; - default: - return 0; - } - - cmd.mask = mask; - - drv_read(&cmd); - - dprintk("get_cur_val = %u\n", cmd.val); - - return cmd.val; -} - -/* - * Return the measured active (C0) frequency on this CPU since last call - * to this function. - * Input: cpu number - * Return: Average CPU frequency in terms of max frequency (zero on error) - * - * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance - * over a period of time, while CPU is in C0 state. - * IA32_MPERF counts at the rate of max advertised frequency - * IA32_APERF counts at the rate of actual CPU frequency - * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and - * no meaning should be associated with absolute values of these MSRs. - */ -static unsigned int get_measured_perf(unsigned int cpu) -{ - union { - struct { - u32 lo; - u32 hi; - } split; - u64 whole; - } aperf_cur, mperf_cur; - - cpumask_t saved_mask; - unsigned int perf_percent; - unsigned int retval; - - saved_mask = current->cpus_allowed; - set_cpus_allowed(current, cpumask_of_cpu(cpu)); - if (get_cpu() != cpu) { - /* We were not able to run on requested processor */ - put_cpu(); - return 0; - } - - rdmsr(MSR_IA32_APERF, aperf_cur.split.lo, aperf_cur.split.hi); - rdmsr(MSR_IA32_MPERF, mperf_cur.split.lo, mperf_cur.split.hi); - - wrmsr(MSR_IA32_APERF, 0,0); - wrmsr(MSR_IA32_MPERF, 0,0); - -#ifdef __i386__ - /* - * We dont want to do 64 bit divide with 32 bit kernel - * Get an approximate value. Return failure in case we cannot get - * an approximate value. - */ - if (unlikely(aperf_cur.split.hi || mperf_cur.split.hi)) { - int shift_count; - u32 h; - - h = max_t(u32, aperf_cur.split.hi, mperf_cur.split.hi); - shift_count = fls(h); - - aperf_cur.whole >>= shift_count; - mperf_cur.whole >>= shift_count; - } - - if (((unsigned long)(-1) / 100) < aperf_cur.split.lo) { - int shift_count = 7; - aperf_cur.split.lo >>= shift_count; - mperf_cur.split.lo >>= shift_count; - } - - if (aperf_cur.split.lo && mperf_cur.split.lo) - perf_percent = (aperf_cur.split.lo * 100) / mperf_cur.split.lo; - else - perf_percent = 0; - -#else - if (unlikely(((unsigned long)(-1) / 100) < aperf_cur.whole)) { - int shift_count = 7; - aperf_cur.whole >>= shift_count; - mperf_cur.whole >>= shift_count; - } - - if (aperf_cur.whole && mperf_cur.whole) - perf_percent = (aperf_cur.whole * 100) / mperf_cur.whole; - else - perf_percent = 0; - -#endif - - retval = drv_data[cpu]->max_freq * perf_percent / 100; - - put_cpu(); - set_cpus_allowed(current, saved_mask); - - dprintk("cpu %d: performance percent %d\n", cpu, perf_percent); - return retval; -} - -static unsigned int get_cur_freq_on_cpu(unsigned int cpu) -{ - struct acpi_cpufreq_data *data = drv_data[cpu]; - unsigned int freq; - - dprintk("get_cur_freq_on_cpu (%d)\n", cpu); - - if (unlikely(data == NULL || - data->acpi_data == NULL || data->freq_table == NULL)) { - return 0; - } - - freq = extract_freq(get_cur_val(cpumask_of_cpu(cpu)), data); - dprintk("cur freq = %u\n", freq); - - return freq; -} - -static unsigned int check_freqs(cpumask_t mask, unsigned int freq, - struct acpi_cpufreq_data *data) -{ - unsigned int cur_freq; - unsigned int i; - - for (i=0; i<100; i++) { - cur_freq = extract_freq(get_cur_val(mask), data); - if (cur_freq == freq) - return 1; - udelay(10); - } - return 0; -} - -static int acpi_cpufreq_target(struct cpufreq_policy *policy, - unsigned int target_freq, unsigned int relation) -{ - struct acpi_cpufreq_data *data = drv_data[policy->cpu]; - struct acpi_processor_performance *perf; - struct cpufreq_freqs freqs; - cpumask_t online_policy_cpus; - struct drv_cmd cmd; - unsigned int next_state = 0; /* Index into freq_table */ - unsigned int next_perf_state = 0; /* Index into perf table */ - unsigned int i; - int result = 0; - - dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu); - - if (unlikely(data == NULL || - data->acpi_data == NULL || data->freq_table == NULL)) { - return -ENODEV; - } - - perf = data->acpi_data; - result = cpufreq_frequency_table_target(policy, - data->freq_table, - target_freq, - relation, &next_state); - if (unlikely(result)) - return -ENODEV; - -#ifdef CONFIG_HOTPLUG_CPU - /* cpufreq holds the hotplug lock, so we are safe from here on */ - cpus_and(online_policy_cpus, cpu_online_map, policy->cpus); -#else - online_policy_cpus = policy->cpus; -#endif - - next_perf_state = data->freq_table[next_state].index; - if (perf->state == next_perf_state) { - if (unlikely(data->resume)) { - dprintk("Called after resume, resetting to P%d\n", - next_perf_state); - data->resume = 0; - } else { - dprintk("Already at target state (P%d)\n", - next_perf_state); - return 0; - } - } - - switch (data->cpu_feature) { - case SYSTEM_INTEL_MSR_CAPABLE: - cmd.type = SYSTEM_INTEL_MSR_CAPABLE; - cmd.addr.msr.reg = MSR_IA32_PERF_CTL; - cmd.val = (u32) perf->states[next_perf_state].control; - break; - case SYSTEM_IO_CAPABLE: - cmd.type = SYSTEM_IO_CAPABLE; - cmd.addr.io.port = perf->control_register.address; - cmd.addr.io.bit_width = perf->control_register.bit_width; - cmd.val = (u32) perf->states[next_perf_state].control; - break; - default: - return -ENODEV; - } - - cpus_clear(cmd.mask); - - if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY) - cmd.mask = online_policy_cpus; - else - cpu_set(policy->cpu, cmd.mask); - - freqs.old = perf->states[perf->state].core_frequency * 1000; - freqs.new = data->freq_table[next_state].frequency; - for_each_cpu_mask(i, cmd.mask) { - freqs.cpu = i; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - } - - drv_write(&cmd); - - if (acpi_pstate_strict) { - if (!check_freqs(cmd.mask, freqs.new, data)) { - dprintk("acpi_cpufreq_target failed (%d)\n", - policy->cpu); - return -EAGAIN; - } - } - - for_each_cpu_mask(i, cmd.mask) { - freqs.cpu = i; - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - } - perf->state = next_perf_state; - - return result; -} - -static int acpi_cpufreq_verify(struct cpufreq_policy *policy) -{ - struct acpi_cpufreq_data *data = drv_data[policy->cpu]; - - dprintk("acpi_cpufreq_verify\n"); - - return cpufreq_frequency_table_verify(policy, data->freq_table); -} - -static unsigned long -acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu) -{ - struct acpi_processor_performance *perf = data->acpi_data; - - if (cpu_khz) { - /* search the closest match to cpu_khz */ - unsigned int i; - unsigned long freq; - unsigned long freqn = perf->states[0].core_frequency * 1000; - - for (i=0; i<(perf->state_count-1); i++) { - freq = freqn; - freqn = perf->states[i+1].core_frequency * 1000; - if ((2 * cpu_khz) > (freqn + freq)) { - perf->state = i; - return freq; - } - } - perf->state = perf->state_count-1; - return freqn; - } else { - /* assume CPU is at P0... */ - perf->state = 0; - return perf->states[0].core_frequency * 1000; - } -} - -/* - * acpi_cpufreq_early_init - initialize ACPI P-States library - * - * Initialize the ACPI P-States library (drivers/acpi/processor_perflib.c) - * in order to determine correct frequency and voltage pairings. We can - * do _PDC and _PSD and find out the processor dependency for the - * actual init that will happen later... - */ -static int __init acpi_cpufreq_early_init(void) -{ - dprintk("acpi_cpufreq_early_init\n"); - - acpi_perf_data = alloc_percpu(struct acpi_processor_performance); - if (!acpi_perf_data) { - dprintk("Memory allocation error for acpi_perf_data.\n"); - return -ENOMEM; - } - - /* Do initialization in ACPI core */ - acpi_processor_preregister_performance(acpi_perf_data); - return 0; -} - -#ifdef CONFIG_SMP -/* - * Some BIOSes do SW_ANY coordination internally, either set it up in hw - * or do it in BIOS firmware and won't inform about it to OS. If not - * detected, this has a side effect of making CPU run at a different speed - * than OS intended it to run at. Detect it and handle it cleanly. - */ -static int bios_with_sw_any_bug; - -static int sw_any_bug_found(struct dmi_system_id *d) -{ - bios_with_sw_any_bug = 1; - return 0; -} - -static struct dmi_system_id sw_any_bug_dmi_table[] = { - { - .callback = sw_any_bug_found, - .ident = "Supermicro Server X6DLP", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"), - DMI_MATCH(DMI_BIOS_VERSION, "080010"), - DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"), - }, - }, - { } -}; -#endif - -static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) -{ - unsigned int i; - unsigned int valid_states = 0; - unsigned int cpu = policy->cpu; - struct acpi_cpufreq_data *data; - unsigned int result = 0; - struct cpuinfo_x86 *c = &cpu_data[policy->cpu]; - struct acpi_processor_performance *perf; - - dprintk("acpi_cpufreq_cpu_init\n"); - - data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL); - if (!data) - return -ENOMEM; - - data->acpi_data = percpu_ptr(acpi_perf_data, cpu); - drv_data[cpu] = data; - - if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) - acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS; - - result = acpi_processor_register_performance(data->acpi_data, cpu); - if (result) - goto err_free; - - perf = data->acpi_data; - policy->shared_type = perf->shared_type; - - /* - * Will let policy->cpus know about dependency only when software - * coordination is required. - */ - if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL || - policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { - policy->cpus = perf->shared_cpu_map; - } - -#ifdef CONFIG_SMP - dmi_check_system(sw_any_bug_dmi_table); - if (bios_with_sw_any_bug && cpus_weight(policy->cpus) == 1) { - policy->shared_type = CPUFREQ_SHARED_TYPE_ALL; - policy->cpus = cpu_core_map[cpu]; - } -#endif - - /* capability check */ - if (perf->state_count <= 1) { - dprintk("No P-States\n"); - result = -ENODEV; - goto err_unreg; - } - - if (perf->control_register.space_id != perf->status_register.space_id) { - result = -ENODEV; - goto err_unreg; - } - - switch (perf->control_register.space_id) { - case ACPI_ADR_SPACE_SYSTEM_IO: - dprintk("SYSTEM IO addr space\n"); - data->cpu_feature = SYSTEM_IO_CAPABLE; - break; - case ACPI_ADR_SPACE_FIXED_HARDWARE: - dprintk("HARDWARE addr space\n"); - if (!check_est_cpu(cpu)) { - result = -ENODEV; - goto err_unreg; - } - data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE; - break; - default: - dprintk("Unknown addr space %d\n", - (u32) (perf->control_register.space_id)); - result = -ENODEV; - goto err_unreg; - } - - data->freq_table = kmalloc(sizeof(struct cpufreq_frequency_table) * - (perf->state_count+1), GFP_KERNEL); - if (!data->freq_table) { - result = -ENOMEM; - goto err_unreg; - } - - /* detect transition latency */ - policy->cpuinfo.transition_latency = 0; - for (i=0; istate_count; i++) { - if ((perf->states[i].transition_latency * 1000) > - policy->cpuinfo.transition_latency) - policy->cpuinfo.transition_latency = - perf->states[i].transition_latency * 1000; - } - policy->governor = CPUFREQ_DEFAULT_GOVERNOR; - - data->max_freq = perf->states[0].core_frequency * 1000; - /* table init */ - for (i=0; istate_count; i++) { - if (i>0 && perf->states[i].core_frequency >= - data->freq_table[valid_states-1].frequency / 1000) - continue; - - data->freq_table[valid_states].index = i; - data->freq_table[valid_states].frequency = - perf->states[i].core_frequency * 1000; - valid_states++; - } - data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END; - perf->state = 0; - - result = cpufreq_frequency_table_cpuinfo(policy, data->freq_table); - if (result) - goto err_freqfree; - - switch (perf->control_register.space_id) { - case ACPI_ADR_SPACE_SYSTEM_IO: - /* Current speed is unknown and not detectable by IO port */ - policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu); - break; - case ACPI_ADR_SPACE_FIXED_HARDWARE: - acpi_cpufreq_driver.get = get_cur_freq_on_cpu; - policy->cur = get_cur_freq_on_cpu(cpu); - break; - default: - break; - } - - /* notify BIOS that we exist */ - acpi_processor_notify_smm(THIS_MODULE); - - /* Check for APERF/MPERF support in hardware */ - if (c->x86_vendor == X86_VENDOR_INTEL && c->cpuid_level >= 6) { - unsigned int ecx; - ecx = cpuid_ecx(6); - if (ecx & CPUID_6_ECX_APERFMPERF_CAPABILITY) - acpi_cpufreq_driver.getavg = get_measured_perf; - } - - dprintk("CPU%u - ACPI performance management activated.\n", cpu); - for (i = 0; i < perf->state_count; i++) - dprintk(" %cP%d: %d MHz, %d mW, %d uS\n", - (i == perf->state ? '*' : ' '), i, - (u32) perf->states[i].core_frequency, - (u32) perf->states[i].power, - (u32) perf->states[i].transition_latency); - - cpufreq_frequency_table_get_attr(data->freq_table, policy->cpu); - - /* - * the first call to ->target() should result in us actually - * writing something to the appropriate registers. - */ - data->resume = 1; - - return result; - -err_freqfree: - kfree(data->freq_table); -err_unreg: - acpi_processor_unregister_performance(perf, cpu); -err_free: - kfree(data); - drv_data[cpu] = NULL; - - return result; -} - -static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) -{ - struct acpi_cpufreq_data *data = drv_data[policy->cpu]; - - dprintk("acpi_cpufreq_cpu_exit\n"); - - if (data) { - cpufreq_frequency_table_put_attr(policy->cpu); - drv_data[policy->cpu] = NULL; - acpi_processor_unregister_performance(data->acpi_data, - policy->cpu); - kfree(data); - } - - return 0; -} - -static int acpi_cpufreq_resume(struct cpufreq_policy *policy) -{ - struct acpi_cpufreq_data *data = drv_data[policy->cpu]; - - dprintk("acpi_cpufreq_resume\n"); - - data->resume = 1; - - return 0; -} - -static struct freq_attr *acpi_cpufreq_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, -}; - -static struct cpufreq_driver acpi_cpufreq_driver = { - .verify = acpi_cpufreq_verify, - .target = acpi_cpufreq_target, - .init = acpi_cpufreq_cpu_init, - .exit = acpi_cpufreq_cpu_exit, - .resume = acpi_cpufreq_resume, - .name = "acpi-cpufreq", - .owner = THIS_MODULE, - .attr = acpi_cpufreq_attr, -}; - -static int __init acpi_cpufreq_init(void) -{ - int ret; - - dprintk("acpi_cpufreq_init\n"); - - ret = acpi_cpufreq_early_init(); - if (ret) - return ret; - - return cpufreq_register_driver(&acpi_cpufreq_driver); -} - -static void __exit acpi_cpufreq_exit(void) -{ - dprintk("acpi_cpufreq_exit\n"); - - cpufreq_unregister_driver(&acpi_cpufreq_driver); - - free_percpu(acpi_perf_data); - - return; -} - -module_param(acpi_pstate_strict, uint, 0644); -MODULE_PARM_DESC(acpi_pstate_strict, - "value 0 or non-zero. non-zero -> strict ACPI checks are " - "performed during frequency changes."); - -late_initcall(acpi_cpufreq_init); -module_exit(acpi_cpufreq_exit); - -MODULE_ALIAS("acpi"); diff --git a/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c deleted file mode 100644 index 66acd503991..00000000000 --- a/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c +++ /dev/null @@ -1,441 +0,0 @@ -/* - * (C) 2004-2006 Sebastian Witt - * - * Licensed under the terms of the GNU GPL License version 2. - * Based upon reverse engineered information - * - * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* - */ - -#include -#include -#include -#include -#include -#include -#include - -#define NFORCE2_XTAL 25 -#define NFORCE2_BOOTFSB 0x48 -#define NFORCE2_PLLENABLE 0xa8 -#define NFORCE2_PLLREG 0xa4 -#define NFORCE2_PLLADR 0xa0 -#define NFORCE2_PLL(mul, div) (0x100000 | (mul << 8) | div) - -#define NFORCE2_MIN_FSB 50 -#define NFORCE2_SAFE_DISTANCE 50 - -/* Delay in ms between FSB changes */ -//#define NFORCE2_DELAY 10 - -/* nforce2_chipset: - * FSB is changed using the chipset - */ -static struct pci_dev *nforce2_chipset_dev; - -/* fid: - * multiplier * 10 - */ -static int fid = 0; - -/* min_fsb, max_fsb: - * minimum and maximum FSB (= FSB at boot time) - */ -static int min_fsb = 0; -static int max_fsb = 0; - -MODULE_AUTHOR("Sebastian Witt "); -MODULE_DESCRIPTION("nForce2 FSB changing cpufreq driver"); -MODULE_LICENSE("GPL"); - -module_param(fid, int, 0444); -module_param(min_fsb, int, 0444); - -MODULE_PARM_DESC(fid, "CPU multiplier to use (11.5 = 115)"); -MODULE_PARM_DESC(min_fsb, - "Minimum FSB to use, if not defined: current FSB - 50"); - -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "cpufreq-nforce2", msg) - -/** - * nforce2_calc_fsb - calculate FSB - * @pll: PLL value - * - * Calculates FSB from PLL value - */ -static int nforce2_calc_fsb(int pll) -{ - unsigned char mul, div; - - mul = (pll >> 8) & 0xff; - div = pll & 0xff; - - if (div > 0) - return NFORCE2_XTAL * mul / div; - - return 0; -} - -/** - * nforce2_calc_pll - calculate PLL value - * @fsb: FSB - * - * Calculate PLL value for given FSB - */ -static int nforce2_calc_pll(unsigned int fsb) -{ - unsigned char xmul, xdiv; - unsigned char mul = 0, div = 0; - int tried = 0; - - /* Try to calculate multiplier and divider up to 4 times */ - while (((mul == 0) || (div == 0)) && (tried <= 3)) { - for (xdiv = 2; xdiv <= 0x80; xdiv++) - for (xmul = 1; xmul <= 0xfe; xmul++) - if (nforce2_calc_fsb(NFORCE2_PLL(xmul, xdiv)) == - fsb + tried) { - mul = xmul; - div = xdiv; - } - tried++; - } - - if ((mul == 0) || (div == 0)) - return -1; - - return NFORCE2_PLL(mul, div); -} - -/** - * nforce2_write_pll - write PLL value to chipset - * @pll: PLL value - * - * Writes new FSB PLL value to chipset - */ -static void nforce2_write_pll(int pll) -{ - int temp; - - /* Set the pll addr. to 0x00 */ - pci_write_config_dword(nforce2_chipset_dev, NFORCE2_PLLADR, 0); - - /* Now write the value in all 64 registers */ - for (temp = 0; temp <= 0x3f; temp++) - pci_write_config_dword(nforce2_chipset_dev, NFORCE2_PLLREG, pll); - - return; -} - -/** - * nforce2_fsb_read - Read FSB - * - * Read FSB from chipset - * If bootfsb != 0, return FSB at boot-time - */ -static unsigned int nforce2_fsb_read(int bootfsb) -{ - struct pci_dev *nforce2_sub5; - u32 fsb, temp = 0; - - /* Get chipset boot FSB from subdevice 5 (FSB at boot-time) */ - nforce2_sub5 = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, - 0x01EF,PCI_ANY_ID,PCI_ANY_ID,NULL); - if (!nforce2_sub5) - return 0; - - pci_read_config_dword(nforce2_sub5, NFORCE2_BOOTFSB, &fsb); - fsb /= 1000000; - - /* Check if PLL register is already set */ - pci_read_config_byte(nforce2_chipset_dev,NFORCE2_PLLENABLE, (u8 *)&temp); - - if(bootfsb || !temp) - return fsb; - - /* Use PLL register FSB value */ - pci_read_config_dword(nforce2_chipset_dev,NFORCE2_PLLREG, &temp); - fsb = nforce2_calc_fsb(temp); - - return fsb; -} - -/** - * nforce2_set_fsb - set new FSB - * @fsb: New FSB - * - * Sets new FSB - */ -static int nforce2_set_fsb(unsigned int fsb) -{ - u32 temp = 0; - unsigned int tfsb; - int diff; - int pll = 0; - - if ((fsb > max_fsb) || (fsb < NFORCE2_MIN_FSB)) { - printk(KERN_ERR "cpufreq: FSB %d is out of range!\n", fsb); - return -EINVAL; - } - - tfsb = nforce2_fsb_read(0); - if (!tfsb) { - printk(KERN_ERR "cpufreq: Error while reading the FSB\n"); - return -EINVAL; - } - - /* First write? Then set actual value */ - pci_read_config_byte(nforce2_chipset_dev,NFORCE2_PLLENABLE, (u8 *)&temp); - if (!temp) { - pll = nforce2_calc_pll(tfsb); - - if (pll < 0) - return -EINVAL; - - nforce2_write_pll(pll); - } - - /* Enable write access */ - temp = 0x01; - pci_write_config_byte(nforce2_chipset_dev, NFORCE2_PLLENABLE, (u8)temp); - - diff = tfsb - fsb; - - if (!diff) - return 0; - - while ((tfsb != fsb) && (tfsb <= max_fsb) && (tfsb >= min_fsb)) { - if (diff < 0) - tfsb++; - else - tfsb--; - - /* Calculate the PLL reg. value */ - if ((pll = nforce2_calc_pll(tfsb)) == -1) - return -EINVAL; - - nforce2_write_pll(pll); -#ifdef NFORCE2_DELAY - mdelay(NFORCE2_DELAY); -#endif - } - - temp = 0x40; - pci_write_config_byte(nforce2_chipset_dev, NFORCE2_PLLADR, (u8)temp); - - return 0; -} - -/** - * nforce2_get - get the CPU frequency - * @cpu: CPU number - * - * Returns the CPU frequency - */ -static unsigned int nforce2_get(unsigned int cpu) -{ - if (cpu) - return 0; - return nforce2_fsb_read(0) * fid * 100; -} - -/** - * nforce2_target - set a new CPUFreq policy - * @policy: new policy - * @target_freq: the target frequency - * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) - * - * Sets a new CPUFreq policy. - */ -static int nforce2_target(struct cpufreq_policy *policy, - unsigned int target_freq, unsigned int relation) -{ -// unsigned long flags; - struct cpufreq_freqs freqs; - unsigned int target_fsb; - - if ((target_freq > policy->max) || (target_freq < policy->min)) - return -EINVAL; - - target_fsb = target_freq / (fid * 100); - - freqs.old = nforce2_get(policy->cpu); - freqs.new = target_fsb * fid * 100; - freqs.cpu = 0; /* Only one CPU on nForce2 plattforms */ - - if (freqs.old == freqs.new) - return 0; - - dprintk("Old CPU frequency %d kHz, new %d kHz\n", - freqs.old, freqs.new); - - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - - /* Disable IRQs */ - //local_irq_save(flags); - - if (nforce2_set_fsb(target_fsb) < 0) - printk(KERN_ERR "cpufreq: Changing FSB to %d failed\n", - target_fsb); - else - dprintk("Changed FSB successfully to %d\n", - target_fsb); - - /* Enable IRQs */ - //local_irq_restore(flags); - - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - - return 0; -} - -/** - * nforce2_verify - verifies a new CPUFreq policy - * @policy: new policy - */ -static int nforce2_verify(struct cpufreq_policy *policy) -{ - unsigned int fsb_pol_max; - - fsb_pol_max = policy->max / (fid * 100); - - if (policy->min < (fsb_pol_max * fid * 100)) - policy->max = (fsb_pol_max + 1) * fid * 100; - - cpufreq_verify_within_limits(policy, - policy->cpuinfo.min_freq, - policy->cpuinfo.max_freq); - return 0; -} - -static int nforce2_cpu_init(struct cpufreq_policy *policy) -{ - unsigned int fsb; - unsigned int rfid; - - /* capability check */ - if (policy->cpu != 0) - return -ENODEV; - - /* Get current FSB */ - fsb = nforce2_fsb_read(0); - - if (!fsb) - return -EIO; - - /* FIX: Get FID from CPU */ - if (!fid) { - if (!cpu_khz) { - printk(KERN_WARNING - "cpufreq: cpu_khz not set, can't calculate multiplier!\n"); - return -ENODEV; - } - - fid = cpu_khz / (fsb * 100); - rfid = fid % 5; - - if (rfid) { - if (rfid > 2) - fid += 5 - rfid; - else - fid -= rfid; - } - } - - printk(KERN_INFO "cpufreq: FSB currently at %i MHz, FID %d.%d\n", fsb, - fid / 10, fid % 10); - - /* Set maximum FSB to FSB at boot time */ - max_fsb = nforce2_fsb_read(1); - - if(!max_fsb) - return -EIO; - - if (!min_fsb) - min_fsb = max_fsb - NFORCE2_SAFE_DISTANCE; - - if (min_fsb < NFORCE2_MIN_FSB) - min_fsb = NFORCE2_MIN_FSB; - - /* cpuinfo and default policy values */ - policy->cpuinfo.min_freq = min_fsb * fid * 100; - policy->cpuinfo.max_freq = max_fsb * fid * 100; - policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; - policy->cur = nforce2_get(policy->cpu); - policy->min = policy->cpuinfo.min_freq; - policy->max = policy->cpuinfo.max_freq; - policy->governor = CPUFREQ_DEFAULT_GOVERNOR; - - return 0; -} - -static int nforce2_cpu_exit(struct cpufreq_policy *policy) -{ - return 0; -} - -static struct cpufreq_driver nforce2_driver = { - .name = "nforce2", - .verify = nforce2_verify, - .target = nforce2_target, - .get = nforce2_get, - .init = nforce2_cpu_init, - .exit = nforce2_cpu_exit, - .owner = THIS_MODULE, -}; - -/** - * nforce2_detect_chipset - detect the Southbridge which contains FSB PLL logic - * - * Detects nForce2 A2 and C1 stepping - * - */ -static unsigned int nforce2_detect_chipset(void) -{ - nforce2_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, - PCI_DEVICE_ID_NVIDIA_NFORCE2, - PCI_ANY_ID, PCI_ANY_ID, NULL); - - if (nforce2_chipset_dev == NULL) - return -ENODEV; - - printk(KERN_INFO "cpufreq: Detected nForce2 chipset revision %X\n", - nforce2_chipset_dev->revision); - printk(KERN_INFO - "cpufreq: FSB changing is maybe unstable and can lead to crashes and data loss.\n"); - - return 0; -} - -/** - * nforce2_init - initializes the nForce2 CPUFreq driver - * - * Initializes the nForce2 FSB support. Returns -ENODEV on unsupported - * devices, -EINVAL on problems during initiatization, and zero on - * success. - */ -static int __init nforce2_init(void) -{ - /* TODO: do we need to detect the processor? */ - - /* detect chipset */ - if (nforce2_detect_chipset()) { - printk(KERN_ERR "cpufreq: No nForce2 chipset.\n"); - return -ENODEV; - } - - return cpufreq_register_driver(&nforce2_driver); -} - -/** - * nforce2_exit - unregisters cpufreq module - * - * Unregisters nForce2 FSB change support. - */ -static void __exit nforce2_exit(void) -{ - cpufreq_unregister_driver(&nforce2_driver); -} - -module_init(nforce2_init); -module_exit(nforce2_exit); - diff --git a/arch/i386/kernel/cpu/cpufreq/e_powersaver.c b/arch/i386/kernel/cpu/cpufreq/e_powersaver.c deleted file mode 100644 index f43d98e11cc..00000000000 --- a/arch/i386/kernel/cpu/cpufreq/e_powersaver.c +++ /dev/null @@ -1,334 +0,0 @@ -/* - * Based on documentation provided by Dave Jones. Thanks! - * - * Licensed under the terms of the GNU GPL License version 2. - * - * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#define EPS_BRAND_C7M 0 -#define EPS_BRAND_C7 1 -#define EPS_BRAND_EDEN 2 -#define EPS_BRAND_C3 3 - -struct eps_cpu_data { - u32 fsb; - struct cpufreq_frequency_table freq_table[]; -}; - -static struct eps_cpu_data *eps_cpu[NR_CPUS]; - - -static unsigned int eps_get(unsigned int cpu) -{ - struct eps_cpu_data *centaur; - u32 lo, hi; - - if (cpu) - return 0; - centaur = eps_cpu[cpu]; - if (centaur == NULL) - return 0; - - /* Return current frequency */ - rdmsr(MSR_IA32_PERF_STATUS, lo, hi); - return centaur->fsb * ((lo >> 8) & 0xff); -} - -static int eps_set_state(struct eps_cpu_data *centaur, - unsigned int cpu, - u32 dest_state) -{ - struct cpufreq_freqs freqs; - u32 lo, hi; - int err = 0; - int i; - - freqs.old = eps_get(cpu); - freqs.new = centaur->fsb * ((dest_state >> 8) & 0xff); - freqs.cpu = cpu; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - - /* Wait while CPU is busy */ - rdmsr(MSR_IA32_PERF_STATUS, lo, hi); - i = 0; - while (lo & ((1 << 16) | (1 << 17))) { - udelay(16); - rdmsr(MSR_IA32_PERF_STATUS, lo, hi); - i++; - if (unlikely(i > 64)) { - err = -ENODEV; - goto postchange; - } - } - /* Set new multiplier and voltage */ - wrmsr(MSR_IA32_PERF_CTL, dest_state & 0xffff, 0); - /* Wait until transition end */ - i = 0; - do { - udelay(16); - rdmsr(MSR_IA32_PERF_STATUS, lo, hi); - i++; - if (unlikely(i > 64)) { - err = -ENODEV; - goto postchange; - } - } while (lo & ((1 << 16) | (1 << 17))); - - /* Return current frequency */ -postchange: - rdmsr(MSR_IA32_PERF_STATUS, lo, hi); - freqs.new = centaur->fsb * ((lo >> 8) & 0xff); - - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - return err; -} - -static int eps_target(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) -{ - struct eps_cpu_data *centaur; - unsigned int newstate = 0; - unsigned int cpu = policy->cpu; - unsigned int dest_state; - int ret; - - if (unlikely(eps_cpu[cpu] == NULL)) - return -ENODEV; - centaur = eps_cpu[cpu]; - - if (unlikely(cpufreq_frequency_table_target(policy, - &eps_cpu[cpu]->freq_table[0], - target_freq, - relation, - &newstate))) { - return -EINVAL; - } - - /* Make frequency transition */ - dest_state = centaur->freq_table[newstate].index & 0xffff; - ret = eps_set_state(centaur, cpu, dest_state); - if (ret) - printk(KERN_ERR "eps: Timeout!\n"); - return ret; -} - -static int eps_verify(struct cpufreq_policy *policy) -{ - return cpufreq_frequency_table_verify(policy, - &eps_cpu[policy->cpu]->freq_table[0]); -} - -static int eps_cpu_init(struct cpufreq_policy *policy) -{ - unsigned int i; - u32 lo, hi; - u64 val; - u8 current_multiplier, current_voltage; - u8 max_multiplier, max_voltage; - u8 min_multiplier, min_voltage; - u8 brand; - u32 fsb; - struct eps_cpu_data *centaur; - struct cpufreq_frequency_table *f_table; - int k, step, voltage; - int ret; - int states; - - if (policy->cpu != 0) - return -ENODEV; - - /* Check brand */ - printk("eps: Detected VIA "); - rdmsr(0x1153, lo, hi); - brand = (((lo >> 2) ^ lo) >> 18) & 3; - switch(brand) { - case EPS_BRAND_C7M: - printk("C7-M\n"); - break; - case EPS_BRAND_C7: - printk("C7\n"); - break; - case EPS_BRAND_EDEN: - printk("Eden\n"); - break; - case EPS_BRAND_C3: - printk("C3\n"); - return -ENODEV; - break; - } - /* Enable Enhanced PowerSaver */ - rdmsrl(MSR_IA32_MISC_ENABLE, val); - if (!(val & 1 << 16)) { - val |= 1 << 16; - wrmsrl(MSR_IA32_MISC_ENABLE, val); - /* Can be locked at 0 */ - rdmsrl(MSR_IA32_MISC_ENABLE, val); - if (!(val & 1 << 16)) { - printk("eps: Can't enable Enhanced PowerSaver\n"); - return -ENODEV; - } - } - - /* Print voltage and multiplier */ - rdmsr(MSR_IA32_PERF_STATUS, lo, hi); - current_voltage = lo & 0xff; - printk("eps: Current voltage = %dmV\n", current_voltage * 16 + 700); - current_multiplier = (lo >> 8) & 0xff; - printk("eps: Current multiplier = %d\n", current_multiplier); - - /* Print limits */ - max_voltage = hi & 0xff; - printk("eps: Highest voltage = %dmV\n", max_voltage * 16 + 700); - max_multiplier = (hi >> 8) & 0xff; - printk("eps: Highest multiplier = %d\n", max_multiplier); - min_voltage = (hi >> 16) & 0xff; - printk("eps: Lowest voltage = %dmV\n", min_voltage * 16 + 700); - min_multiplier = (hi >> 24) & 0xff; - printk("eps: Lowest multiplier = %d\n", min_multiplier); - - /* Sanity checks */ - if (current_multiplier == 0 || max_multiplier == 0 - || min_multiplier == 0) - return -EINVAL; - if (current_multiplier > max_multiplier - || max_multiplier <= min_multiplier) - return -EINVAL; - if (current_voltage > 0x1c || max_voltage > 0x1c) - return -EINVAL; - if (max_voltage < min_voltage) - return -EINVAL; - - /* Calc FSB speed */ - fsb = cpu_khz / current_multiplier; - /* Calc number of p-states supported */ - if (brand == EPS_BRAND_C7M) - states = max_multiplier - min_multiplier + 1; - else - states = 2; - - /* Allocate private data and frequency table for current cpu */ - centaur = kzalloc(sizeof(struct eps_cpu_data) - + (states + 1) * sizeof(struct cpufreq_frequency_table), - GFP_KERNEL); - if (!centaur) - return -ENOMEM; - eps_cpu[0] = centaur; - - /* Copy basic values */ - centaur->fsb = fsb; - - /* Fill frequency and MSR value table */ - f_table = ¢aur->freq_table[0]; - if (brand != EPS_BRAND_C7M) { - f_table[0].frequency = fsb * min_multiplier; - f_table[0].index = (min_multiplier << 8) | min_voltage; - f_table[1].frequency = fsb * max_multiplier; - f_table[1].index = (max_multiplier << 8) | max_voltage; - f_table[2].frequency = CPUFREQ_TABLE_END; - } else { - k = 0; - step = ((max_voltage - min_voltage) * 256) - / (max_multiplier - min_multiplier); - for (i = min_multiplier; i <= max_multiplier; i++) { - voltage = (k * step) / 256 + min_voltage; - f_table[k].frequency = fsb * i; - f_table[k].index = (i << 8) | voltage; - k++; - } - f_table[k].frequency = CPUFREQ_TABLE_END; - } - - policy->governor = CPUFREQ_DEFAULT_GOVERNOR; - policy->cpuinfo.transition_latency = 140000; /* 844mV -> 700mV in ns */ - policy->cur = fsb * current_multiplier; - - ret = cpufreq_frequency_table_cpuinfo(policy, ¢aur->freq_table[0]); - if (ret) { - kfree(centaur); - return ret; - } - - cpufreq_frequency_table_get_attr(¢aur->freq_table[0], policy->cpu); - return 0; -} - -static int eps_cpu_exit(struct cpufreq_policy *policy) -{ - unsigned int cpu = policy->cpu; - struct eps_cpu_data *centaur; - u32 lo, hi; - - if (eps_cpu[cpu] == NULL) - return -ENODEV; - centaur = eps_cpu[cpu]; - - /* Get max frequency */ - rdmsr(MSR_IA32_PERF_STATUS, lo, hi); - /* Set max frequency */ - eps_set_state(centaur, cpu, hi & 0xffff); - /* Bye */ - cpufreq_frequency_table_put_attr(policy->cpu); - kfree(eps_cpu[cpu]); - eps_cpu[cpu] = NULL; - return 0; -} - -static struct freq_attr* eps_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, -}; - -static struct cpufreq_driver eps_driver = { - .verify = eps_verify, - .target = eps_target, - .init = eps_cpu_init, - .exit = eps_cpu_exit, - .get = eps_get, - .name = "e_powersaver", - .owner = THIS_MODULE, - .attr = eps_attr, -}; - -static int __init eps_init(void) -{ - struct cpuinfo_x86 *c = cpu_data; - - /* This driver will work only on Centaur C7 processors with - * Enhanced SpeedStep/PowerSaver registers */ - if (c->x86_vendor != X86_VENDOR_CENTAUR - || c->x86 != 6 || c->x86_model != 10) - return -ENODEV; - if (!cpu_has(c, X86_FEATURE_EST)) - return -ENODEV; - - if (cpufreq_register_driver(&eps_driver)) - return -EINVAL; - return 0; -} - -static void __exit eps_exit(void) -{ - cpufreq_unregister_driver(&eps_driver); -} - -MODULE_AUTHOR("Rafał Bilski "); -MODULE_DESCRIPTION("Enhanced PowerSaver driver for VIA C7 CPU's."); -MODULE_LICENSE("GPL"); - -module_init(eps_init); -module_exit(eps_exit); diff --git a/arch/i386/kernel/cpu/cpufreq/elanfreq.c b/arch/i386/kernel/cpu/cpufreq/elanfreq.c deleted file mode 100644 index f317276afa7..00000000000 --- a/arch/i386/kernel/cpu/cpufreq/elanfreq.c +++ /dev/null @@ -1,309 +0,0 @@ -/* - * elanfreq: cpufreq driver for the AMD ELAN family - * - * (c) Copyright 2002 Robert Schwebel - * - * Parts of this code are (c) Sven Geggus - * - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * 2002-02-13: - initial revision for 2.4.18-pre9 by Robert Schwebel - * - */ - -#include -#include -#include - -#include -#include -#include - -#include -#include -#include - -#define REG_CSCIR 0x22 /* Chip Setup and Control Index Register */ -#define REG_CSCDR 0x23 /* Chip Setup and Control Data Register */ - -/* Module parameter */ -static int max_freq; - -struct s_elan_multiplier { - int clock; /* frequency in kHz */ - int val40h; /* PMU Force Mode register */ - int val80h; /* CPU Clock Speed Register */ -}; - -/* - * It is important that the frequencies - * are listed in ascending order here! - */ -struct s_elan_multiplier elan_multiplier[] = { - {1000, 0x02, 0x18}, - {2000, 0x02, 0x10}, - {4000, 0x02, 0x08}, - {8000, 0x00, 0x00}, - {16000, 0x00, 0x02}, - {33000, 0x00, 0x04}, - {66000, 0x01, 0x04}, - {99000, 0x01, 0x05} -}; - -static struct cpufreq_frequency_table elanfreq_table[] = { - {0, 1000}, - {1, 2000}, - {2, 4000}, - {3, 8000}, - {4, 16000}, - {5, 33000}, - {6, 66000}, - {7, 99000}, - {0, CPUFREQ_TABLE_END}, -}; - - -/** - * elanfreq_get_cpu_frequency: determine current cpu speed - * - * Finds out at which frequency the CPU of the Elan SOC runs - * at the moment. Frequencies from 1 to 33 MHz are generated - * the normal way, 66 and 99 MHz are called "Hyperspeed Mode" - * and have the rest of the chip running with 33 MHz. - */ - -static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu) -{ - u8 clockspeed_reg; /* Clock Speed Register */ - - local_irq_disable(); - outb_p(0x80,REG_CSCIR); - clockspeed_reg = inb_p(REG_CSCDR); - local_irq_enable(); - - if ((clockspeed_reg & 0xE0) == 0xE0) - return 0; - - /* Are we in CPU clock multiplied mode (66/99 MHz)? */ - if ((clockspeed_reg & 0xE0) == 0xC0) { - if ((clockspeed_reg & 0x01) == 0) - return 66000; - else - return 99000; - } - - /* 33 MHz is not 32 MHz... */ - if ((clockspeed_reg & 0xE0)==0xA0) - return 33000; - - return ((1<<((clockspeed_reg & 0xE0) >> 5)) * 1000); -} - - -/** - * elanfreq_set_cpu_frequency: Change the CPU core frequency - * @cpu: cpu number - * @freq: frequency in kHz - * - * This function takes a frequency value and changes the CPU frequency - * according to this. Note that the frequency has to be checked by - * elanfreq_validatespeed() for correctness! - * - * There is no return value. - */ - -static void elanfreq_set_cpu_state (unsigned int state) -{ - struct cpufreq_freqs freqs; - - freqs.old = elanfreq_get_cpu_frequency(0); - freqs.new = elan_multiplier[state].clock; - freqs.cpu = 0; /* elanfreq.c is UP only driver */ - - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - - printk(KERN_INFO "elanfreq: attempting to set frequency to %i kHz\n", - elan_multiplier[state].clock); - - - /* - * Access to the Elan's internal registers is indexed via - * 0x22: Chip Setup & Control Register Index Register (CSCI) - * 0x23: Chip Setup & Control Register Data Register (CSCD) - * - */ - - /* - * 0x40 is the Power Management Unit's Force Mode Register. - * Bit 6 enables Hyperspeed Mode (66/100 MHz core frequency) - */ - - local_irq_disable(); - outb_p(0x40,REG_CSCIR); /* Disable hyperspeed mode */ - outb_p(0x00,REG_CSCDR); - local_irq_enable(); /* wait till internal pipelines and */ - udelay(1000); /* buffers have cleaned up */ - - local_irq_disable(); - - /* now, set the CPU clock speed register (0x80) */ - outb_p(0x80,REG_CSCIR); - outb_p(elan_multiplier[state].val80h,REG_CSCDR); - - /* now, the hyperspeed bit in PMU Force Mode Register (0x40) */ - outb_p(0x40,REG_CSCIR); - outb_p(elan_multiplier[state].val40h,REG_CSCDR); - udelay(10000); - local_irq_enable(); - - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); -}; - - -/** - * elanfreq_validatespeed: test if frequency range is valid - * @policy: the policy to validate - * - * This function checks if a given frequency range in kHz is valid - * for the hardware supported by the driver. - */ - -static int elanfreq_verify (struct cpufreq_policy *policy) -{ - return cpufreq_frequency_table_verify(policy, &elanfreq_table[0]); -} - -static int elanfreq_target (struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) -{ - unsigned int newstate = 0; - - if (cpufreq_frequency_table_target(policy, &elanfreq_table[0], target_freq, relation, &newstate)) - return -EINVAL; - - elanfreq_set_cpu_state(newstate); - - return 0; -} - - -/* - * Module init and exit code - */ - -static int elanfreq_cpu_init(struct cpufreq_policy *policy) -{ - struct cpuinfo_x86 *c = cpu_data; - unsigned int i; - int result; - - /* capability check */ - if ((c->x86_vendor != X86_VENDOR_AMD) || - (c->x86 != 4) || (c->x86_model!=10)) - return -ENODEV; - - /* max freq */ - if (!max_freq) - max_freq = elanfreq_get_cpu_frequency(0); - - /* table init */ - for (i=0; (elanfreq_table[i].frequency != CPUFREQ_TABLE_END); i++) { - if (elanfreq_table[i].frequency > max_freq) - elanfreq_table[i].frequency = CPUFREQ_ENTRY_INVALID; - } - - /* cpuinfo and default policy values */ - policy->governor = CPUFREQ_DEFAULT_GOVERNOR; - policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; - policy->cur = elanfreq_get_cpu_frequency(0); - - result = cpufreq_frequency_table_cpuinfo(policy, elanfreq_table); - if (result) - return (result); - - cpufreq_frequency_table_get_attr(elanfreq_table, policy->cpu); - return 0; -} - - -static int elanfreq_cpu_exit(struct cpufreq_policy *policy) -{ - cpufreq_frequency_table_put_attr(policy->cpu); - return 0; -} - - -#ifndef MODULE -/** - * elanfreq_setup - elanfreq command line parameter parsing - * - * elanfreq command line parameter. Use: - * elanfreq=66000 - * to set the maximum CPU frequency to 66 MHz. Note that in - * case you do not give this boot parameter, the maximum - * frequency will fall back to _current_ CPU frequency which - * might be lower. If you build this as a module, use the - * max_freq module parameter instead. - */ -static int __init elanfreq_setup(char *str) -{ - max_freq = simple_strtoul(str, &str, 0); - printk(KERN_WARNING "You're using the deprecated elanfreq command line option. Use elanfreq.max_freq instead, please!\n"); - return 1; -} -__setup("elanfreq=", elanfreq_setup); -#endif - - -static struct freq_attr* elanfreq_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, -}; - - -static struct cpufreq_driver elanfreq_driver = { - .get = elanfreq_get_cpu_frequency, - .verify = elanfreq_verify, - .target = elanfreq_target, - .init = elanfreq_cpu_init, - .exit = elanfreq_cpu_exit, - .name = "elanfreq", - .owner = THIS_MODULE, - .attr = elanfreq_attr, -}; - - -static int __init elanfreq_init(void) -{ - struct cpuinfo_x86 *c = cpu_data; - - /* Test if we have the right hardware */ - if ((c->x86_vendor != X86_VENDOR_AMD) || - (c->x86 != 4) || (c->x86_model!=10)) { - printk(KERN_INFO "elanfreq: error: no Elan processor found!\n"); - return -ENODEV; - } - return cpufreq_register_driver(&elanfreq_driver); -} - - -static void __exit elanfreq_exit(void) -{ - cpufreq_unregister_driver(&elanfreq_driver); -} - - -module_param (max_freq, int, 0444); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Robert Schwebel , Sven Geggus "); -MODULE_DESCRIPTION("cpufreq driver for AMD's Elan CPUs"); - -module_init(elanfreq_init); -module_exit(elanfreq_exit); diff --git a/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c b/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c deleted file mode 100644 index 461dabc4e49..00000000000 --- a/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c +++ /dev/null @@ -1,495 +0,0 @@ -/* - * Cyrix MediaGX and NatSemi Geode Suspend Modulation - * (C) 2002 Zwane Mwaikambo - * (C) 2002 Hiroshi Miura - * All Rights Reserved - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation - * - * The author(s) of this software shall not be held liable for damages - * of any nature resulting due to the use of this software. This - * software is provided AS-IS with no warranties. - * - * Theoritical note: - * - * (see Geode(tm) CS5530 manual (rev.4.1) page.56) - * - * CPU frequency control on NatSemi Geode GX1/GXLV processor and CS55x0 - * are based on Suspend Moduration. - * - * Suspend Modulation works by asserting and de-asserting the SUSP# pin - * to CPU(GX1/GXLV) for configurable durations. When asserting SUSP# - * the CPU enters an idle state. GX1 stops its core clock when SUSP# is - * asserted then power consumption is reduced. - * - * Suspend Modulation's OFF/ON duration are configurable - * with 'Suspend Modulation OFF Count Register' - * and 'Suspend Modulation ON Count Register'. - * These registers are 8bit counters that represent the number of - * 32us intervals which the SUSP# pin is asserted(ON)/de-asserted(OFF) - * to the processor. - * - * These counters define a ratio which is the effective frequency - * of operation of the system. - * - * OFF Count - * F_eff = Fgx * ---------------------- - * OFF Count + ON Count - * - * 0 <= On Count, Off Count <= 255 - * - * From these limits, we can get register values - * - * off_duration + on_duration <= MAX_DURATION - * on_duration = off_duration * (stock_freq - freq) / freq - * - * off_duration = (freq * DURATION) / stock_freq - * on_duration = DURATION - off_duration - * - * - *--------------------------------------------------------------------------- - * - * ChangeLog: - * Dec. 12, 2003 Hiroshi Miura - * - fix on/off register mistake - * - fix cpu_khz calc when it stops cpu modulation. - * - * Dec. 11, 2002 Hiroshi Miura - * - rewrite for Cyrix MediaGX Cx5510/5520 and - * NatSemi Geode Cs5530(A). - * - * Jul. ??, 2002 Zwane Mwaikambo - * - cs5530_mod patch for 2.4.19-rc1. - * - *--------------------------------------------------------------------------- - * - * Todo - * Test on machines with 5510, 5530, 5530A - */ - -/************************************************************************ - * Suspend Modulation - Definitions * - ************************************************************************/ - -#include -#include -#include -#include -#include -#include -#include -#include - -/* PCI config registers, all at F0 */ -#define PCI_PMER1 0x80 /* power management enable register 1 */ -#define PCI_PMER2 0x81 /* power management enable register 2 */ -#define PCI_PMER3 0x82 /* power management enable register 3 */ -#define PCI_IRQTC 0x8c /* irq speedup timer counter register:typical 2 to 4ms */ -#define PCI_VIDTC 0x8d /* video speedup timer counter register: typical 50 to 100ms */ -#define PCI_MODOFF 0x94 /* suspend modulation OFF counter register, 1 = 32us */ -#define PCI_MODON 0x95 /* suspend modulation ON counter register */ -#define PCI_SUSCFG 0x96 /* suspend configuration register */ - -/* PMER1 bits */ -#define GPM (1<<0) /* global power management */ -#define GIT (1<<1) /* globally enable PM device idle timers */ -#define GTR (1<<2) /* globally enable IO traps */ -#define IRQ_SPDUP (1<<3) /* disable clock throttle during interrupt handling */ -#define VID_SPDUP (1<<4) /* disable clock throttle during vga video handling */ - -/* SUSCFG bits */ -#define SUSMOD (1<<0) /* enable/disable suspend modulation */ -/* the belows support only with cs5530 (after rev.1.2)/cs5530A */ -#define SMISPDUP (1<<1) /* select how SMI re-enable suspend modulation: */ - /* IRQTC timer or read SMI speedup disable reg.(F1BAR[08-09h]) */ -#define SUSCFG (1<<2) /* enable powering down a GXLV processor. "Special 3Volt Suspend" mode */ -/* the belows support only with cs5530A */ -#define PWRSVE_ISA (1<<3) /* stop ISA clock */ -#define PWRSVE (1<<4) /* active idle */ - -struct gxfreq_params { - u8 on_duration; - u8 off_duration; - u8 pci_suscfg; - u8 pci_pmer1; - u8 pci_pmer2; - struct pci_dev *cs55x0; -}; - -static struct gxfreq_params *gx_params; -static int stock_freq; - -/* PCI bus clock - defaults to 30.000 if cpu_khz is not available */ -static int pci_busclk = 0; -module_param (pci_busclk, int, 0444); - -/* maximum duration for which the cpu may be suspended - * (32us * MAX_DURATION). If no parameter is given, this defaults - * to 255. - * Note that this leads to a maximum of 8 ms(!) where the CPU clock - * is suspended -- processing power is just 0.39% of what it used to be, - * though. 781.25 kHz(!) for a 200 MHz processor -- wow. */ -static int max_duration = 255; -module_param (max_duration, int, 0444); - -/* For the default policy, we want at least some processing power - * - let's say 5%. (min = maxfreq / POLICY_MIN_DIV) - */ -#define POLICY_MIN_DIV 20 - - -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "gx-suspmod", msg) - -/** - * we can detect a core multipiler from dir0_lsb - * from GX1 datasheet p.56, - * MULT[3:0]: - * 0000 = SYSCLK multiplied by 4 (test only) - * 0001 = SYSCLK multiplied by 10 - * 0010 = SYSCLK multiplied by 4 - * 0011 = SYSCLK multiplied by 6 - * 0100 = SYSCLK multiplied by 9 - * 0101 = SYSCLK multiplied by 5 - * 0110 = SYSCLK multiplied by 7 - * 0111 = SYSCLK multiplied by 8 - * of 33.3MHz - **/ -static int gx_freq_mult[16] = { - 4, 10, 4, 6, 9, 5, 7, 8, - 0, 0, 0, 0, 0, 0, 0, 0 -}; - - -/**************************************************************** - * Low Level chipset interface * - ****************************************************************/ -static struct pci_device_id gx_chipset_tbl[] __initdata = { - { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, PCI_ANY_ID, PCI_ANY_ID }, - { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520, PCI_ANY_ID, PCI_ANY_ID }, - { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510, PCI_ANY_ID, PCI_ANY_ID }, - { 0, }, -}; - -/** - * gx_detect_chipset: - * - **/ -static __init struct pci_dev *gx_detect_chipset(void) -{ - struct pci_dev *gx_pci = NULL; - - /* check if CPU is a MediaGX or a Geode. */ - if ((current_cpu_data.x86_vendor != X86_VENDOR_NSC) && - (current_cpu_data.x86_vendor != X86_VENDOR_CYRIX)) { - dprintk("error: no MediaGX/Geode processor found!\n"); - return NULL; - } - - /* detect which companion chip is used */ - while ((gx_pci = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, gx_pci)) != NULL) { - if ((pci_match_id(gx_chipset_tbl, gx_pci)) != NULL) - return gx_pci; - } - - dprintk("error: no supported chipset found!\n"); - return NULL; -} - -/** - * gx_get_cpuspeed: - * - * Finds out at which efficient frequency the Cyrix MediaGX/NatSemi Geode CPU runs. - */ -static unsigned int gx_get_cpuspeed(unsigned int cpu) -{ - if ((gx_params->pci_suscfg & SUSMOD) == 0) - return stock_freq; - - return (stock_freq * gx_params->off_duration) - / (gx_params->on_duration + gx_params->off_duration); -} - -/** - * gx_validate_speed: - * determine current cpu speed - * - **/ - -static unsigned int gx_validate_speed(unsigned int khz, u8 *on_duration, u8 *off_duration) -{ - unsigned int i; - u8 tmp_on, tmp_off; - int old_tmp_freq = stock_freq; - int tmp_freq; - - *off_duration=1; - *on_duration=0; - - for (i=max_duration; i>0; i--) { - tmp_off = ((khz * i) / stock_freq) & 0xff; - tmp_on = i - tmp_off; - tmp_freq = (stock_freq * tmp_off) / i; - /* if this relation is closer to khz, use this. If it's equal, - * prefer it, too - lower latency */ - if (abs(tmp_freq - khz) <= abs(old_tmp_freq - khz)) { - *on_duration = tmp_on; - *off_duration = tmp_off; - old_tmp_freq = tmp_freq; - } - } - - return old_tmp_freq; -} - - -/** - * gx_set_cpuspeed: - * set cpu speed in khz. - **/ - -static void gx_set_cpuspeed(unsigned int khz) -{ - u8 suscfg, pmer1; - unsigned int new_khz; - unsigned long flags; - struct cpufreq_freqs freqs; - - freqs.cpu = 0; - freqs.old = gx_get_cpuspeed(0); - - new_khz = gx_validate_speed(khz, &gx_params->on_duration, &gx_params->off_duration); - - freqs.new = new_khz; - - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - local_irq_save(flags); - - if (new_khz != stock_freq) { /* if new khz == 100% of CPU speed, it is special case */ - switch (gx_params->cs55x0->device) { - case PCI_DEVICE_ID_CYRIX_5530_LEGACY: - pmer1 = gx_params->pci_pmer1 | IRQ_SPDUP | VID_SPDUP; - /* FIXME: need to test other values -- Zwane,Miura */ - pci_write_config_byte(gx_params->cs55x0, PCI_IRQTC, 4); /* typical 2 to 4ms */ - pci_write_config_byte(gx_params->cs55x0, PCI_VIDTC, 100);/* typical 50 to 100ms */ - pci_write_config_byte(gx_params->cs55x0, PCI_PMER1, pmer1); - - if (gx_params->cs55x0->revision < 0x10) { /* CS5530(rev 1.2, 1.3) */ - suscfg = gx_params->pci_suscfg | SUSMOD; - } else { /* CS5530A,B.. */ - suscfg = gx_params->pci_suscfg | SUSMOD | PWRSVE; - } - break; - case PCI_DEVICE_ID_CYRIX_5520: - case PCI_DEVICE_ID_CYRIX_5510: - suscfg = gx_params->pci_suscfg | SUSMOD; - break; - default: - local_irq_restore(flags); - dprintk("fatal: try to set unknown chipset.\n"); - return; - } - } else { - suscfg = gx_params->pci_suscfg & ~(SUSMOD); - gx_params->off_duration = 0; - gx_params->on_duration = 0; - dprintk("suspend modulation disabled: cpu runs 100 percent speed.\n"); - } - - pci_write_config_byte(gx_params->cs55x0, PCI_MODOFF, gx_params->off_duration); - pci_write_config_byte(gx_params->cs55x0, PCI_MODON, gx_params->on_duration); - - pci_write_config_byte(gx_params->cs55x0, PCI_SUSCFG, suscfg); - pci_read_config_byte(gx_params->cs55x0, PCI_SUSCFG, &suscfg); - - local_irq_restore(flags); - - gx_params->pci_suscfg = suscfg; - - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - - dprintk("suspend modulation w/ duration of ON:%d us, OFF:%d us\n", - gx_params->on_duration * 32, gx_params->off_duration * 32); - dprintk("suspend modulation w/ clock speed: %d kHz.\n", freqs.new); -} - -/**************************************************************** - * High level functions * - ****************************************************************/ - -/* - * cpufreq_gx_verify: test if frequency range is valid - * - * This function checks if a given frequency range in kHz is valid - * for the hardware supported by the driver. - */ - -static int cpufreq_gx_verify(struct cpufreq_policy *policy) -{ - unsigned int tmp_freq = 0; - u8 tmp1, tmp2; - - if (!stock_freq || !policy) - return -EINVAL; - - policy->cpu = 0; - cpufreq_verify_within_limits(policy, (stock_freq / max_duration), stock_freq); - - /* it needs to be assured that at least one supported frequency is - * within policy->min and policy->max. If it is not, policy->max - * needs to be increased until one freuqency is supported. - * policy->min may not be decreased, though. This way we guarantee a - * specific processing capacity. - */ - tmp_freq = gx_validate_speed(policy->min, &tmp1, &tmp2); - if (tmp_freq < policy->min) - tmp_freq += stock_freq / max_duration; - policy->min = tmp_freq; - if (policy->min > policy->max) - policy->max = tmp_freq; - tmp_freq = gx_validate_speed(policy->max, &tmp1, &tmp2); - if (tmp_freq > policy->max) - tmp_freq -= stock_freq / max_duration; - policy->max = tmp_freq; - if (policy->max < policy->min) - policy->max = policy->min; - cpufreq_verify_within_limits(policy, (stock_freq / max_duration), stock_freq); - - return 0; -} - -/* - * cpufreq_gx_target: - * - */ -static int cpufreq_gx_target(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) -{ - u8 tmp1, tmp2; - unsigned int tmp_freq; - - if (!stock_freq || !policy) - return -EINVAL; - - policy->cpu = 0; - - tmp_freq = gx_validate_speed(target_freq, &tmp1, &tmp2); - while (tmp_freq < policy->min) { - tmp_freq += stock_freq / max_duration; - tmp_freq = gx_validate_speed(tmp_freq, &tmp1, &tmp2); - } - while (tmp_freq > policy->max) { - tmp_freq -= stock_freq / max_duration; - tmp_freq = gx_validate_speed(tmp_freq, &tmp1, &tmp2); - } - - gx_set_cpuspeed(tmp_freq); - - return 0; -} - -static int cpufreq_gx_cpu_init(struct cpufreq_policy *policy) -{ - unsigned int maxfreq, curfreq; - - if (!policy || policy->cpu != 0) - return -ENODEV; - - /* determine maximum frequency */ - if (pci_busclk) { - maxfreq = pci_busclk * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f]; - } else if (cpu_khz) { - maxfreq = cpu_khz; - } else { - maxfreq = 30000 * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f]; - } - stock_freq = maxfreq; - curfreq = gx_get_cpuspeed(0); - - dprintk("cpu max frequency is %d.\n", maxfreq); - dprintk("cpu current frequency is %dkHz.\n",curfreq); - - /* setup basic struct for cpufreq API */ - policy->cpu = 0; - - if (max_duration < POLICY_MIN_DIV) - policy->min = maxfreq / max_duration; - else - policy->min = maxfreq / POLICY_MIN_DIV; - policy->max = maxfreq; - policy->cur = curfreq; - policy->governor = CPUFREQ_DEFAULT_GOVERNOR; - policy->cpuinfo.min_freq = maxfreq / max_duration; - policy->cpuinfo.max_freq = maxfreq; - policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; - - return 0; -} - -/* - * cpufreq_gx_init: - * MediaGX/Geode GX initialize cpufreq driver - */ -static struct cpufreq_driver gx_suspmod_driver = { - .get = gx_get_cpuspeed, - .verify = cpufreq_gx_verify, - .target = cpufreq_gx_target, - .init = cpufreq_gx_cpu_init, - .name = "gx-suspmod", - .owner = THIS_MODULE, -}; - -static int __init cpufreq_gx_init(void) -{ - int ret; - struct gxfreq_params *params; - struct pci_dev *gx_pci; - - /* Test if we have the right hardware */ - if ((gx_pci = gx_detect_chipset()) == NULL) - return -ENODEV; - - /* check whether module parameters are sane */ - if (max_duration > 0xff) - max_duration = 0xff; - - dprintk("geode suspend modulation available.\n"); - - params = kzalloc(sizeof(struct gxfreq_params), GFP_KERNEL); - if (params == NULL) - return -ENOMEM; - - params->cs55x0 = gx_pci; - gx_params = params; - - /* keep cs55x0 configurations */ - pci_read_config_byte(params->cs55x0, PCI_SUSCFG, &(params->pci_suscfg)); - pci_read_config_byte(params->cs55x0, PCI_PMER1, &(params->pci_pmer1)); - pci_read_config_byte(params->cs55x0, PCI_PMER2, &(params->pci_pmer2)); - pci_read_config_byte(params->cs55x0, PCI_MODON, &(params->on_duration)); - pci_read_config_byte(params->cs55x0, PCI_MODOFF, &(params->off_duration)); - - if ((ret = cpufreq_register_driver(&gx_suspmod_driver))) { - kfree(params); - return ret; /* register error! */ - } - - return 0; -} - -static void __exit cpufreq_gx_exit(void) -{ - cpufreq_unregister_driver(&gx_suspmod_driver); - pci_dev_put(gx_params->cs55x0); - kfree(gx_params); -} - -MODULE_AUTHOR ("Hiroshi Miura "); -MODULE_DESCRIPTION ("Cpufreq driver for Cyrix MediaGX and NatSemi Geode"); -MODULE_LICENSE ("GPL"); - -module_init(cpufreq_gx_init); -module_exit(cpufreq_gx_exit); - diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c deleted file mode 100644 index f0cce3c2dc3..00000000000 --- a/arch/i386/kernel/cpu/cpufreq/longhaul.c +++ /dev/null @@ -1,1024 +0,0 @@ -/* - * (C) 2001-2004 Dave Jones. - * (C) 2002 Padraig Brady. - * - * Licensed under the terms of the GNU GPL License version 2. - * Based upon datasheets & sample CPUs kindly provided by VIA. - * - * VIA have currently 3 different versions of Longhaul. - * Version 1 (Longhaul) uses the BCR2 MSR at 0x1147. - * It is present only in Samuel 1 (C5A), Samuel 2 (C5B) stepping 0. - * Version 2 of longhaul is backward compatible with v1, but adds - * LONGHAUL MSR for purpose of both frequency and voltage scaling. - * Present in Samuel 2 (steppings 1-7 only) (C5B), and Ezra (C5C). - * Version 3 of longhaul got renamed to Powersaver and redesigned - * to use only the POWERSAVER MSR at 0x110a. - * It is present in Ezra-T (C5M), Nehemiah (C5X) and above. - * It's pretty much the same feature wise to longhaul v2, though - * there is provision for scaling FSB too, but this doesn't work - * too well in practice so we don't even try to use this. - * - * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "longhaul.h" - -#define PFX "longhaul: " - -#define TYPE_LONGHAUL_V1 1 -#define TYPE_LONGHAUL_V2 2 -#define TYPE_POWERSAVER 3 - -#define CPU_SAMUEL 1 -#define CPU_SAMUEL2 2 -#define CPU_EZRA 3 -#define CPU_EZRA_T 4 -#define CPU_NEHEMIAH 5 -#define CPU_NEHEMIAH_C 6 - -/* Flags */ -#define USE_ACPI_C3 (1 << 1) -#define USE_NORTHBRIDGE (1 << 2) - -static int cpu_model; -static unsigned int numscales=16; -static unsigned int fsb; - -static const struct mV_pos *vrm_mV_table; -static const unsigned char *mV_vrm_table; - -static unsigned int highest_speed, lowest_speed; /* kHz */ -static unsigned int minmult, maxmult; -static int can_scale_voltage; -static struct acpi_processor *pr = NULL; -static struct acpi_processor_cx *cx = NULL; -static u32 acpi_regs_addr; -static u8 longhaul_flags; -static unsigned int longhaul_index; - -/* Module parameters */ -static int scale_voltage; -static int disable_acpi_c3; -static int revid_errata; - -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg) - - -/* Clock ratios multiplied by 10 */ -static int clock_ratio[32]; -static int eblcr_table[32]; -static int longhaul_version; -static struct cpufreq_frequency_table *longhaul_table; - -#ifdef CONFIG_CPU_FREQ_DEBUG -static char speedbuffer[8]; - -static char *print_speed(int speed) -{ - if (speed < 1000) { - snprintf(speedbuffer, sizeof(speedbuffer),"%dMHz", speed); - return speedbuffer; - } - - if (speed%1000 == 0) - snprintf(speedbuffer, sizeof(speedbuffer), - "%dGHz", speed/1000); - else - snprintf(speedbuffer, sizeof(speedbuffer), - "%d.%dGHz", speed/1000, (speed%1000)/100); - - return speedbuffer; -} -#endif - - -static unsigned int calc_speed(int mult) -{ - int khz; - khz = (mult/10)*fsb; - if (mult%10) - khz += fsb/2; - khz *= 1000; - return khz; -} - - -static int longhaul_get_cpu_mult(void) -{ - unsigned long invalue=0,lo, hi; - - rdmsr (MSR_IA32_EBL_CR_POWERON, lo, hi); - invalue = (lo & (1<<22|1<<23|1<<24|1<<25)) >>22; - if (longhaul_version==TYPE_LONGHAUL_V2 || longhaul_version==TYPE_POWERSAVER) { - if (lo & (1<<27)) - invalue+=16; - } - return eblcr_table[invalue]; -} - -/* For processor with BCR2 MSR */ - -static void do_longhaul1(unsigned int clock_ratio_index) -{ - union msr_bcr2 bcr2; - - rdmsrl(MSR_VIA_BCR2, bcr2.val); - /* Enable software clock multiplier */ - bcr2.bits.ESOFTBF = 1; - bcr2.bits.CLOCKMUL = clock_ratio_index & 0xff; - - /* Sync to timer tick */ - safe_halt(); - /* Change frequency on next halt or sleep */ - wrmsrl(MSR_VIA_BCR2, bcr2.val); - /* Invoke transition */ - ACPI_FLUSH_CPU_CACHE(); - halt(); - - /* Disable software clock multiplier */ - local_irq_disable(); - rdmsrl(MSR_VIA_BCR2, bcr2.val); - bcr2.bits.ESOFTBF = 0; - wrmsrl(MSR_VIA_BCR2, bcr2.val); -} - -/* For processor with Longhaul MSR */ - -static void do_powersaver(int cx_address, unsigned int clock_ratio_index, - unsigned int dir) -{ - union msr_longhaul longhaul; - u32 t; - - rdmsrl(MSR_VIA_LONGHAUL, longhaul.val); - /* Setup new frequency */ - if (!revid_errata) - longhaul.bits.RevisionKey = longhaul.bits.RevisionID; - else - longhaul.bits.RevisionKey = 0; - longhaul.bits.SoftBusRatio = clock_ratio_index & 0xf; - longhaul.bits.SoftBusRatio4 = (clock_ratio_index & 0x10) >> 4; - /* Setup new voltage */ - if (can_scale_voltage) - longhaul.bits.SoftVID = (clock_ratio_index >> 8) & 0x1f; - /* Sync to timer tick */ - safe_halt(); - /* Raise voltage if necessary */ - if (can_scale_voltage && dir) { - longhaul.bits.EnableSoftVID = 1; - wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); - /* Change voltage */ - if (!cx_address) { - ACPI_FLUSH_CPU_CACHE(); - halt(); - } else { - ACPI_FLUSH_CPU_CACHE(); - /* Invoke C3 */ - inb(cx_address); - /* Dummy op - must do something useless after P_LVL3 - * read */ - t = inl(acpi_gbl_FADT.xpm_timer_block.address); - } - longhaul.bits.EnableSoftVID = 0; - wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); - } - - /* Change frequency on next halt or sleep */ - longhaul.bits.EnableSoftBusRatio = 1; - wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); - if (!cx_address) { - ACPI_FLUSH_CPU_CACHE(); - halt(); - } else { - ACPI_FLUSH_CPU_CACHE(); - /* Invoke C3 */ - inb(cx_address); - /* Dummy op - must do something useless after P_LVL3 read */ - t = inl(acpi_gbl_FADT.xpm_timer_block.address); - } - /* Disable bus ratio bit */ - longhaul.bits.EnableSoftBusRatio = 0; - wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); - - /* Reduce voltage if necessary */ - if (can_scale_voltage && !dir) { - longhaul.bits.EnableSoftVID = 1; - wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); - /* Change voltage */ - if (!cx_address) { - ACPI_FLUSH_CPU_CACHE(); - halt(); - } else { - ACPI_FLUSH_CPU_CACHE(); - /* Invoke C3 */ - inb(cx_address); - /* Dummy op - must do something useless after P_LVL3 - * read */ - t = inl(acpi_gbl_FADT.xpm_timer_block.address); - } - longhaul.bits.EnableSoftVID = 0; - wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); - } -} - -/** - * longhaul_set_cpu_frequency() - * @clock_ratio_index : bitpattern of the new multiplier. - * - * Sets a new clock ratio. - */ - -static void longhaul_setstate(unsigned int table_index) -{ - unsigned int clock_ratio_index; - int speed, mult; - struct cpufreq_freqs freqs; - unsigned long flags; - unsigned int pic1_mask, pic2_mask; - u16 bm_status = 0; - u32 bm_timeout = 1000; - unsigned int dir = 0; - - clock_ratio_index = longhaul_table[table_index].index; - /* Safety precautions */ - mult = clock_ratio[clock_ratio_index & 0x1f]; - if (mult == -1) - return; - speed = calc_speed(mult); - if ((speed > highest_speed) || (speed < lowest_speed)) - return; - /* Voltage transition before frequency transition? */ - if (can_scale_voltage && longhaul_index < table_index) - dir = 1; - - freqs.old = calc_speed(longhaul_get_cpu_mult()); - freqs.new = speed; - freqs.cpu = 0; /* longhaul.c is UP only driver */ - - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - - dprintk ("Setting to FSB:%dMHz Mult:%d.%dx (%s)\n", - fsb, mult/10, mult%10, print_speed(speed/1000)); -retry_loop: - preempt_disable(); - local_irq_save(flags); - - pic2_mask = inb(0xA1); - pic1_mask = inb(0x21); /* works on C3. save mask. */ - outb(0xFF,0xA1); /* Overkill */ - outb(0xFE,0x21); /* TMR0 only */ - - /* Wait while PCI bus is busy. */ - if (acpi_regs_addr && (longhaul_flags & USE_NORTHBRIDGE - || ((pr != NULL) && pr->flags.bm_control))) { - bm_status = inw(acpi_regs_addr); - bm_status &= 1 << 4; - while (bm_status && bm_timeout) { - outw(1 << 4, acpi_regs_addr); - bm_timeout--; - bm_status = inw(acpi_regs_addr); - bm_status &= 1 << 4; - } - } - - if (longhaul_flags & USE_NORTHBRIDGE) { - /* Disable AGP and PCI arbiters */ - outb(3, 0x22); - } else if ((pr != NULL) && pr->flags.bm_control) { - /* Disable bus master arbitration */ - acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1); - } - switch (longhaul_version) { - - /* - * Longhaul v1. (Samuel[C5A] and Samuel2 stepping 0[C5B]) - * Software controlled multipliers only. - */ - case TYPE_LONGHAUL_V1: - do_longhaul1(clock_ratio_index); - break; - - /* - * Longhaul v2 appears in Samuel2 Steppings 1->7 [C5B] and Ezra [C5C] - * - * Longhaul v3 (aka Powersaver). (Ezra-T [C5M] & Nehemiah [C5N]) - * Nehemiah can do FSB scaling too, but this has never been proven - * to work in practice. - */ - case TYPE_LONGHAUL_V2: - case TYPE_POWERSAVER: - if (longhaul_flags & USE_ACPI_C3) { - /* Don't allow wakeup */ - acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0); - do_powersaver(cx->address, clock_ratio_index, dir); - } else { - do_powersaver(0, clock_ratio_index, dir); - } - break; - } - - if (longhaul_flags & USE_NORTHBRIDGE) { - /* Enable arbiters */ - outb(0, 0x22); - } else if ((pr != NULL) && pr->flags.bm_control) { - /* Enable bus master arbitration */ - acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0); - } - outb(pic2_mask,0xA1); /* restore mask */ - outb(pic1_mask,0x21); - - local_irq_restore(flags); - preempt_enable(); - - freqs.new = calc_speed(longhaul_get_cpu_mult()); - /* Check if requested frequency is set. */ - if (unlikely(freqs.new != speed)) { - printk(KERN_INFO PFX "Failed to set requested frequency!\n"); - /* Revision ID = 1 but processor is expecting revision key - * equal to 0. Jumpers at the bottom of processor will change - * multiplier and FSB, but will not change bits in Longhaul - * MSR nor enable voltage scaling. */ - if (!revid_errata) { - printk(KERN_INFO PFX "Enabling \"Ignore Revision ID\" " - "option.\n"); - revid_errata = 1; - msleep(200); - goto retry_loop; - } - /* Why ACPI C3 sometimes doesn't work is a mystery for me. - * But it does happen. Processor is entering ACPI C3 state, - * but it doesn't change frequency. I tried poking various - * bits in northbridge registers, but without success. */ - if (longhaul_flags & USE_ACPI_C3) { - printk(KERN_INFO PFX "Disabling ACPI C3 support.\n"); - longhaul_flags &= ~USE_ACPI_C3; - if (revid_errata) { - printk(KERN_INFO PFX "Disabling \"Ignore " - "Revision ID\" option.\n"); - revid_errata = 0; - } - msleep(200); - goto retry_loop; - } - /* This shouldn't happen. Longhaul ver. 2 was reported not - * working on processors without voltage scaling, but with - * RevID = 1. RevID errata will make things right. Just - * to be 100% sure. */ - if (longhaul_version == TYPE_LONGHAUL_V2) { - printk(KERN_INFO PFX "Switching to Longhaul ver. 1\n"); - longhaul_version = TYPE_LONGHAUL_V1; - msleep(200); - goto retry_loop; - } - } - /* Report true CPU frequency */ - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - - if (!bm_timeout) - printk(KERN_INFO PFX "Warning: Timeout while waiting for idle PCI bus.\n"); -} - -/* - * Centaur decided to make life a little more tricky. - * Only longhaul v1 is allowed to read EBLCR BSEL[0:1]. - * Samuel2 and above have to try and guess what the FSB is. - * We do this by assuming we booted at maximum multiplier, and interpolate - * between that value multiplied by possible FSBs and cpu_mhz which - * was calculated at boot time. Really ugly, but no other way to do this. - */ - -#define ROUNDING 0xf - -static int guess_fsb(int mult) -{ - int speed = cpu_khz / 1000; - int i; - int speeds[] = { 666, 1000, 1333, 2000 }; - int f_max, f_min; - - for (i = 0; i < 4; i++) { - f_max = ((speeds[i] * mult) + 50) / 100; - f_max += (ROUNDING / 2); - f_min = f_max - ROUNDING; - if ((speed <= f_max) && (speed >= f_min)) - return speeds[i] / 10; - } - return 0; -} - - -static int __init longhaul_get_ranges(void) -{ - unsigned int i, j, k = 0; - unsigned int ratio; - int mult; - - /* Get current frequency */ - mult = longhaul_get_cpu_mult(); - if (mult == -1) { - printk(KERN_INFO PFX "Invalid (reserved) multiplier!\n"); - return -EINVAL; - } - fsb = guess_fsb(mult); - if (fsb == 0) { - printk(KERN_INFO PFX "Invalid (reserved) FSB!\n"); - return -EINVAL; - } - /* Get max multiplier - as we always did. - * Longhaul MSR is usefull only when voltage scaling is enabled. - * C3 is booting at max anyway. */ - maxmult = mult; - /* Get min multiplier */ - switch (cpu_model) { - case CPU_NEHEMIAH: - minmult = 50; - break; - case CPU_NEHEMIAH_C: - minmult = 40; - break; - default: - minmult = 30; - break; - } - - dprintk ("MinMult:%d.%dx MaxMult:%d.%dx\n", - minmult/10, minmult%10, maxmult/10, maxmult%10); - - highest_speed = calc_speed(maxmult); - lowest_speed = calc_speed(minmult); - dprintk ("FSB:%dMHz Lowest speed: %s Highest speed:%s\n", fsb, - print_speed(lowest_speed/1000), - print_speed(highest_speed/1000)); - - if (lowest_speed == highest_speed) { - printk (KERN_INFO PFX "highestspeed == lowest, aborting.\n"); - return -EINVAL; - } - if (lowest_speed > highest_speed) { - printk (KERN_INFO PFX "nonsense! lowest (%d > %d) !\n", - lowest_speed, highest_speed); - return -EINVAL; - } - - longhaul_table = kmalloc((numscales + 1) * sizeof(struct cpufreq_frequency_table), GFP_KERNEL); - if(!longhaul_table) - return -ENOMEM; - - for (j = 0; j < numscales; j++) { - ratio = clock_ratio[j]; - if (ratio == -1) - continue; - if (ratio > maxmult || ratio < minmult) - continue; - longhaul_table[k].frequency = calc_speed(ratio); - longhaul_table[k].index = j; - k++; - } - if (k <= 1) { - kfree(longhaul_table); - return -ENODEV; - } - /* Sort */ - for (j = 0; j < k - 1; j++) { - unsigned int min_f, min_i; - min_f = longhaul_table[j].frequency; - min_i = j; - for (i = j + 1; i < k; i++) { - if (longhaul_table[i].frequency < min_f) { - min_f = longhaul_table[i].frequency; - min_i = i; - } - } - if (min_i != j) { - unsigned int temp; - temp = longhaul_table[j].frequency; - longhaul_table[j].frequency = longhaul_table[min_i].frequency; - longhaul_table[min_i].frequency = temp; - temp = longhaul_table[j].index; - longhaul_table[j].index = longhaul_table[min_i].index; - longhaul_table[min_i].index = temp; - } - } - - longhaul_table[k].frequency = CPUFREQ_TABLE_END; - - /* Find index we are running on */ - for (j = 0; j < k; j++) { - if (clock_ratio[longhaul_table[j].index & 0x1f] == mult) { - longhaul_index = j; - break; - } - } - return 0; -} - - -static void __init longhaul_setup_voltagescaling(void) -{ - union msr_longhaul longhaul; - struct mV_pos minvid, maxvid, vid; - unsigned int j, speed, pos, kHz_step, numvscales; - int min_vid_speed; - - rdmsrl(MSR_VIA_LONGHAUL, longhaul.val); - if (!(longhaul.bits.RevisionID & 1)) { - printk(KERN_INFO PFX "Voltage scaling not supported by CPU.\n"); - return; - } - - if (!longhaul.bits.VRMRev) { - printk(KERN_INFO PFX "VRM 8.5\n"); - vrm_mV_table = &vrm85_mV[0]; - mV_vrm_table = &mV_vrm85[0]; - } else { - printk(KERN_INFO PFX "Mobile VRM\n"); - if (cpu_model < CPU_NEHEMIAH) - return; - vrm_mV_table = &mobilevrm_mV[0]; - mV_vrm_table = &mV_mobilevrm[0]; - } - - minvid = vrm_mV_table[longhaul.bits.MinimumVID]; - maxvid = vrm_mV_table[longhaul.bits.MaximumVID]; - - if (minvid.mV == 0 || maxvid.mV == 0 || minvid.mV > maxvid.mV) { - printk (KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. " - "Voltage scaling disabled.\n", - minvid.mV/1000, minvid.mV%1000, maxvid.mV/1000, maxvid.mV%1000); - return; - } - - if (minvid.mV == maxvid.mV) { - printk (KERN_INFO PFX "Claims to support voltage scaling but min & max are " - "both %d.%03d. Voltage scaling disabled\n", - maxvid.mV/1000, maxvid.mV%1000); - return; - } - - /* How many voltage steps */ - numvscales = maxvid.pos - minvid.pos + 1; - printk(KERN_INFO PFX - "Max VID=%d.%03d " - "Min VID=%d.%03d, " - "%d possible voltage scales\n", - maxvid.mV/1000, maxvid.mV%1000, - minvid.mV/1000, minvid.mV%1000, - numvscales); - - /* Calculate max frequency at min voltage */ - j = longhaul.bits.MinMHzBR; - if (longhaul.bits.MinMHzBR4) - j += 16; - min_vid_speed = eblcr_table[j]; - if (min_vid_speed == -1) - return; - switch (longhaul.bits.MinMHzFSB) { - case 0: - min_vid_speed *= 13333; - break; - case 1: - min_vid_speed *= 10000; - break; - case 3: - min_vid_speed *= 6666; - break; - default: - return; - break; - } - if (min_vid_speed >= highest_speed) - return; - /* Calculate kHz for one voltage step */ - kHz_step = (highest_speed - min_vid_speed) / numvscales; - - j = 0; - while (longhaul_table[j].frequency != CPUFREQ_TABLE_END) { - speed = longhaul_table[j].frequency; - if (speed > min_vid_speed) - pos = (speed - min_vid_speed) / kHz_step + minvid.pos; - else - pos = minvid.pos; - longhaul_table[j].index |= mV_vrm_table[pos] << 8; - vid = vrm_mV_table[mV_vrm_table[pos]]; - printk(KERN_INFO PFX "f: %d kHz, index: %d, vid: %d mV\n", speed, j, vid.mV); - j++; - } - - can_scale_voltage = 1; - printk(KERN_INFO PFX "Voltage scaling enabled.\n"); -} - - -static int longhaul_verify(struct cpufreq_policy *policy) -{ - return cpufreq_frequency_table_verify(policy, longhaul_table); -} - - -static int longhaul_target(struct cpufreq_policy *policy, - unsigned int target_freq, unsigned int relation) -{ - unsigned int table_index = 0; - unsigned int i; - unsigned int dir = 0; - u8 vid, current_vid; - - if (cpufreq_frequency_table_target(policy, longhaul_table, target_freq, relation, &table_index)) - return -EINVAL; - - /* Don't set same frequency again */ - if (longhaul_index == table_index) - return 0; - - if (!can_scale_voltage) - longhaul_setstate(table_index); - else { - /* On test system voltage transitions exceeding single - * step up or down were turning motherboard off. Both - * "ondemand" and "userspace" are unsafe. C7 is doing - * this in hardware, C3 is old and we need to do this - * in software. */ - i = longhaul_index; - current_vid = (longhaul_table[longhaul_index].index >> 8) & 0x1f; - if (table_index > longhaul_index) - dir = 1; - while (i != table_index) { - vid = (longhaul_table[i].index >> 8) & 0x1f; - if (vid != current_vid) { - longhaul_setstate(i); - current_vid = vid; - msleep(200); - } - if (dir) - i++; - else - i--; - } - longhaul_setstate(table_index); - } - longhaul_index = table_index; - return 0; -} - - -static unsigned int longhaul_get(unsigned int cpu) -{ - if (cpu) - return 0; - return calc_speed(longhaul_get_cpu_mult()); -} - -static acpi_status longhaul_walk_callback(acpi_handle obj_handle, - u32 nesting_level, - void *context, void **return_value) -{ - struct acpi_device *d; - - if ( acpi_bus_get_device(obj_handle, &d) ) { - return 0; - } - *return_value = (void *)acpi_driver_data(d); - return 1; -} - -/* VIA don't support PM2 reg, but have something similar */ -static int enable_arbiter_disable(void) -{ - struct pci_dev *dev; - int status = 1; - int reg; - u8 pci_cmd; - - /* Find PLE133 host bridge */ - reg = 0x78; - dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8601_0, - NULL); - /* Find CLE266 host bridge */ - if (dev == NULL) { - reg = 0x76; - dev = pci_get_device(PCI_VENDOR_ID_VIA, - PCI_DEVICE_ID_VIA_862X_0, NULL); - /* Find CN400 V-Link host bridge */ - if (dev == NULL) - dev = pci_get_device(PCI_VENDOR_ID_VIA, 0x7259, NULL); - } - if (dev != NULL) { - /* Enable access to port 0x22 */ - pci_read_config_byte(dev, reg, &pci_cmd); - if (!(pci_cmd & 1<<7)) { - pci_cmd |= 1<<7; - pci_write_config_byte(dev, reg, pci_cmd); - pci_read_config_byte(dev, reg, &pci_cmd); - if (!(pci_cmd & 1<<7)) { - printk(KERN_ERR PFX - "Can't enable access to port 0x22.\n"); - status = 0; - } - } - pci_dev_put(dev); - return status; - } - return 0; -} - -static int longhaul_setup_southbridge(void) -{ - struct pci_dev *dev; - u8 pci_cmd; - - /* Find VT8235 southbridge */ - dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, NULL); - if (dev == NULL) - /* Find VT8237 southbridge */ - dev = pci_get_device(PCI_VENDOR_ID_VIA, - PCI_DEVICE_ID_VIA_8237, NULL); - if (dev != NULL) { - /* Set transition time to max */ - pci_read_config_byte(dev, 0xec, &pci_cmd); - pci_cmd &= ~(1 << 2); - pci_write_config_byte(dev, 0xec, pci_cmd); - pci_read_config_byte(dev, 0xe4, &pci_cmd); - pci_cmd &= ~(1 << 7); - pci_write_config_byte(dev, 0xe4, pci_cmd); - pci_read_config_byte(dev, 0xe5, &pci_cmd); - pci_cmd |= 1 << 7; - pci_write_config_byte(dev, 0xe5, pci_cmd); - /* Get address of ACPI registers block*/ - pci_read_config_byte(dev, 0x81, &pci_cmd); - if (pci_cmd & 1 << 7) { - pci_read_config_dword(dev, 0x88, &acpi_regs_addr); - acpi_regs_addr &= 0xff00; - printk(KERN_INFO PFX "ACPI I/O at 0x%x\n", acpi_regs_addr); - } - - pci_dev_put(dev); - return 1; - } - return 0; -} - -static int __init longhaul_cpu_init(struct cpufreq_policy *policy) -{ - struct cpuinfo_x86 *c = cpu_data; - char *cpuname=NULL; - int ret; - u32 lo, hi; - - /* Check what we have on this motherboard */ - switch (c->x86_model) { - case 6: - cpu_model = CPU_SAMUEL; - cpuname = "C3 'Samuel' [C5A]"; - longhaul_version = TYPE_LONGHAUL_V1; - memcpy (clock_ratio, samuel1_clock_ratio, sizeof(samuel1_clock_ratio)); - memcpy (eblcr_table, samuel1_eblcr, sizeof(samuel1_eblcr)); - break; - - case 7: - switch (c->x86_mask) { - case 0: - longhaul_version = TYPE_LONGHAUL_V1; - cpu_model = CPU_SAMUEL2; - cpuname = "C3 'Samuel 2' [C5B]"; - /* Note, this is not a typo, early Samuel2's had - * Samuel1 ratios. */ - memcpy(clock_ratio, samuel1_clock_ratio, - sizeof(samuel1_clock_ratio)); - memcpy(eblcr_table, samuel2_eblcr, - sizeof(samuel2_eblcr)); - break; - case 1 ... 15: - longhaul_version = TYPE_LONGHAUL_V1; - if (c->x86_mask < 8) { - cpu_model = CPU_SAMUEL2; - cpuname = "C3 'Samuel 2' [C5B]"; - } else { - cpu_model = CPU_EZRA; - cpuname = "C3 'Ezra' [C5C]"; - } - memcpy(clock_ratio, ezra_clock_ratio, - sizeof(ezra_clock_ratio)); - memcpy(eblcr_table, ezra_eblcr, - sizeof(ezra_eblcr)); - break; - } - break; - - case 8: - cpu_model = CPU_EZRA_T; - cpuname = "C3 'Ezra-T' [C5M]"; - longhaul_version = TYPE_POWERSAVER; - numscales=32; - memcpy (clock_ratio, ezrat_clock_ratio, sizeof(ezrat_clock_ratio)); - memcpy (eblcr_table, ezrat_eblcr, sizeof(ezrat_eblcr)); - break; - - case 9: - longhaul_version = TYPE_POWERSAVER; - numscales = 32; - memcpy(clock_ratio, - nehemiah_clock_ratio, - sizeof(nehemiah_clock_ratio)); - memcpy(eblcr_table, nehemiah_eblcr, sizeof(nehemiah_eblcr)); - switch (c->x86_mask) { - case 0 ... 1: - cpu_model = CPU_NEHEMIAH; - cpuname = "C3 'Nehemiah A' [C5XLOE]"; - break; - case 2 ... 4: - cpu_model = CPU_NEHEMIAH; - cpuname = "C3 'Nehemiah B' [C5XLOH]"; - break; - case 5 ... 15: - cpu_model = CPU_NEHEMIAH_C; - cpuname = "C3 'Nehemiah C' [C5P]"; - break; - } - break; - - default: - cpuname = "Unknown"; - break; - } - /* Check Longhaul ver. 2 */ - if (longhaul_version == TYPE_LONGHAUL_V2) { - rdmsr(MSR_VIA_LONGHAUL, lo, hi); - if (lo == 0 && hi == 0) - /* Looks like MSR isn't present */ - longhaul_version = TYPE_LONGHAUL_V1; - } - - printk (KERN_INFO PFX "VIA %s CPU detected. ", cpuname); - switch (longhaul_version) { - case TYPE_LONGHAUL_V1: - case TYPE_LONGHAUL_V2: - printk ("Longhaul v%d supported.\n", longhaul_version); - break; - case TYPE_POWERSAVER: - printk ("Powersaver supported.\n"); - break; - }; - - /* Doesn't hurt */ - longhaul_setup_southbridge(); - - /* Find ACPI data for processor */ - acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, - ACPI_UINT32_MAX, &longhaul_walk_callback, - NULL, (void *)&pr); - - /* Check ACPI support for C3 state */ - if (pr != NULL && longhaul_version == TYPE_POWERSAVER) { - cx = &pr->power.states[ACPI_STATE_C3]; - if (cx->address > 0 && cx->latency <= 1000) - longhaul_flags |= USE_ACPI_C3; - } - /* Disable if it isn't working */ - if (disable_acpi_c3) - longhaul_flags &= ~USE_ACPI_C3; - /* Check if northbridge is friendly */ - if (enable_arbiter_disable()) - longhaul_flags |= USE_NORTHBRIDGE; - - /* Check ACPI support for bus master arbiter disable */ - if (!(longhaul_flags & USE_ACPI_C3 - || longhaul_flags & USE_NORTHBRIDGE) - && ((pr == NULL) || !(pr->flags.bm_control))) { - printk(KERN_ERR PFX - "No ACPI support. Unsupported northbridge.\n"); - return -ENODEV; - } - - if (longhaul_flags & USE_NORTHBRIDGE) - printk(KERN_INFO PFX "Using northbridge support.\n"); - if (longhaul_flags & USE_ACPI_C3) - printk(KERN_INFO PFX "Using ACPI support.\n"); - - ret = longhaul_get_ranges(); - if (ret != 0) - return ret; - - if ((longhaul_version != TYPE_LONGHAUL_V1) && (scale_voltage != 0)) - longhaul_setup_voltagescaling(); - - policy->governor = CPUFREQ_DEFAULT_GOVERNOR; - policy->cpuinfo.transition_latency = 200000; /* nsec */ - policy->cur = calc_speed(longhaul_get_cpu_mult()); - - ret = cpufreq_frequency_table_cpuinfo(policy, longhaul_table); - if (ret) - return ret; - - cpufreq_frequency_table_get_attr(longhaul_table, policy->cpu); - - return 0; -} - -static int __devexit longhaul_cpu_exit(struct cpufreq_policy *policy) -{ - cpufreq_frequency_table_put_attr(policy->cpu); - return 0; -} - -static struct freq_attr* longhaul_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, -}; - -static struct cpufreq_driver longhaul_driver = { - .verify = longhaul_verify, - .target = longhaul_target, - .get = longhaul_get, - .init = longhaul_cpu_init, - .exit = __devexit_p(longhaul_cpu_exit), - .name = "longhaul", - .owner = THIS_MODULE, - .attr = longhaul_attr, -}; - - -static int __init longhaul_init(void) -{ - struct cpuinfo_x86 *c = cpu_data; - - if (c->x86_vendor != X86_VENDOR_CENTAUR || c->x86 != 6) - return -ENODEV; - -#ifdef CONFIG_SMP - if (num_online_cpus() > 1) { - printk(KERN_ERR PFX "More than 1 CPU detected, longhaul disabled.\n"); - return -ENODEV; - } -#endif -#ifdef CONFIG_X86_IO_APIC - if (cpu_has_apic) { - printk(KERN_ERR PFX "APIC detected. Longhaul is currently broken in this configuration.\n"); - return -ENODEV; - } -#endif - switch (c->x86_model) { - case 6 ... 9: - return cpufreq_register_driver(&longhaul_driver); - case 10: - printk(KERN_ERR PFX "Use acpi-cpufreq driver for VIA C7\n"); - default: - ;; - } - - return -ENODEV; -} - - -static void __exit longhaul_exit(void) -{ - int i; - - for (i=0; i < numscales; i++) { - if (clock_ratio[i] == maxmult) { - longhaul_setstate(i); - break; - } - } - - cpufreq_unregister_driver(&longhaul_driver); - kfree(longhaul_table); -} - -/* Even if BIOS is exporting ACPI C3 state, and it is used - * with success when CPU is idle, this state doesn't - * trigger frequency transition in some cases. */ -module_param (disable_acpi_c3, int, 0644); -MODULE_PARM_DESC(disable_acpi_c3, "Don't use ACPI C3 support"); -/* Change CPU voltage with frequency. Very usefull to save - * power, but most VIA C3 processors aren't supporting it. */ -module_param (scale_voltage, int, 0644); -MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor"); -/* Force revision key to 0 for processors which doesn't - * support voltage scaling, but are introducing itself as - * such. */ -module_param(revid_errata, int, 0644); -MODULE_PARM_DESC(revid_errata, "Ignore CPU Revision ID"); - -MODULE_AUTHOR ("Dave Jones "); -MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors."); -MODULE_LICENSE ("GPL"); - -late_initcall(longhaul_init); -module_exit(longhaul_exit); diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.h b/arch/i386/kernel/cpu/cpufreq/longhaul.h deleted file mode 100644 index 4fcc320997d..00000000000 --- a/arch/i386/kernel/cpu/cpufreq/longhaul.h +++ /dev/null @@ -1,353 +0,0 @@ -/* - * longhaul.h - * (C) 2003 Dave Jones. - * - * Licensed under the terms of the GNU GPL License version 2. - * - * VIA-specific information - */ - -union msr_bcr2 { - struct { - unsigned Reseved:19, // 18:0 - ESOFTBF:1, // 19 - Reserved2:3, // 22:20 - CLOCKMUL:4, // 26:23 - Reserved3:5; // 31:27 - } bits; - unsigned long val; -}; - -union msr_longhaul { - struct { - unsigned RevisionID:4, // 3:0 - RevisionKey:4, // 7:4 - EnableSoftBusRatio:1, // 8 - EnableSoftVID:1, // 9 - EnableSoftBSEL:1, // 10 - Reserved:3, // 11:13 - SoftBusRatio4:1, // 14 - VRMRev:1, // 15 - SoftBusRatio:4, // 19:16 - SoftVID:5, // 24:20 - Reserved2:3, // 27:25 - SoftBSEL:2, // 29:28 - Reserved3:2, // 31:30 - MaxMHzBR:4, // 35:32 - MaximumVID:5, // 40:36 - MaxMHzFSB:2, // 42:41 - MaxMHzBR4:1, // 43 - Reserved4:4, // 47:44 - MinMHzBR:4, // 51:48 - MinimumVID:5, // 56:52 - MinMHzFSB:2, // 58:57 - MinMHzBR4:1, // 59 - Reserved5:4; // 63:60 - } bits; - unsigned long long val; -}; - -/* - * Clock ratio tables. Div/Mod by 10 to get ratio. - * The eblcr ones specify the ratio read from the CPU. - * The clock_ratio ones specify what to write to the CPU. - */ - -/* - * VIA C3 Samuel 1 & Samuel 2 (stepping 0) - */ -static const int __initdata samuel1_clock_ratio[16] = { - -1, /* 0000 -> RESERVED */ - 30, /* 0001 -> 3.0x */ - 40, /* 0010 -> 4.0x */ - -1, /* 0011 -> RESERVED */ - -1, /* 0100 -> RESERVED */ - 35, /* 0101 -> 3.5x */ - 45, /* 0110 -> 4.5x */ - 55, /* 0111 -> 5.5x */ - 60, /* 1000 -> 6.0x */ - 70, /* 1001 -> 7.0x */ - 80, /* 1010 -> 8.0x */ - 50, /* 1011 -> 5.0x */ - 65, /* 1100 -> 6.5x */ - 75, /* 1101 -> 7.5x */ - -1, /* 1110 -> RESERVED */ - -1, /* 1111 -> RESERVED */ -}; - -static const int __initdata samuel1_eblcr[16] = { - 50, /* 0000 -> RESERVED */ - 30, /* 0001 -> 3.0x */ - 40, /* 0010 -> 4.0x */ - -1, /* 0011 -> RESERVED */ - 55, /* 0100 -> 5.5x */ - 35, /* 0101 -> 3.5x */ - 45, /* 0110 -> 4.5x */ - -1, /* 0111 -> RESERVED */ - -1, /* 1000 -> RESERVED */ - 70, /* 1001 -> 7.0x */ - 80, /* 1010 -> 8.0x */ - 60, /* 1011 -> 6.0x */ - -1, /* 1100 -> RESERVED */ - 75, /* 1101 -> 7.5x */ - -1, /* 1110 -> RESERVED */ - 65, /* 1111 -> 6.5x */ -}; - -/* - * VIA C3 Samuel2 Stepping 1->15 - */ -static const int __initdata samuel2_eblcr[16] = { - 50, /* 0000 -> 5.0x */ - 30, /* 0001 -> 3.0x */ - 40, /* 0010 -> 4.0x */ - 100, /* 0011 -> 10.0x */ - 55, /* 0100 -> 5.5x */ - 35, /* 0101 -> 3.5x */ - 45, /* 0110 -> 4.5x */ - 110, /* 0111 -> 11.0x */ - 90, /* 1000 -> 9.0x */ - 70, /* 1001 -> 7.0x */ - 80, /* 1010 -> 8.0x */ - 60, /* 1011 -> 6.0x */ - 120, /* 1100 -> 12.0x */ - 75, /* 1101 -> 7.5x */ - 130, /* 1110 -> 13.0x */ - 65, /* 1111 -> 6.5x */ -}; - -/* - * VIA C3 Ezra - */ -static const int __initdata ezra_clock_ratio[16] = { - 100, /* 0000 -> 10.0x */ - 30, /* 0001 -> 3.0x */ - 40, /* 0010 -> 4.0x */ - 90, /* 0011 -> 9.0x */ - 95, /* 0100 -> 9.5x */ - 35, /* 0101 -> 3.5x */ - 45, /* 0110 -> 4.5x */ - 55, /* 0111 -> 5.5x */ - 60, /* 1000 -> 6.0x */ - 70, /* 1001 -> 7.0x */ - 80, /* 1010 -> 8.0x */ - 50, /* 1011 -> 5.0x */ - 65, /* 1100 -> 6.5x */ - 75, /* 1101 -> 7.5x */ - 85, /* 1110 -> 8.5x */ - 120, /* 1111 -> 12.0x */ -}; - -static const int __initdata ezra_eblcr[16] = { - 50, /* 0000 -> 5.0x */ - 30, /* 0001 -> 3.0x */ - 40, /* 0010 -> 4.0x */ - 100, /* 0011 -> 10.0x */ - 55, /* 0100 -> 5.5x */ - 35, /* 0101 -> 3.5x */ - 45, /* 0110 -> 4.5x */ - 95, /* 0111 -> 9.5x */ - 90, /* 1000 -> 9.0x */ - 70, /* 1001 -> 7.0x */ - 80, /* 1010 -> 8.0x */ - 60, /* 1011 -> 6.0x */ - 120, /* 1100 -> 12.0x */ - 75, /* 1101 -> 7.5x */ - 85, /* 1110 -> 8.5x */ - 65, /* 1111 -> 6.5x */ -}; - -/* - * VIA C3 (Ezra-T) [C5M]. - */ -static const int __initdata ezrat_clock_ratio[32] = { - 100, /* 0000 -> 10.0x */ - 30, /* 0001 -> 3.0x */ - 40, /* 0010 -> 4.0x */ - 90, /* 0011 -> 9.0x */ - 95, /* 0100 -> 9.5x */ - 35, /* 0101 -> 3.5x */ - 45, /* 0110 -> 4.5x */ - 55, /* 0111 -> 5.5x */ - 60, /* 1000 -> 6.0x */ - 70, /* 1001 -> 7.0x */ - 80, /* 1010 -> 8.0x */ - 50, /* 1011 -> 5.0x */ - 65, /* 1100 -> 6.5x */ - 75, /* 1101 -> 7.5x */ - 85, /* 1110 -> 8.5x */ - 120, /* 1111 -> 12.0x */ - - -1, /* 0000 -> RESERVED (10.0x) */ - 110, /* 0001 -> 11.0x */ - -1, /* 0010 -> 12.0x */ - -1, /* 0011 -> RESERVED (9.0x)*/ - 105, /* 0100 -> 10.5x */ - 115, /* 0101 -> 11.5x */ - 125, /* 0110 -> 12.5x */ - 135, /* 0111 -> 13.5x */ - 140, /* 1000 -> 14.0x */ - 150, /* 1001 -> 15.0x */ - 160, /* 1010 -> 16.0x */ - 130, /* 1011 -> 13.0x */ - 145, /* 1100 -> 14.5x */ - 155, /* 1101 -> 15.5x */ - -1, /* 1110 -> RESERVED (13.0x) */ - -1, /* 1111 -> RESERVED (12.0x) */ -}; - -static const int __initdata ezrat_eblcr[32] = { - 50, /* 0000 -> 5.0x */ - 30, /* 0001 -> 3.0x */ - 40, /* 0010 -> 4.0x */ - 100, /* 0011 -> 10.0x */ - 55, /* 0100 -> 5.5x */ - 35, /* 0101 -> 3.5x */ - 45, /* 0110 -> 4.5x */ - 95, /* 0111 -> 9.5x */ - 90, /* 1000 -> 9.0x */ - 70, /* 1001 -> 7.0x */ - 80, /* 1010 -> 8.0x */ - 60, /* 1011 -> 6.0x */ - 120, /* 1100 -> 12.0x */ - 75, /* 1101 -> 7.5x */ - 85, /* 1110 -> 8.5x */ - 65, /* 1111 -> 6.5x */ - - -1, /* 0000 -> RESERVED (9.0x) */ - 110, /* 0001 -> 11.0x */ - 120, /* 0010 -> 12.0x */ - -1, /* 0011 -> RESERVED (10.0x)*/ - 135, /* 0100 -> 13.5x */ - 115, /* 0101 -> 11.5x */ - 125, /* 0110 -> 12.5x */ - 105, /* 0111 -> 10.5x */ - 130, /* 1000 -> 13.0x */ - 150, /* 1001 -> 15.0x */ - 160, /* 1010 -> 16.0x */ - 140, /* 1011 -> 14.0x */ - -1, /* 1100 -> RESERVED (12.0x) */ - 155, /* 1101 -> 15.5x */ - -1, /* 1110 -> RESERVED (13.0x) */ - 145, /* 1111 -> 14.5x */ -}; - -/* - * VIA C3 Nehemiah */ - -static const int __initdata nehemiah_clock_ratio[32] = { - 100, /* 0000 -> 10.0x */ - -1, /* 0001 -> 16.0x */ - 40, /* 0010 -> 4.0x */ - 90, /* 0011 -> 9.0x */ - 95, /* 0100 -> 9.5x */ - -1, /* 0101 -> RESERVED */ - 45, /* 0110 -> 4.5x */ - 55, /* 0111 -> 5.5x */ - 60, /* 1000 -> 6.0x */ - 70, /* 1001 -> 7.0x */ - 80, /* 1010 -> 8.0x */ - 50, /* 1011 -> 5.0x */ - 65, /* 1100 -> 6.5x */ - 75, /* 1101 -> 7.5x */ - 85, /* 1110 -> 8.5x */ - 120, /* 1111 -> 12.0x */ - -1, /* 0000 -> 10.0x */ - 110, /* 0001 -> 11.0x */ - -1, /* 0010 -> 12.0x */ - -1, /* 0011 -> 9.0x */ - 105, /* 0100 -> 10.5x */ - 115, /* 0101 -> 11.5x */ - 125, /* 0110 -> 12.5x */ - 135, /* 0111 -> 13.5x */ - 140, /* 1000 -> 14.0x */ - 150, /* 1001 -> 15.0x */ - 160, /* 1010 -> 16.0x */ - 130, /* 1011 -> 13.0x */ - 145, /* 1100 -> 14.5x */ - 155, /* 1101 -> 15.5x */ - -1, /* 1110 -> RESERVED (13.0x) */ - -1, /* 1111 -> 12.0x */ -}; - -static const int __initdata nehemiah_eblcr[32] = { - 50, /* 0000 -> 5.0x */ - 160, /* 0001 -> 16.0x */ - 40, /* 0010 -> 4.0x */ - 100, /* 0011 -> 10.0x */ - 55, /* 0100 -> 5.5x */ - -1, /* 0101 -> RESERVED */ - 45, /* 0110 -> 4.5x */ - 95, /* 0111 -> 9.5x */ - 90, /* 1000 -> 9.0x */ - 70, /* 1001 -> 7.0x */ - 80, /* 1010 -> 8.0x */ - 60, /* 1011 -> 6.0x */ - 120, /* 1100 -> 12.0x */ - 75, /* 1101 -> 7.5x */ - 85, /* 1110 -> 8.5x */ - 65, /* 1111 -> 6.5x */ - 90, /* 0000 -> 9.0x */ - 110, /* 0001 -> 11.0x */ - 120, /* 0010 -> 12.0x */ - 100, /* 0011 -> 10.0x */ - 135, /* 0100 -> 13.5x */ - 115, /* 0101 -> 11.5x */ - 125, /* 0110 -> 12.5x */ - 105, /* 0111 -> 10.5x */ - 130, /* 1000 -> 13.0x */ - 150, /* 1001 -> 15.0x */ - 160, /* 1010 -> 16.0x */ - 140, /* 1011 -> 14.0x */ - 120, /* 1100 -> 12.0x */ - 155, /* 1101 -> 15.5x */ - -1, /* 1110 -> RESERVED (13.0x) */ - 145 /* 1111 -> 14.5x */ -}; - -/* - * Voltage scales. Div/Mod by 1000 to get actual voltage. - * Which scale to use depends on the VRM type in use. - */ - -struct mV_pos { - unsigned short mV; - unsigned short pos; -}; - -static const struct mV_pos __initdata vrm85_mV[32] = { - {1250, 8}, {1200, 6}, {1150, 4}, {1100, 2}, - {1050, 0}, {1800, 30}, {1750, 28}, {1700, 26}, - {1650, 24}, {1600, 22}, {1550, 20}, {1500, 18}, - {1450, 16}, {1400, 14}, {1350, 12}, {1300, 10}, - {1275, 9}, {1225, 7}, {1175, 5}, {1125, 3}, - {1075, 1}, {1825, 31}, {1775, 29}, {1725, 27}, - {1675, 25}, {1625, 23}, {1575, 21}, {1525, 19}, - {1475, 17}, {1425, 15}, {1375, 13}, {1325, 11} -}; - -static const unsigned char __initdata mV_vrm85[32] = { - 0x04, 0x14, 0x03, 0x13, 0x02, 0x12, 0x01, 0x11, - 0x00, 0x10, 0x0f, 0x1f, 0x0e, 0x1e, 0x0d, 0x1d, - 0x0c, 0x1c, 0x0b, 0x1b, 0x0a, 0x1a, 0x09, 0x19, - 0x08, 0x18, 0x07, 0x17, 0x06, 0x16, 0x05, 0x15 -}; - -static const struct mV_pos __initdata mobilevrm_mV[32] = { - {1750, 31}, {1700, 30}, {1650, 29}, {1600, 28}, - {1550, 27}, {1500, 26}, {1450, 25}, {1400, 24}, - {1350, 23}, {1300, 22}, {1250, 21}, {1200, 20}, - {1150, 19}, {1100, 18}, {1050, 17}, {1000, 16}, - {975, 15}, {950, 14}, {925, 13}, {900, 12}, - {875, 11}, {850, 10}, {825, 9}, {800, 8}, - {775, 7}, {750, 6}, {725, 5}, {700, 4}, - {675, 3}, {650, 2}, {625, 1}, {600, 0} -}; - -static const unsigned char __initdata mV_mobilevrm[32] = { - 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, - 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, - 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, - 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 -}; - diff --git a/arch/i386/kernel/cpu/cpufreq/longrun.c b/arch/i386/kernel/cpu/cpufreq/longrun.c deleted file mode 100644 index b2689514295..00000000000 --- a/arch/i386/kernel/cpu/cpufreq/longrun.c +++ /dev/null @@ -1,325 +0,0 @@ -/* - * (C) 2002 - 2003 Dominik Brodowski - * - * Licensed under the terms of the GNU GPL License version 2. - * - * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* - */ - -#include -#include -#include -#include -#include - -#include -#include -#include - -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longrun", msg) - -static struct cpufreq_driver longrun_driver; - -/** - * longrun_{low,high}_freq is needed for the conversion of cpufreq kHz - * values into per cent values. In TMTA microcode, the following is valid: - * performance_pctg = (current_freq - low_freq)/(high_freq - low_freq) - */ -static unsigned int longrun_low_freq, longrun_high_freq; - - -/** - * longrun_get_policy - get the current LongRun policy - * @policy: struct cpufreq_policy where current policy is written into - * - * Reads the current LongRun policy by access to MSR_TMTA_LONGRUN_FLAGS - * and MSR_TMTA_LONGRUN_CTRL - */ -static void __init longrun_get_policy(struct cpufreq_policy *policy) -{ - u32 msr_lo, msr_hi; - - rdmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi); - dprintk("longrun flags are %x - %x\n", msr_lo, msr_hi); - if (msr_lo & 0x01) - policy->policy = CPUFREQ_POLICY_PERFORMANCE; - else - policy->policy = CPUFREQ_POLICY_POWERSAVE; - - rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); - dprintk("longrun ctrl is %x - %x\n", msr_lo, msr_hi); - msr_lo &= 0x0000007F; - msr_hi &= 0x0000007F; - - if ( longrun_high_freq <= longrun_low_freq ) { - /* Assume degenerate Longrun table */ - policy->min = policy->max = longrun_high_freq; - } else { - policy->min = longrun_low_freq + msr_lo * - ((longrun_high_freq - longrun_low_freq) / 100); - policy->max = longrun_low_freq + msr_hi * - ((longrun_high_freq - longrun_low_freq) / 100); - } - policy->cpu = 0; -} - - -/** - * longrun_set_policy - sets a new CPUFreq policy - * @policy: new policy - * - * Sets a new CPUFreq policy on LongRun-capable processors. This function - * has to be called with cpufreq_driver locked. - */ -static int longrun_set_policy(struct cpufreq_policy *policy) -{ - u32 msr_lo, msr_hi; - u32 pctg_lo, pctg_hi; - - if (!policy) - return -EINVAL; - - if ( longrun_high_freq <= longrun_low_freq ) { - /* Assume degenerate Longrun table */ - pctg_lo = pctg_hi = 100; - } else { - pctg_lo = (policy->min - longrun_low_freq) / - ((longrun_high_freq - longrun_low_freq) / 100); - pctg_hi = (policy->max - longrun_low_freq) / - ((longrun_high_freq - longrun_low_freq) / 100); - } - - if (pctg_hi > 100) - pctg_hi = 100; - if (pctg_lo > pctg_hi) - pctg_lo = pctg_hi; - - /* performance or economy mode */ - rdmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi); - msr_lo &= 0xFFFFFFFE; - switch (policy->policy) { - case CPUFREQ_POLICY_PERFORMANCE: - msr_lo |= 0x00000001; - break; - case CPUFREQ_POLICY_POWERSAVE: - break; - } - wrmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi); - - /* lower and upper boundary */ - rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); - msr_lo &= 0xFFFFFF80; - msr_hi &= 0xFFFFFF80; - msr_lo |= pctg_lo; - msr_hi |= pctg_hi; - wrmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); - - return 0; -} - - -/** - * longrun_verify_poliy - verifies a new CPUFreq policy - * @policy: the policy to verify - * - * Validates a new CPUFreq policy. This function has to be called with - * cpufreq_driver locked. - */ -static int longrun_verify_policy(struct cpufreq_policy *policy) -{ - if (!policy) - return -EINVAL; - - policy->cpu = 0; - cpufreq_verify_within_limits(policy, - policy->cpuinfo.min_freq, - policy->cpuinfo.max_freq); - - if ((policy->policy != CPUFREQ_POLICY_POWERSAVE) && - (policy->policy != CPUFREQ_POLICY_PERFORMANCE)) - return -EINVAL; - - return 0; -} - -static unsigned int longrun_get(unsigned int cpu) -{ - u32 eax, ebx, ecx, edx; - - if (cpu) - return 0; - - cpuid(0x80860007, &eax, &ebx, &ecx, &edx); - dprintk("cpuid eax is %u\n", eax); - - return (eax * 1000); -} - -/** - * longrun_determine_freqs - determines the lowest and highest possible core frequency - * @low_freq: an int to put the lowest frequency into - * @high_freq: an int to put the highest frequency into - * - * Determines the lowest and highest possible core frequencies on this CPU. - * This is necessary to calculate the performance percentage according to - * TMTA rules: - * performance_pctg = (target_freq - low_freq)/(high_freq - low_freq) - */ -static unsigned int __init longrun_determine_freqs(unsigned int *low_freq, - unsigned int *high_freq) -{ - u32 msr_lo, msr_hi; - u32 save_lo, save_hi; - u32 eax, ebx, ecx, edx; - u32 try_hi; - struct cpuinfo_x86 *c = cpu_data; - - if (!low_freq || !high_freq) - return -EINVAL; - - if (cpu_has(c, X86_FEATURE_LRTI)) { - /* if the LongRun Table Interface is present, the - * detection is a bit easier: - * For minimum frequency, read out the maximum - * level (msr_hi), write that into "currently - * selected level", and read out the frequency. - * For maximum frequency, read out level zero. - */ - /* minimum */ - rdmsr(MSR_TMTA_LRTI_READOUT, msr_lo, msr_hi); - wrmsr(MSR_TMTA_LRTI_READOUT, msr_hi, msr_hi); - rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi); - *low_freq = msr_lo * 1000; /* to kHz */ - - /* maximum */ - wrmsr(MSR_TMTA_LRTI_READOUT, 0, msr_hi); - rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi); - *high_freq = msr_lo * 1000; /* to kHz */ - - dprintk("longrun table interface told %u - %u kHz\n", *low_freq, *high_freq); - - if (*low_freq > *high_freq) - *low_freq = *high_freq; - return 0; - } - - /* set the upper border to the value determined during TSC init */ - *high_freq = (cpu_khz / 1000); - *high_freq = *high_freq * 1000; - dprintk("high frequency is %u kHz\n", *high_freq); - - /* get current borders */ - rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); - save_lo = msr_lo & 0x0000007F; - save_hi = msr_hi & 0x0000007F; - - /* if current perf_pctg is larger than 90%, we need to decrease the - * upper limit to make the calculation more accurate. - */ - cpuid(0x80860007, &eax, &ebx, &ecx, &edx); - /* try decreasing in 10% steps, some processors react only - * on some barrier values */ - for (try_hi = 80; try_hi > 0 && ecx > 90; try_hi -=10) { - /* set to 0 to try_hi perf_pctg */ - msr_lo &= 0xFFFFFF80; - msr_hi &= 0xFFFFFF80; - msr_hi |= try_hi; - wrmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); - - /* read out current core MHz and current perf_pctg */ - cpuid(0x80860007, &eax, &ebx, &ecx, &edx); - - /* restore values */ - wrmsr(MSR_TMTA_LONGRUN_CTRL, save_lo, save_hi); - } - dprintk("percentage is %u %%, freq is %u MHz\n", ecx, eax); - - /* performance_pctg = (current_freq - low_freq)/(high_freq - low_freq) - * eqals - * low_freq * ( 1 - perf_pctg) = (cur_freq - high_freq * perf_pctg) - * - * high_freq * perf_pctg is stored tempoarily into "ebx". - */ - ebx = (((cpu_khz / 1000) * ecx) / 100); /* to MHz */ - - if ((ecx > 95) || (ecx == 0) || (eax < ebx)) - return -EIO; - - edx = (eax - ebx) / (100 - ecx); - *low_freq = edx * 1000; /* back to kHz */ - - dprintk("low frequency is %u kHz\n", *low_freq); - - if (*low_freq > *high_freq) - *low_freq = *high_freq; - - return 0; -} - - -static int __init longrun_cpu_init(struct cpufreq_policy *policy) -{ - int result = 0; - - /* capability check */ - if (policy->cpu != 0) - return -ENODEV; - - /* detect low and high frequency */ - result = longrun_determine_freqs(&longrun_low_freq, &longrun_high_freq); - if (result) - return result; - - /* cpuinfo and default policy values */ - policy->cpuinfo.min_freq = longrun_low_freq; - policy->cpuinfo.max_freq = longrun_high_freq; - policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; - longrun_get_policy(policy); - - return 0; -} - - -static struct cpufreq_driver longrun_driver = { - .flags = CPUFREQ_CONST_LOOPS, - .verify = longrun_verify_policy, - .setpolicy = longrun_set_policy, - .get = longrun_get, - .init = longrun_cpu_init, - .name = "longrun", - .owner = THIS_MODULE, -}; - - -/** - * longrun_init - initializes the Transmeta Crusoe LongRun CPUFreq driver - * - * Initializes the LongRun support. - */ -static int __init longrun_init(void) -{ - struct cpuinfo_x86 *c = cpu_data; - - if (c->x86_vendor != X86_VENDOR_TRANSMETA || - !cpu_has(c, X86_FEATURE_LONGRUN)) - return -ENODEV; - - return cpufreq_register_driver(&longrun_driver); -} - - -/** - * longrun_exit - unregisters LongRun support - */ -static void __exit longrun_exit(void) -{ - cpufreq_unregister_driver(&longrun_driver); -} - - -MODULE_AUTHOR ("Dominik Brodowski "); -MODULE_DESCRIPTION ("LongRun driver for Transmeta Crusoe and Efficeon processors."); -MODULE_LICENSE ("GPL"); - -module_init(longrun_init); -module_exit(longrun_exit); diff --git a/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c b/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c deleted file mode 100644 index 4c76b511e19..00000000000 --- a/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c +++ /dev/null @@ -1,316 +0,0 @@ -/* - * Pentium 4/Xeon CPU on demand clock modulation/speed scaling - * (C) 2002 - 2003 Dominik Brodowski - * (C) 2002 Zwane Mwaikambo - * (C) 2002 Arjan van de Ven - * (C) 2002 Tora T. Engstad - * All Rights Reserved - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * The author(s) of this software shall not be held liable for damages - * of any nature resulting due to the use of this software. This - * software is provided AS-IS with no warranties. - * - * Date Errata Description - * 20020525 N44, O17 12.5% or 25% DC causes lockup - * - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "speedstep-lib.h" - -#define PFX "p4-clockmod: " -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "p4-clockmod", msg) - -/* - * Duty Cycle (3bits), note DC_DISABLE is not specified in - * intel docs i just use it to mean disable - */ -enum { - DC_RESV, DC_DFLT, DC_25PT, DC_38PT, DC_50PT, - DC_64PT, DC_75PT, DC_88PT, DC_DISABLE -}; - -#define DC_ENTRIES 8 - - -static int has_N44_O17_errata[NR_CPUS]; -static unsigned int stock_freq; -static struct cpufreq_driver p4clockmod_driver; -static unsigned int cpufreq_p4_get(unsigned int cpu); - -static int cpufreq_p4_setdc(unsigned int cpu, unsigned int newstate) -{ - u32 l, h; - - if (!cpu_online(cpu) || (newstate > DC_DISABLE) || (newstate == DC_RESV)) - return -EINVAL; - - rdmsr_on_cpu(cpu, MSR_IA32_THERM_STATUS, &l, &h); - - if (l & 0x01) - dprintk("CPU#%d currently thermal throttled\n", cpu); - - if (has_N44_O17_errata[cpu] && (newstate == DC_25PT || newstate == DC_DFLT)) - newstate = DC_38PT; - - rdmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, &l, &h); - if (newstate == DC_DISABLE) { - dprintk("CPU#%d disabling modulation\n", cpu); - wrmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, l & ~(1<<4), h); - } else { - dprintk("CPU#%d setting duty cycle to %d%%\n", - cpu, ((125 * newstate) / 10)); - /* bits 63 - 5 : reserved - * bit 4 : enable/disable - * bits 3-1 : duty cycle - * bit 0 : reserved - */ - l = (l & ~14); - l = l | (1<<4) | ((newstate & 0x7)<<1); - wrmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, l, h); - } - - return 0; -} - - -static struct cpufreq_frequency_table p4clockmod_table[] = { - {DC_RESV, CPUFREQ_ENTRY_INVALID}, - {DC_DFLT, 0}, - {DC_25PT, 0}, - {DC_38PT, 0}, - {DC_50PT, 0}, - {DC_64PT, 0}, - {DC_75PT, 0}, - {DC_88PT, 0}, - {DC_DISABLE, 0}, - {DC_RESV, CPUFREQ_TABLE_END}, -}; - - -static int cpufreq_p4_target(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) -{ - unsigned int newstate = DC_RESV; - struct cpufreq_freqs freqs; - int i; - - if (cpufreq_frequency_table_target(policy, &p4clockmod_table[0], target_freq, relation, &newstate)) - return -EINVAL; - - freqs.old = cpufreq_p4_get(policy->cpu); - freqs.new = stock_freq * p4clockmod_table[newstate].index / 8; - - if (freqs.new == freqs.old) - return 0; - - /* notifiers */ - for_each_cpu_mask(i, policy->cpus) { - freqs.cpu = i; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - } - - /* run on each logical CPU, see section 13.15.3 of IA32 Intel Architecture Software - * Developer's Manual, Volume 3 - */ - for_each_cpu_mask(i, policy->cpus) - cpufreq_p4_setdc(i, p4clockmod_table[newstate].index); - - /* notifiers */ - for_each_cpu_mask(i, policy->cpus) { - freqs.cpu = i; - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - } - - return 0; -} - - -static int cpufreq_p4_verify(struct cpufreq_policy *policy) -{ - return cpufreq_frequency_table_verify(policy, &p4clockmod_table[0]); -} - - -static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c) -{ - if (c->x86 == 0x06) { - if (cpu_has(c, X86_FEATURE_EST)) - printk(KERN_WARNING PFX "Warning: EST-capable CPU detected. " - "The acpi-cpufreq module offers voltage scaling" - " in addition of frequency scaling. You should use " - "that instead of p4-clockmod, if possible.\n"); - switch (c->x86_model) { - case 0x0E: /* Core */ - case 0x0F: /* Core Duo */ - p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; - return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PCORE); - case 0x0D: /* Pentium M (Dothan) */ - p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; - /* fall through */ - case 0x09: /* Pentium M (Banias) */ - return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PM); - } - } - - if (c->x86 != 0xF) { - printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. Please send an e-mail to \n"); - return 0; - } - - /* on P-4s, the TSC runs with constant frequency independent whether - * throttling is active or not. */ - p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; - - if (speedstep_detect_processor() == SPEEDSTEP_PROCESSOR_P4M) { - printk(KERN_WARNING PFX "Warning: Pentium 4-M detected. " - "The speedstep-ich or acpi cpufreq modules offer " - "voltage scaling in addition of frequency scaling. " - "You should use either one instead of p4-clockmod, " - "if possible.\n"); - return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_P4M); - } - - return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_P4D); -} - - - -static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy) -{ - struct cpuinfo_x86 *c = &cpu_data[policy->cpu]; - int cpuid = 0; - unsigned int i; - -#ifdef CONFIG_SMP - policy->cpus = cpu_sibling_map[policy->cpu]; -#endif - - /* Errata workaround */ - cpuid = (c->x86 << 8) | (c->x86_model << 4) | c->x86_mask; - switch (cpuid) { - case 0x0f07: - case 0x0f0a: - case 0x0f11: - case 0x0f12: - has_N44_O17_errata[policy->cpu] = 1; - dprintk("has errata -- disabling low frequencies\n"); - } - - /* get max frequency */ - stock_freq = cpufreq_p4_get_frequency(c); - if (!stock_freq) - return -EINVAL; - - /* table init */ - for (i=1; (p4clockmod_table[i].frequency != CPUFREQ_TABLE_END); i++) { - if ((i<2) && (has_N44_O17_errata[policy->cpu])) - p4clockmod_table[i].frequency = CPUFREQ_ENTRY_INVALID; - else - p4clockmod_table[i].frequency = (stock_freq * i)/8; - } - cpufreq_frequency_table_get_attr(p4clockmod_table, policy->cpu); - - /* cpuinfo and default policy values */ - policy->governor = CPUFREQ_DEFAULT_GOVERNOR; - policy->cpuinfo.transition_latency = 1000000; /* assumed */ - policy->cur = stock_freq; - - return cpufreq_frequency_table_cpuinfo(policy, &p4clockmod_table[0]); -} - - -static int cpufreq_p4_cpu_exit(struct cpufreq_policy *policy) -{ - cpufreq_frequency_table_put_attr(policy->cpu); - return 0; -} - -static unsigned int cpufreq_p4_get(unsigned int cpu) -{ - u32 l, h; - - rdmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, &l, &h); - - if (l & 0x10) { - l = l >> 1; - l &= 0x7; - } else - l = DC_DISABLE; - - if (l != DC_DISABLE) - return (stock_freq * l / 8); - - return stock_freq; -} - -static struct freq_attr* p4clockmod_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, -}; - -static struct cpufreq_driver p4clockmod_driver = { - .verify = cpufreq_p4_verify, - .target = cpufreq_p4_target, - .init = cpufreq_p4_cpu_init, - .exit = cpufreq_p4_cpu_exit, - .get = cpufreq_p4_get, - .name = "p4-clockmod", - .owner = THIS_MODULE, - .attr = p4clockmod_attr, -}; - - -static int __init cpufreq_p4_init(void) -{ - struct cpuinfo_x86 *c = cpu_data; - int ret; - - /* - * THERM_CONTROL is architectural for IA32 now, so - * we can rely on the capability checks - */ - if (c->x86_vendor != X86_VENDOR_INTEL) - return -ENODEV; - - if (!test_bit(X86_FEATURE_ACPI, c->x86_capability) || - !test_bit(X86_FEATURE_ACC, c->x86_capability)) - return -ENODEV; - - ret = cpufreq_register_driver(&p4clockmod_driver); - if (!ret) - printk(KERN_INFO PFX "P4/Xeon(TM) CPU On-Demand Clock Modulation available\n"); - - return (ret); -} - - -static void __exit cpufreq_p4_exit(void) -{ - cpufreq_unregister_driver(&p4clockmod_driver); -} - - -MODULE_AUTHOR ("Zwane Mwaikambo "); -MODULE_DESCRIPTION ("cpufreq driver for Pentium(TM) 4/Xeon(TM)"); -MODULE_LICENSE ("GPL"); - -late_initcall(cpufreq_p4_init); -module_exit(cpufreq_p4_exit); diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k6.c b/arch/i386/kernel/cpu/cpufreq/powernow-k6.c deleted file mode 100644 index f89524051e4..00000000000 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k6.c +++ /dev/null @@ -1,256 +0,0 @@ -/* - * This file was based upon code in Powertweak Linux (http://powertweak.sf.net) - * (C) 2000-2003 Dave Jones, Arjan van de Ven, Janne Pänkälä, Dominik Brodowski. - * - * Licensed under the terms of the GNU GPL License version 2. - * - * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - - -#define POWERNOW_IOPORT 0xfff0 /* it doesn't matter where, as long - as it is unused */ - -static unsigned int busfreq; /* FSB, in 10 kHz */ -static unsigned int max_multiplier; - - -/* Clock ratio multiplied by 10 - see table 27 in AMD#23446 */ -static struct cpufreq_frequency_table clock_ratio[] = { - {45, /* 000 -> 4.5x */ 0}, - {50, /* 001 -> 5.0x */ 0}, - {40, /* 010 -> 4.0x */ 0}, - {55, /* 011 -> 5.5x */ 0}, - {20, /* 100 -> 2.0x */ 0}, - {30, /* 101 -> 3.0x */ 0}, - {60, /* 110 -> 6.0x */ 0}, - {35, /* 111 -> 3.5x */ 0}, - {0, CPUFREQ_TABLE_END} -}; - - -/** - * powernow_k6_get_cpu_multiplier - returns the current FSB multiplier - * - * Returns the current setting of the frequency multiplier. Core clock - * speed is frequency of the Front-Side Bus multiplied with this value. - */ -static int powernow_k6_get_cpu_multiplier(void) -{ - u64 invalue = 0; - u32 msrval; - - msrval = POWERNOW_IOPORT + 0x1; - wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */ - invalue=inl(POWERNOW_IOPORT + 0x8); - msrval = POWERNOW_IOPORT + 0x0; - wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */ - - return clock_ratio[(invalue >> 5)&7].index; -} - - -/** - * powernow_k6_set_state - set the PowerNow! multiplier - * @best_i: clock_ratio[best_i] is the target multiplier - * - * Tries to change the PowerNow! multiplier - */ -static void powernow_k6_set_state (unsigned int best_i) -{ - unsigned long outvalue=0, invalue=0; - unsigned long msrval; - struct cpufreq_freqs freqs; - - if (clock_ratio[best_i].index > max_multiplier) { - printk(KERN_ERR "cpufreq: invalid target frequency\n"); - return; - } - - freqs.old = busfreq * powernow_k6_get_cpu_multiplier(); - freqs.new = busfreq * clock_ratio[best_i].index; - freqs.cpu = 0; /* powernow-k6.c is UP only driver */ - - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - - /* we now need to transform best_i to the BVC format, see AMD#23446 */ - - outvalue = (1<<12) | (1<<10) | (1<<9) | (best_i<<5); - - msrval = POWERNOW_IOPORT + 0x1; - wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */ - invalue=inl(POWERNOW_IOPORT + 0x8); - invalue = invalue & 0xf; - outvalue = outvalue | invalue; - outl(outvalue ,(POWERNOW_IOPORT + 0x8)); - msrval = POWERNOW_IOPORT + 0x0; - wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */ - - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - - return; -} - - -/** - * powernow_k6_verify - verifies a new CPUfreq policy - * @policy: new policy - * - * Policy must be within lowest and highest possible CPU Frequency, - * and at least one possible state must be within min and max. - */ -static int powernow_k6_verify(struct cpufreq_policy *policy) -{ - return cpufreq_frequency_table_verify(policy, &clock_ratio[0]); -} - - -/** - * powernow_k6_setpolicy - sets a new CPUFreq policy - * @policy: new policy - * @target_freq: the target frequency - * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) - * - * sets a new CPUFreq policy - */ -static int powernow_k6_target (struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) -{ - unsigned int newstate = 0; - - if (cpufreq_frequency_table_target(policy, &clock_ratio[0], target_freq, relation, &newstate)) - return -EINVAL; - - powernow_k6_set_state(newstate); - - return 0; -} - - -static int powernow_k6_cpu_init(struct cpufreq_policy *policy) -{ - unsigned int i; - int result; - - if (policy->cpu != 0) - return -ENODEV; - - /* get frequencies */ - max_multiplier = powernow_k6_get_cpu_multiplier(); - busfreq = cpu_khz / max_multiplier; - - /* table init */ - for (i=0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) { - if (clock_ratio[i].index > max_multiplier) - clock_ratio[i].frequency = CPUFREQ_ENTRY_INVALID; - else - clock_ratio[i].frequency = busfreq * clock_ratio[i].index; - } - - /* cpuinfo and default policy values */ - policy->governor = CPUFREQ_DEFAULT_GOVERNOR; - policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; - policy->cur = busfreq * max_multiplier; - - result = cpufreq_frequency_table_cpuinfo(policy, clock_ratio); - if (result) - return (result); - - cpufreq_frequency_table_get_attr(clock_ratio, policy->cpu); - - return 0; -} - - -static int powernow_k6_cpu_exit(struct cpufreq_policy *policy) -{ - unsigned int i; - for (i=0; i<8; i++) { - if (i==max_multiplier) - powernow_k6_set_state(i); - } - cpufreq_frequency_table_put_attr(policy->cpu); - return 0; -} - -static unsigned int powernow_k6_get(unsigned int cpu) -{ - return busfreq * powernow_k6_get_cpu_multiplier(); -} - -static struct freq_attr* powernow_k6_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, -}; - -static struct cpufreq_driver powernow_k6_driver = { - .verify = powernow_k6_verify, - .target = powernow_k6_target, - .init = powernow_k6_cpu_init, - .exit = powernow_k6_cpu_exit, - .get = powernow_k6_get, - .name = "powernow-k6", - .owner = THIS_MODULE, - .attr = powernow_k6_attr, -}; - - -/** - * powernow_k6_init - initializes the k6 PowerNow! CPUFreq driver - * - * Initializes the K6 PowerNow! support. Returns -ENODEV on unsupported - * devices, -EINVAL or -ENOMEM on problems during initiatization, and zero - * on success. - */ -static int __init powernow_k6_init(void) -{ - struct cpuinfo_x86 *c = cpu_data; - - if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 != 5) || - ((c->x86_model != 12) && (c->x86_model != 13))) - return -ENODEV; - - if (!request_region(POWERNOW_IOPORT, 16, "PowerNow!")) { - printk("cpufreq: PowerNow IOPORT region already used.\n"); - return -EIO; - } - - if (cpufreq_register_driver(&powernow_k6_driver)) { - release_region (POWERNOW_IOPORT, 16); - return -EINVAL; - } - - return 0; -} - - -/** - * powernow_k6_exit - unregisters AMD K6-2+/3+ PowerNow! support - * - * Unregisters AMD K6-2+ / K6-3+ PowerNow! support. - */ -static void __exit powernow_k6_exit(void) -{ - cpufreq_unregister_driver(&powernow_k6_driver); - release_region (POWERNOW_IOPORT, 16); -} - - -MODULE_AUTHOR ("Arjan van de Ven , Dave Jones , Dominik Brodowski "); -MODULE_DESCRIPTION ("PowerNow! driver for AMD K6-2+ / K6-3+ processors."); -MODULE_LICENSE ("GPL"); - -module_init(powernow_k6_init); -module_exit(powernow_k6_exit); diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c deleted file mode 100644 index ca3e1d34188..00000000000 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c +++ /dev/null @@ -1,703 +0,0 @@ -/* - * AMD K7 Powernow driver. - * (C) 2003 Dave Jones on behalf of SuSE Labs. - * (C) 2003-2004 Dave Jones - * - * Licensed under the terms of the GNU GPL License version 2. - * Based upon datasheets & sample CPUs kindly provided by AMD. - * - * Errata 5: Processor may fail to execute a FID/VID change in presence of interrupt. - * - We cli/sti on stepping A0 CPUs around the FID/VID transition. - * Errata 15: Processors with half frequency multipliers may hang upon wakeup from disconnect. - * - We disable half multipliers if ACPI is used on A0 stepping CPUs. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#ifdef CONFIG_X86_POWERNOW_K7_ACPI -#include -#include -#endif - -#include "powernow-k7.h" - -#define PFX "powernow: " - - -struct psb_s { - u8 signature[10]; - u8 tableversion; - u8 flags; - u16 settlingtime; - u8 reserved1; - u8 numpst; -}; - -struct pst_s { - u32 cpuid; - u8 fsbspeed; - u8 maxfid; - u8 startvid; - u8 numpstates; -}; - -#ifdef CONFIG_X86_POWERNOW_K7_ACPI -union powernow_acpi_control_t { - struct { - unsigned long fid:5, - vid:5, - sgtc:20, - res1:2; - } bits; - unsigned long val; -}; -#endif - -#ifdef CONFIG_CPU_FREQ_DEBUG -/* divide by 1000 to get VCore voltage in V. */ -static const int mobile_vid_table[32] = { - 2000, 1950, 1900, 1850, 1800, 1750, 1700, 1650, - 1600, 1550, 1500, 1450, 1400, 1350, 1300, 0, - 1275, 1250, 1225, 1200, 1175, 1150, 1125, 1100, - 1075, 1050, 1025, 1000, 975, 950, 925, 0, -}; -#endif - -/* divide by 10 to get FID. */ -static const int fid_codes[32] = { - 110, 115, 120, 125, 50, 55, 60, 65, - 70, 75, 80, 85, 90, 95, 100, 105, - 30, 190, 40, 200, 130, 135, 140, 210, - 150, 225, 160, 165, 170, 180, -1, -1, -}; - -/* This parameter is used in order to force ACPI instead of legacy method for - * configuration purpose. - */ - -static int acpi_force; - -static struct cpufreq_frequency_table *powernow_table; - -static unsigned int can_scale_bus; -static unsigned int can_scale_vid; -static unsigned int minimum_speed=-1; -static unsigned int maximum_speed; -static unsigned int number_scales; -static unsigned int fsb; -static unsigned int latency; -static char have_a0; - -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "powernow-k7", msg) - -static int check_fsb(unsigned int fsbspeed) -{ - int delta; - unsigned int f = fsb / 1000; - - delta = (fsbspeed > f) ? fsbspeed - f : f - fsbspeed; - return (delta < 5); -} - -static int check_powernow(void) -{ - struct cpuinfo_x86 *c = cpu_data; - unsigned int maxei, eax, ebx, ecx, edx; - - if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 !=6)) { -#ifdef MODULE - printk (KERN_INFO PFX "This module only works with AMD K7 CPUs\n"); -#endif - return 0; - } - - /* Get maximum capabilities */ - maxei = cpuid_eax (0x80000000); - if (maxei < 0x80000007) { /* Any powernow info ? */ -#ifdef MODULE - printk (KERN_INFO PFX "No powernow capabilities detected\n"); -#endif - return 0; - } - - if ((c->x86_model == 6) && (c->x86_mask == 0)) { - printk (KERN_INFO PFX "K7 660[A0] core detected, enabling errata workarounds\n"); - have_a0 = 1; - } - - cpuid(0x80000007, &eax, &ebx, &ecx, &edx); - - /* Check we can actually do something before we say anything.*/ - if (!(edx & (1 << 1 | 1 << 2))) - return 0; - - printk (KERN_INFO PFX "PowerNOW! Technology present. Can scale: "); - - if (edx & 1 << 1) { - printk ("frequency"); - can_scale_bus=1; - } - - if ((edx & (1 << 1 | 1 << 2)) == 0x6) - printk (" and "); - - if (edx & 1 << 2) { - printk ("voltage"); - can_scale_vid=1; - } - - printk (".\n"); - return 1; -} - - -static int get_ranges (unsigned char *pst) -{ - unsigned int j; - unsigned int speed; - u8 fid, vid; - - powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) * (number_scales + 1)), GFP_KERNEL); - if (!powernow_table) - return -ENOMEM; - - for (j=0 ; j < number_scales; j++) { - fid = *pst++; - - powernow_table[j].frequency = (fsb * fid_codes[fid]) / 10; - powernow_table[j].index = fid; /* lower 8 bits */ - - speed = powernow_table[j].frequency; - - if ((fid_codes[fid] % 10)==5) { -#ifdef CONFIG_X86_POWERNOW_K7_ACPI - if (have_a0 == 1) - powernow_table[j].frequency = CPUFREQ_ENTRY_INVALID; -#endif - } - - if (speed < minimum_speed) - minimum_speed = speed; - if (speed > maximum_speed) - maximum_speed = speed; - - vid = *pst++; - powernow_table[j].index |= (vid << 8); /* upper 8 bits */ - - dprintk (" FID: 0x%x (%d.%dx [%dMHz]) " - "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10, - fid_codes[fid] % 10, speed/1000, vid, - mobile_vid_table[vid]/1000, - mobile_vid_table[vid]%1000); - } - powernow_table[number_scales].frequency = CPUFREQ_TABLE_END; - powernow_table[number_scales].index = 0; - - return 0; -} - - -static void change_FID(int fid) -{ - union msr_fidvidctl fidvidctl; - - rdmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val); - if (fidvidctl.bits.FID != fid) { - fidvidctl.bits.SGTC = latency; - fidvidctl.bits.FID = fid; - fidvidctl.bits.VIDC = 0; - fidvidctl.bits.FIDC = 1; - wrmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val); - } -} - - -static void change_VID(int vid) -{ - union msr_fidvidctl fidvidctl; - - rdmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val); - if (fidvidctl.bits.VID != vid) { - fidvidctl.bits.SGTC = latency; - fidvidctl.bits.VID = vid; - fidvidctl.bits.FIDC = 0; - fidvidctl.bits.VIDC = 1; - wrmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val); - } -} - - -static void change_speed (unsigned int index) -{ - u8 fid, vid; - struct cpufreq_freqs freqs; - union msr_fidvidstatus fidvidstatus; - int cfid; - - /* fid are the lower 8 bits of the index we stored into - * the cpufreq frequency table in powernow_decode_bios, - * vid are the upper 8 bits. - */ - - fid = powernow_table[index].index & 0xFF; - vid = (powernow_table[index].index & 0xFF00) >> 8; - - freqs.cpu = 0; - - rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val); - cfid = fidvidstatus.bits.CFID; - freqs.old = fsb * fid_codes[cfid] / 10; - - freqs.new = powernow_table[index].frequency; - - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - - /* Now do the magic poking into the MSRs. */ - - if (have_a0 == 1) /* A0 errata 5 */ - local_irq_disable(); - - if (freqs.old > freqs.new) { - /* Going down, so change FID first */ - change_FID(fid); - change_VID(vid); - } else { - /* Going up, so change VID first */ - change_VID(vid); - change_FID(fid); - } - - - if (have_a0 == 1) - local_irq_enable(); - - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); -} - - -#ifdef CONFIG_X86_POWERNOW_K7_ACPI - -static struct acpi_processor_performance *acpi_processor_perf; - -static int powernow_acpi_init(void) -{ - int i; - int retval = 0; - union powernow_acpi_control_t pc; - - if (acpi_processor_perf != NULL && powernow_table != NULL) { - retval = -EINVAL; - goto err0; - } - - acpi_processor_perf = kzalloc(sizeof(struct acpi_processor_performance), - GFP_KERNEL); - if (!acpi_processor_perf) { - retval = -ENOMEM; - goto err0; - } - - if (acpi_processor_register_performance(acpi_processor_perf, 0)) { - retval = -EIO; - goto err1; - } - - if (acpi_processor_perf->control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) { - retval = -ENODEV; - goto err2; - } - - if (acpi_processor_perf->status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) { - retval = -ENODEV; - goto err2; - } - - number_scales = acpi_processor_perf->state_count; - - if (number_scales < 2) { - retval = -ENODEV; - goto err2; - } - - powernow_table = kzalloc((number_scales + 1) * (sizeof(struct cpufreq_frequency_table)), GFP_KERNEL); - if (!powernow_table) { - retval = -ENOMEM; - goto err2; - } - - pc.val = (unsigned long) acpi_processor_perf->states[0].control; - for (i = 0; i < number_scales; i++) { - u8 fid, vid; - struct acpi_processor_px *state = - &acpi_processor_perf->states[i]; - unsigned int speed, speed_mhz; - - pc.val = (unsigned long) state->control; - dprintk ("acpi: P%d: %d MHz %d mW %d uS control %08x SGTC %d\n", - i, - (u32) state->core_frequency, - (u32) state->power, - (u32) state->transition_latency, - (u32) state->control, - pc.bits.sgtc); - - vid = pc.bits.vid; - fid = pc.bits.fid; - - powernow_table[i].frequency = fsb * fid_codes[fid] / 10; - powernow_table[i].index = fid; /* lower 8 bits */ - powernow_table[i].index |= (vid << 8); /* upper 8 bits */ - - speed = powernow_table[i].frequency; - speed_mhz = speed / 1000; - - /* processor_perflib will multiply the MHz value by 1000 to - * get a KHz value (e.g. 1266000). However, powernow-k7 works - * with true KHz values (e.g. 1266768). To ensure that all - * powernow frequencies are available, we must ensure that - * ACPI doesn't restrict them, so we round up the MHz value - * to ensure that perflib's computed KHz value is greater than - * or equal to powernow's KHz value. - */ - if (speed % 1000 > 0) - speed_mhz++; - - if ((fid_codes[fid] % 10)==5) { - if (have_a0 == 1) - powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; - } - - dprintk (" FID: 0x%x (%d.%dx [%dMHz]) " - "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10, - fid_codes[fid] % 10, speed_mhz, vid, - mobile_vid_table[vid]/1000, - mobile_vid_table[vid]%1000); - - if (state->core_frequency != speed_mhz) { - state->core_frequency = speed_mhz; - dprintk(" Corrected ACPI frequency to %d\n", - speed_mhz); - } - - if (latency < pc.bits.sgtc) - latency = pc.bits.sgtc; - - if (speed < minimum_speed) - minimum_speed = speed; - if (speed > maximum_speed) - maximum_speed = speed; - } - - powernow_table[i].frequency = CPUFREQ_TABLE_END; - powernow_table[i].index = 0; - - /* notify BIOS that we exist */ - acpi_processor_notify_smm(THIS_MODULE); - - return 0; - -err2: - acpi_processor_unregister_performance(acpi_processor_perf, 0); -err1: - kfree(acpi_processor_perf); -err0: - printk(KERN_WARNING PFX "ACPI perflib can not be used in this platform\n"); - acpi_processor_perf = NULL; - return retval; -} -#else -static int powernow_acpi_init(void) -{ - printk(KERN_INFO PFX "no support for ACPI processor found." - " Please recompile your kernel with ACPI processor\n"); - return -EINVAL; -} -#endif - -static int powernow_decode_bios (int maxfid, int startvid) -{ - struct psb_s *psb; - struct pst_s *pst; - unsigned int i, j; - unsigned char *p; - unsigned int etuple; - unsigned int ret; - - etuple = cpuid_eax(0x80000001); - - for (i=0xC0000; i < 0xffff0 ; i+=16) { - - p = phys_to_virt(i); - - if (memcmp(p, "AMDK7PNOW!", 10) == 0){ - dprintk ("Found PSB header at %p\n", p); - psb = (struct psb_s *) p; - dprintk ("Table version: 0x%x\n", psb->tableversion); - if (psb->tableversion != 0x12) { - printk (KERN_INFO PFX "Sorry, only v1.2 tables supported right now\n"); - return -ENODEV; - } - - dprintk ("Flags: 0x%x\n", psb->flags); - if ((psb->flags & 1)==0) { - dprintk ("Mobile voltage regulator\n"); - } else { - dprintk ("Desktop voltage regulator\n"); - } - - latency = psb->settlingtime; - if (latency < 100) { - printk (KERN_INFO PFX "BIOS set settling time to %d microseconds." - "Should be at least 100. Correcting.\n", latency); - latency = 100; - } - dprintk ("Settling Time: %d microseconds.\n", psb->settlingtime); - dprintk ("Has %d PST tables. (Only dumping ones relevant to this CPU).\n", psb->numpst); - - p += sizeof (struct psb_s); - - pst = (struct pst_s *) p; - - for (j=0; jnumpst; j++) { - pst = (struct pst_s *) p; - number_scales = pst->numpstates; - - if ((etuple == pst->cpuid) && check_fsb(pst->fsbspeed) && - (maxfid==pst->maxfid) && (startvid==pst->startvid)) - { - dprintk ("PST:%d (@%p)\n", j, pst); - dprintk (" cpuid: 0x%x fsb: %d maxFID: 0x%x startvid: 0x%x\n", - pst->cpuid, pst->fsbspeed, pst->maxfid, pst->startvid); - - ret = get_ranges ((char *) pst + sizeof (struct pst_s)); - return ret; - } else { - unsigned int k; - p = (char *) pst + sizeof (struct pst_s); - for (k=0; k= 5) - m += 5; - - m /= 10; - - sgtc = 100 * m * latency; - sgtc = sgtc / 3; - if (sgtc > 0xfffff) { - printk(KERN_WARNING PFX "SGTC too large %d\n", sgtc); - sgtc = 0xfffff; - } - return sgtc; -} - -static unsigned int powernow_get(unsigned int cpu) -{ - union msr_fidvidstatus fidvidstatus; - unsigned int cfid; - - if (cpu) - return 0; - rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val); - cfid = fidvidstatus.bits.CFID; - - return (fsb * fid_codes[cfid] / 10); -} - - -static int __init acer_cpufreq_pst(struct dmi_system_id *d) -{ - printk(KERN_WARNING "%s laptop with broken PST tables in BIOS detected.\n", d->ident); - printk(KERN_WARNING "You need to downgrade to 3A21 (09/09/2002), or try a newer BIOS than 3A71 (01/20/2003)\n"); - printk(KERN_WARNING "cpufreq scaling has been disabled as a result of this.\n"); - return 0; -} - -/* - * Some Athlon laptops have really fucked PST tables. - * A BIOS update is all that can save them. - * Mention this, and disable cpufreq. - */ -static struct dmi_system_id __initdata powernow_dmi_table[] = { - { - .callback = acer_cpufreq_pst, - .ident = "Acer Aspire", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Insyde Software"), - DMI_MATCH(DMI_BIOS_VERSION, "3A71"), - }, - }, - { } -}; - -static int __init powernow_cpu_init (struct cpufreq_policy *policy) -{ - union msr_fidvidstatus fidvidstatus; - int result; - - if (policy->cpu != 0) - return -ENODEV; - - rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val); - - recalibrate_cpu_khz(); - - fsb = (10 * cpu_khz) / fid_codes[fidvidstatus.bits.CFID]; - if (!fsb) { - printk(KERN_WARNING PFX "can not determine bus frequency\n"); - return -EINVAL; - } - dprintk("FSB: %3dMHz\n", fsb/1000); - - if (dmi_check_system(powernow_dmi_table) || acpi_force) { - printk (KERN_INFO PFX "PSB/PST known to be broken. Trying ACPI instead\n"); - result = powernow_acpi_init(); - } else { - result = powernow_decode_bios(fidvidstatus.bits.MFID, fidvidstatus.bits.SVID); - if (result) { - printk (KERN_INFO PFX "Trying ACPI perflib\n"); - maximum_speed = 0; - minimum_speed = -1; - latency = 0; - result = powernow_acpi_init(); - if (result) { - printk (KERN_INFO PFX "ACPI and legacy methods failed\n"); - printk (KERN_INFO PFX "See http://www.codemonkey.org.uk/projects/cpufreq/powernow-k7.html\n"); - } - } else { - /* SGTC use the bus clock as timer */ - latency = fixup_sgtc(); - printk(KERN_INFO PFX "SGTC: %d\n", latency); - } - } - - if (result) - return result; - - printk (KERN_INFO PFX "Minimum speed %d MHz. Maximum speed %d MHz.\n", - minimum_speed/1000, maximum_speed/1000); - - policy->governor = CPUFREQ_DEFAULT_GOVERNOR; - - policy->cpuinfo.transition_latency = cpufreq_scale(2000000UL, fsb, latency); - - policy->cur = powernow_get(0); - - cpufreq_frequency_table_get_attr(powernow_table, policy->cpu); - - return cpufreq_frequency_table_cpuinfo(policy, powernow_table); -} - -static int powernow_cpu_exit (struct cpufreq_policy *policy) { - cpufreq_frequency_table_put_attr(policy->cpu); - -#ifdef CONFIG_X86_POWERNOW_K7_ACPI - if (acpi_processor_perf) { - acpi_processor_unregister_performance(acpi_processor_perf, 0); - kfree(acpi_processor_perf); - } -#endif - - kfree(powernow_table); - return 0; -} - -static struct freq_attr* powernow_table_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, -}; - -static struct cpufreq_driver powernow_driver = { - .verify = powernow_verify, - .target = powernow_target, - .get = powernow_get, - .init = powernow_cpu_init, - .exit = powernow_cpu_exit, - .name = "powernow-k7", - .owner = THIS_MODULE, - .attr = powernow_table_attr, -}; - -static int __init powernow_init (void) -{ - if (check_powernow()==0) - return -ENODEV; - return cpufreq_register_driver(&powernow_driver); -} - - -static void __exit powernow_exit (void) -{ - cpufreq_unregister_driver(&powernow_driver); -} - -module_param(acpi_force, int, 0444); -MODULE_PARM_DESC(acpi_force, "Force ACPI to be used."); - -MODULE_AUTHOR ("Dave Jones "); -MODULE_DESCRIPTION ("Powernow driver for AMD K7 processors."); -MODULE_LICENSE ("GPL"); - -late_initcall(powernow_init); -module_exit(powernow_exit); - diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k7.h b/arch/i386/kernel/cpu/cpufreq/powernow-k7.h deleted file mode 100644 index f8a63b3664e..00000000000 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k7.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * $Id: powernow-k7.h,v 1.2 2003/02/10 18:26:01 davej Exp $ - * (C) 2003 Dave Jones. - * - * Licensed under the terms of the GNU GPL License version 2. - * - * AMD-specific information - * - */ - -union msr_fidvidctl { - struct { - unsigned FID:5, // 4:0 - reserved1:3, // 7:5 - VID:5, // 12:8 - reserved2:3, // 15:13 - FIDC:1, // 16 - VIDC:1, // 17 - reserved3:2, // 19:18 - FIDCHGRATIO:1, // 20 - reserved4:11, // 31-21 - SGTC:20, // 32:51 - reserved5:12; // 63:52 - } bits; - unsigned long long val; -}; - -union msr_fidvidstatus { - struct { - unsigned CFID:5, // 4:0 - reserved1:3, // 7:5 - SFID:5, // 12:8 - reserved2:3, // 15:13 - MFID:5, // 20:16 - reserved3:11, // 31:21 - CVID:5, // 36:32 - reserved4:3, // 39:37 - SVID:5, // 44:40 - reserved5:3, // 47:45 - MVID:5, // 52:48 - reserved6:11; // 63:53 - } bits; - unsigned long long val; -}; diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c deleted file mode 100644 index 34ed53a0673..00000000000 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c +++ /dev/null @@ -1,1363 +0,0 @@ -/* - * (c) 2003-2006 Advanced Micro Devices, Inc. - * Your use of this code is subject to the terms and conditions of the - * GNU general public license version 2. See "COPYING" or - * http://www.gnu.org/licenses/gpl.html - * - * Support : mark.langsdorf@amd.com - * - * Based on the powernow-k7.c module written by Dave Jones. - * (C) 2003 Dave Jones on behalf of SuSE Labs - * (C) 2004 Dominik Brodowski - * (C) 2004 Pavel Machek - * Licensed under the terms of the GNU GPL License version 2. - * Based upon datasheets & sample CPUs kindly provided by AMD. - * - * Valuable input gratefully received from Dave Jones, Pavel Machek, - * Dominik Brodowski, Jacob Shin, and others. - * Originally developed by Paul Devriendt. - * Processor information obtained from Chapter 9 (Power and Thermal Management) - * of the "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD - * Opteron Processors" available for download from www.amd.com - * - * Tables for specific CPUs can be inferred from - * http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/30430.pdf - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include /* for current / set_cpus_allowed() */ - -#include -#include -#include - -#ifdef CONFIG_X86_POWERNOW_K8_ACPI -#include -#include -#include -#endif - -#define PFX "powernow-k8: " -#define BFX PFX "BIOS error: " -#define VERSION "version 2.00.00" -#include "powernow-k8.h" - -/* serialize freq changes */ -static DEFINE_MUTEX(fidvid_mutex); - -static struct powernow_k8_data *powernow_data[NR_CPUS]; - -static int cpu_family = CPU_OPTERON; - -#ifndef CONFIG_SMP -static cpumask_t cpu_core_map[1]; -#endif - -/* Return a frequency in MHz, given an input fid */ -static u32 find_freq_from_fid(u32 fid) -{ - return 800 + (fid * 100); -} - - -/* Return a frequency in KHz, given an input fid */ -static u32 find_khz_freq_from_fid(u32 fid) -{ - return 1000 * find_freq_from_fid(fid); -} - -/* Return a frequency in MHz, given an input fid and did */ -static u32 find_freq_from_fiddid(u32 fid, u32 did) -{ - return 100 * (fid + 0x10) >> did; -} - -static u32 find_khz_freq_from_fiddid(u32 fid, u32 did) -{ - return 1000 * find_freq_from_fiddid(fid, did); -} - -static u32 find_fid_from_pstate(u32 pstate) -{ - u32 hi, lo; - rdmsr(MSR_PSTATE_DEF_BASE + pstate, lo, hi); - return lo & HW_PSTATE_FID_MASK; -} - -static u32 find_did_from_pstate(u32 pstate) -{ - u32 hi, lo; - rdmsr(MSR_PSTATE_DEF_BASE + pstate, lo, hi); - return (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT; -} - -/* Return the vco fid for an input fid - * - * Each "low" fid has corresponding "high" fid, and you can get to "low" fids - * only from corresponding high fids. This returns "high" fid corresponding to - * "low" one. - */ -static u32 convert_fid_to_vco_fid(u32 fid) -{ - if (fid < HI_FID_TABLE_BOTTOM) - return 8 + (2 * fid); - else - return fid; -} - -/* - * Return 1 if the pending bit is set. Unless we just instructed the processor - * to transition to a new state, seeing this bit set is really bad news. - */ -static int pending_bit_stuck(void) -{ - u32 lo, hi; - - if (cpu_family == CPU_HW_PSTATE) - return 0; - - rdmsr(MSR_FIDVID_STATUS, lo, hi); - return lo & MSR_S_LO_CHANGE_PENDING ? 1 : 0; -} - -/* - * Update the global current fid / vid values from the status msr. - * Returns 1 on error. - */ -static int query_current_values_with_pending_wait(struct powernow_k8_data *data) -{ - u32 lo, hi; - u32 i = 0; - - if (cpu_family == CPU_HW_PSTATE) { - rdmsr(MSR_PSTATE_STATUS, lo, hi); - i = lo & HW_PSTATE_MASK; - rdmsr(MSR_PSTATE_DEF_BASE + i, lo, hi); - data->currfid = lo & HW_PSTATE_FID_MASK; - data->currdid = (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT; - return 0; - } - do { - if (i++ > 10000) { - dprintk("detected change pending stuck\n"); - return 1; - } - rdmsr(MSR_FIDVID_STATUS, lo, hi); - } while (lo & MSR_S_LO_CHANGE_PENDING); - - data->currvid = hi & MSR_S_HI_CURRENT_VID; - data->currfid = lo & MSR_S_LO_CURRENT_FID; - - return 0; -} - -/* the isochronous relief time */ -static void count_off_irt(struct powernow_k8_data *data) -{ - udelay((1 << data->irt) * 10); - return; -} - -/* the voltage stabalization time */ -static void count_off_vst(struct powernow_k8_data *data) -{ - udelay(data->vstable * VST_UNITS_20US); - return; -} - -/* need to init the control msr to a safe value (for each cpu) */ -static void fidvid_msr_init(void) -{ - u32 lo, hi; - u8 fid, vid; - - rdmsr(MSR_FIDVID_STATUS, lo, hi); - vid = hi & MSR_S_HI_CURRENT_VID; - fid = lo & MSR_S_LO_CURRENT_FID; - lo = fid | (vid << MSR_C_LO_VID_SHIFT); - hi = MSR_C_HI_STP_GNT_BENIGN; - dprintk("cpu%d, init lo 0x%x, hi 0x%x\n", smp_processor_id(), lo, hi); - wrmsr(MSR_FIDVID_CTL, lo, hi); -} - - -/* write the new fid value along with the other control fields to the msr */ -static int write_new_fid(struct powernow_k8_data *data, u32 fid) -{ - u32 lo; - u32 savevid = data->currvid; - u32 i = 0; - - if ((fid & INVALID_FID_MASK) || (data->currvid & INVALID_VID_MASK)) { - printk(KERN_ERR PFX "internal error - overflow on fid write\n"); - return 1; - } - - lo = fid | (data->currvid << MSR_C_LO_VID_SHIFT) | MSR_C_LO_INIT_FID_VID; - - dprintk("writing fid 0x%x, lo 0x%x, hi 0x%x\n", - fid, lo, data->plllock * PLL_LOCK_CONVERSION); - - do { - wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION); - if (i++ > 100) { - printk(KERN_ERR PFX "Hardware error - pending bit very stuck - no further pstate changes possible\n"); - return 1; - } - } while (query_current_values_with_pending_wait(data)); - - count_off_irt(data); - - if (savevid != data->currvid) { - printk(KERN_ERR PFX "vid change on fid trans, old 0x%x, new 0x%x\n", - savevid, data->currvid); - return 1; - } - - if (fid != data->currfid) { - printk(KERN_ERR PFX "fid trans failed, fid 0x%x, curr 0x%x\n", fid, - data->currfid); - return 1; - } - - return 0; -} - -/* Write a new vid to the hardware */ -static int write_new_vid(struct powernow_k8_data *data, u32 vid) -{ - u32 lo; - u32 savefid = data->currfid; - int i = 0; - - if ((data->currfid & INVALID_FID_MASK) || (vid & INVALID_VID_MASK)) { - printk(KERN_ERR PFX "internal error - overflow on vid write\n"); - return 1; - } - - lo = data->currfid | (vid << MSR_C_LO_VID_SHIFT) | MSR_C_LO_INIT_FID_VID; - - dprintk("writing vid 0x%x, lo 0x%x, hi 0x%x\n", - vid, lo, STOP_GRANT_5NS); - - do { - wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS); - if (i++ > 100) { - printk(KERN_ERR PFX "internal error - pending bit very stuck - no further pstate changes possible\n"); - return 1; - } - } while (query_current_values_with_pending_wait(data)); - - if (savefid != data->currfid) { - printk(KERN_ERR PFX "fid changed on vid trans, old 0x%x new 0x%x\n", - savefid, data->currfid); - return 1; - } - - if (vid != data->currvid) { - printk(KERN_ERR PFX "vid trans failed, vid 0x%x, curr 0x%x\n", vid, - data->currvid); - return 1; - } - - return 0; -} - -/* - * Reduce the vid by the max of step or reqvid. - * Decreasing vid codes represent increasing voltages: - * vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of VID_OFF is off. - */ -static int decrease_vid_code_by_step(struct powernow_k8_data *data, u32 reqvid, u32 step) -{ - if ((data->currvid - reqvid) > step) - reqvid = data->currvid - step; - - if (write_new_vid(data, reqvid)) - return 1; - - count_off_vst(data); - - return 0; -} - -/* Change hardware pstate by single MSR write */ -static int transition_pstate(struct powernow_k8_data *data, u32 pstate) -{ - wrmsr(MSR_PSTATE_CTRL, pstate, 0); - data->currfid = find_fid_from_pstate(pstate); - return 0; -} - -/* Change Opteron/Athlon64 fid and vid, by the 3 phases. */ -static int transition_fid_vid(struct powernow_k8_data *data, u32 reqfid, u32 reqvid) -{ - if (core_voltage_pre_transition(data, reqvid)) - return 1; - - if (core_frequency_transition(data, reqfid)) - return 1; - - if (core_voltage_post_transition(data, reqvid)) - return 1; - - if (query_current_values_with_pending_wait(data)) - return 1; - - if ((reqfid != data->currfid) || (reqvid != data->currvid)) { - printk(KERN_ERR PFX "failed (cpu%d): req 0x%x 0x%x, curr 0x%x 0x%x\n", - smp_processor_id(), - reqfid, reqvid, data->currfid, data->currvid); - return 1; - } - - dprintk("transitioned (cpu%d): new fid 0x%x, vid 0x%x\n", - smp_processor_id(), data->currfid, data->currvid); - - return 0; -} - -/* Phase 1 - core voltage transition ... setup voltage */ -static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid) -{ - u32 rvosteps = data->rvo; - u32 savefid = data->currfid; - u32 maxvid, lo; - - dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, reqvid 0x%x, rvo 0x%x\n", - smp_processor_id(), - data->currfid, data->currvid, reqvid, data->rvo); - - rdmsr(MSR_FIDVID_STATUS, lo, maxvid); - maxvid = 0x1f & (maxvid >> 16); - dprintk("ph1 maxvid=0x%x\n", maxvid); - if (reqvid < maxvid) /* lower numbers are higher voltages */ - reqvid = maxvid; - - while (data->currvid > reqvid) { - dprintk("ph1: curr 0x%x, req vid 0x%x\n", - data->currvid, reqvid); - if (decrease_vid_code_by_step(data, reqvid, data->vidmvs)) - return 1; - } - - while ((rvosteps > 0) && ((data->rvo + data->currvid) > reqvid)) { - if (data->currvid == maxvid) { - rvosteps = 0; - } else { - dprintk("ph1: changing vid for rvo, req 0x%x\n", - data->currvid - 1); - if (decrease_vid_code_by_step(data, data->currvid - 1, 1)) - return 1; - rvosteps--; - } - } - - if (query_current_values_with_pending_wait(data)) - return 1; - - if (savefid != data->currfid) { - printk(KERN_ERR PFX "ph1 err, currfid changed 0x%x\n", data->currfid); - return 1; - } - - dprintk("ph1 complete, currfid 0x%x, currvid 0x%x\n", - data->currfid, data->currvid); - - return 0; -} - -/* Phase 2 - core frequency transition */ -static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid) -{ - u32 vcoreqfid, vcocurrfid, vcofiddiff, fid_interval, savevid = data->currvid; - - if ((reqfid < HI_FID_TABLE_BOTTOM) && (data->currfid < HI_FID_TABLE_BOTTOM)) { - printk(KERN_ERR PFX "ph2: illegal lo-lo transition 0x%x 0x%x\n", - reqfid, data->currfid); - return 1; - } - - if (data->currfid == reqfid) { - printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n", data->currfid); - return 0; - } - - dprintk("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, reqfid 0x%x\n", - smp_processor_id(), - data->currfid, data->currvid, reqfid); - - vcoreqfid = convert_fid_to_vco_fid(reqfid); - vcocurrfid = convert_fid_to_vco_fid(data->currfid); - vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid - : vcoreqfid - vcocurrfid; - - while (vcofiddiff > 2) { - (data->currfid & 1) ? (fid_interval = 1) : (fid_interval = 2); - - if (reqfid > data->currfid) { - if (data->currfid > LO_FID_TABLE_TOP) { - if (write_new_fid(data, data->currfid + fid_interval)) { - return 1; - } - } else { - if (write_new_fid - (data, 2 + convert_fid_to_vco_fid(data->currfid))) { - return 1; - } - } - } else { - if (write_new_fid(data, data->currfid - fid_interval)) - return 1; - } - - vcocurrfid = convert_fid_to_vco_fid(data->currfid); - vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid - : vcoreqfid - vcocurrfid; - } - - if (write_new_fid(data, reqfid)) - return 1; - - if (query_current_values_with_pending_wait(data)) - return 1; - - if (data->currfid != reqfid) { - printk(KERN_ERR PFX - "ph2: mismatch, failed fid transition, curr 0x%x, req 0x%x\n", - data->currfid, reqfid); - return 1; - } - - if (savevid != data->currvid) { - printk(KERN_ERR PFX "ph2: vid changed, save 0x%x, curr 0x%x\n", - savevid, data->currvid); - return 1; - } - - dprintk("ph2 complete, currfid 0x%x, currvid 0x%x\n", - data->currfid, data->currvid); - - return 0; -} - -/* Phase 3 - core voltage transition flow ... jump to the final vid. */ -static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid) -{ - u32 savefid = data->currfid; - u32 savereqvid = reqvid; - - dprintk("ph3 (cpu%d): starting, currfid 0x%x, currvid 0x%x\n", - smp_processor_id(), - data->currfid, data->currvid); - - if (reqvid != data->currvid) { - if (write_new_vid(data, reqvid)) - return 1; - - if (savefid != data->currfid) { - printk(KERN_ERR PFX - "ph3: bad fid change, save 0x%x, curr 0x%x\n", - savefid, data->currfid); - return 1; - } - - if (data->currvid != reqvid) { - printk(KERN_ERR PFX - "ph3: failed vid transition\n, req 0x%x, curr 0x%x", - reqvid, data->currvid); - return 1; - } - } - - if (query_current_values_with_pending_wait(data)) - return 1; - - if (savereqvid != data->currvid) { - dprintk("ph3 failed, currvid 0x%x\n", data->currvid); - return 1; - } - - if (savefid != data->currfid) { - dprintk("ph3 failed, currfid changed 0x%x\n", - data->currfid); - return 1; - } - - dprintk("ph3 complete, currfid 0x%x, currvid 0x%x\n", - data->currfid, data->currvid); - - return 0; -} - -static int check_supported_cpu(unsigned int cpu) -{ - cpumask_t oldmask = CPU_MASK_ALL; - u32 eax, ebx, ecx, edx; - unsigned int rc = 0; - - oldmask = current->cpus_allowed; - set_cpus_allowed(current, cpumask_of_cpu(cpu)); - - if (smp_processor_id() != cpu) { - printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu); - goto out; - } - - if (current_cpu_data.x86_vendor != X86_VENDOR_AMD) - goto out; - - eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); - if (((eax & CPUID_XFAM) != CPUID_XFAM_K8) && - ((eax & CPUID_XFAM) < CPUID_XFAM_10H)) - goto out; - - if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) { - if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) || - ((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) { - printk(KERN_INFO PFX "Processor cpuid %x not supported\n", eax); - goto out; - } - - eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES); - if (eax < CPUID_FREQ_VOLT_CAPABILITIES) { - printk(KERN_INFO PFX - "No frequency change capabilities detected\n"); - goto out; - } - - cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); - if ((edx & P_STATE_TRANSITION_CAPABLE) != P_STATE_TRANSITION_CAPABLE) { - printk(KERN_INFO PFX "Power state transitions not supported\n"); - goto out; - } - } else { /* must be a HW Pstate capable processor */ - cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); - if ((edx & USE_HW_PSTATE) == USE_HW_PSTATE) - cpu_family = CPU_HW_PSTATE; - else - goto out; - } - - rc = 1; - -out: - set_cpus_allowed(current, oldmask); - return rc; -} - -static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, u8 maxvid) -{ - unsigned int j; - u8 lastfid = 0xff; - - for (j = 0; j < data->numps; j++) { - if (pst[j].vid > LEAST_VID) { - printk(KERN_ERR PFX "vid %d invalid : 0x%x\n", j, pst[j].vid); - return -EINVAL; - } - if (pst[j].vid < data->rvo) { /* vid + rvo >= 0 */ - printk(KERN_ERR BFX "0 vid exceeded with pstate %d\n", j); - return -ENODEV; - } - if (pst[j].vid < maxvid + data->rvo) { /* vid + rvo >= maxvid */ - printk(KERN_ERR BFX "maxvid exceeded with pstate %d\n", j); - return -ENODEV; - } - if (pst[j].fid > MAX_FID) { - printk(KERN_ERR BFX "maxfid exceeded with pstate %d\n", j); - return -ENODEV; - } - if (j && (pst[j].fid < HI_FID_TABLE_BOTTOM)) { - /* Only first fid is allowed to be in "low" range */ - printk(KERN_ERR BFX "two low fids - %d : 0x%x\n", j, pst[j].fid); - return -EINVAL; - } - if (pst[j].fid < lastfid) - lastfid = pst[j].fid; - } - if (lastfid & 1) { - printk(KERN_ERR BFX "lastfid invalid\n"); - return -EINVAL; - } - if (lastfid > LO_FID_TABLE_TOP) - printk(KERN_INFO BFX "first fid not from lo freq table\n"); - - return 0; -} - -static void print_basics(struct powernow_k8_data *data) -{ - int j; - for (j = 0; j < data->numps; j++) { - if (data->powernow_table[j].frequency != CPUFREQ_ENTRY_INVALID) { - if (cpu_family == CPU_HW_PSTATE) { - printk(KERN_INFO PFX " %d : fid 0x%x did 0x%x (%d MHz)\n", - j, - (data->powernow_table[j].index & 0xff00) >> 8, - (data->powernow_table[j].index & 0xff0000) >> 16, - data->powernow_table[j].frequency/1000); - } else { - printk(KERN_INFO PFX " %d : fid 0x%x (%d MHz), vid 0x%x\n", - j, - data->powernow_table[j].index & 0xff, - data->powernow_table[j].frequency/1000, - data->powernow_table[j].index >> 8); - } - } - } - if (data->batps) - printk(KERN_INFO PFX "Only %d pstates on battery\n", data->batps); -} - -static int fill_powernow_table(struct powernow_k8_data *data, struct pst_s *pst, u8 maxvid) -{ - struct cpufreq_frequency_table *powernow_table; - unsigned int j; - - if (data->batps) { /* use ACPI support to get full speed on mains power */ - printk(KERN_WARNING PFX "Only %d pstates usable (use ACPI driver for full range\n", data->batps); - data->numps = data->batps; - } - - for ( j=1; jnumps; j++ ) { - if (pst[j-1].fid >= pst[j].fid) { - printk(KERN_ERR PFX "PST out of sequence\n"); - return -EINVAL; - } - } - - if (data->numps < 2) { - printk(KERN_ERR PFX "no p states to transition\n"); - return -ENODEV; - } - - if (check_pst_table(data, pst, maxvid)) - return -EINVAL; - - powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) - * (data->numps + 1)), GFP_KERNEL); - if (!powernow_table) { - printk(KERN_ERR PFX "powernow_table memory alloc failure\n"); - return -ENOMEM; - } - - for (j = 0; j < data->numps; j++) { - powernow_table[j].index = pst[j].fid; /* lower 8 bits */ - powernow_table[j].index |= (pst[j].vid << 8); /* upper 8 bits */ - powernow_table[j].frequency = find_khz_freq_from_fid(pst[j].fid); - } - powernow_table[data->numps].frequency = CPUFREQ_TABLE_END; - powernow_table[data->numps].index = 0; - - if (query_current_values_with_pending_wait(data)) { - kfree(powernow_table); - return -EIO; - } - - dprintk("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid); - data->powernow_table = powernow_table; - if (first_cpu(cpu_core_map[data->cpu]) == data->cpu) - print_basics(data); - - for (j = 0; j < data->numps; j++) - if ((pst[j].fid==data->currfid) && (pst[j].vid==data->currvid)) - return 0; - - dprintk("currfid/vid do not match PST, ignoring\n"); - return 0; -} - -/* Find and validate the PSB/PST table in BIOS. */ -static int find_psb_table(struct powernow_k8_data *data) -{ - struct psb_s *psb; - unsigned int i; - u32 mvs; - u8 maxvid; - u32 cpst = 0; - u32 thiscpuid; - - for (i = 0xc0000; i < 0xffff0; i += 0x10) { - /* Scan BIOS looking for the signature. */ - /* It can not be at ffff0 - it is too big. */ - - psb = phys_to_virt(i); - if (memcmp(psb, PSB_ID_STRING, PSB_ID_STRING_LEN) != 0) - continue; - - dprintk("found PSB header at 0x%p\n", psb); - - dprintk("table vers: 0x%x\n", psb->tableversion); - if (psb->tableversion != PSB_VERSION_1_4) { - printk(KERN_ERR BFX "PSB table is not v1.4\n"); - return -ENODEV; - } - - dprintk("flags: 0x%x\n", psb->flags1); - if (psb->flags1) { - printk(KERN_ERR BFX "unknown flags\n"); - return -ENODEV; - } - - data->vstable = psb->vstable; - dprintk("voltage stabilization time: %d(*20us)\n", data->vstable); - - dprintk("flags2: 0x%x\n", psb->flags2); - data->rvo = psb->flags2 & 3; - data->irt = ((psb->flags2) >> 2) & 3; - mvs = ((psb->flags2) >> 4) & 3; - data->vidmvs = 1 << mvs; - data->batps = ((psb->flags2) >> 6) & 3; - - dprintk("ramp voltage offset: %d\n", data->rvo); - dprintk("isochronous relief time: %d\n", data->irt); - dprintk("maximum voltage step: %d - 0x%x\n", mvs, data->vidmvs); - - dprintk("numpst: 0x%x\n", psb->num_tables); - cpst = psb->num_tables; - if ((psb->cpuid == 0x00000fc0) || (psb->cpuid == 0x00000fe0) ){ - thiscpuid = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); - if ((thiscpuid == 0x00000fc0) || (thiscpuid == 0x00000fe0) ) { - cpst = 1; - } - } - if (cpst != 1) { - printk(KERN_ERR BFX "numpst must be 1\n"); - return -ENODEV; - } - - data->plllock = psb->plllocktime; - dprintk("plllocktime: 0x%x (units 1us)\n", psb->plllocktime); - dprintk("maxfid: 0x%x\n", psb->maxfid); - dprintk("maxvid: 0x%x\n", psb->maxvid); - maxvid = psb->maxvid; - - data->numps = psb->numps; - dprintk("numpstates: 0x%x\n", data->numps); - return fill_powernow_table(data, (struct pst_s *)(psb+1), maxvid); - } - /* - * If you see this message, complain to BIOS manufacturer. If - * he tells you "we do not support Linux" or some similar - * nonsense, remember that Windows 2000 uses the same legacy - * mechanism that the old Linux PSB driver uses. Tell them it - * is broken with Windows 2000. - * - * The reference to the AMD documentation is chapter 9 in the - * BIOS and Kernel Developer's Guide, which is available on - * www.amd.com - */ - printk(KERN_ERR PFX "BIOS error - no PSB or ACPI _PSS objects\n"); - return -ENODEV; -} - -#ifdef CONFIG_X86_POWERNOW_K8_ACPI -static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) -{ - if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) - return; - - data->irt = (data->acpi_data.states[index].control >> IRT_SHIFT) & IRT_MASK; - data->rvo = (data->acpi_data.states[index].control >> RVO_SHIFT) & RVO_MASK; - data->exttype = (data->acpi_data.states[index].control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK; - data->plllock = (data->acpi_data.states[index].control >> PLL_L_SHIFT) & PLL_L_MASK; - data->vidmvs = 1 << ((data->acpi_data.states[index].control >> MVS_SHIFT) & MVS_MASK); - data->vstable = (data->acpi_data.states[index].control >> VST_SHIFT) & VST_MASK; -} - -static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) -{ - struct cpufreq_frequency_table *powernow_table; - int ret_val; - - if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { - dprintk("register performance failed: bad ACPI data\n"); - return -EIO; - } - - /* verify the data contained in the ACPI structures */ - if (data->acpi_data.state_count <= 1) { - dprintk("No ACPI P-States\n"); - goto err_out; - } - - if ((data->acpi_data.control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) || - (data->acpi_data.status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) { - dprintk("Invalid control/status registers (%x - %x)\n", - data->acpi_data.control_register.space_id, - data->acpi_data.status_register.space_id); - goto err_out; - } - - /* fill in data->powernow_table */ - powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) - * (data->acpi_data.state_count + 1)), GFP_KERNEL); - if (!powernow_table) { - dprintk("powernow_table memory alloc failure\n"); - goto err_out; - } - - if (cpu_family == CPU_HW_PSTATE) - ret_val = fill_powernow_table_pstate(data, powernow_table); - else - ret_val = fill_powernow_table_fidvid(data, powernow_table); - if (ret_val) - goto err_out_mem; - - powernow_table[data->acpi_data.state_count].frequency = CPUFREQ_TABLE_END; - powernow_table[data->acpi_data.state_count].index = 0; - data->powernow_table = powernow_table; - - /* fill in data */ - data->numps = data->acpi_data.state_count; - if (first_cpu(cpu_core_map[data->cpu]) == data->cpu) - print_basics(data); - powernow_k8_acpi_pst_values(data, 0); - - /* notify BIOS that we exist */ - acpi_processor_notify_smm(THIS_MODULE); - - return 0; - -err_out_mem: - kfree(powernow_table); - -err_out: - acpi_processor_unregister_performance(&data->acpi_data, data->cpu); - - /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */ - data->acpi_data.state_count = 0; - - return -ENODEV; -} - -static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table) -{ - int i; - - for (i = 0; i < data->acpi_data.state_count; i++) { - u32 index; - u32 hi = 0, lo = 0; - u32 fid; - u32 did; - - index = data->acpi_data.states[i].control & HW_PSTATE_MASK; - if (index > MAX_HW_PSTATE) { - printk(KERN_ERR PFX "invalid pstate %d - bad value %d.\n", i, index); - printk(KERN_ERR PFX "Please report to BIOS manufacturer\n"); - } - rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi); - if (!(hi & HW_PSTATE_VALID_MASK)) { - dprintk("invalid pstate %d, ignoring\n", index); - powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; - continue; - } - - fid = lo & HW_PSTATE_FID_MASK; - did = (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT; - - dprintk(" %d : fid 0x%x, did 0x%x\n", index, fid, did); - - powernow_table[i].index = index | (fid << HW_FID_INDEX_SHIFT) | (did << HW_DID_INDEX_SHIFT); - - powernow_table[i].frequency = find_khz_freq_from_fiddid(fid, did); - - if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) { - printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n", - powernow_table[i].frequency, - (unsigned int) (data->acpi_data.states[i].core_frequency * 1000)); - powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; - continue; - } - } - return 0; -} - -static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table) -{ - int i; - int cntlofreq = 0; - for (i = 0; i < data->acpi_data.state_count; i++) { - u32 fid; - u32 vid; - - if (data->exttype) { - fid = data->acpi_data.states[i].status & EXT_FID_MASK; - vid = (data->acpi_data.states[i].status >> VID_SHIFT) & EXT_VID_MASK; - } else { - fid = data->acpi_data.states[i].control & FID_MASK; - vid = (data->acpi_data.states[i].control >> VID_SHIFT) & VID_MASK; - } - - dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid); - - powernow_table[i].index = fid; /* lower 8 bits */ - powernow_table[i].index |= (vid << 8); /* upper 8 bits */ - powernow_table[i].frequency = find_khz_freq_from_fid(fid); - - /* verify frequency is OK */ - if ((powernow_table[i].frequency > (MAX_FREQ * 1000)) || - (powernow_table[i].frequency < (MIN_FREQ * 1000))) { - dprintk("invalid freq %u kHz, ignoring\n", powernow_table[i].frequency); - powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; - continue; - } - - /* verify voltage is OK - BIOSs are using "off" to indicate invalid */ - if (vid == VID_OFF) { - dprintk("invalid vid %u, ignoring\n", vid); - powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; - continue; - } - - /* verify only 1 entry from the lo frequency table */ - if (fid < HI_FID_TABLE_BOTTOM) { - if (cntlofreq) { - /* if both entries are the same, ignore this one ... */ - if ((powernow_table[i].frequency != powernow_table[cntlofreq].frequency) || - (powernow_table[i].index != powernow_table[cntlofreq].index)) { - printk(KERN_ERR PFX "Too many lo freq table entries\n"); - return 1; - } - - dprintk("double low frequency table entry, ignoring it.\n"); - powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; - continue; - } else - cntlofreq = i; - } - - if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) { - printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n", - powernow_table[i].frequency, - (unsigned int) (data->acpi_data.states[i].core_frequency * 1000)); - powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; - continue; - } - } - return 0; -} - -static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) -{ - if (data->acpi_data.state_count) - acpi_processor_unregister_performance(&data->acpi_data, data->cpu); -} - -#else -static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; } -static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; } -static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; } -#endif /* CONFIG_X86_POWERNOW_K8_ACPI */ - -/* Take a frequency, and issue the fid/vid transition command */ -static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned int index) -{ - u32 fid = 0; - u32 vid = 0; - int res, i; - struct cpufreq_freqs freqs; - - dprintk("cpu %d transition to index %u\n", smp_processor_id(), index); - - /* fid/vid correctness check for k8 */ - /* fid are the lower 8 bits of the index we stored into - * the cpufreq frequency table in find_psb_table, vid - * are the upper 8 bits. - */ - fid = data->powernow_table[index].index & 0xFF; - vid = (data->powernow_table[index].index & 0xFF00) >> 8; - - dprintk("table matched fid 0x%x, giving vid 0x%x\n", fid, vid); - - if (query_current_values_with_pending_wait(data)) - return 1; - - if ((data->currvid == vid) && (data->currfid == fid)) { - dprintk("target matches current values (fid 0x%x, vid 0x%x)\n", - fid, vid); - return 0; - } - - if ((fid < HI_FID_TABLE_BOTTOM) && (data->currfid < HI_FID_TABLE_BOTTOM)) { - printk(KERN_ERR PFX - "ignoring illegal change in lo freq table-%x to 0x%x\n", - data->currfid, fid); - return 1; - } - - dprintk("cpu %d, changing to fid 0x%x, vid 0x%x\n", - smp_processor_id(), fid, vid); - freqs.old = find_khz_freq_from_fid(data->currfid); - freqs.new = find_khz_freq_from_fid(fid); - - for_each_cpu_mask(i, *(data->available_cores)) { - freqs.cpu = i; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - } - - res = transition_fid_vid(data, fid, vid); - freqs.new = find_khz_freq_from_fid(data->currfid); - - for_each_cpu_mask(i, *(data->available_cores)) { - freqs.cpu = i; - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - } - return res; -} - -/* Take a frequency, and issue the hardware pstate transition command */ -static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned int index) -{ - u32 fid = 0; - u32 did = 0; - u32 pstate = 0; - int res, i; - struct cpufreq_freqs freqs; - - dprintk("cpu %d transition to index %u\n", smp_processor_id(), index); - - /* get fid did for hardware pstate transition */ - pstate = index & HW_PSTATE_MASK; - if (pstate > MAX_HW_PSTATE) - return 0; - fid = (index & HW_FID_INDEX_MASK) >> HW_FID_INDEX_SHIFT; - did = (index & HW_DID_INDEX_MASK) >> HW_DID_INDEX_SHIFT; - freqs.old = find_khz_freq_from_fiddid(data->currfid, data->currdid); - freqs.new = find_khz_freq_from_fiddid(fid, did); - - for_each_cpu_mask(i, *(data->available_cores)) { - freqs.cpu = i; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - } - - res = transition_pstate(data, pstate); - data->currfid = find_fid_from_pstate(pstate); - data->currdid = find_did_from_pstate(pstate); - freqs.new = find_khz_freq_from_fiddid(data->currfid, data->currdid); - - for_each_cpu_mask(i, *(data->available_cores)) { - freqs.cpu = i; - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - } - return res; -} - -/* Driver entry point to switch to the target frequency */ -static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation) -{ - cpumask_t oldmask = CPU_MASK_ALL; - struct powernow_k8_data *data = powernow_data[pol->cpu]; - u32 checkfid; - u32 checkvid; - unsigned int newstate; - int ret = -EIO; - - if (!data) - return -EINVAL; - - checkfid = data->currfid; - checkvid = data->currvid; - - /* only run on specific CPU from here on */ - oldmask = current->cpus_allowed; - set_cpus_allowed(current, cpumask_of_cpu(pol->cpu)); - - if (smp_processor_id() != pol->cpu) { - printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); - goto err_out; - } - - if (pending_bit_stuck()) { - printk(KERN_ERR PFX "failing targ, change pending bit set\n"); - goto err_out; - } - - dprintk("targ: cpu %d, %d kHz, min %d, max %d, relation %d\n", - pol->cpu, targfreq, pol->min, pol->max, relation); - - if (query_current_values_with_pending_wait(data)) - goto err_out; - - if (cpu_family == CPU_HW_PSTATE) - dprintk("targ: curr fid 0x%x, did 0x%x\n", - data->currfid, data->currdid); - else { - dprintk("targ: curr fid 0x%x, vid 0x%x\n", - data->currfid, data->currvid); - - if ((checkvid != data->currvid) || (checkfid != data->currfid)) { - printk(KERN_INFO PFX - "error - out of sync, fix 0x%x 0x%x, vid 0x%x 0x%x\n", - checkfid, data->currfid, checkvid, data->currvid); - } - } - - if (cpufreq_frequency_table_target(pol, data->powernow_table, targfreq, relation, &newstate)) - goto err_out; - - mutex_lock(&fidvid_mutex); - - powernow_k8_acpi_pst_values(data, newstate); - - if (cpu_family == CPU_HW_PSTATE) - ret = transition_frequency_pstate(data, newstate); - else - ret = transition_frequency_fidvid(data, newstate); - if (ret) { - printk(KERN_ERR PFX "transition frequency failed\n"); - ret = 1; - mutex_unlock(&fidvid_mutex); - goto err_out; - } - mutex_unlock(&fidvid_mutex); - - if (cpu_family == CPU_HW_PSTATE) - pol->cur = find_khz_freq_from_fiddid(data->currfid, data->currdid); - else - pol->cur = find_khz_freq_from_fid(data->currfid); - ret = 0; - -err_out: - set_cpus_allowed(current, oldmask); - return ret; -} - -/* Driver entry point to verify the policy and range of frequencies */ -static int powernowk8_verify(struct cpufreq_policy *pol) -{ - struct powernow_k8_data *data = powernow_data[pol->cpu]; - - if (!data) - return -EINVAL; - - return cpufreq_frequency_table_verify(pol, data->powernow_table); -} - -/* per CPU init entry point to the driver */ -static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) -{ - struct powernow_k8_data *data; - cpumask_t oldmask = CPU_MASK_ALL; - int rc; - - if (!cpu_online(pol->cpu)) - return -ENODEV; - - if (!check_supported_cpu(pol->cpu)) - return -ENODEV; - - data = kzalloc(sizeof(struct powernow_k8_data), GFP_KERNEL); - if (!data) { - printk(KERN_ERR PFX "unable to alloc powernow_k8_data"); - return -ENOMEM; - } - - data->cpu = pol->cpu; - - if (powernow_k8_cpu_init_acpi(data)) { - /* - * Use the PSB BIOS structure. This is only availabe on - * an UP version, and is deprecated by AMD. - */ - if (num_online_cpus() != 1) { - printk(KERN_ERR PFX "MP systems not supported by PSB BIOS structure\n"); - kfree(data); - return -ENODEV; - } - if (pol->cpu != 0) { - printk(KERN_ERR PFX "No _PSS objects for CPU other than CPU0\n"); - kfree(data); - return -ENODEV; - } - rc = find_psb_table(data); - if (rc) { - kfree(data); - return -ENODEV; - } - } - - /* only run on specific CPU from here on */ - oldmask = current->cpus_allowed; - set_cpus_allowed(current, cpumask_of_cpu(pol->cpu)); - - if (smp_processor_id() != pol->cpu) { - printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); - goto err_out; - } - - if (pending_bit_stuck()) { - printk(KERN_ERR PFX "failing init, change pending bit set\n"); - goto err_out; - } - - if (query_current_values_with_pending_wait(data)) - goto err_out; - - if (cpu_family == CPU_OPTERON) - fidvid_msr_init(); - - /* run on any CPU again */ - set_cpus_allowed(current, oldmask); - - pol->governor = CPUFREQ_DEFAULT_GOVERNOR; - if (cpu_family == CPU_HW_PSTATE) - pol->cpus = cpumask_of_cpu(pol->cpu); - else - pol->cpus = cpu_core_map[pol->cpu]; - data->available_cores = &(pol->cpus); - - /* Take a crude guess here. - * That guess was in microseconds, so multiply with 1000 */ - pol->cpuinfo.transition_latency = (((data->rvo + 8) * data->vstable * VST_UNITS_20US) - + (3 * (1 << data->irt) * 10)) * 1000; - - if (cpu_family == CPU_HW_PSTATE) - pol->cur = find_khz_freq_from_fiddid(data->currfid, data->currdid); - else - pol->cur = find_khz_freq_from_fid(data->currfid); - dprintk("policy current frequency %d kHz\n", pol->cur); - - /* min/max the cpu is capable of */ - if (cpufreq_frequency_table_cpuinfo(pol, data->powernow_table)) { - printk(KERN_ERR PFX "invalid powernow_table\n"); - powernow_k8_cpu_exit_acpi(data); - kfree(data->powernow_table); - kfree(data); - return -EINVAL; - } - - cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu); - - if (cpu_family == CPU_HW_PSTATE) - dprintk("cpu_init done, current fid 0x%x, did 0x%x\n", - data->currfid, data->currdid); - else - dprintk("cpu_init done, current fid 0x%x, vid 0x%x\n", - data->currfid, data->currvid); - - powernow_data[pol->cpu] = data; - - return 0; - -err_out: - set_cpus_allowed(current, oldmask); - powernow_k8_cpu_exit_acpi(data); - - kfree(data); - return -ENODEV; -} - -static int __devexit powernowk8_cpu_exit (struct cpufreq_policy *pol) -{ - struct powernow_k8_data *data = powernow_data[pol->cpu]; - - if (!data) - return -EINVAL; - - powernow_k8_cpu_exit_acpi(data); - - cpufreq_frequency_table_put_attr(pol->cpu); - - kfree(data->powernow_table); - kfree(data); - - return 0; -} - -static unsigned int powernowk8_get (unsigned int cpu) -{ - struct powernow_k8_data *data; - cpumask_t oldmask = current->cpus_allowed; - unsigned int khz = 0; - - data = powernow_data[first_cpu(cpu_core_map[cpu])]; - - if (!data) - return -EINVAL; - - set_cpus_allowed(current, cpumask_of_cpu(cpu)); - if (smp_processor_id() != cpu) { - printk(KERN_ERR PFX "limiting to CPU %d failed in powernowk8_get\n", cpu); - set_cpus_allowed(current, oldmask); - return 0; - } - - if (query_current_values_with_pending_wait(data)) - goto out; - - if (cpu_family == CPU_HW_PSTATE) - khz = find_khz_freq_from_fiddid(data->currfid, data->currdid); - else - khz = find_khz_freq_from_fid(data->currfid); - - -out: - set_cpus_allowed(current, oldmask); - return khz; -} - -static struct freq_attr* powernow_k8_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, -}; - -static struct cpufreq_driver cpufreq_amd64_driver = { - .verify = powernowk8_verify, - .target = powernowk8_target, - .init = powernowk8_cpu_init, - .exit = __devexit_p(powernowk8_cpu_exit), - .get = powernowk8_get, - .name = "powernow-k8", - .owner = THIS_MODULE, - .attr = powernow_k8_attr, -}; - -/* driver entry point for init */ -static int __cpuinit powernowk8_init(void) -{ - unsigned int i, supported_cpus = 0; - unsigned int booted_cores = 1; - - for_each_online_cpu(i) { - if (check_supported_cpu(i)) - supported_cpus++; - } - -#ifdef CONFIG_SMP - booted_cores = cpu_data[0].booted_cores; -#endif - - if (supported_cpus == num_online_cpus()) { - printk(KERN_INFO PFX "Found %d %s " - "processors (%d cpu cores) (" VERSION ")\n", - supported_cpus/booted_cores, - boot_cpu_data.x86_model_id, supported_cpus); - return cpufreq_register_driver(&cpufreq_amd64_driver); - } - - return -ENODEV; -} - -/* driver entry point for term */ -static void __exit powernowk8_exit(void) -{ - dprintk("exit\n"); - - cpufreq_unregister_driver(&cpufreq_amd64_driver); -} - -MODULE_AUTHOR("Paul Devriendt and Mark Langsdorf "); -MODULE_DESCRIPTION("AMD Athlon 64 and Opteron processor frequency driver."); -MODULE_LICENSE("GPL"); - -late_initcall(powernowk8_init); -module_exit(powernowk8_exit); diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h deleted file mode 100644 index b06c812208c..00000000000 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h +++ /dev/null @@ -1,232 +0,0 @@ -/* - * (c) 2003-2006 Advanced Micro Devices, Inc. - * Your use of this code is subject to the terms and conditions of the - * GNU general public license version 2. See "COPYING" or - * http://www.gnu.org/licenses/gpl.html - */ - -struct powernow_k8_data { - unsigned int cpu; - - u32 numps; /* number of p-states */ - u32 batps; /* number of p-states supported on battery */ - - /* these values are constant when the PSB is used to determine - * vid/fid pairings, but are modified during the ->target() call - * when ACPI is used */ - u32 rvo; /* ramp voltage offset */ - u32 irt; /* isochronous relief time */ - u32 vidmvs; /* usable value calculated from mvs */ - u32 vstable; /* voltage stabilization time, units 20 us */ - u32 plllock; /* pll lock time, units 1 us */ - u32 exttype; /* extended interface = 1 */ - - /* keep track of the current fid / vid or did */ - u32 currvid, currfid, currdid; - - /* the powernow_table includes all frequency and vid/fid pairings: - * fid are the lower 8 bits of the index, vid are the upper 8 bits. - * frequency is in kHz */ - struct cpufreq_frequency_table *powernow_table; - -#ifdef CONFIG_X86_POWERNOW_K8_ACPI - /* the acpi table needs to be kept. it's only available if ACPI was - * used to determine valid frequency/vid/fid states */ - struct acpi_processor_performance acpi_data; -#endif - /* we need to keep track of associated cores, but let cpufreq - * handle hotplug events - so just point at cpufreq pol->cpus - * structure */ - cpumask_t *available_cores; -}; - - -/* processor's cpuid instruction support */ -#define CPUID_PROCESSOR_SIGNATURE 1 /* function 1 */ -#define CPUID_XFAM 0x0ff00000 /* extended family */ -#define CPUID_XFAM_K8 0 -#define CPUID_XMOD 0x000f0000 /* extended model */ -#define CPUID_XMOD_REV_MASK 0x00080000 -#define CPUID_XFAM_10H 0x00100000 /* family 0x10 */ -#define CPUID_USE_XFAM_XMOD 0x00000f00 -#define CPUID_GET_MAX_CAPABILITIES 0x80000000 -#define CPUID_FREQ_VOLT_CAPABILITIES 0x80000007 -#define P_STATE_TRANSITION_CAPABLE 6 - -/* Model Specific Registers for p-state transitions. MSRs are 64-bit. For */ -/* writes (wrmsr - opcode 0f 30), the register number is placed in ecx, and */ -/* the value to write is placed in edx:eax. For reads (rdmsr - opcode 0f 32), */ -/* the register number is placed in ecx, and the data is returned in edx:eax. */ - -#define MSR_FIDVID_CTL 0xc0010041 -#define MSR_FIDVID_STATUS 0xc0010042 - -/* Field definitions within the FID VID Low Control MSR : */ -#define MSR_C_LO_INIT_FID_VID 0x00010000 -#define MSR_C_LO_NEW_VID 0x00003f00 -#define MSR_C_LO_NEW_FID 0x0000003f -#define MSR_C_LO_VID_SHIFT 8 - -/* Field definitions within the FID VID High Control MSR : */ -#define MSR_C_HI_STP_GNT_TO 0x000fffff - -/* Field definitions within the FID VID Low Status MSR : */ -#define MSR_S_LO_CHANGE_PENDING 0x80000000 /* cleared when completed */ -#define MSR_S_LO_MAX_RAMP_VID 0x3f000000 -#define MSR_S_LO_MAX_FID 0x003f0000 -#define MSR_S_LO_START_FID 0x00003f00 -#define MSR_S_LO_CURRENT_FID 0x0000003f - -/* Field definitions within the FID VID High Status MSR : */ -#define MSR_S_HI_MIN_WORKING_VID 0x3f000000 -#define MSR_S_HI_MAX_WORKING_VID 0x003f0000 -#define MSR_S_HI_START_VID 0x00003f00 -#define MSR_S_HI_CURRENT_VID 0x0000003f -#define MSR_C_HI_STP_GNT_BENIGN 0x00000001 - - -/* Hardware Pstate _PSS and MSR definitions */ -#define USE_HW_PSTATE 0x00000080 -#define HW_PSTATE_FID_MASK 0x0000003f -#define HW_PSTATE_DID_MASK 0x000001c0 -#define HW_PSTATE_DID_SHIFT 6 -#define HW_PSTATE_MASK 0x00000007 -#define HW_PSTATE_VALID_MASK 0x80000000 -#define HW_FID_INDEX_SHIFT 8 -#define HW_FID_INDEX_MASK 0x0000ff00 -#define HW_DID_INDEX_SHIFT 16 -#define HW_DID_INDEX_MASK 0x00ff0000 -#define HW_WATTS_MASK 0xff -#define HW_PWR_DVR_MASK 0x300 -#define HW_PWR_DVR_SHIFT 8 -#define HW_PWR_MAX_MULT 3 -#define MAX_HW_PSTATE 8 /* hw pstate supports up to 8 */ -#define MSR_PSTATE_DEF_BASE 0xc0010064 /* base of Pstate MSRs */ -#define MSR_PSTATE_STATUS 0xc0010063 /* Pstate Status MSR */ -#define MSR_PSTATE_CTRL 0xc0010062 /* Pstate control MSR */ - -/* define the two driver architectures */ -#define CPU_OPTERON 0 -#define CPU_HW_PSTATE 1 - - -/* - * There are restrictions frequencies have to follow: - * - only 1 entry in the low fid table ( <=1.4GHz ) - * - lowest entry in the high fid table must be >= 2 * the entry in the - * low fid table - * - lowest entry in the high fid table must be a <= 200MHz + 2 * the entry - * in the low fid table - * - the parts can only step at <= 200 MHz intervals, odd fid values are - * supported in revision G and later revisions. - * - lowest frequency must be >= interprocessor hypertransport link speed - * (only applies to MP systems obviously) - */ - -/* fids (frequency identifiers) are arranged in 2 tables - lo and hi */ -#define LO_FID_TABLE_TOP 7 /* fid values marking the boundary */ -#define HI_FID_TABLE_BOTTOM 8 /* between the low and high tables */ - -#define LO_VCOFREQ_TABLE_TOP 1400 /* corresponding vco frequency values */ -#define HI_VCOFREQ_TABLE_BOTTOM 1600 - -#define MIN_FREQ_RESOLUTION 200 /* fids jump by 2 matching freq jumps by 200 */ - -#define MAX_FID 0x2a /* Spec only gives FID values as far as 5 GHz */ -#define LEAST_VID 0x3e /* Lowest (numerically highest) useful vid value */ - -#define MIN_FREQ 800 /* Min and max freqs, per spec */ -#define MAX_FREQ 5000 - -#define INVALID_FID_MASK 0xffffffc0 /* not a valid fid if these bits are set */ -#define INVALID_VID_MASK 0xffffffc0 /* not a valid vid if these bits are set */ - -#define VID_OFF 0x3f - -#define STOP_GRANT_5NS 1 /* min poss memory access latency for voltage change */ - -#define PLL_LOCK_CONVERSION (1000/5) /* ms to ns, then divide by clock period */ - -#define MAXIMUM_VID_STEPS 1 /* Current cpus only allow a single step of 25mV */ -#define VST_UNITS_20US 20 /* Voltage Stabalization Time is in units of 20us */ - -/* - * Most values of interest are enocoded in a single field of the _PSS - * entries: the "control" value. - */ - -#define IRT_SHIFT 30 -#define RVO_SHIFT 28 -#define EXT_TYPE_SHIFT 27 -#define PLL_L_SHIFT 20 -#define MVS_SHIFT 18 -#define VST_SHIFT 11 -#define VID_SHIFT 6 -#define IRT_MASK 3 -#define RVO_MASK 3 -#define EXT_TYPE_MASK 1 -#define PLL_L_MASK 0x7f -#define MVS_MASK 3 -#define VST_MASK 0x7f -#define VID_MASK 0x1f -#define FID_MASK 0x1f -#define EXT_VID_MASK 0x3f -#define EXT_FID_MASK 0x3f - - -/* - * Version 1.4 of the PSB table. This table is constructed by BIOS and is - * to tell the OS's power management driver which VIDs and FIDs are - * supported by this particular processor. - * If the data in the PSB / PST is wrong, then this driver will program the - * wrong values into hardware, which is very likely to lead to a crash. - */ - -#define PSB_ID_STRING "AMDK7PNOW!" -#define PSB_ID_STRING_LEN 10 - -#define PSB_VERSION_1_4 0x14 - -struct psb_s { - u8 signature[10]; - u8 tableversion; - u8 flags1; - u16 vstable; - u8 flags2; - u8 num_tables; - u32 cpuid; - u8 plllocktime; - u8 maxfid; - u8 maxvid; - u8 numps; -}; - -/* Pairs of fid/vid values are appended to the version 1.4 PSB table. */ -struct pst_s { - u8 fid; - u8 vid; -}; - -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "powernow-k8", msg) - -static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid); -static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid); -static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid); - -static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index); - -#ifdef CONFIG_X86_POWERNOW_K8_ACPI -static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table); -static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table); -#endif - -#ifdef CONFIG_SMP -static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[]) -{ -} -#else -static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[]) -{ - cpu_set(0, cpu_sharedcore_mask[0]); -} -#endif diff --git a/arch/i386/kernel/cpu/cpufreq/sc520_freq.c b/arch/i386/kernel/cpu/cpufreq/sc520_freq.c deleted file mode 100644 index b8fb4b521c6..00000000000 --- a/arch/i386/kernel/cpu/cpufreq/sc520_freq.c +++ /dev/null @@ -1,191 +0,0 @@ -/* - * sc520_freq.c: cpufreq driver for the AMD Elan sc520 - * - * Copyright (C) 2005 Sean Young - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * Based on elanfreq.c - * - * 2005-03-30: - initial revision - */ - -#include -#include -#include - -#include -#include - -#include -#include -#include - -#define MMCR_BASE 0xfffef000 /* The default base address */ -#define OFFS_CPUCTL 0x2 /* CPU Control Register */ - -static __u8 __iomem *cpuctl; - -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "sc520_freq", msg) - -static struct cpufreq_frequency_table sc520_freq_table[] = { - {0x01, 100000}, - {0x02, 133000}, - {0, CPUFREQ_TABLE_END}, -}; - -static unsigned int sc520_freq_get_cpu_frequency(unsigned int cpu) -{ - u8 clockspeed_reg = *cpuctl; - - switch (clockspeed_reg & 0x03) { - default: - printk(KERN_ERR "sc520_freq: error: cpuctl register has unexpected value %02x\n", clockspeed_reg); - case 0x01: - return 100000; - case 0x02: - return 133000; - } -} - -static void sc520_freq_set_cpu_state (unsigned int state) -{ - - struct cpufreq_freqs freqs; - u8 clockspeed_reg; - - freqs.old = sc520_freq_get_cpu_frequency(0); - freqs.new = sc520_freq_table[state].frequency; - freqs.cpu = 0; /* AMD Elan is UP */ - - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - - dprintk("attempting to set frequency to %i kHz\n", - sc520_freq_table[state].frequency); - - local_irq_disable(); - - clockspeed_reg = *cpuctl & ~0x03; - *cpuctl = clockspeed_reg | sc520_freq_table[state].index; - - local_irq_enable(); - - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); -}; - -static int sc520_freq_verify (struct cpufreq_policy *policy) -{ - return cpufreq_frequency_table_verify(policy, &sc520_freq_table[0]); -} - -static int sc520_freq_target (struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) -{ - unsigned int newstate = 0; - - if (cpufreq_frequency_table_target(policy, sc520_freq_table, target_freq, relation, &newstate)) - return -EINVAL; - - sc520_freq_set_cpu_state(newstate); - - return 0; -} - - -/* - * Module init and exit code - */ - -static int sc520_freq_cpu_init(struct cpufreq_policy *policy) -{ - struct cpuinfo_x86 *c = cpu_data; - int result; - - /* capability check */ - if (c->x86_vendor != X86_VENDOR_AMD || - c->x86 != 4 || c->x86_model != 9) - return -ENODEV; - - /* cpuinfo and default policy values */ - policy->governor = CPUFREQ_DEFAULT_GOVERNOR; - policy->cpuinfo.transition_latency = 1000000; /* 1ms */ - policy->cur = sc520_freq_get_cpu_frequency(0); - - result = cpufreq_frequency_table_cpuinfo(policy, sc520_freq_table); - if (result) - return (result); - - cpufreq_frequency_table_get_attr(sc520_freq_table, policy->cpu); - - return 0; -} - - -static int sc520_freq_cpu_exit(struct cpufreq_policy *policy) -{ - cpufreq_frequency_table_put_attr(policy->cpu); - return 0; -} - - -static struct freq_attr* sc520_freq_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, -}; - - -static struct cpufreq_driver sc520_freq_driver = { - .get = sc520_freq_get_cpu_frequency, - .verify = sc520_freq_verify, - .target = sc520_freq_target, - .init = sc520_freq_cpu_init, - .exit = sc520_freq_cpu_exit, - .name = "sc520_freq", - .owner = THIS_MODULE, - .attr = sc520_freq_attr, -}; - - -static int __init sc520_freq_init(void) -{ - struct cpuinfo_x86 *c = cpu_data; - int err; - - /* Test if we have the right hardware */ - if(c->x86_vendor != X86_VENDOR_AMD || - c->x86 != 4 || c->x86_model != 9) { - dprintk("no Elan SC520 processor found!\n"); - return -ENODEV; - } - cpuctl = ioremap((unsigned long)(MMCR_BASE + OFFS_CPUCTL), 1); - if(!cpuctl) { - printk(KERN_ERR "sc520_freq: error: failed to remap memory\n"); - return -ENOMEM; - } - - err = cpufreq_register_driver(&sc520_freq_driver); - if (err) - iounmap(cpuctl); - - return err; -} - - -static void __exit sc520_freq_exit(void) -{ - cpufreq_unregister_driver(&sc520_freq_driver); - iounmap(cpuctl); -} - - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Sean Young "); -MODULE_DESCRIPTION("cpufreq driver for AMD's Elan sc520 CPU"); - -module_init(sc520_freq_init); -module_exit(sc520_freq_exit); - diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c deleted file mode 100644 index 6c5dc2c85ae..00000000000 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c +++ /dev/null @@ -1,634 +0,0 @@ -/* - * cpufreq driver for Enhanced SpeedStep, as found in Intel's Pentium - * M (part of the Centrino chipset). - * - * Since the original Pentium M, most new Intel CPUs support Enhanced - * SpeedStep. - * - * Despite the "SpeedStep" in the name, this is almost entirely unlike - * traditional SpeedStep. - * - * Modelled on speedstep.c - * - * Copyright (C) 2003 Jeremy Fitzhardinge - */ - -#include -#include -#include -#include -#include /* current */ -#include -#include - -#include -#include -#include - -#define PFX "speedstep-centrino: " -#define MAINTAINER "cpufreq@lists.linux.org.uk" - -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg) - -#define INTEL_MSR_RANGE (0xffff) - -struct cpu_id -{ - __u8 x86; /* CPU family */ - __u8 x86_model; /* model */ - __u8 x86_mask; /* stepping */ -}; - -enum { - CPU_BANIAS, - CPU_DOTHAN_A1, - CPU_DOTHAN_A2, - CPU_DOTHAN_B0, - CPU_MP4HT_D0, - CPU_MP4HT_E0, -}; - -static const struct cpu_id cpu_ids[] = { - [CPU_BANIAS] = { 6, 9, 5 }, - [CPU_DOTHAN_A1] = { 6, 13, 1 }, - [CPU_DOTHAN_A2] = { 6, 13, 2 }, - [CPU_DOTHAN_B0] = { 6, 13, 6 }, - [CPU_MP4HT_D0] = {15, 3, 4 }, - [CPU_MP4HT_E0] = {15, 4, 1 }, -}; -#define N_IDS ARRAY_SIZE(cpu_ids) - -struct cpu_model -{ - const struct cpu_id *cpu_id; - const char *model_name; - unsigned max_freq; /* max clock in kHz */ - - struct cpufreq_frequency_table *op_points; /* clock/voltage pairs */ -}; -static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, const struct cpu_id *x); - -/* Operating points for current CPU */ -static struct cpu_model *centrino_model[NR_CPUS]; -static const struct cpu_id *centrino_cpu[NR_CPUS]; - -static struct cpufreq_driver centrino_driver; - -#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE - -/* Computes the correct form for IA32_PERF_CTL MSR for a particular - frequency/voltage operating point; frequency in MHz, volts in mV. - This is stored as "index" in the structure. */ -#define OP(mhz, mv) \ - { \ - .frequency = (mhz) * 1000, \ - .index = (((mhz)/100) << 8) | ((mv - 700) / 16) \ - } - -/* - * These voltage tables were derived from the Intel Pentium M - * datasheet, document 25261202.pdf, Table 5. I have verified they - * are consistent with my IBM ThinkPad X31, which has a 1.3GHz Pentium - * M. - */ - -/* Ultra Low Voltage Intel Pentium M processor 900MHz (Banias) */ -static struct cpufreq_frequency_table banias_900[] = -{ - OP(600, 844), - OP(800, 988), - OP(900, 1004), - { .frequency = CPUFREQ_TABLE_END } -}; - -/* Ultra Low Voltage Intel Pentium M processor 1000MHz (Banias) */ -static struct cpufreq_frequency_table banias_1000[] = -{ - OP(600, 844), - OP(800, 972), - OP(900, 988), - OP(1000, 1004), - { .frequency = CPUFREQ_TABLE_END } -}; - -/* Low Voltage Intel Pentium M processor 1.10GHz (Banias) */ -static struct cpufreq_frequency_table banias_1100[] = -{ - OP( 600, 956), - OP( 800, 1020), - OP( 900, 1100), - OP(1000, 1164), - OP(1100, 1180), - { .frequency = CPUFREQ_TABLE_END } -}; - - -/* Low Voltage Intel Pentium M processor 1.20GHz (Banias) */ -static struct cpufreq_frequency_table banias_1200[] = -{ - OP( 600, 956), - OP( 800, 1004), - OP( 900, 1020), - OP(1000, 1100), - OP(1100, 1164), - OP(1200, 1180), - { .frequency = CPUFREQ_TABLE_END } -}; - -/* Intel Pentium M processor 1.30GHz (Banias) */ -static struct cpufreq_frequency_table banias_1300[] = -{ - OP( 600, 956), - OP( 800, 1260), - OP(1000, 1292), - OP(1200, 1356), - OP(1300, 1388), - { .frequency = CPUFREQ_TABLE_END } -}; - -/* Intel Pentium M processor 1.40GHz (Banias) */ -static struct cpufreq_frequency_table banias_1400[] = -{ - OP( 600, 956), - OP( 800, 1180), - OP(1000, 1308), - OP(1200, 1436), - OP(1400, 1484), - { .frequency = CPUFREQ_TABLE_END } -}; - -/* Intel Pentium M processor 1.50GHz (Banias) */ -static struct cpufreq_frequency_table banias_1500[] = -{ - OP( 600, 956), - OP( 800, 1116), - OP(1000, 1228), - OP(1200, 1356), - OP(1400, 1452), - OP(1500, 1484), - { .frequency = CPUFREQ_TABLE_END } -}; - -/* Intel Pentium M processor 1.60GHz (Banias) */ -static struct cpufreq_frequency_table banias_1600[] = -{ - OP( 600, 956), - OP( 800, 1036), - OP(1000, 1164), - OP(1200, 1276), - OP(1400, 1420), - OP(1600, 1484), - { .frequency = CPUFREQ_TABLE_END } -}; - -/* Intel Pentium M processor 1.70GHz (Banias) */ -static struct cpufreq_frequency_table banias_1700[] = -{ - OP( 600, 956), - OP( 800, 1004), - OP(1000, 1116), - OP(1200, 1228), - OP(1400, 1308), - OP(1700, 1484), - { .frequency = CPUFREQ_TABLE_END } -}; -#undef OP - -#define _BANIAS(cpuid, max, name) \ -{ .cpu_id = cpuid, \ - .model_name = "Intel(R) Pentium(R) M processor " name "MHz", \ - .max_freq = (max)*1000, \ - .op_points = banias_##max, \ -} -#define BANIAS(max) _BANIAS(&cpu_ids[CPU_BANIAS], max, #max) - -/* CPU models, their operating frequency range, and freq/voltage - operating points */ -static struct cpu_model models[] = -{ - _BANIAS(&cpu_ids[CPU_BANIAS], 900, " 900"), - BANIAS(1000), - BANIAS(1100), - BANIAS(1200), - BANIAS(1300), - BANIAS(1400), - BANIAS(1500), - BANIAS(1600), - BANIAS(1700), - - /* NULL model_name is a wildcard */ - { &cpu_ids[CPU_DOTHAN_A1], NULL, 0, NULL }, - { &cpu_ids[CPU_DOTHAN_A2], NULL, 0, NULL }, - { &cpu_ids[CPU_DOTHAN_B0], NULL, 0, NULL }, - { &cpu_ids[CPU_MP4HT_D0], NULL, 0, NULL }, - { &cpu_ids[CPU_MP4HT_E0], NULL, 0, NULL }, - - { NULL, } -}; -#undef _BANIAS -#undef BANIAS - -static int centrino_cpu_init_table(struct cpufreq_policy *policy) -{ - struct cpuinfo_x86 *cpu = &cpu_data[policy->cpu]; - struct cpu_model *model; - - for(model = models; model->cpu_id != NULL; model++) - if (centrino_verify_cpu_id(cpu, model->cpu_id) && - (model->model_name == NULL || - strcmp(cpu->x86_model_id, model->model_name) == 0)) - break; - - if (model->cpu_id == NULL) { - /* No match at all */ - dprintk("no support for CPU model \"%s\": " - "send /proc/cpuinfo to " MAINTAINER "\n", - cpu->x86_model_id); - return -ENOENT; - } - - if (model->op_points == NULL) { - /* Matched a non-match */ - dprintk("no table support for CPU model \"%s\"\n", - cpu->x86_model_id); - dprintk("try using the acpi-cpufreq driver\n"); - return -ENOENT; - } - - centrino_model[policy->cpu] = model; - - dprintk("found \"%s\": max frequency: %dkHz\n", - model->model_name, model->max_freq); - - return 0; -} - -#else -static inline int centrino_cpu_init_table(struct cpufreq_policy *policy) { return -ENODEV; } -#endif /* CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE */ - -static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, const struct cpu_id *x) -{ - if ((c->x86 == x->x86) && - (c->x86_model == x->x86_model) && - (c->x86_mask == x->x86_mask)) - return 1; - return 0; -} - -/* To be called only after centrino_model is initialized */ -static unsigned extract_clock(unsigned msr, unsigned int cpu, int failsafe) -{ - int i; - - /* - * Extract clock in kHz from PERF_CTL value - * for centrino, as some DSDTs are buggy. - * Ideally, this can be done using the acpi_data structure. - */ - if ((centrino_cpu[cpu] == &cpu_ids[CPU_BANIAS]) || - (centrino_cpu[cpu] == &cpu_ids[CPU_DOTHAN_A1]) || - (centrino_cpu[cpu] == &cpu_ids[CPU_DOTHAN_B0])) { - msr = (msr >> 8) & 0xff; - return msr * 100000; - } - - if ((!centrino_model[cpu]) || (!centrino_model[cpu]->op_points)) - return 0; - - msr &= 0xffff; - for (i=0;centrino_model[cpu]->op_points[i].frequency != CPUFREQ_TABLE_END; i++) { - if (msr == centrino_model[cpu]->op_points[i].index) - return centrino_model[cpu]->op_points[i].frequency; - } - if (failsafe) - return centrino_model[cpu]->op_points[i-1].frequency; - else - return 0; -} - -/* Return the current CPU frequency in kHz */ -static unsigned int get_cur_freq(unsigned int cpu) -{ - unsigned l, h; - unsigned clock_freq; - cpumask_t saved_mask; - - saved_mask = current->cpus_allowed; - set_cpus_allowed(current, cpumask_of_cpu(cpu)); - if (smp_processor_id() != cpu) - return 0; - - rdmsr(MSR_IA32_PERF_STATUS, l, h); - clock_freq = extract_clock(l, cpu, 0); - - if (unlikely(clock_freq == 0)) { - /* - * On some CPUs, we can see transient MSR values (which are - * not present in _PSS), while CPU is doing some automatic - * P-state transition (like TM2). Get the last freq set - * in PERF_CTL. - */ - rdmsr(MSR_IA32_PERF_CTL, l, h); - clock_freq = extract_clock(l, cpu, 1); - } - - set_cpus_allowed(current, saved_mask); - return clock_freq; -} - - -static int centrino_cpu_init(struct cpufreq_policy *policy) -{ - struct cpuinfo_x86 *cpu = &cpu_data[policy->cpu]; - unsigned freq; - unsigned l, h; - int ret; - int i; - - /* Only Intel makes Enhanced Speedstep-capable CPUs */ - if (cpu->x86_vendor != X86_VENDOR_INTEL || !cpu_has(cpu, X86_FEATURE_EST)) - return -ENODEV; - - if (cpu_has(cpu, X86_FEATURE_CONSTANT_TSC)) - centrino_driver.flags |= CPUFREQ_CONST_LOOPS; - - if (policy->cpu != 0) - return -ENODEV; - - for (i = 0; i < N_IDS; i++) - if (centrino_verify_cpu_id(cpu, &cpu_ids[i])) - break; - - if (i != N_IDS) - centrino_cpu[policy->cpu] = &cpu_ids[i]; - - if (!centrino_cpu[policy->cpu]) { - dprintk("found unsupported CPU with " - "Enhanced SpeedStep: send /proc/cpuinfo to " - MAINTAINER "\n"); - return -ENODEV; - } - - if (centrino_cpu_init_table(policy)) { - return -ENODEV; - } - - /* Check to see if Enhanced SpeedStep is enabled, and try to - enable it if not. */ - rdmsr(MSR_IA32_MISC_ENABLE, l, h); - - if (!(l & (1<<16))) { - l |= (1<<16); - dprintk("trying to enable Enhanced SpeedStep (%x)\n", l); - wrmsr(MSR_IA32_MISC_ENABLE, l, h); - - /* check to see if it stuck */ - rdmsr(MSR_IA32_MISC_ENABLE, l, h); - if (!(l & (1<<16))) { - printk(KERN_INFO PFX "couldn't enable Enhanced SpeedStep\n"); - return -ENODEV; - } - } - - freq = get_cur_freq(policy->cpu); - - policy->governor = CPUFREQ_DEFAULT_GOVERNOR; - policy->cpuinfo.transition_latency = 10000; /* 10uS transition latency */ - policy->cur = freq; - - dprintk("centrino_cpu_init: cur=%dkHz\n", policy->cur); - - ret = cpufreq_frequency_table_cpuinfo(policy, centrino_model[policy->cpu]->op_points); - if (ret) - return (ret); - - cpufreq_frequency_table_get_attr(centrino_model[policy->cpu]->op_points, policy->cpu); - - return 0; -} - -static int centrino_cpu_exit(struct cpufreq_policy *policy) -{ - unsigned int cpu = policy->cpu; - - if (!centrino_model[cpu]) - return -ENODEV; - - cpufreq_frequency_table_put_attr(cpu); - - centrino_model[cpu] = NULL; - - return 0; -} - -/** - * centrino_verify - verifies a new CPUFreq policy - * @policy: new policy - * - * Limit must be within this model's frequency range at least one - * border included. - */ -static int centrino_verify (struct cpufreq_policy *policy) -{ - return cpufreq_frequency_table_verify(policy, centrino_model[policy->cpu]->op_points); -} - -/** - * centrino_setpolicy - set a new CPUFreq policy - * @policy: new policy - * @target_freq: the target frequency - * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) - * - * Sets a new CPUFreq policy. - */ -static int centrino_target (struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) -{ - unsigned int newstate = 0; - unsigned int msr, oldmsr = 0, h = 0, cpu = policy->cpu; - struct cpufreq_freqs freqs; - cpumask_t online_policy_cpus; - cpumask_t saved_mask; - cpumask_t set_mask; - cpumask_t covered_cpus; - int retval = 0; - unsigned int j, k, first_cpu, tmp; - - if (unlikely(centrino_model[cpu] == NULL)) - return -ENODEV; - - if (unlikely(cpufreq_frequency_table_target(policy, - centrino_model[cpu]->op_points, - target_freq, - relation, - &newstate))) { - return -EINVAL; - } - -#ifdef CONFIG_HOTPLUG_CPU - /* cpufreq holds the hotplug lock, so we are safe from here on */ - cpus_and(online_policy_cpus, cpu_online_map, policy->cpus); -#else - online_policy_cpus = policy->cpus; -#endif - - saved_mask = current->cpus_allowed; - first_cpu = 1; - cpus_clear(covered_cpus); - for_each_cpu_mask(j, online_policy_cpus) { - /* - * Support for SMP systems. - * Make sure we are running on CPU that wants to change freq - */ - cpus_clear(set_mask); - if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) - cpus_or(set_mask, set_mask, online_policy_cpus); - else - cpu_set(j, set_mask); - - set_cpus_allowed(current, set_mask); - preempt_disable(); - if (unlikely(!cpu_isset(smp_processor_id(), set_mask))) { - dprintk("couldn't limit to CPUs in this domain\n"); - retval = -EAGAIN; - if (first_cpu) { - /* We haven't started the transition yet. */ - goto migrate_end; - } - preempt_enable(); - break; - } - - msr = centrino_model[cpu]->op_points[newstate].index; - - if (first_cpu) { - rdmsr(MSR_IA32_PERF_CTL, oldmsr, h); - if (msr == (oldmsr & 0xffff)) { - dprintk("no change needed - msr was and needs " - "to be %x\n", oldmsr); - retval = 0; - goto migrate_end; - } - - freqs.old = extract_clock(oldmsr, cpu, 0); - freqs.new = extract_clock(msr, cpu, 0); - - dprintk("target=%dkHz old=%d new=%d msr=%04x\n", - target_freq, freqs.old, freqs.new, msr); - - for_each_cpu_mask(k, online_policy_cpus) { - freqs.cpu = k; - cpufreq_notify_transition(&freqs, - CPUFREQ_PRECHANGE); - } - - first_cpu = 0; - /* all but 16 LSB are reserved, treat them with care */ - oldmsr &= ~0xffff; - msr &= 0xffff; - oldmsr |= msr; - } - - wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); - if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { - preempt_enable(); - break; - } - - cpu_set(j, covered_cpus); - preempt_enable(); - } - - for_each_cpu_mask(k, online_policy_cpus) { - freqs.cpu = k; - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - } - - if (unlikely(retval)) { - /* - * We have failed halfway through the frequency change. - * We have sent callbacks to policy->cpus and - * MSRs have already been written on coverd_cpus. - * Best effort undo.. - */ - - if (!cpus_empty(covered_cpus)) { - for_each_cpu_mask(j, covered_cpus) { - set_cpus_allowed(current, cpumask_of_cpu(j)); - wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); - } - } - - tmp = freqs.new; - freqs.new = freqs.old; - freqs.old = tmp; - for_each_cpu_mask(j, online_policy_cpus) { - freqs.cpu = j; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - } - } - set_cpus_allowed(current, saved_mask); - return 0; - -migrate_end: - preempt_enable(); - set_cpus_allowed(current, saved_mask); - return 0; -} - -static struct freq_attr* centrino_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, -}; - -static struct cpufreq_driver centrino_driver = { - .name = "centrino", /* should be speedstep-centrino, - but there's a 16 char limit */ - .init = centrino_cpu_init, - .exit = centrino_cpu_exit, - .verify = centrino_verify, - .target = centrino_target, - .get = get_cur_freq, - .attr = centrino_attr, - .owner = THIS_MODULE, -}; - - -/** - * centrino_init - initializes the Enhanced SpeedStep CPUFreq driver - * - * Initializes the Enhanced SpeedStep support. Returns -ENODEV on - * unsupported devices, -ENOENT if there's no voltage table for this - * particular CPU model, -EINVAL on problems during initiatization, - * and zero on success. - * - * This is quite picky. Not only does the CPU have to advertise the - * "est" flag in the cpuid capability flags, we look for a specific - * CPU model and stepping, and we need to have the exact model name in - * our voltage tables. That is, be paranoid about not releasing - * someone's valuable magic smoke. - */ -static int __init centrino_init(void) -{ - struct cpuinfo_x86 *cpu = cpu_data; - - if (!cpu_has(cpu, X86_FEATURE_EST)) - return -ENODEV; - - return cpufreq_register_driver(¢rino_driver); -} - -static void __exit centrino_exit(void) -{ - cpufreq_unregister_driver(¢rino_driver); -} - -MODULE_AUTHOR ("Jeremy Fitzhardinge "); -MODULE_DESCRIPTION ("Enhanced SpeedStep driver for Intel Pentium M processors."); -MODULE_LICENSE ("GPL"); - -late_initcall(centrino_init); -module_exit(centrino_exit); diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c b/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c deleted file mode 100644 index a5b2346faf1..00000000000 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-ich.c +++ /dev/null @@ -1,440 +0,0 @@ -/* - * (C) 2001 Dave Jones, Arjan van de ven. - * (C) 2002 - 2003 Dominik Brodowski - * - * Licensed under the terms of the GNU GPL License version 2. - * Based upon reverse engineered information, and on Intel documentation - * for chipsets ICH2-M and ICH3-M. - * - * Many thanks to Ducrot Bruno for finding and fixing the last - * "missing link" for ICH2-M/ICH3-M support, and to Thomas Winkler - * for extensive testing. - * - * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* - */ - - -/********************************************************************* - * SPEEDSTEP - DEFINITIONS * - *********************************************************************/ - -#include -#include -#include -#include -#include -#include -#include - -#include "speedstep-lib.h" - - -/* speedstep_chipset: - * It is necessary to know which chipset is used. As accesses to - * this device occur at various places in this module, we need a - * static struct pci_dev * pointing to that device. - */ -static struct pci_dev *speedstep_chipset_dev; - - -/* speedstep_processor - */ -static unsigned int speedstep_processor = 0; - -static u32 pmbase; - -/* - * There are only two frequency states for each processor. Values - * are in kHz for the time being. - */ -static struct cpufreq_frequency_table speedstep_freqs[] = { - {SPEEDSTEP_HIGH, 0}, - {SPEEDSTEP_LOW, 0}, - {0, CPUFREQ_TABLE_END}, -}; - - -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-ich", msg) - - -/** - * speedstep_find_register - read the PMBASE address - * - * Returns: -ENODEV if no register could be found - */ -static int speedstep_find_register (void) -{ - if (!speedstep_chipset_dev) - return -ENODEV; - - /* get PMBASE */ - pci_read_config_dword(speedstep_chipset_dev, 0x40, &pmbase); - if (!(pmbase & 0x01)) { - printk(KERN_ERR "speedstep-ich: could not find speedstep register\n"); - return -ENODEV; - } - - pmbase &= 0xFFFFFFFE; - if (!pmbase) { - printk(KERN_ERR "speedstep-ich: could not find speedstep register\n"); - return -ENODEV; - } - - dprintk("pmbase is 0x%x\n", pmbase); - return 0; -} - -/** - * speedstep_set_state - set the SpeedStep state - * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) - * - * Tries to change the SpeedStep state. - */ -static void speedstep_set_state (unsigned int state) -{ - u8 pm2_blk; - u8 value; - unsigned long flags; - - if (state > 0x1) - return; - - /* Disable IRQs */ - local_irq_save(flags); - - /* read state */ - value = inb(pmbase + 0x50); - - dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value); - - /* write new state */ - value &= 0xFE; - value |= state; - - dprintk("writing 0x%x to pmbase 0x%x + 0x50\n", value, pmbase); - - /* Disable bus master arbitration */ - pm2_blk = inb(pmbase + 0x20); - pm2_blk |= 0x01; - outb(pm2_blk, (pmbase + 0x20)); - - /* Actual transition */ - outb(value, (pmbase + 0x50)); - - /* Restore bus master arbitration */ - pm2_blk &= 0xfe; - outb(pm2_blk, (pmbase + 0x20)); - - /* check if transition was successful */ - value = inb(pmbase + 0x50); - - /* Enable IRQs */ - local_irq_restore(flags); - - dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value); - - if (state == (value & 0x1)) { - dprintk("change to %u MHz succeeded\n", (speedstep_get_processor_frequency(speedstep_processor) / 1000)); - } else { - printk (KERN_ERR "cpufreq: change failed - I/O error\n"); - } - - return; -} - - -/** - * speedstep_activate - activate SpeedStep control in the chipset - * - * Tries to activate the SpeedStep status and control registers. - * Returns -EINVAL on an unsupported chipset, and zero on success. - */ -static int speedstep_activate (void) -{ - u16 value = 0; - - if (!speedstep_chipset_dev) - return -EINVAL; - - pci_read_config_word(speedstep_chipset_dev, 0x00A0, &value); - if (!(value & 0x08)) { - value |= 0x08; - dprintk("activating SpeedStep (TM) registers\n"); - pci_write_config_word(speedstep_chipset_dev, 0x00A0, value); - } - - return 0; -} - - -/** - * speedstep_detect_chipset - detect the Southbridge which contains SpeedStep logic - * - * Detects ICH2-M, ICH3-M and ICH4-M so far. The pci_dev points to - * the LPC bridge / PM module which contains all power-management - * functions. Returns the SPEEDSTEP_CHIPSET_-number for the detected - * chipset, or zero on failure. - */ -static unsigned int speedstep_detect_chipset (void) -{ - speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, - PCI_DEVICE_ID_INTEL_82801DB_12, - PCI_ANY_ID, - PCI_ANY_ID, - NULL); - if (speedstep_chipset_dev) - return 4; /* 4-M */ - - speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, - PCI_DEVICE_ID_INTEL_82801CA_12, - PCI_ANY_ID, - PCI_ANY_ID, - NULL); - if (speedstep_chipset_dev) - return 3; /* 3-M */ - - - speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, - PCI_DEVICE_ID_INTEL_82801BA_10, - PCI_ANY_ID, - PCI_ANY_ID, - NULL); - if (speedstep_chipset_dev) { - /* speedstep.c causes lockups on Dell Inspirons 8000 and - * 8100 which use a pretty old revision of the 82815 - * host brige. Abort on these systems. - */ - static struct pci_dev *hostbridge; - - hostbridge = pci_get_subsys(PCI_VENDOR_ID_INTEL, - PCI_DEVICE_ID_INTEL_82815_MC, - PCI_ANY_ID, - PCI_ANY_ID, - NULL); - - if (!hostbridge) - return 2; /* 2-M */ - - if (hostbridge->revision < 5) { - dprintk("hostbridge does not support speedstep\n"); - speedstep_chipset_dev = NULL; - pci_dev_put(hostbridge); - return 0; - } - - pci_dev_put(hostbridge); - return 2; /* 2-M */ - } - - return 0; -} - -static unsigned int _speedstep_get(cpumask_t cpus) -{ - unsigned int speed; - cpumask_t cpus_allowed; - - cpus_allowed = current->cpus_allowed; - set_cpus_allowed(current, cpus); - speed = speedstep_get_processor_frequency(speedstep_processor); - set_cpus_allowed(current, cpus_allowed); - dprintk("detected %u kHz as current frequency\n", speed); - return speed; -} - -static unsigned int speedstep_get(unsigned int cpu) -{ - return _speedstep_get(cpumask_of_cpu(cpu)); -} - -/** - * speedstep_target - set a new CPUFreq policy - * @policy: new policy - * @target_freq: the target frequency - * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) - * - * Sets a new CPUFreq policy. - */ -static int speedstep_target (struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) -{ - unsigned int newstate = 0; - struct cpufreq_freqs freqs; - cpumask_t cpus_allowed; - int i; - - if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate)) - return -EINVAL; - - freqs.old = _speedstep_get(policy->cpus); - freqs.new = speedstep_freqs[newstate].frequency; - freqs.cpu = policy->cpu; - - dprintk("transiting from %u to %u kHz\n", freqs.old, freqs.new); - - /* no transition necessary */ - if (freqs.old == freqs.new) - return 0; - - cpus_allowed = current->cpus_allowed; - - for_each_cpu_mask(i, policy->cpus) { - freqs.cpu = i; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - } - - /* switch to physical CPU where state is to be changed */ - set_cpus_allowed(current, policy->cpus); - - speedstep_set_state(newstate); - - /* allow to be run on all CPUs */ - set_cpus_allowed(current, cpus_allowed); - - for_each_cpu_mask(i, policy->cpus) { - freqs.cpu = i; - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - } - - return 0; -} - - -/** - * speedstep_verify - verifies a new CPUFreq policy - * @policy: new policy - * - * Limit must be within speedstep_low_freq and speedstep_high_freq, with - * at least one border included. - */ -static int speedstep_verify (struct cpufreq_policy *policy) -{ - return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]); -} - - -static int speedstep_cpu_init(struct cpufreq_policy *policy) -{ - int result = 0; - unsigned int speed; - cpumask_t cpus_allowed; - - /* only run on CPU to be set, or on its sibling */ -#ifdef CONFIG_SMP - policy->cpus = cpu_sibling_map[policy->cpu]; -#endif - - cpus_allowed = current->cpus_allowed; - set_cpus_allowed(current, policy->cpus); - - /* detect low and high frequency and transition latency */ - result = speedstep_get_freqs(speedstep_processor, - &speedstep_freqs[SPEEDSTEP_LOW].frequency, - &speedstep_freqs[SPEEDSTEP_HIGH].frequency, - &policy->cpuinfo.transition_latency, - &speedstep_set_state); - set_cpus_allowed(current, cpus_allowed); - if (result) - return result; - - /* get current speed setting */ - speed = _speedstep_get(policy->cpus); - if (!speed) - return -EIO; - - dprintk("currently at %s speed setting - %i MHz\n", - (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency) ? "low" : "high", - (speed / 1000)); - - /* cpuinfo and default policy values */ - policy->governor = CPUFREQ_DEFAULT_GOVERNOR; - policy->cur = speed; - - result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs); - if (result) - return (result); - - cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu); - - return 0; -} - - -static int speedstep_cpu_exit(struct cpufreq_policy *policy) -{ - cpufreq_frequency_table_put_attr(policy->cpu); - return 0; -} - -static struct freq_attr* speedstep_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, -}; - - -static struct cpufreq_driver speedstep_driver = { - .name = "speedstep-ich", - .verify = speedstep_verify, - .target = speedstep_target, - .init = speedstep_cpu_init, - .exit = speedstep_cpu_exit, - .get = speedstep_get, - .owner = THIS_MODULE, - .attr = speedstep_attr, -}; - - -/** - * speedstep_init - initializes the SpeedStep CPUFreq driver - * - * Initializes the SpeedStep support. Returns -ENODEV on unsupported - * devices, -EINVAL on problems during initiatization, and zero on - * success. - */ -static int __init speedstep_init(void) -{ - /* detect processor */ - speedstep_processor = speedstep_detect_processor(); - if (!speedstep_processor) { - dprintk("Intel(R) SpeedStep(TM) capable processor not found\n"); - return -ENODEV; - } - - /* detect chipset */ - if (!speedstep_detect_chipset()) { - dprintk("Intel(R) SpeedStep(TM) for this chipset not (yet) available.\n"); - return -ENODEV; - } - - /* activate speedstep support */ - if (speedstep_activate()) { - pci_dev_put(speedstep_chipset_dev); - return -EINVAL; - } - - if (speedstep_find_register()) - return -ENODEV; - - return cpufreq_register_driver(&speedstep_driver); -} - - -/** - * speedstep_exit - unregisters SpeedStep support - * - * Unregisters SpeedStep support. - */ -static void __exit speedstep_exit(void) -{ - pci_dev_put(speedstep_chipset_dev); - cpufreq_unregister_driver(&speedstep_driver); -} - - -MODULE_AUTHOR ("Dave Jones , Dominik Brodowski "); -MODULE_DESCRIPTION ("Speedstep driver for Intel mobile processors on chipsets with ICH-M southbridges."); -MODULE_LICENSE ("GPL"); - -module_init(speedstep_init); -module_exit(speedstep_exit); diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c deleted file mode 100644 index b1acc8ce316..00000000000 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c +++ /dev/null @@ -1,444 +0,0 @@ -/* - * (C) 2002 - 2003 Dominik Brodowski - * - * Licensed under the terms of the GNU GPL License version 2. - * - * Library for common functions for Intel SpeedStep v.1 and v.2 support - * - * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* - */ - -#include -#include -#include -#include -#include -#include - -#include -#include "speedstep-lib.h" - -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-lib", msg) - -#ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK -static int relaxed_check = 0; -#else -#define relaxed_check 0 -#endif - -/********************************************************************* - * GET PROCESSOR CORE SPEED IN KHZ * - *********************************************************************/ - -static unsigned int pentium3_get_frequency (unsigned int processor) -{ - /* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */ - struct { - unsigned int ratio; /* Frequency Multiplier (x10) */ - u8 bitmap; /* power on configuration bits - [27, 25:22] (in MSR 0x2a) */ - } msr_decode_mult [] = { - { 30, 0x01 }, - { 35, 0x05 }, - { 40, 0x02 }, - { 45, 0x06 }, - { 50, 0x00 }, - { 55, 0x04 }, - { 60, 0x0b }, - { 65, 0x0f }, - { 70, 0x09 }, - { 75, 0x0d }, - { 80, 0x0a }, - { 85, 0x26 }, - { 90, 0x20 }, - { 100, 0x2b }, - { 0, 0xff } /* error or unknown value */ - }; - - /* PIII(-M) FSB settings: see table b1-b of 24547206.pdf */ - struct { - unsigned int value; /* Front Side Bus speed in MHz */ - u8 bitmap; /* power on configuration bits [18: 19] - (in MSR 0x2a) */ - } msr_decode_fsb [] = { - { 66, 0x0 }, - { 100, 0x2 }, - { 133, 0x1 }, - { 0, 0xff} - }; - - u32 msr_lo, msr_tmp; - int i = 0, j = 0; - - /* read MSR 0x2a - we only need the low 32 bits */ - rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp); - dprintk("P3 - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp); - msr_tmp = msr_lo; - - /* decode the FSB */ - msr_tmp &= 0x00c0000; - msr_tmp >>= 18; - while (msr_tmp != msr_decode_fsb[i].bitmap) { - if (msr_decode_fsb[i].bitmap == 0xff) - return 0; - i++; - } - - /* decode the multiplier */ - if (processor == SPEEDSTEP_PROCESSOR_PIII_C_EARLY) { - dprintk("workaround for early PIIIs\n"); - msr_lo &= 0x03c00000; - } else - msr_lo &= 0x0bc00000; - msr_lo >>= 22; - while (msr_lo != msr_decode_mult[j].bitmap) { - if (msr_decode_mult[j].bitmap == 0xff) - return 0; - j++; - } - - dprintk("speed is %u\n", (msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100)); - - return (msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100); -} - - -static unsigned int pentiumM_get_frequency(void) -{ - u32 msr_lo, msr_tmp; - - rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp); - dprintk("PM - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp); - - /* see table B-2 of 24547212.pdf */ - if (msr_lo & 0x00040000) { - printk(KERN_DEBUG "speedstep-lib: PM - invalid FSB: 0x%x 0x%x\n", msr_lo, msr_tmp); - return 0; - } - - msr_tmp = (msr_lo >> 22) & 0x1f; - dprintk("bits 22-26 are 0x%x, speed is %u\n", msr_tmp, (msr_tmp * 100 * 1000)); - - return (msr_tmp * 100 * 1000); -} - -static unsigned int pentium_core_get_frequency(void) -{ - u32 fsb = 0; - u32 msr_lo, msr_tmp; - - rdmsr(MSR_FSB_FREQ, msr_lo, msr_tmp); - /* see table B-2 of 25366920.pdf */ - switch (msr_lo & 0x07) { - case 5: - fsb = 100000; - break; - case 1: - fsb = 133333; - break; - case 3: - fsb = 166667; - break; - default: - printk(KERN_ERR "PCORE - MSR_FSB_FREQ undefined value"); - } - - rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp); - dprintk("PCORE - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp); - - msr_tmp = (msr_lo >> 22) & 0x1f; - dprintk("bits 22-26 are 0x%x, speed is %u\n", msr_tmp, (msr_tmp * fsb)); - - return (msr_tmp * fsb); -} - - -static unsigned int pentium4_get_frequency(void) -{ - struct cpuinfo_x86 *c = &boot_cpu_data; - u32 msr_lo, msr_hi, mult; - unsigned int fsb = 0; - - rdmsr(0x2c, msr_lo, msr_hi); - - dprintk("P4 - MSR_EBC_FREQUENCY_ID: 0x%x 0x%x\n", msr_lo, msr_hi); - - /* decode the FSB: see IA-32 Intel (C) Architecture Software - * Developer's Manual, Volume 3: System Prgramming Guide, - * revision #12 in Table B-1: MSRs in the Pentium 4 and - * Intel Xeon Processors, on page B-4 and B-5. - */ - if (c->x86_model < 2) - fsb = 100 * 1000; - else { - u8 fsb_code = (msr_lo >> 16) & 0x7; - switch (fsb_code) { - case 0: - fsb = 100 * 1000; - break; - case 1: - fsb = 13333 * 10; - break; - case 2: - fsb = 200 * 1000; - break; - } - } - - if (!fsb) - printk(KERN_DEBUG "speedstep-lib: couldn't detect FSB speed. Please send an e-mail to \n"); - - /* Multiplier. */ - if (c->x86_model < 2) - mult = msr_lo >> 27; - else - mult = msr_lo >> 24; - - dprintk("P4 - FSB %u kHz; Multiplier %u; Speed %u kHz\n", fsb, mult, (fsb * mult)); - - return (fsb * mult); -} - - -unsigned int speedstep_get_processor_frequency(unsigned int processor) -{ - switch (processor) { - case SPEEDSTEP_PROCESSOR_PCORE: - return pentium_core_get_frequency(); - case SPEEDSTEP_PROCESSOR_PM: - return pentiumM_get_frequency(); - case SPEEDSTEP_PROCESSOR_P4D: - case SPEEDSTEP_PROCESSOR_P4M: - return pentium4_get_frequency(); - case SPEEDSTEP_PROCESSOR_PIII_T: - case SPEEDSTEP_PROCESSOR_PIII_C: - case SPEEDSTEP_PROCESSOR_PIII_C_EARLY: - return pentium3_get_frequency(processor); - default: - return 0; - }; - return 0; -} -EXPORT_SYMBOL_GPL(speedstep_get_processor_frequency); - - -/********************************************************************* - * DETECT SPEEDSTEP-CAPABLE PROCESSOR * - *********************************************************************/ - -unsigned int speedstep_detect_processor (void) -{ - struct cpuinfo_x86 *c = cpu_data; - u32 ebx, msr_lo, msr_hi; - - dprintk("x86: %x, model: %x\n", c->x86, c->x86_model); - - if ((c->x86_vendor != X86_VENDOR_INTEL) || - ((c->x86 != 6) && (c->x86 != 0xF))) - return 0; - - if (c->x86 == 0xF) { - /* Intel Mobile Pentium 4-M - * or Intel Mobile Pentium 4 with 533 MHz FSB */ - if (c->x86_model != 2) - return 0; - - ebx = cpuid_ebx(0x00000001); - ebx &= 0x000000FF; - - dprintk("ebx value is %x, x86_mask is %x\n", ebx, c->x86_mask); - - switch (c->x86_mask) { - case 4: - /* - * B-stepping [M-P4-M] - * sample has ebx = 0x0f, production has 0x0e. - */ - if ((ebx == 0x0e) || (ebx == 0x0f)) - return SPEEDSTEP_PROCESSOR_P4M; - break; - case 7: - /* - * C-stepping [M-P4-M] - * needs to have ebx=0x0e, else it's a celeron: - * cf. 25130917.pdf / page 7, footnote 5 even - * though 25072120.pdf / page 7 doesn't say - * samples are only of B-stepping... - */ - if (ebx == 0x0e) - return SPEEDSTEP_PROCESSOR_P4M; - break; - case 9: - /* - * D-stepping [M-P4-M or M-P4/533] - * - * this is totally strange: CPUID 0x0F29 is - * used by M-P4-M, M-P4/533 and(!) Celeron CPUs. - * The latter need to be sorted out as they don't - * support speedstep. - * Celerons with CPUID 0x0F29 may have either - * ebx=0x8 or 0xf -- 25130917.pdf doesn't say anything - * specific. - * M-P4-Ms may have either ebx=0xe or 0xf [see above] - * M-P4/533 have either ebx=0xe or 0xf. [25317607.pdf] - * also, M-P4M HTs have ebx=0x8, too - * For now, they are distinguished by the model_id string - */ - if ((ebx == 0x0e) || (strstr(c->x86_model_id,"Mobile Intel(R) Pentium(R) 4") != NULL)) - return SPEEDSTEP_PROCESSOR_P4M; - break; - default: - break; - } - return 0; - } - - switch (c->x86_model) { - case 0x0B: /* Intel PIII [Tualatin] */ - /* cpuid_ebx(1) is 0x04 for desktop PIII, 0x06 for mobile PIII-M */ - ebx = cpuid_ebx(0x00000001); - dprintk("ebx is %x\n", ebx); - - ebx &= 0x000000FF; - - if (ebx != 0x06) - return 0; - - /* So far all PIII-M processors support SpeedStep. See - * Intel's 24540640.pdf of June 2003 - */ - return SPEEDSTEP_PROCESSOR_PIII_T; - - case 0x08: /* Intel PIII [Coppermine] */ - - /* all mobile PIII Coppermines have FSB 100 MHz - * ==> sort out a few desktop PIIIs. */ - rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_hi); - dprintk("Coppermine: MSR_IA32_EBL_CR_POWERON is 0x%x, 0x%x\n", msr_lo, msr_hi); - msr_lo &= 0x00c0000; - if (msr_lo != 0x0080000) - return 0; - - /* - * If the processor is a mobile version, - * platform ID has bit 50 set - * it has SpeedStep technology if either - * bit 56 or 57 is set - */ - rdmsr(MSR_IA32_PLATFORM_ID, msr_lo, msr_hi); - dprintk("Coppermine: MSR_IA32_PLATFORM ID is 0x%x, 0x%x\n", msr_lo, msr_hi); - if ((msr_hi & (1<<18)) && (relaxed_check ? 1 : (msr_hi & (3<<24)))) { - if (c->x86_mask == 0x01) { - dprintk("early PIII version\n"); - return SPEEDSTEP_PROCESSOR_PIII_C_EARLY; - } else - return SPEEDSTEP_PROCESSOR_PIII_C; - } - - default: - return 0; - } -} -EXPORT_SYMBOL_GPL(speedstep_detect_processor); - - -/********************************************************************* - * DETECT SPEEDSTEP SPEEDS * - *********************************************************************/ - -unsigned int speedstep_get_freqs(unsigned int processor, - unsigned int *low_speed, - unsigned int *high_speed, - unsigned int *transition_latency, - void (*set_state) (unsigned int state)) -{ - unsigned int prev_speed; - unsigned int ret = 0; - unsigned long flags; - struct timeval tv1, tv2; - - if ((!processor) || (!low_speed) || (!high_speed) || (!set_state)) - return -EINVAL; - - dprintk("trying to determine both speeds\n"); - - /* get current speed */ - prev_speed = speedstep_get_processor_frequency(processor); - if (!prev_speed) - return -EIO; - - dprintk("previous speed is %u\n", prev_speed); - - local_irq_save(flags); - - /* switch to low state */ - set_state(SPEEDSTEP_LOW); - *low_speed = speedstep_get_processor_frequency(processor); - if (!*low_speed) { - ret = -EIO; - goto out; - } - - dprintk("low speed is %u\n", *low_speed); - - /* start latency measurement */ - if (transition_latency) - do_gettimeofday(&tv1); - - /* switch to high state */ - set_state(SPEEDSTEP_HIGH); - - /* end latency measurement */ - if (transition_latency) - do_gettimeofday(&tv2); - - *high_speed = speedstep_get_processor_frequency(processor); - if (!*high_speed) { - ret = -EIO; - goto out; - } - - dprintk("high speed is %u\n", *high_speed); - - if (*low_speed == *high_speed) { - ret = -ENODEV; - goto out; - } - - /* switch to previous state, if necessary */ - if (*high_speed != prev_speed) - set_state(SPEEDSTEP_LOW); - - if (transition_latency) { - *transition_latency = (tv2.tv_sec - tv1.tv_sec) * USEC_PER_SEC + - tv2.tv_usec - tv1.tv_usec; - dprintk("transition latency is %u uSec\n", *transition_latency); - - /* convert uSec to nSec and add 20% for safety reasons */ - *transition_latency *= 1200; - - /* check if the latency measurement is too high or too low - * and set it to a safe value (500uSec) in that case - */ - if (*transition_latency > 10000000 || *transition_latency < 50000) { - printk (KERN_WARNING "speedstep: frequency transition measured seems out of " - "range (%u nSec), falling back to a safe one of %u nSec.\n", - *transition_latency, 500000); - *transition_latency = 500000; - } - } - -out: - local_irq_restore(flags); - return (ret); -} -EXPORT_SYMBOL_GPL(speedstep_get_freqs); - -#ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK -module_param(relaxed_check, int, 0444); -MODULE_PARM_DESC(relaxed_check, "Don't do all checks for speedstep capability."); -#endif - -MODULE_AUTHOR ("Dominik Brodowski "); -MODULE_DESCRIPTION ("Library for Intel SpeedStep 1 or 2 cpufreq drivers."); -MODULE_LICENSE ("GPL"); diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.h b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.h deleted file mode 100644 index b11bcc608ca..00000000000 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * (C) 2002 - 2003 Dominik Brodowski - * - * Licensed under the terms of the GNU GPL License version 2. - * - * Library for common functions for Intel SpeedStep v.1 and v.2 support - * - * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* - */ - - - -/* processors */ - -#define SPEEDSTEP_PROCESSOR_PIII_C_EARLY 0x00000001 /* Coppermine core */ -#define SPEEDSTEP_PROCESSOR_PIII_C 0x00000002 /* Coppermine core */ -#define SPEEDSTEP_PROCESSOR_PIII_T 0x00000003 /* Tualatin core */ -#define SPEEDSTEP_PROCESSOR_P4M 0x00000004 /* P4-M */ - -/* the following processors are not speedstep-capable and are not auto-detected - * in speedstep_detect_processor(). However, their speed can be detected using - * the speedstep_get_processor_frequency() call. */ -#define SPEEDSTEP_PROCESSOR_PM 0xFFFFFF03 /* Pentium M */ -#define SPEEDSTEP_PROCESSOR_P4D 0xFFFFFF04 /* desktop P4 */ -#define SPEEDSTEP_PROCESSOR_PCORE 0xFFFFFF05 /* Core */ - -/* speedstep states -- only two of them */ - -#define SPEEDSTEP_HIGH 0x00000000 -#define SPEEDSTEP_LOW 0x00000001 - - -/* detect a speedstep-capable processor */ -extern unsigned int speedstep_detect_processor (void); - -/* detect the current speed (in khz) of the processor */ -extern unsigned int speedstep_get_processor_frequency(unsigned int processor); - - -/* detect the low and high speeds of the processor. The callback - * set_state"'s first argument is either SPEEDSTEP_HIGH or - * SPEEDSTEP_LOW; the second argument is zero so that no - * cpufreq_notify_transition calls are initiated. - */ -extern unsigned int speedstep_get_freqs(unsigned int processor, - unsigned int *low_speed, - unsigned int *high_speed, - unsigned int *transition_latency, - void (*set_state) (unsigned int state)); diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c b/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c deleted file mode 100644 index e1c509aa305..00000000000 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c +++ /dev/null @@ -1,424 +0,0 @@ -/* - * Intel SpeedStep SMI driver. - * - * (C) 2003 Hiroshi Miura - * - * Licensed under the terms of the GNU GPL License version 2. - * - */ - - -/********************************************************************* - * SPEEDSTEP - DEFINITIONS * - *********************************************************************/ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "speedstep-lib.h" - -/* speedstep system management interface port/command. - * - * These parameters are got from IST-SMI BIOS call. - * If user gives it, these are used. - * - */ -static int smi_port = 0; -static int smi_cmd = 0; -static unsigned int smi_sig = 0; - -/* info about the processor */ -static unsigned int speedstep_processor = 0; - -/* - * There are only two frequency states for each processor. Values - * are in kHz for the time being. - */ -static struct cpufreq_frequency_table speedstep_freqs[] = { - {SPEEDSTEP_HIGH, 0}, - {SPEEDSTEP_LOW, 0}, - {0, CPUFREQ_TABLE_END}, -}; - -#define GET_SPEEDSTEP_OWNER 0 -#define GET_SPEEDSTEP_STATE 1 -#define SET_SPEEDSTEP_STATE 2 -#define GET_SPEEDSTEP_FREQS 4 - -/* how often shall the SMI call be tried if it failed, e.g. because - * of DMA activity going on? */ -#define SMI_TRIES 5 - -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-smi", msg) - -/** - * speedstep_smi_ownership - */ -static int speedstep_smi_ownership (void) -{ - u32 command, result, magic; - u32 function = GET_SPEEDSTEP_OWNER; - unsigned char magic_data[] = "Copyright (c) 1999 Intel Corporation"; - - command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); - magic = virt_to_phys(magic_data); - - dprintk("trying to obtain ownership with command %x at port %x\n", command, smi_port); - - __asm__ __volatile__( - "out %%al, (%%dx)\n" - : "=D" (result) - : "a" (command), "b" (function), "c" (0), "d" (smi_port), - "D" (0), "S" (magic) - : "memory" - ); - - dprintk("result is %x\n", result); - - return result; -} - -/** - * speedstep_smi_get_freqs - get SpeedStep preferred & current freq. - * @low: the low frequency value is placed here - * @high: the high frequency value is placed here - * - * Only available on later SpeedStep-enabled systems, returns false results or - * even hangs [cf. bugme.osdl.org # 1422] on earlier systems. Empirical testing - * shows that the latter occurs if !(ist_info.event & 0xFFFF). - */ -static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high) -{ - u32 command, result = 0, edi, high_mhz, low_mhz; - u32 state=0; - u32 function = GET_SPEEDSTEP_FREQS; - - if (!(ist_info.event & 0xFFFF)) { - dprintk("bug #1422 -- can't read freqs from BIOS\n"); - return -ENODEV; - } - - command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); - - dprintk("trying to determine frequencies with command %x at port %x\n", command, smi_port); - - __asm__ __volatile__("movl $0, %%edi\n" - "out %%al, (%%dx)\n" - : "=a" (result), "=b" (high_mhz), "=c" (low_mhz), "=d" (state), "=D" (edi) - : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0) - ); - - dprintk("result %x, low_freq %u, high_freq %u\n", result, low_mhz, high_mhz); - - /* abort if results are obviously incorrect... */ - if ((high_mhz + low_mhz) < 600) - return -EINVAL; - - *high = high_mhz * 1000; - *low = low_mhz * 1000; - - return result; -} - -/** - * speedstep_get_state - set the SpeedStep state - * @state: processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) - * - */ -static int speedstep_get_state (void) -{ - u32 function=GET_SPEEDSTEP_STATE; - u32 result, state, edi, command; - - command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); - - dprintk("trying to determine current setting with command %x at port %x\n", command, smi_port); - - __asm__ __volatile__("movl $0, %%edi\n" - "out %%al, (%%dx)\n" - : "=a" (result), "=b" (state), "=D" (edi) - : "a" (command), "b" (function), "c" (0), "d" (smi_port), "S" (0) - ); - - dprintk("state is %x, result is %x\n", state, result); - - return (state & 1); -} - - -/** - * speedstep_set_state - set the SpeedStep state - * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) - * - */ -static void speedstep_set_state (unsigned int state) -{ - unsigned int result = 0, command, new_state; - unsigned long flags; - unsigned int function=SET_SPEEDSTEP_STATE; - unsigned int retry = 0; - - if (state > 0x1) - return; - - /* Disable IRQs */ - local_irq_save(flags); - - command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); - - dprintk("trying to set frequency to state %u with command %x at port %x\n", state, command, smi_port); - - do { - if (retry) { - dprintk("retry %u, previous result %u, waiting...\n", retry, result); - mdelay(retry * 50); - } - retry++; - __asm__ __volatile__( - "movl $0, %%edi\n" - "out %%al, (%%dx)\n" - : "=b" (new_state), "=D" (result) - : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0) - ); - } while ((new_state != state) && (retry <= SMI_TRIES)); - - /* enable IRQs */ - local_irq_restore(flags); - - if (new_state == state) { - dprintk("change to %u MHz succeeded after %u tries with result %u\n", (speedstep_freqs[new_state].frequency / 1000), retry, result); - } else { - printk(KERN_ERR "cpufreq: change failed with new_state %u and result %u\n", new_state, result); - } - - return; -} - - -/** - * speedstep_target - set a new CPUFreq policy - * @policy: new policy - * @target_freq: new freq - * @relation: - * - * Sets a new CPUFreq policy/freq. - */ -static int speedstep_target (struct cpufreq_policy *policy, - unsigned int target_freq, unsigned int relation) -{ - unsigned int newstate = 0; - struct cpufreq_freqs freqs; - - if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate)) - return -EINVAL; - - freqs.old = speedstep_freqs[speedstep_get_state()].frequency; - freqs.new = speedstep_freqs[newstate].frequency; - freqs.cpu = 0; /* speedstep.c is UP only driver */ - - if (freqs.old == freqs.new) - return 0; - - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - speedstep_set_state(newstate); - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - - return 0; -} - - -/** - * speedstep_verify - verifies a new CPUFreq policy - * @policy: new policy - * - * Limit must be within speedstep_low_freq and speedstep_high_freq, with - * at least one border included. - */ -static int speedstep_verify (struct cpufreq_policy *policy) -{ - return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]); -} - - -static int speedstep_cpu_init(struct cpufreq_policy *policy) -{ - int result; - unsigned int speed,state; - - /* capability check */ - if (policy->cpu != 0) - return -ENODEV; - - result = speedstep_smi_ownership(); - if (result) { - dprintk("fails in aquiring ownership of a SMI interface.\n"); - return -EINVAL; - } - - /* detect low and high frequency */ - result = speedstep_smi_get_freqs(&speedstep_freqs[SPEEDSTEP_LOW].frequency, - &speedstep_freqs[SPEEDSTEP_HIGH].frequency); - if (result) { - /* fall back to speedstep_lib.c dection mechanism: try both states out */ - dprintk("could not detect low and high frequencies by SMI call.\n"); - result = speedstep_get_freqs(speedstep_processor, - &speedstep_freqs[SPEEDSTEP_LOW].frequency, - &speedstep_freqs[SPEEDSTEP_HIGH].frequency, - NULL, - &speedstep_set_state); - - if (result) { - dprintk("could not detect two different speeds -- aborting.\n"); - return result; - } else - dprintk("workaround worked.\n"); - } - - /* get current speed setting */ - state = speedstep_get_state(); - speed = speedstep_freqs[state].frequency; - - dprintk("currently at %s speed setting - %i MHz\n", - (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency) ? "low" : "high", - (speed / 1000)); - - /* cpuinfo and default policy values */ - policy->governor = CPUFREQ_DEFAULT_GOVERNOR; - policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; - policy->cur = speed; - - result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs); - if (result) - return (result); - - cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu); - - return 0; -} - -static int speedstep_cpu_exit(struct cpufreq_policy *policy) -{ - cpufreq_frequency_table_put_attr(policy->cpu); - return 0; -} - -static unsigned int speedstep_get(unsigned int cpu) -{ - if (cpu) - return -ENODEV; - return speedstep_get_processor_frequency(speedstep_processor); -} - - -static int speedstep_resume(struct cpufreq_policy *policy) -{ - int result = speedstep_smi_ownership(); - - if (result) - dprintk("fails in re-aquiring ownership of a SMI interface.\n"); - - return result; -} - -static struct freq_attr* speedstep_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, -}; - -static struct cpufreq_driver speedstep_driver = { - .name = "speedstep-smi", - .verify = speedstep_verify, - .target = speedstep_target, - .init = speedstep_cpu_init, - .exit = speedstep_cpu_exit, - .get = speedstep_get, - .resume = speedstep_resume, - .owner = THIS_MODULE, - .attr = speedstep_attr, -}; - -/** - * speedstep_init - initializes the SpeedStep CPUFreq driver - * - * Initializes the SpeedStep support. Returns -ENODEV on unsupported - * BIOS, -EINVAL on problems during initiatization, and zero on - * success. - */ -static int __init speedstep_init(void) -{ - speedstep_processor = speedstep_detect_processor(); - - switch (speedstep_processor) { - case SPEEDSTEP_PROCESSOR_PIII_T: - case SPEEDSTEP_PROCESSOR_PIII_C: - case SPEEDSTEP_PROCESSOR_PIII_C_EARLY: - break; - default: - speedstep_processor = 0; - } - - if (!speedstep_processor) { - dprintk ("No supported Intel CPU detected.\n"); - return -ENODEV; - } - - dprintk("signature:0x%.8lx, command:0x%.8lx, event:0x%.8lx, perf_level:0x%.8lx.\n", - ist_info.signature, ist_info.command, ist_info.event, ist_info.perf_level); - - /* Error if no IST-SMI BIOS or no PARM - sig= 'ISGE' aka 'Intel Speedstep Gate E' */ - if ((ist_info.signature != 0x47534943) && ( - (smi_port == 0) || (smi_cmd == 0))) - return -ENODEV; - - if (smi_sig == 1) - smi_sig = 0x47534943; - else - smi_sig = ist_info.signature; - - /* setup smi_port from MODLULE_PARM or BIOS */ - if ((smi_port > 0xff) || (smi_port < 0)) - return -EINVAL; - else if (smi_port == 0) - smi_port = ist_info.command & 0xff; - - if ((smi_cmd > 0xff) || (smi_cmd < 0)) - return -EINVAL; - else if (smi_cmd == 0) - smi_cmd = (ist_info.command >> 16) & 0xff; - - return cpufreq_register_driver(&speedstep_driver); -} - - -/** - * speedstep_exit - unregisters SpeedStep support - * - * Unregisters SpeedStep support. - */ -static void __exit speedstep_exit(void) -{ - cpufreq_unregister_driver(&speedstep_driver); -} - -module_param(smi_port, int, 0444); -module_param(smi_cmd, int, 0444); -module_param(smi_sig, uint, 0444); - -MODULE_PARM_DESC(smi_port, "Override the BIOS-given IST port with this value -- Intel's default setting is 0xb2"); -MODULE_PARM_DESC(smi_cmd, "Override the BIOS-given IST command with this value -- Intel's default setting is 0x82"); -MODULE_PARM_DESC(smi_sig, "Set to 1 to fake the IST signature when using the SMI interface."); - -MODULE_AUTHOR ("Hiroshi Miura"); -MODULE_DESCRIPTION ("Speedstep driver for IST applet SMI interface."); -MODULE_LICENSE ("GPL"); - -module_init(speedstep_init); -module_exit(speedstep_exit); diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig new file mode 100644 index 00000000000..d8c6f132dc7 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/Kconfig @@ -0,0 +1,250 @@ +# +# CPU Frequency scaling +# + +menu "CPU Frequency scaling" + +source "drivers/cpufreq/Kconfig" + +if CPU_FREQ + +comment "CPUFreq processor drivers" + +config X86_ACPI_CPUFREQ + tristate "ACPI Processor P-States driver" + select CPU_FREQ_TABLE + depends on ACPI_PROCESSOR + help + This driver adds a CPUFreq driver which utilizes the ACPI + Processor Performance States. + This driver also supports Intel Enhanced Speedstep. + + For details, take a look at . + + If in doubt, say N. + +config ELAN_CPUFREQ + tristate "AMD Elan SC400 and SC410" + select CPU_FREQ_TABLE + depends on X86_ELAN + ---help--- + This adds the CPUFreq driver for AMD Elan SC400 and SC410 + processors. + + You need to specify the processor maximum speed as boot + parameter: elanfreq=maxspeed (in kHz) or as module + parameter "max_freq". + + For details, take a look at . + + If in doubt, say N. + +config SC520_CPUFREQ + tristate "AMD Elan SC520" + select CPU_FREQ_TABLE + depends on X86_ELAN + ---help--- + This adds the CPUFreq driver for AMD Elan SC520 processor. + + For details, take a look at . + + If in doubt, say N. + + +config X86_POWERNOW_K6 + tristate "AMD Mobile K6-2/K6-3 PowerNow!" + select CPU_FREQ_TABLE + help + This adds the CPUFreq driver for mobile AMD K6-2+ and mobile + AMD K6-3+ processors. + + For details, take a look at . + + If in doubt, say N. + +config X86_POWERNOW_K7 + tristate "AMD Mobile Athlon/Duron PowerNow!" + select CPU_FREQ_TABLE + help + This adds the CPUFreq driver for mobile AMD K7 mobile processors. + + For details, take a look at . + + If in doubt, say N. + +config X86_POWERNOW_K7_ACPI + bool + depends on X86_POWERNOW_K7 && ACPI_PROCESSOR + depends on !(X86_POWERNOW_K7 = y && ACPI_PROCESSOR = m) + default y + +config X86_POWERNOW_K8 + tristate "AMD Opteron/Athlon64 PowerNow!" + select CPU_FREQ_TABLE + depends on EXPERIMENTAL + help + This adds the CPUFreq driver for mobile AMD Opteron/Athlon64 processors. + + For details, take a look at . + + If in doubt, say N. + +config X86_POWERNOW_K8_ACPI + bool "ACPI Support" + select ACPI_PROCESSOR + depends on ACPI && X86_POWERNOW_K8 + default y + help + This provides access to the K8s Processor Performance States via ACPI. + This driver is probably required for CPUFreq to work with multi-socket and + SMP systems. It is not required on at least some single-socket yet + multi-core systems, even if SMP is enabled. + + It is safe to say Y here. + +config X86_GX_SUSPMOD + tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation" + depends on PCI + help + This add the CPUFreq driver for NatSemi Geode processors which + support suspend modulation. + + For details, take a look at . + + If in doubt, say N. + +config X86_SPEEDSTEP_CENTRINO + tristate "Intel Enhanced SpeedStep" + select CPU_FREQ_TABLE + select X86_SPEEDSTEP_CENTRINO_TABLE + help + This adds the CPUFreq driver for Enhanced SpeedStep enabled + mobile CPUs. This means Intel Pentium M (Centrino) CPUs. However, + you also need to say Y to "Use ACPI tables to decode..." below + [which might imply enabling ACPI] if you want to use this driver + on non-Banias CPUs. + + For details, take a look at . + + If in doubt, say N. + +config X86_SPEEDSTEP_CENTRINO_TABLE + bool "Built-in tables for Banias CPUs" + depends on X86_SPEEDSTEP_CENTRINO + default y + help + Use built-in tables for Banias CPUs if ACPI encoding + is not available. + + If in doubt, say N. + +config X86_SPEEDSTEP_ICH + tristate "Intel Speedstep on ICH-M chipsets (ioport interface)" + select CPU_FREQ_TABLE + help + This adds the CPUFreq driver for certain mobile Intel Pentium III + (Coppermine), all mobile Intel Pentium III-M (Tualatin) and all + mobile Intel Pentium 4 P4-M on systems which have an Intel ICH2, + ICH3 or ICH4 southbridge. + + For details, take a look at . + + If in doubt, say N. + +config X86_SPEEDSTEP_SMI + tristate "Intel SpeedStep on 440BX/ZX/MX chipsets (SMI interface)" + select CPU_FREQ_TABLE + depends on EXPERIMENTAL + help + This adds the CPUFreq driver for certain mobile Intel Pentium III + (Coppermine), all mobile Intel Pentium III-M (Tualatin) + on systems which have an Intel 440BX/ZX/MX southbridge. + + For details, take a look at . + + If in doubt, say N. + +config X86_P4_CLOCKMOD + tristate "Intel Pentium 4 clock modulation" + select CPU_FREQ_TABLE + help + This adds the CPUFreq driver for Intel Pentium 4 / XEON + processors. + + For details, take a look at . + + If in doubt, say N. + +config X86_CPUFREQ_NFORCE2 + tristate "nVidia nForce2 FSB changing" + depends on EXPERIMENTAL + help + This adds the CPUFreq driver for FSB changing on nVidia nForce2 + platforms. + + For details, take a look at . + + If in doubt, say N. + +config X86_LONGRUN + tristate "Transmeta LongRun" + help + This adds the CPUFreq driver for Transmeta Crusoe and Efficeon processors + which support LongRun. + + For details, take a look at . + + If in doubt, say N. + +config X86_LONGHAUL + tristate "VIA Cyrix III Longhaul" + select CPU_FREQ_TABLE + depends on ACPI_PROCESSOR + help + This adds the CPUFreq driver for VIA Samuel/CyrixIII, + VIA Cyrix Samuel/C3, VIA Cyrix Ezra and VIA Cyrix Ezra-T + processors. + + For details, take a look at . + + If in doubt, say N. + +config X86_E_POWERSAVER + tristate "VIA C7 Enhanced PowerSaver (EXPERIMENTAL)" + select CPU_FREQ_TABLE + depends on EXPERIMENTAL + help + This adds the CPUFreq driver for VIA C7 processors. + + If in doubt, say N. + +comment "shared options" + +config X86_ACPI_CPUFREQ_PROC_INTF + bool "/proc/acpi/processor/../performance interface (deprecated)" + depends on PROC_FS + depends on X86_ACPI_CPUFREQ || X86_POWERNOW_K7_ACPI || X86_POWERNOW_K8_ACPI + help + This enables the deprecated /proc/acpi/processor/../performance + interface. While it is helpful for debugging, the generic, + cross-architecture cpufreq interfaces should be used. + + If in doubt, say N. + +config X86_SPEEDSTEP_LIB + tristate + default X86_SPEEDSTEP_ICH || X86_SPEEDSTEP_SMI || X86_P4_CLOCKMOD + +config X86_SPEEDSTEP_RELAXED_CAP_CHECK + bool "Relaxed speedstep capability checks" + depends on (X86_SPEEDSTEP_SMI || X86_SPEEDSTEP_ICH) + help + Don't perform all checks for a speedstep capable system which would + normally be done. Some ancient or strange systems, though speedstep + capable, don't always indicate that they are speedstep capable. This + option lets the probing code bypass some of those checks if the + parameter "relaxed_check=1" is passed to the module. + +endif # CPU_FREQ + +endmenu diff --git a/arch/x86/kernel/cpu/cpufreq/Makefile b/arch/x86/kernel/cpu/cpufreq/Makefile new file mode 100644 index 00000000000..560f7760dae --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/Makefile @@ -0,0 +1,16 @@ +obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o +obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o +obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o +obj-$(CONFIG_X86_LONGHAUL) += longhaul.o +obj-$(CONFIG_X86_E_POWERSAVER) += e_powersaver.o +obj-$(CONFIG_ELAN_CPUFREQ) += elanfreq.o +obj-$(CONFIG_SC520_CPUFREQ) += sc520_freq.o +obj-$(CONFIG_X86_LONGRUN) += longrun.o +obj-$(CONFIG_X86_GX_SUSPMOD) += gx-suspmod.o +obj-$(CONFIG_X86_SPEEDSTEP_ICH) += speedstep-ich.o +obj-$(CONFIG_X86_SPEEDSTEP_LIB) += speedstep-lib.o +obj-$(CONFIG_X86_SPEEDSTEP_SMI) += speedstep-smi.o +obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o +obj-$(CONFIG_X86_SPEEDSTEP_CENTRINO) += speedstep-centrino.o +obj-$(CONFIG_X86_P4_CLOCKMOD) += p4-clockmod.o +obj-$(CONFIG_X86_CPUFREQ_NFORCE2) += cpufreq-nforce2.o diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c new file mode 100644 index 00000000000..705e13a3078 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -0,0 +1,799 @@ +/* + * acpi-cpufreq.c - ACPI Processor P-States Driver ($Revision: 1.4 $) + * + * Copyright (C) 2001, 2002 Andy Grover + * Copyright (C) 2001, 2002 Paul Diefenbaugh + * Copyright (C) 2002 - 2004 Dominik Brodowski + * Copyright (C) 2006 Denis Sadykov + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or (at + * your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "acpi-cpufreq", msg) + +MODULE_AUTHOR("Paul Diefenbaugh, Dominik Brodowski"); +MODULE_DESCRIPTION("ACPI Processor P-States Driver"); +MODULE_LICENSE("GPL"); + +enum { + UNDEFINED_CAPABLE = 0, + SYSTEM_INTEL_MSR_CAPABLE, + SYSTEM_IO_CAPABLE, +}; + +#define INTEL_MSR_RANGE (0xffff) +#define CPUID_6_ECX_APERFMPERF_CAPABILITY (0x1) + +struct acpi_cpufreq_data { + struct acpi_processor_performance *acpi_data; + struct cpufreq_frequency_table *freq_table; + unsigned int max_freq; + unsigned int resume; + unsigned int cpu_feature; +}; + +static struct acpi_cpufreq_data *drv_data[NR_CPUS]; +/* acpi_perf_data is a pointer to percpu data. */ +static struct acpi_processor_performance *acpi_perf_data; + +static struct cpufreq_driver acpi_cpufreq_driver; + +static unsigned int acpi_pstate_strict; + +static int check_est_cpu(unsigned int cpuid) +{ + struct cpuinfo_x86 *cpu = &cpu_data[cpuid]; + + if (cpu->x86_vendor != X86_VENDOR_INTEL || + !cpu_has(cpu, X86_FEATURE_EST)) + return 0; + + return 1; +} + +static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data) +{ + struct acpi_processor_performance *perf; + int i; + + perf = data->acpi_data; + + for (i=0; istate_count; i++) { + if (value == perf->states[i].status) + return data->freq_table[i].frequency; + } + return 0; +} + +static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data) +{ + int i; + struct acpi_processor_performance *perf; + + msr &= INTEL_MSR_RANGE; + perf = data->acpi_data; + + for (i=0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) { + if (msr == perf->states[data->freq_table[i].index].status) + return data->freq_table[i].frequency; + } + return data->freq_table[0].frequency; +} + +static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data) +{ + switch (data->cpu_feature) { + case SYSTEM_INTEL_MSR_CAPABLE: + return extract_msr(val, data); + case SYSTEM_IO_CAPABLE: + return extract_io(val, data); + default: + return 0; + } +} + +struct msr_addr { + u32 reg; +}; + +struct io_addr { + u16 port; + u8 bit_width; +}; + +typedef union { + struct msr_addr msr; + struct io_addr io; +} drv_addr_union; + +struct drv_cmd { + unsigned int type; + cpumask_t mask; + drv_addr_union addr; + u32 val; +}; + +static void do_drv_read(struct drv_cmd *cmd) +{ + u32 h; + + switch (cmd->type) { + case SYSTEM_INTEL_MSR_CAPABLE: + rdmsr(cmd->addr.msr.reg, cmd->val, h); + break; + case SYSTEM_IO_CAPABLE: + acpi_os_read_port((acpi_io_address)cmd->addr.io.port, + &cmd->val, + (u32)cmd->addr.io.bit_width); + break; + default: + break; + } +} + +static void do_drv_write(struct drv_cmd *cmd) +{ + u32 lo, hi; + + switch (cmd->type) { + case SYSTEM_INTEL_MSR_CAPABLE: + rdmsr(cmd->addr.msr.reg, lo, hi); + lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE); + wrmsr(cmd->addr.msr.reg, lo, hi); + break; + case SYSTEM_IO_CAPABLE: + acpi_os_write_port((acpi_io_address)cmd->addr.io.port, + cmd->val, + (u32)cmd->addr.io.bit_width); + break; + default: + break; + } +} + +static void drv_read(struct drv_cmd *cmd) +{ + cpumask_t saved_mask = current->cpus_allowed; + cmd->val = 0; + + set_cpus_allowed(current, cmd->mask); + do_drv_read(cmd); + set_cpus_allowed(current, saved_mask); +} + +static void drv_write(struct drv_cmd *cmd) +{ + cpumask_t saved_mask = current->cpus_allowed; + unsigned int i; + + for_each_cpu_mask(i, cmd->mask) { + set_cpus_allowed(current, cpumask_of_cpu(i)); + do_drv_write(cmd); + } + + set_cpus_allowed(current, saved_mask); + return; +} + +static u32 get_cur_val(cpumask_t mask) +{ + struct acpi_processor_performance *perf; + struct drv_cmd cmd; + + if (unlikely(cpus_empty(mask))) + return 0; + + switch (drv_data[first_cpu(mask)]->cpu_feature) { + case SYSTEM_INTEL_MSR_CAPABLE: + cmd.type = SYSTEM_INTEL_MSR_CAPABLE; + cmd.addr.msr.reg = MSR_IA32_PERF_STATUS; + break; + case SYSTEM_IO_CAPABLE: + cmd.type = SYSTEM_IO_CAPABLE; + perf = drv_data[first_cpu(mask)]->acpi_data; + cmd.addr.io.port = perf->control_register.address; + cmd.addr.io.bit_width = perf->control_register.bit_width; + break; + default: + return 0; + } + + cmd.mask = mask; + + drv_read(&cmd); + + dprintk("get_cur_val = %u\n", cmd.val); + + return cmd.val; +} + +/* + * Return the measured active (C0) frequency on this CPU since last call + * to this function. + * Input: cpu number + * Return: Average CPU frequency in terms of max frequency (zero on error) + * + * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance + * over a period of time, while CPU is in C0 state. + * IA32_MPERF counts at the rate of max advertised frequency + * IA32_APERF counts at the rate of actual CPU frequency + * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and + * no meaning should be associated with absolute values of these MSRs. + */ +static unsigned int get_measured_perf(unsigned int cpu) +{ + union { + struct { + u32 lo; + u32 hi; + } split; + u64 whole; + } aperf_cur, mperf_cur; + + cpumask_t saved_mask; + unsigned int perf_percent; + unsigned int retval; + + saved_mask = current->cpus_allowed; + set_cpus_allowed(current, cpumask_of_cpu(cpu)); + if (get_cpu() != cpu) { + /* We were not able to run on requested processor */ + put_cpu(); + return 0; + } + + rdmsr(MSR_IA32_APERF, aperf_cur.split.lo, aperf_cur.split.hi); + rdmsr(MSR_IA32_MPERF, mperf_cur.split.lo, mperf_cur.split.hi); + + wrmsr(MSR_IA32_APERF, 0,0); + wrmsr(MSR_IA32_MPERF, 0,0); + +#ifdef __i386__ + /* + * We dont want to do 64 bit divide with 32 bit kernel + * Get an approximate value. Return failure in case we cannot get + * an approximate value. + */ + if (unlikely(aperf_cur.split.hi || mperf_cur.split.hi)) { + int shift_count; + u32 h; + + h = max_t(u32, aperf_cur.split.hi, mperf_cur.split.hi); + shift_count = fls(h); + + aperf_cur.whole >>= shift_count; + mperf_cur.whole >>= shift_count; + } + + if (((unsigned long)(-1) / 100) < aperf_cur.split.lo) { + int shift_count = 7; + aperf_cur.split.lo >>= shift_count; + mperf_cur.split.lo >>= shift_count; + } + + if (aperf_cur.split.lo && mperf_cur.split.lo) + perf_percent = (aperf_cur.split.lo * 100) / mperf_cur.split.lo; + else + perf_percent = 0; + +#else + if (unlikely(((unsigned long)(-1) / 100) < aperf_cur.whole)) { + int shift_count = 7; + aperf_cur.whole >>= shift_count; + mperf_cur.whole >>= shift_count; + } + + if (aperf_cur.whole && mperf_cur.whole) + perf_percent = (aperf_cur.whole * 100) / mperf_cur.whole; + else + perf_percent = 0; + +#endif + + retval = drv_data[cpu]->max_freq * perf_percent / 100; + + put_cpu(); + set_cpus_allowed(current, saved_mask); + + dprintk("cpu %d: performance percent %d\n", cpu, perf_percent); + return retval; +} + +static unsigned int get_cur_freq_on_cpu(unsigned int cpu) +{ + struct acpi_cpufreq_data *data = drv_data[cpu]; + unsigned int freq; + + dprintk("get_cur_freq_on_cpu (%d)\n", cpu); + + if (unlikely(data == NULL || + data->acpi_data == NULL || data->freq_table == NULL)) { + return 0; + } + + freq = extract_freq(get_cur_val(cpumask_of_cpu(cpu)), data); + dprintk("cur freq = %u\n", freq); + + return freq; +} + +static unsigned int check_freqs(cpumask_t mask, unsigned int freq, + struct acpi_cpufreq_data *data) +{ + unsigned int cur_freq; + unsigned int i; + + for (i=0; i<100; i++) { + cur_freq = extract_freq(get_cur_val(mask), data); + if (cur_freq == freq) + return 1; + udelay(10); + } + return 0; +} + +static int acpi_cpufreq_target(struct cpufreq_policy *policy, + unsigned int target_freq, unsigned int relation) +{ + struct acpi_cpufreq_data *data = drv_data[policy->cpu]; + struct acpi_processor_performance *perf; + struct cpufreq_freqs freqs; + cpumask_t online_policy_cpus; + struct drv_cmd cmd; + unsigned int next_state = 0; /* Index into freq_table */ + unsigned int next_perf_state = 0; /* Index into perf table */ + unsigned int i; + int result = 0; + + dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu); + + if (unlikely(data == NULL || + data->acpi_data == NULL || data->freq_table == NULL)) { + return -ENODEV; + } + + perf = data->acpi_data; + result = cpufreq_frequency_table_target(policy, + data->freq_table, + target_freq, + relation, &next_state); + if (unlikely(result)) + return -ENODEV; + +#ifdef CONFIG_HOTPLUG_CPU + /* cpufreq holds the hotplug lock, so we are safe from here on */ + cpus_and(online_policy_cpus, cpu_online_map, policy->cpus); +#else + online_policy_cpus = policy->cpus; +#endif + + next_perf_state = data->freq_table[next_state].index; + if (perf->state == next_perf_state) { + if (unlikely(data->resume)) { + dprintk("Called after resume, resetting to P%d\n", + next_perf_state); + data->resume = 0; + } else { + dprintk("Already at target state (P%d)\n", + next_perf_state); + return 0; + } + } + + switch (data->cpu_feature) { + case SYSTEM_INTEL_MSR_CAPABLE: + cmd.type = SYSTEM_INTEL_MSR_CAPABLE; + cmd.addr.msr.reg = MSR_IA32_PERF_CTL; + cmd.val = (u32) perf->states[next_perf_state].control; + break; + case SYSTEM_IO_CAPABLE: + cmd.type = SYSTEM_IO_CAPABLE; + cmd.addr.io.port = perf->control_register.address; + cmd.addr.io.bit_width = perf->control_register.bit_width; + cmd.val = (u32) perf->states[next_perf_state].control; + break; + default: + return -ENODEV; + } + + cpus_clear(cmd.mask); + + if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY) + cmd.mask = online_policy_cpus; + else + cpu_set(policy->cpu, cmd.mask); + + freqs.old = perf->states[perf->state].core_frequency * 1000; + freqs.new = data->freq_table[next_state].frequency; + for_each_cpu_mask(i, cmd.mask) { + freqs.cpu = i; + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + } + + drv_write(&cmd); + + if (acpi_pstate_strict) { + if (!check_freqs(cmd.mask, freqs.new, data)) { + dprintk("acpi_cpufreq_target failed (%d)\n", + policy->cpu); + return -EAGAIN; + } + } + + for_each_cpu_mask(i, cmd.mask) { + freqs.cpu = i; + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + } + perf->state = next_perf_state; + + return result; +} + +static int acpi_cpufreq_verify(struct cpufreq_policy *policy) +{ + struct acpi_cpufreq_data *data = drv_data[policy->cpu]; + + dprintk("acpi_cpufreq_verify\n"); + + return cpufreq_frequency_table_verify(policy, data->freq_table); +} + +static unsigned long +acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu) +{ + struct acpi_processor_performance *perf = data->acpi_data; + + if (cpu_khz) { + /* search the closest match to cpu_khz */ + unsigned int i; + unsigned long freq; + unsigned long freqn = perf->states[0].core_frequency * 1000; + + for (i=0; i<(perf->state_count-1); i++) { + freq = freqn; + freqn = perf->states[i+1].core_frequency * 1000; + if ((2 * cpu_khz) > (freqn + freq)) { + perf->state = i; + return freq; + } + } + perf->state = perf->state_count-1; + return freqn; + } else { + /* assume CPU is at P0... */ + perf->state = 0; + return perf->states[0].core_frequency * 1000; + } +} + +/* + * acpi_cpufreq_early_init - initialize ACPI P-States library + * + * Initialize the ACPI P-States library (drivers/acpi/processor_perflib.c) + * in order to determine correct frequency and voltage pairings. We can + * do _PDC and _PSD and find out the processor dependency for the + * actual init that will happen later... + */ +static int __init acpi_cpufreq_early_init(void) +{ + dprintk("acpi_cpufreq_early_init\n"); + + acpi_perf_data = alloc_percpu(struct acpi_processor_performance); + if (!acpi_perf_data) { + dprintk("Memory allocation error for acpi_perf_data.\n"); + return -ENOMEM; + } + + /* Do initialization in ACPI core */ + acpi_processor_preregister_performance(acpi_perf_data); + return 0; +} + +#ifdef CONFIG_SMP +/* + * Some BIOSes do SW_ANY coordination internally, either set it up in hw + * or do it in BIOS firmware and won't inform about it to OS. If not + * detected, this has a side effect of making CPU run at a different speed + * than OS intended it to run at. Detect it and handle it cleanly. + */ +static int bios_with_sw_any_bug; + +static int sw_any_bug_found(struct dmi_system_id *d) +{ + bios_with_sw_any_bug = 1; + return 0; +} + +static struct dmi_system_id sw_any_bug_dmi_table[] = { + { + .callback = sw_any_bug_found, + .ident = "Supermicro Server X6DLP", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"), + DMI_MATCH(DMI_BIOS_VERSION, "080010"), + DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"), + }, + }, + { } +}; +#endif + +static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) +{ + unsigned int i; + unsigned int valid_states = 0; + unsigned int cpu = policy->cpu; + struct acpi_cpufreq_data *data; + unsigned int result = 0; + struct cpuinfo_x86 *c = &cpu_data[policy->cpu]; + struct acpi_processor_performance *perf; + + dprintk("acpi_cpufreq_cpu_init\n"); + + data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + data->acpi_data = percpu_ptr(acpi_perf_data, cpu); + drv_data[cpu] = data; + + if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) + acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS; + + result = acpi_processor_register_performance(data->acpi_data, cpu); + if (result) + goto err_free; + + perf = data->acpi_data; + policy->shared_type = perf->shared_type; + + /* + * Will let policy->cpus know about dependency only when software + * coordination is required. + */ + if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL || + policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { + policy->cpus = perf->shared_cpu_map; + } + +#ifdef CONFIG_SMP + dmi_check_system(sw_any_bug_dmi_table); + if (bios_with_sw_any_bug && cpus_weight(policy->cpus) == 1) { + policy->shared_type = CPUFREQ_SHARED_TYPE_ALL; + policy->cpus = cpu_core_map[cpu]; + } +#endif + + /* capability check */ + if (perf->state_count <= 1) { + dprintk("No P-States\n"); + result = -ENODEV; + goto err_unreg; + } + + if (perf->control_register.space_id != perf->status_register.space_id) { + result = -ENODEV; + goto err_unreg; + } + + switch (perf->control_register.space_id) { + case ACPI_ADR_SPACE_SYSTEM_IO: + dprintk("SYSTEM IO addr space\n"); + data->cpu_feature = SYSTEM_IO_CAPABLE; + break; + case ACPI_ADR_SPACE_FIXED_HARDWARE: + dprintk("HARDWARE addr space\n"); + if (!check_est_cpu(cpu)) { + result = -ENODEV; + goto err_unreg; + } + data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE; + break; + default: + dprintk("Unknown addr space %d\n", + (u32) (perf->control_register.space_id)); + result = -ENODEV; + goto err_unreg; + } + + data->freq_table = kmalloc(sizeof(struct cpufreq_frequency_table) * + (perf->state_count+1), GFP_KERNEL); + if (!data->freq_table) { + result = -ENOMEM; + goto err_unreg; + } + + /* detect transition latency */ + policy->cpuinfo.transition_latency = 0; + for (i=0; istate_count; i++) { + if ((perf->states[i].transition_latency * 1000) > + policy->cpuinfo.transition_latency) + policy->cpuinfo.transition_latency = + perf->states[i].transition_latency * 1000; + } + policy->governor = CPUFREQ_DEFAULT_GOVERNOR; + + data->max_freq = perf->states[0].core_frequency * 1000; + /* table init */ + for (i=0; istate_count; i++) { + if (i>0 && perf->states[i].core_frequency >= + data->freq_table[valid_states-1].frequency / 1000) + continue; + + data->freq_table[valid_states].index = i; + data->freq_table[valid_states].frequency = + perf->states[i].core_frequency * 1000; + valid_states++; + } + data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END; + perf->state = 0; + + result = cpufreq_frequency_table_cpuinfo(policy, data->freq_table); + if (result) + goto err_freqfree; + + switch (perf->control_register.space_id) { + case ACPI_ADR_SPACE_SYSTEM_IO: + /* Current speed is unknown and not detectable by IO port */ + policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu); + break; + case ACPI_ADR_SPACE_FIXED_HARDWARE: + acpi_cpufreq_driver.get = get_cur_freq_on_cpu; + policy->cur = get_cur_freq_on_cpu(cpu); + break; + default: + break; + } + + /* notify BIOS that we exist */ + acpi_processor_notify_smm(THIS_MODULE); + + /* Check for APERF/MPERF support in hardware */ + if (c->x86_vendor == X86_VENDOR_INTEL && c->cpuid_level >= 6) { + unsigned int ecx; + ecx = cpuid_ecx(6); + if (ecx & CPUID_6_ECX_APERFMPERF_CAPABILITY) + acpi_cpufreq_driver.getavg = get_measured_perf; + } + + dprintk("CPU%u - ACPI performance management activated.\n", cpu); + for (i = 0; i < perf->state_count; i++) + dprintk(" %cP%d: %d MHz, %d mW, %d uS\n", + (i == perf->state ? '*' : ' '), i, + (u32) perf->states[i].core_frequency, + (u32) perf->states[i].power, + (u32) perf->states[i].transition_latency); + + cpufreq_frequency_table_get_attr(data->freq_table, policy->cpu); + + /* + * the first call to ->target() should result in us actually + * writing something to the appropriate registers. + */ + data->resume = 1; + + return result; + +err_freqfree: + kfree(data->freq_table); +err_unreg: + acpi_processor_unregister_performance(perf, cpu); +err_free: + kfree(data); + drv_data[cpu] = NULL; + + return result; +} + +static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) +{ + struct acpi_cpufreq_data *data = drv_data[policy->cpu]; + + dprintk("acpi_cpufreq_cpu_exit\n"); + + if (data) { + cpufreq_frequency_table_put_attr(policy->cpu); + drv_data[policy->cpu] = NULL; + acpi_processor_unregister_performance(data->acpi_data, + policy->cpu); + kfree(data); + } + + return 0; +} + +static int acpi_cpufreq_resume(struct cpufreq_policy *policy) +{ + struct acpi_cpufreq_data *data = drv_data[policy->cpu]; + + dprintk("acpi_cpufreq_resume\n"); + + data->resume = 1; + + return 0; +} + +static struct freq_attr *acpi_cpufreq_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + +static struct cpufreq_driver acpi_cpufreq_driver = { + .verify = acpi_cpufreq_verify, + .target = acpi_cpufreq_target, + .init = acpi_cpufreq_cpu_init, + .exit = acpi_cpufreq_cpu_exit, + .resume = acpi_cpufreq_resume, + .name = "acpi-cpufreq", + .owner = THIS_MODULE, + .attr = acpi_cpufreq_attr, +}; + +static int __init acpi_cpufreq_init(void) +{ + int ret; + + dprintk("acpi_cpufreq_init\n"); + + ret = acpi_cpufreq_early_init(); + if (ret) + return ret; + + return cpufreq_register_driver(&acpi_cpufreq_driver); +} + +static void __exit acpi_cpufreq_exit(void) +{ + dprintk("acpi_cpufreq_exit\n"); + + cpufreq_unregister_driver(&acpi_cpufreq_driver); + + free_percpu(acpi_perf_data); + + return; +} + +module_param(acpi_pstate_strict, uint, 0644); +MODULE_PARM_DESC(acpi_pstate_strict, + "value 0 or non-zero. non-zero -> strict ACPI checks are " + "performed during frequency changes."); + +late_initcall(acpi_cpufreq_init); +module_exit(acpi_cpufreq_exit); + +MODULE_ALIAS("acpi"); diff --git a/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c new file mode 100644 index 00000000000..66acd503991 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/cpufreq-nforce2.c @@ -0,0 +1,441 @@ +/* + * (C) 2004-2006 Sebastian Witt + * + * Licensed under the terms of the GNU GPL License version 2. + * Based upon reverse engineered information + * + * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* + */ + +#include +#include +#include +#include +#include +#include +#include + +#define NFORCE2_XTAL 25 +#define NFORCE2_BOOTFSB 0x48 +#define NFORCE2_PLLENABLE 0xa8 +#define NFORCE2_PLLREG 0xa4 +#define NFORCE2_PLLADR 0xa0 +#define NFORCE2_PLL(mul, div) (0x100000 | (mul << 8) | div) + +#define NFORCE2_MIN_FSB 50 +#define NFORCE2_SAFE_DISTANCE 50 + +/* Delay in ms between FSB changes */ +//#define NFORCE2_DELAY 10 + +/* nforce2_chipset: + * FSB is changed using the chipset + */ +static struct pci_dev *nforce2_chipset_dev; + +/* fid: + * multiplier * 10 + */ +static int fid = 0; + +/* min_fsb, max_fsb: + * minimum and maximum FSB (= FSB at boot time) + */ +static int min_fsb = 0; +static int max_fsb = 0; + +MODULE_AUTHOR("Sebastian Witt "); +MODULE_DESCRIPTION("nForce2 FSB changing cpufreq driver"); +MODULE_LICENSE("GPL"); + +module_param(fid, int, 0444); +module_param(min_fsb, int, 0444); + +MODULE_PARM_DESC(fid, "CPU multiplier to use (11.5 = 115)"); +MODULE_PARM_DESC(min_fsb, + "Minimum FSB to use, if not defined: current FSB - 50"); + +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "cpufreq-nforce2", msg) + +/** + * nforce2_calc_fsb - calculate FSB + * @pll: PLL value + * + * Calculates FSB from PLL value + */ +static int nforce2_calc_fsb(int pll) +{ + unsigned char mul, div; + + mul = (pll >> 8) & 0xff; + div = pll & 0xff; + + if (div > 0) + return NFORCE2_XTAL * mul / div; + + return 0; +} + +/** + * nforce2_calc_pll - calculate PLL value + * @fsb: FSB + * + * Calculate PLL value for given FSB + */ +static int nforce2_calc_pll(unsigned int fsb) +{ + unsigned char xmul, xdiv; + unsigned char mul = 0, div = 0; + int tried = 0; + + /* Try to calculate multiplier and divider up to 4 times */ + while (((mul == 0) || (div == 0)) && (tried <= 3)) { + for (xdiv = 2; xdiv <= 0x80; xdiv++) + for (xmul = 1; xmul <= 0xfe; xmul++) + if (nforce2_calc_fsb(NFORCE2_PLL(xmul, xdiv)) == + fsb + tried) { + mul = xmul; + div = xdiv; + } + tried++; + } + + if ((mul == 0) || (div == 0)) + return -1; + + return NFORCE2_PLL(mul, div); +} + +/** + * nforce2_write_pll - write PLL value to chipset + * @pll: PLL value + * + * Writes new FSB PLL value to chipset + */ +static void nforce2_write_pll(int pll) +{ + int temp; + + /* Set the pll addr. to 0x00 */ + pci_write_config_dword(nforce2_chipset_dev, NFORCE2_PLLADR, 0); + + /* Now write the value in all 64 registers */ + for (temp = 0; temp <= 0x3f; temp++) + pci_write_config_dword(nforce2_chipset_dev, NFORCE2_PLLREG, pll); + + return; +} + +/** + * nforce2_fsb_read - Read FSB + * + * Read FSB from chipset + * If bootfsb != 0, return FSB at boot-time + */ +static unsigned int nforce2_fsb_read(int bootfsb) +{ + struct pci_dev *nforce2_sub5; + u32 fsb, temp = 0; + + /* Get chipset boot FSB from subdevice 5 (FSB at boot-time) */ + nforce2_sub5 = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, + 0x01EF,PCI_ANY_ID,PCI_ANY_ID,NULL); + if (!nforce2_sub5) + return 0; + + pci_read_config_dword(nforce2_sub5, NFORCE2_BOOTFSB, &fsb); + fsb /= 1000000; + + /* Check if PLL register is already set */ + pci_read_config_byte(nforce2_chipset_dev,NFORCE2_PLLENABLE, (u8 *)&temp); + + if(bootfsb || !temp) + return fsb; + + /* Use PLL register FSB value */ + pci_read_config_dword(nforce2_chipset_dev,NFORCE2_PLLREG, &temp); + fsb = nforce2_calc_fsb(temp); + + return fsb; +} + +/** + * nforce2_set_fsb - set new FSB + * @fsb: New FSB + * + * Sets new FSB + */ +static int nforce2_set_fsb(unsigned int fsb) +{ + u32 temp = 0; + unsigned int tfsb; + int diff; + int pll = 0; + + if ((fsb > max_fsb) || (fsb < NFORCE2_MIN_FSB)) { + printk(KERN_ERR "cpufreq: FSB %d is out of range!\n", fsb); + return -EINVAL; + } + + tfsb = nforce2_fsb_read(0); + if (!tfsb) { + printk(KERN_ERR "cpufreq: Error while reading the FSB\n"); + return -EINVAL; + } + + /* First write? Then set actual value */ + pci_read_config_byte(nforce2_chipset_dev,NFORCE2_PLLENABLE, (u8 *)&temp); + if (!temp) { + pll = nforce2_calc_pll(tfsb); + + if (pll < 0) + return -EINVAL; + + nforce2_write_pll(pll); + } + + /* Enable write access */ + temp = 0x01; + pci_write_config_byte(nforce2_chipset_dev, NFORCE2_PLLENABLE, (u8)temp); + + diff = tfsb - fsb; + + if (!diff) + return 0; + + while ((tfsb != fsb) && (tfsb <= max_fsb) && (tfsb >= min_fsb)) { + if (diff < 0) + tfsb++; + else + tfsb--; + + /* Calculate the PLL reg. value */ + if ((pll = nforce2_calc_pll(tfsb)) == -1) + return -EINVAL; + + nforce2_write_pll(pll); +#ifdef NFORCE2_DELAY + mdelay(NFORCE2_DELAY); +#endif + } + + temp = 0x40; + pci_write_config_byte(nforce2_chipset_dev, NFORCE2_PLLADR, (u8)temp); + + return 0; +} + +/** + * nforce2_get - get the CPU frequency + * @cpu: CPU number + * + * Returns the CPU frequency + */ +static unsigned int nforce2_get(unsigned int cpu) +{ + if (cpu) + return 0; + return nforce2_fsb_read(0) * fid * 100; +} + +/** + * nforce2_target - set a new CPUFreq policy + * @policy: new policy + * @target_freq: the target frequency + * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) + * + * Sets a new CPUFreq policy. + */ +static int nforce2_target(struct cpufreq_policy *policy, + unsigned int target_freq, unsigned int relation) +{ +// unsigned long flags; + struct cpufreq_freqs freqs; + unsigned int target_fsb; + + if ((target_freq > policy->max) || (target_freq < policy->min)) + return -EINVAL; + + target_fsb = target_freq / (fid * 100); + + freqs.old = nforce2_get(policy->cpu); + freqs.new = target_fsb * fid * 100; + freqs.cpu = 0; /* Only one CPU on nForce2 plattforms */ + + if (freqs.old == freqs.new) + return 0; + + dprintk("Old CPU frequency %d kHz, new %d kHz\n", + freqs.old, freqs.new); + + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + + /* Disable IRQs */ + //local_irq_save(flags); + + if (nforce2_set_fsb(target_fsb) < 0) + printk(KERN_ERR "cpufreq: Changing FSB to %d failed\n", + target_fsb); + else + dprintk("Changed FSB successfully to %d\n", + target_fsb); + + /* Enable IRQs */ + //local_irq_restore(flags); + + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + + return 0; +} + +/** + * nforce2_verify - verifies a new CPUFreq policy + * @policy: new policy + */ +static int nforce2_verify(struct cpufreq_policy *policy) +{ + unsigned int fsb_pol_max; + + fsb_pol_max = policy->max / (fid * 100); + + if (policy->min < (fsb_pol_max * fid * 100)) + policy->max = (fsb_pol_max + 1) * fid * 100; + + cpufreq_verify_within_limits(policy, + policy->cpuinfo.min_freq, + policy->cpuinfo.max_freq); + return 0; +} + +static int nforce2_cpu_init(struct cpufreq_policy *policy) +{ + unsigned int fsb; + unsigned int rfid; + + /* capability check */ + if (policy->cpu != 0) + return -ENODEV; + + /* Get current FSB */ + fsb = nforce2_fsb_read(0); + + if (!fsb) + return -EIO; + + /* FIX: Get FID from CPU */ + if (!fid) { + if (!cpu_khz) { + printk(KERN_WARNING + "cpufreq: cpu_khz not set, can't calculate multiplier!\n"); + return -ENODEV; + } + + fid = cpu_khz / (fsb * 100); + rfid = fid % 5; + + if (rfid) { + if (rfid > 2) + fid += 5 - rfid; + else + fid -= rfid; + } + } + + printk(KERN_INFO "cpufreq: FSB currently at %i MHz, FID %d.%d\n", fsb, + fid / 10, fid % 10); + + /* Set maximum FSB to FSB at boot time */ + max_fsb = nforce2_fsb_read(1); + + if(!max_fsb) + return -EIO; + + if (!min_fsb) + min_fsb = max_fsb - NFORCE2_SAFE_DISTANCE; + + if (min_fsb < NFORCE2_MIN_FSB) + min_fsb = NFORCE2_MIN_FSB; + + /* cpuinfo and default policy values */ + policy->cpuinfo.min_freq = min_fsb * fid * 100; + policy->cpuinfo.max_freq = max_fsb * fid * 100; + policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; + policy->cur = nforce2_get(policy->cpu); + policy->min = policy->cpuinfo.min_freq; + policy->max = policy->cpuinfo.max_freq; + policy->governor = CPUFREQ_DEFAULT_GOVERNOR; + + return 0; +} + +static int nforce2_cpu_exit(struct cpufreq_policy *policy) +{ + return 0; +} + +static struct cpufreq_driver nforce2_driver = { + .name = "nforce2", + .verify = nforce2_verify, + .target = nforce2_target, + .get = nforce2_get, + .init = nforce2_cpu_init, + .exit = nforce2_cpu_exit, + .owner = THIS_MODULE, +}; + +/** + * nforce2_detect_chipset - detect the Southbridge which contains FSB PLL logic + * + * Detects nForce2 A2 and C1 stepping + * + */ +static unsigned int nforce2_detect_chipset(void) +{ + nforce2_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, + PCI_DEVICE_ID_NVIDIA_NFORCE2, + PCI_ANY_ID, PCI_ANY_ID, NULL); + + if (nforce2_chipset_dev == NULL) + return -ENODEV; + + printk(KERN_INFO "cpufreq: Detected nForce2 chipset revision %X\n", + nforce2_chipset_dev->revision); + printk(KERN_INFO + "cpufreq: FSB changing is maybe unstable and can lead to crashes and data loss.\n"); + + return 0; +} + +/** + * nforce2_init - initializes the nForce2 CPUFreq driver + * + * Initializes the nForce2 FSB support. Returns -ENODEV on unsupported + * devices, -EINVAL on problems during initiatization, and zero on + * success. + */ +static int __init nforce2_init(void) +{ + /* TODO: do we need to detect the processor? */ + + /* detect chipset */ + if (nforce2_detect_chipset()) { + printk(KERN_ERR "cpufreq: No nForce2 chipset.\n"); + return -ENODEV; + } + + return cpufreq_register_driver(&nforce2_driver); +} + +/** + * nforce2_exit - unregisters cpufreq module + * + * Unregisters nForce2 FSB change support. + */ +static void __exit nforce2_exit(void) +{ + cpufreq_unregister_driver(&nforce2_driver); +} + +module_init(nforce2_init); +module_exit(nforce2_exit); + diff --git a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c new file mode 100644 index 00000000000..f43d98e11cc --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c @@ -0,0 +1,334 @@ +/* + * Based on documentation provided by Dave Jones. Thanks! + * + * Licensed under the terms of the GNU GPL License version 2. + * + * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#define EPS_BRAND_C7M 0 +#define EPS_BRAND_C7 1 +#define EPS_BRAND_EDEN 2 +#define EPS_BRAND_C3 3 + +struct eps_cpu_data { + u32 fsb; + struct cpufreq_frequency_table freq_table[]; +}; + +static struct eps_cpu_data *eps_cpu[NR_CPUS]; + + +static unsigned int eps_get(unsigned int cpu) +{ + struct eps_cpu_data *centaur; + u32 lo, hi; + + if (cpu) + return 0; + centaur = eps_cpu[cpu]; + if (centaur == NULL) + return 0; + + /* Return current frequency */ + rdmsr(MSR_IA32_PERF_STATUS, lo, hi); + return centaur->fsb * ((lo >> 8) & 0xff); +} + +static int eps_set_state(struct eps_cpu_data *centaur, + unsigned int cpu, + u32 dest_state) +{ + struct cpufreq_freqs freqs; + u32 lo, hi; + int err = 0; + int i; + + freqs.old = eps_get(cpu); + freqs.new = centaur->fsb * ((dest_state >> 8) & 0xff); + freqs.cpu = cpu; + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + + /* Wait while CPU is busy */ + rdmsr(MSR_IA32_PERF_STATUS, lo, hi); + i = 0; + while (lo & ((1 << 16) | (1 << 17))) { + udelay(16); + rdmsr(MSR_IA32_PERF_STATUS, lo, hi); + i++; + if (unlikely(i > 64)) { + err = -ENODEV; + goto postchange; + } + } + /* Set new multiplier and voltage */ + wrmsr(MSR_IA32_PERF_CTL, dest_state & 0xffff, 0); + /* Wait until transition end */ + i = 0; + do { + udelay(16); + rdmsr(MSR_IA32_PERF_STATUS, lo, hi); + i++; + if (unlikely(i > 64)) { + err = -ENODEV; + goto postchange; + } + } while (lo & ((1 << 16) | (1 << 17))); + + /* Return current frequency */ +postchange: + rdmsr(MSR_IA32_PERF_STATUS, lo, hi); + freqs.new = centaur->fsb * ((lo >> 8) & 0xff); + + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + return err; +} + +static int eps_target(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + struct eps_cpu_data *centaur; + unsigned int newstate = 0; + unsigned int cpu = policy->cpu; + unsigned int dest_state; + int ret; + + if (unlikely(eps_cpu[cpu] == NULL)) + return -ENODEV; + centaur = eps_cpu[cpu]; + + if (unlikely(cpufreq_frequency_table_target(policy, + &eps_cpu[cpu]->freq_table[0], + target_freq, + relation, + &newstate))) { + return -EINVAL; + } + + /* Make frequency transition */ + dest_state = centaur->freq_table[newstate].index & 0xffff; + ret = eps_set_state(centaur, cpu, dest_state); + if (ret) + printk(KERN_ERR "eps: Timeout!\n"); + return ret; +} + +static int eps_verify(struct cpufreq_policy *policy) +{ + return cpufreq_frequency_table_verify(policy, + &eps_cpu[policy->cpu]->freq_table[0]); +} + +static int eps_cpu_init(struct cpufreq_policy *policy) +{ + unsigned int i; + u32 lo, hi; + u64 val; + u8 current_multiplier, current_voltage; + u8 max_multiplier, max_voltage; + u8 min_multiplier, min_voltage; + u8 brand; + u32 fsb; + struct eps_cpu_data *centaur; + struct cpufreq_frequency_table *f_table; + int k, step, voltage; + int ret; + int states; + + if (policy->cpu != 0) + return -ENODEV; + + /* Check brand */ + printk("eps: Detected VIA "); + rdmsr(0x1153, lo, hi); + brand = (((lo >> 2) ^ lo) >> 18) & 3; + switch(brand) { + case EPS_BRAND_C7M: + printk("C7-M\n"); + break; + case EPS_BRAND_C7: + printk("C7\n"); + break; + case EPS_BRAND_EDEN: + printk("Eden\n"); + break; + case EPS_BRAND_C3: + printk("C3\n"); + return -ENODEV; + break; + } + /* Enable Enhanced PowerSaver */ + rdmsrl(MSR_IA32_MISC_ENABLE, val); + if (!(val & 1 << 16)) { + val |= 1 << 16; + wrmsrl(MSR_IA32_MISC_ENABLE, val); + /* Can be locked at 0 */ + rdmsrl(MSR_IA32_MISC_ENABLE, val); + if (!(val & 1 << 16)) { + printk("eps: Can't enable Enhanced PowerSaver\n"); + return -ENODEV; + } + } + + /* Print voltage and multiplier */ + rdmsr(MSR_IA32_PERF_STATUS, lo, hi); + current_voltage = lo & 0xff; + printk("eps: Current voltage = %dmV\n", current_voltage * 16 + 700); + current_multiplier = (lo >> 8) & 0xff; + printk("eps: Current multiplier = %d\n", current_multiplier); + + /* Print limits */ + max_voltage = hi & 0xff; + printk("eps: Highest voltage = %dmV\n", max_voltage * 16 + 700); + max_multiplier = (hi >> 8) & 0xff; + printk("eps: Highest multiplier = %d\n", max_multiplier); + min_voltage = (hi >> 16) & 0xff; + printk("eps: Lowest voltage = %dmV\n", min_voltage * 16 + 700); + min_multiplier = (hi >> 24) & 0xff; + printk("eps: Lowest multiplier = %d\n", min_multiplier); + + /* Sanity checks */ + if (current_multiplier == 0 || max_multiplier == 0 + || min_multiplier == 0) + return -EINVAL; + if (current_multiplier > max_multiplier + || max_multiplier <= min_multiplier) + return -EINVAL; + if (current_voltage > 0x1c || max_voltage > 0x1c) + return -EINVAL; + if (max_voltage < min_voltage) + return -EINVAL; + + /* Calc FSB speed */ + fsb = cpu_khz / current_multiplier; + /* Calc number of p-states supported */ + if (brand == EPS_BRAND_C7M) + states = max_multiplier - min_multiplier + 1; + else + states = 2; + + /* Allocate private data and frequency table for current cpu */ + centaur = kzalloc(sizeof(struct eps_cpu_data) + + (states + 1) * sizeof(struct cpufreq_frequency_table), + GFP_KERNEL); + if (!centaur) + return -ENOMEM; + eps_cpu[0] = centaur; + + /* Copy basic values */ + centaur->fsb = fsb; + + /* Fill frequency and MSR value table */ + f_table = ¢aur->freq_table[0]; + if (brand != EPS_BRAND_C7M) { + f_table[0].frequency = fsb * min_multiplier; + f_table[0].index = (min_multiplier << 8) | min_voltage; + f_table[1].frequency = fsb * max_multiplier; + f_table[1].index = (max_multiplier << 8) | max_voltage; + f_table[2].frequency = CPUFREQ_TABLE_END; + } else { + k = 0; + step = ((max_voltage - min_voltage) * 256) + / (max_multiplier - min_multiplier); + for (i = min_multiplier; i <= max_multiplier; i++) { + voltage = (k * step) / 256 + min_voltage; + f_table[k].frequency = fsb * i; + f_table[k].index = (i << 8) | voltage; + k++; + } + f_table[k].frequency = CPUFREQ_TABLE_END; + } + + policy->governor = CPUFREQ_DEFAULT_GOVERNOR; + policy->cpuinfo.transition_latency = 140000; /* 844mV -> 700mV in ns */ + policy->cur = fsb * current_multiplier; + + ret = cpufreq_frequency_table_cpuinfo(policy, ¢aur->freq_table[0]); + if (ret) { + kfree(centaur); + return ret; + } + + cpufreq_frequency_table_get_attr(¢aur->freq_table[0], policy->cpu); + return 0; +} + +static int eps_cpu_exit(struct cpufreq_policy *policy) +{ + unsigned int cpu = policy->cpu; + struct eps_cpu_data *centaur; + u32 lo, hi; + + if (eps_cpu[cpu] == NULL) + return -ENODEV; + centaur = eps_cpu[cpu]; + + /* Get max frequency */ + rdmsr(MSR_IA32_PERF_STATUS, lo, hi); + /* Set max frequency */ + eps_set_state(centaur, cpu, hi & 0xffff); + /* Bye */ + cpufreq_frequency_table_put_attr(policy->cpu); + kfree(eps_cpu[cpu]); + eps_cpu[cpu] = NULL; + return 0; +} + +static struct freq_attr* eps_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + +static struct cpufreq_driver eps_driver = { + .verify = eps_verify, + .target = eps_target, + .init = eps_cpu_init, + .exit = eps_cpu_exit, + .get = eps_get, + .name = "e_powersaver", + .owner = THIS_MODULE, + .attr = eps_attr, +}; + +static int __init eps_init(void) +{ + struct cpuinfo_x86 *c = cpu_data; + + /* This driver will work only on Centaur C7 processors with + * Enhanced SpeedStep/PowerSaver registers */ + if (c->x86_vendor != X86_VENDOR_CENTAUR + || c->x86 != 6 || c->x86_model != 10) + return -ENODEV; + if (!cpu_has(c, X86_FEATURE_EST)) + return -ENODEV; + + if (cpufreq_register_driver(&eps_driver)) + return -EINVAL; + return 0; +} + +static void __exit eps_exit(void) +{ + cpufreq_unregister_driver(&eps_driver); +} + +MODULE_AUTHOR("Rafał Bilski "); +MODULE_DESCRIPTION("Enhanced PowerSaver driver for VIA C7 CPU's."); +MODULE_LICENSE("GPL"); + +module_init(eps_init); +module_exit(eps_exit); diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c new file mode 100644 index 00000000000..f317276afa7 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c @@ -0,0 +1,309 @@ +/* + * elanfreq: cpufreq driver for the AMD ELAN family + * + * (c) Copyright 2002 Robert Schwebel + * + * Parts of this code are (c) Sven Geggus + * + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * 2002-02-13: - initial revision for 2.4.18-pre9 by Robert Schwebel + * + */ + +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#define REG_CSCIR 0x22 /* Chip Setup and Control Index Register */ +#define REG_CSCDR 0x23 /* Chip Setup and Control Data Register */ + +/* Module parameter */ +static int max_freq; + +struct s_elan_multiplier { + int clock; /* frequency in kHz */ + int val40h; /* PMU Force Mode register */ + int val80h; /* CPU Clock Speed Register */ +}; + +/* + * It is important that the frequencies + * are listed in ascending order here! + */ +struct s_elan_multiplier elan_multiplier[] = { + {1000, 0x02, 0x18}, + {2000, 0x02, 0x10}, + {4000, 0x02, 0x08}, + {8000, 0x00, 0x00}, + {16000, 0x00, 0x02}, + {33000, 0x00, 0x04}, + {66000, 0x01, 0x04}, + {99000, 0x01, 0x05} +}; + +static struct cpufreq_frequency_table elanfreq_table[] = { + {0, 1000}, + {1, 2000}, + {2, 4000}, + {3, 8000}, + {4, 16000}, + {5, 33000}, + {6, 66000}, + {7, 99000}, + {0, CPUFREQ_TABLE_END}, +}; + + +/** + * elanfreq_get_cpu_frequency: determine current cpu speed + * + * Finds out at which frequency the CPU of the Elan SOC runs + * at the moment. Frequencies from 1 to 33 MHz are generated + * the normal way, 66 and 99 MHz are called "Hyperspeed Mode" + * and have the rest of the chip running with 33 MHz. + */ + +static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu) +{ + u8 clockspeed_reg; /* Clock Speed Register */ + + local_irq_disable(); + outb_p(0x80,REG_CSCIR); + clockspeed_reg = inb_p(REG_CSCDR); + local_irq_enable(); + + if ((clockspeed_reg & 0xE0) == 0xE0) + return 0; + + /* Are we in CPU clock multiplied mode (66/99 MHz)? */ + if ((clockspeed_reg & 0xE0) == 0xC0) { + if ((clockspeed_reg & 0x01) == 0) + return 66000; + else + return 99000; + } + + /* 33 MHz is not 32 MHz... */ + if ((clockspeed_reg & 0xE0)==0xA0) + return 33000; + + return ((1<<((clockspeed_reg & 0xE0) >> 5)) * 1000); +} + + +/** + * elanfreq_set_cpu_frequency: Change the CPU core frequency + * @cpu: cpu number + * @freq: frequency in kHz + * + * This function takes a frequency value and changes the CPU frequency + * according to this. Note that the frequency has to be checked by + * elanfreq_validatespeed() for correctness! + * + * There is no return value. + */ + +static void elanfreq_set_cpu_state (unsigned int state) +{ + struct cpufreq_freqs freqs; + + freqs.old = elanfreq_get_cpu_frequency(0); + freqs.new = elan_multiplier[state].clock; + freqs.cpu = 0; /* elanfreq.c is UP only driver */ + + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + + printk(KERN_INFO "elanfreq: attempting to set frequency to %i kHz\n", + elan_multiplier[state].clock); + + + /* + * Access to the Elan's internal registers is indexed via + * 0x22: Chip Setup & Control Register Index Register (CSCI) + * 0x23: Chip Setup & Control Register Data Register (CSCD) + * + */ + + /* + * 0x40 is the Power Management Unit's Force Mode Register. + * Bit 6 enables Hyperspeed Mode (66/100 MHz core frequency) + */ + + local_irq_disable(); + outb_p(0x40,REG_CSCIR); /* Disable hyperspeed mode */ + outb_p(0x00,REG_CSCDR); + local_irq_enable(); /* wait till internal pipelines and */ + udelay(1000); /* buffers have cleaned up */ + + local_irq_disable(); + + /* now, set the CPU clock speed register (0x80) */ + outb_p(0x80,REG_CSCIR); + outb_p(elan_multiplier[state].val80h,REG_CSCDR); + + /* now, the hyperspeed bit in PMU Force Mode Register (0x40) */ + outb_p(0x40,REG_CSCIR); + outb_p(elan_multiplier[state].val40h,REG_CSCDR); + udelay(10000); + local_irq_enable(); + + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); +}; + + +/** + * elanfreq_validatespeed: test if frequency range is valid + * @policy: the policy to validate + * + * This function checks if a given frequency range in kHz is valid + * for the hardware supported by the driver. + */ + +static int elanfreq_verify (struct cpufreq_policy *policy) +{ + return cpufreq_frequency_table_verify(policy, &elanfreq_table[0]); +} + +static int elanfreq_target (struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + unsigned int newstate = 0; + + if (cpufreq_frequency_table_target(policy, &elanfreq_table[0], target_freq, relation, &newstate)) + return -EINVAL; + + elanfreq_set_cpu_state(newstate); + + return 0; +} + + +/* + * Module init and exit code + */ + +static int elanfreq_cpu_init(struct cpufreq_policy *policy) +{ + struct cpuinfo_x86 *c = cpu_data; + unsigned int i; + int result; + + /* capability check */ + if ((c->x86_vendor != X86_VENDOR_AMD) || + (c->x86 != 4) || (c->x86_model!=10)) + return -ENODEV; + + /* max freq */ + if (!max_freq) + max_freq = elanfreq_get_cpu_frequency(0); + + /* table init */ + for (i=0; (elanfreq_table[i].frequency != CPUFREQ_TABLE_END); i++) { + if (elanfreq_table[i].frequency > max_freq) + elanfreq_table[i].frequency = CPUFREQ_ENTRY_INVALID; + } + + /* cpuinfo and default policy values */ + policy->governor = CPUFREQ_DEFAULT_GOVERNOR; + policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; + policy->cur = elanfreq_get_cpu_frequency(0); + + result = cpufreq_frequency_table_cpuinfo(policy, elanfreq_table); + if (result) + return (result); + + cpufreq_frequency_table_get_attr(elanfreq_table, policy->cpu); + return 0; +} + + +static int elanfreq_cpu_exit(struct cpufreq_policy *policy) +{ + cpufreq_frequency_table_put_attr(policy->cpu); + return 0; +} + + +#ifndef MODULE +/** + * elanfreq_setup - elanfreq command line parameter parsing + * + * elanfreq command line parameter. Use: + * elanfreq=66000 + * to set the maximum CPU frequency to 66 MHz. Note that in + * case you do not give this boot parameter, the maximum + * frequency will fall back to _current_ CPU frequency which + * might be lower. If you build this as a module, use the + * max_freq module parameter instead. + */ +static int __init elanfreq_setup(char *str) +{ + max_freq = simple_strtoul(str, &str, 0); + printk(KERN_WARNING "You're using the deprecated elanfreq command line option. Use elanfreq.max_freq instead, please!\n"); + return 1; +} +__setup("elanfreq=", elanfreq_setup); +#endif + + +static struct freq_attr* elanfreq_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + + +static struct cpufreq_driver elanfreq_driver = { + .get = elanfreq_get_cpu_frequency, + .verify = elanfreq_verify, + .target = elanfreq_target, + .init = elanfreq_cpu_init, + .exit = elanfreq_cpu_exit, + .name = "elanfreq", + .owner = THIS_MODULE, + .attr = elanfreq_attr, +}; + + +static int __init elanfreq_init(void) +{ + struct cpuinfo_x86 *c = cpu_data; + + /* Test if we have the right hardware */ + if ((c->x86_vendor != X86_VENDOR_AMD) || + (c->x86 != 4) || (c->x86_model!=10)) { + printk(KERN_INFO "elanfreq: error: no Elan processor found!\n"); + return -ENODEV; + } + return cpufreq_register_driver(&elanfreq_driver); +} + + +static void __exit elanfreq_exit(void) +{ + cpufreq_unregister_driver(&elanfreq_driver); +} + + +module_param (max_freq, int, 0444); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Robert Schwebel , Sven Geggus "); +MODULE_DESCRIPTION("cpufreq driver for AMD's Elan CPUs"); + +module_init(elanfreq_init); +module_exit(elanfreq_exit); diff --git a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c new file mode 100644 index 00000000000..461dabc4e49 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c @@ -0,0 +1,495 @@ +/* + * Cyrix MediaGX and NatSemi Geode Suspend Modulation + * (C) 2002 Zwane Mwaikambo + * (C) 2002 Hiroshi Miura + * All Rights Reserved + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation + * + * The author(s) of this software shall not be held liable for damages + * of any nature resulting due to the use of this software. This + * software is provided AS-IS with no warranties. + * + * Theoritical note: + * + * (see Geode(tm) CS5530 manual (rev.4.1) page.56) + * + * CPU frequency control on NatSemi Geode GX1/GXLV processor and CS55x0 + * are based on Suspend Moduration. + * + * Suspend Modulation works by asserting and de-asserting the SUSP# pin + * to CPU(GX1/GXLV) for configurable durations. When asserting SUSP# + * the CPU enters an idle state. GX1 stops its core clock when SUSP# is + * asserted then power consumption is reduced. + * + * Suspend Modulation's OFF/ON duration are configurable + * with 'Suspend Modulation OFF Count Register' + * and 'Suspend Modulation ON Count Register'. + * These registers are 8bit counters that represent the number of + * 32us intervals which the SUSP# pin is asserted(ON)/de-asserted(OFF) + * to the processor. + * + * These counters define a ratio which is the effective frequency + * of operation of the system. + * + * OFF Count + * F_eff = Fgx * ---------------------- + * OFF Count + ON Count + * + * 0 <= On Count, Off Count <= 255 + * + * From these limits, we can get register values + * + * off_duration + on_duration <= MAX_DURATION + * on_duration = off_duration * (stock_freq - freq) / freq + * + * off_duration = (freq * DURATION) / stock_freq + * on_duration = DURATION - off_duration + * + * + *--------------------------------------------------------------------------- + * + * ChangeLog: + * Dec. 12, 2003 Hiroshi Miura + * - fix on/off register mistake + * - fix cpu_khz calc when it stops cpu modulation. + * + * Dec. 11, 2002 Hiroshi Miura + * - rewrite for Cyrix MediaGX Cx5510/5520 and + * NatSemi Geode Cs5530(A). + * + * Jul. ??, 2002 Zwane Mwaikambo + * - cs5530_mod patch for 2.4.19-rc1. + * + *--------------------------------------------------------------------------- + * + * Todo + * Test on machines with 5510, 5530, 5530A + */ + +/************************************************************************ + * Suspend Modulation - Definitions * + ************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include + +/* PCI config registers, all at F0 */ +#define PCI_PMER1 0x80 /* power management enable register 1 */ +#define PCI_PMER2 0x81 /* power management enable register 2 */ +#define PCI_PMER3 0x82 /* power management enable register 3 */ +#define PCI_IRQTC 0x8c /* irq speedup timer counter register:typical 2 to 4ms */ +#define PCI_VIDTC 0x8d /* video speedup timer counter register: typical 50 to 100ms */ +#define PCI_MODOFF 0x94 /* suspend modulation OFF counter register, 1 = 32us */ +#define PCI_MODON 0x95 /* suspend modulation ON counter register */ +#define PCI_SUSCFG 0x96 /* suspend configuration register */ + +/* PMER1 bits */ +#define GPM (1<<0) /* global power management */ +#define GIT (1<<1) /* globally enable PM device idle timers */ +#define GTR (1<<2) /* globally enable IO traps */ +#define IRQ_SPDUP (1<<3) /* disable clock throttle during interrupt handling */ +#define VID_SPDUP (1<<4) /* disable clock throttle during vga video handling */ + +/* SUSCFG bits */ +#define SUSMOD (1<<0) /* enable/disable suspend modulation */ +/* the belows support only with cs5530 (after rev.1.2)/cs5530A */ +#define SMISPDUP (1<<1) /* select how SMI re-enable suspend modulation: */ + /* IRQTC timer or read SMI speedup disable reg.(F1BAR[08-09h]) */ +#define SUSCFG (1<<2) /* enable powering down a GXLV processor. "Special 3Volt Suspend" mode */ +/* the belows support only with cs5530A */ +#define PWRSVE_ISA (1<<3) /* stop ISA clock */ +#define PWRSVE (1<<4) /* active idle */ + +struct gxfreq_params { + u8 on_duration; + u8 off_duration; + u8 pci_suscfg; + u8 pci_pmer1; + u8 pci_pmer2; + struct pci_dev *cs55x0; +}; + +static struct gxfreq_params *gx_params; +static int stock_freq; + +/* PCI bus clock - defaults to 30.000 if cpu_khz is not available */ +static int pci_busclk = 0; +module_param (pci_busclk, int, 0444); + +/* maximum duration for which the cpu may be suspended + * (32us * MAX_DURATION). If no parameter is given, this defaults + * to 255. + * Note that this leads to a maximum of 8 ms(!) where the CPU clock + * is suspended -- processing power is just 0.39% of what it used to be, + * though. 781.25 kHz(!) for a 200 MHz processor -- wow. */ +static int max_duration = 255; +module_param (max_duration, int, 0444); + +/* For the default policy, we want at least some processing power + * - let's say 5%. (min = maxfreq / POLICY_MIN_DIV) + */ +#define POLICY_MIN_DIV 20 + + +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "gx-suspmod", msg) + +/** + * we can detect a core multipiler from dir0_lsb + * from GX1 datasheet p.56, + * MULT[3:0]: + * 0000 = SYSCLK multiplied by 4 (test only) + * 0001 = SYSCLK multiplied by 10 + * 0010 = SYSCLK multiplied by 4 + * 0011 = SYSCLK multiplied by 6 + * 0100 = SYSCLK multiplied by 9 + * 0101 = SYSCLK multiplied by 5 + * 0110 = SYSCLK multiplied by 7 + * 0111 = SYSCLK multiplied by 8 + * of 33.3MHz + **/ +static int gx_freq_mult[16] = { + 4, 10, 4, 6, 9, 5, 7, 8, + 0, 0, 0, 0, 0, 0, 0, 0 +}; + + +/**************************************************************** + * Low Level chipset interface * + ****************************************************************/ +static struct pci_device_id gx_chipset_tbl[] __initdata = { + { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, PCI_ANY_ID, PCI_ANY_ID }, + { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520, PCI_ANY_ID, PCI_ANY_ID }, + { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510, PCI_ANY_ID, PCI_ANY_ID }, + { 0, }, +}; + +/** + * gx_detect_chipset: + * + **/ +static __init struct pci_dev *gx_detect_chipset(void) +{ + struct pci_dev *gx_pci = NULL; + + /* check if CPU is a MediaGX or a Geode. */ + if ((current_cpu_data.x86_vendor != X86_VENDOR_NSC) && + (current_cpu_data.x86_vendor != X86_VENDOR_CYRIX)) { + dprintk("error: no MediaGX/Geode processor found!\n"); + return NULL; + } + + /* detect which companion chip is used */ + while ((gx_pci = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, gx_pci)) != NULL) { + if ((pci_match_id(gx_chipset_tbl, gx_pci)) != NULL) + return gx_pci; + } + + dprintk("error: no supported chipset found!\n"); + return NULL; +} + +/** + * gx_get_cpuspeed: + * + * Finds out at which efficient frequency the Cyrix MediaGX/NatSemi Geode CPU runs. + */ +static unsigned int gx_get_cpuspeed(unsigned int cpu) +{ + if ((gx_params->pci_suscfg & SUSMOD) == 0) + return stock_freq; + + return (stock_freq * gx_params->off_duration) + / (gx_params->on_duration + gx_params->off_duration); +} + +/** + * gx_validate_speed: + * determine current cpu speed + * + **/ + +static unsigned int gx_validate_speed(unsigned int khz, u8 *on_duration, u8 *off_duration) +{ + unsigned int i; + u8 tmp_on, tmp_off; + int old_tmp_freq = stock_freq; + int tmp_freq; + + *off_duration=1; + *on_duration=0; + + for (i=max_duration; i>0; i--) { + tmp_off = ((khz * i) / stock_freq) & 0xff; + tmp_on = i - tmp_off; + tmp_freq = (stock_freq * tmp_off) / i; + /* if this relation is closer to khz, use this. If it's equal, + * prefer it, too - lower latency */ + if (abs(tmp_freq - khz) <= abs(old_tmp_freq - khz)) { + *on_duration = tmp_on; + *off_duration = tmp_off; + old_tmp_freq = tmp_freq; + } + } + + return old_tmp_freq; +} + + +/** + * gx_set_cpuspeed: + * set cpu speed in khz. + **/ + +static void gx_set_cpuspeed(unsigned int khz) +{ + u8 suscfg, pmer1; + unsigned int new_khz; + unsigned long flags; + struct cpufreq_freqs freqs; + + freqs.cpu = 0; + freqs.old = gx_get_cpuspeed(0); + + new_khz = gx_validate_speed(khz, &gx_params->on_duration, &gx_params->off_duration); + + freqs.new = new_khz; + + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + local_irq_save(flags); + + if (new_khz != stock_freq) { /* if new khz == 100% of CPU speed, it is special case */ + switch (gx_params->cs55x0->device) { + case PCI_DEVICE_ID_CYRIX_5530_LEGACY: + pmer1 = gx_params->pci_pmer1 | IRQ_SPDUP | VID_SPDUP; + /* FIXME: need to test other values -- Zwane,Miura */ + pci_write_config_byte(gx_params->cs55x0, PCI_IRQTC, 4); /* typical 2 to 4ms */ + pci_write_config_byte(gx_params->cs55x0, PCI_VIDTC, 100);/* typical 50 to 100ms */ + pci_write_config_byte(gx_params->cs55x0, PCI_PMER1, pmer1); + + if (gx_params->cs55x0->revision < 0x10) { /* CS5530(rev 1.2, 1.3) */ + suscfg = gx_params->pci_suscfg | SUSMOD; + } else { /* CS5530A,B.. */ + suscfg = gx_params->pci_suscfg | SUSMOD | PWRSVE; + } + break; + case PCI_DEVICE_ID_CYRIX_5520: + case PCI_DEVICE_ID_CYRIX_5510: + suscfg = gx_params->pci_suscfg | SUSMOD; + break; + default: + local_irq_restore(flags); + dprintk("fatal: try to set unknown chipset.\n"); + return; + } + } else { + suscfg = gx_params->pci_suscfg & ~(SUSMOD); + gx_params->off_duration = 0; + gx_params->on_duration = 0; + dprintk("suspend modulation disabled: cpu runs 100 percent speed.\n"); + } + + pci_write_config_byte(gx_params->cs55x0, PCI_MODOFF, gx_params->off_duration); + pci_write_config_byte(gx_params->cs55x0, PCI_MODON, gx_params->on_duration); + + pci_write_config_byte(gx_params->cs55x0, PCI_SUSCFG, suscfg); + pci_read_config_byte(gx_params->cs55x0, PCI_SUSCFG, &suscfg); + + local_irq_restore(flags); + + gx_params->pci_suscfg = suscfg; + + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + + dprintk("suspend modulation w/ duration of ON:%d us, OFF:%d us\n", + gx_params->on_duration * 32, gx_params->off_duration * 32); + dprintk("suspend modulation w/ clock speed: %d kHz.\n", freqs.new); +} + +/**************************************************************** + * High level functions * + ****************************************************************/ + +/* + * cpufreq_gx_verify: test if frequency range is valid + * + * This function checks if a given frequency range in kHz is valid + * for the hardware supported by the driver. + */ + +static int cpufreq_gx_verify(struct cpufreq_policy *policy) +{ + unsigned int tmp_freq = 0; + u8 tmp1, tmp2; + + if (!stock_freq || !policy) + return -EINVAL; + + policy->cpu = 0; + cpufreq_verify_within_limits(policy, (stock_freq / max_duration), stock_freq); + + /* it needs to be assured that at least one supported frequency is + * within policy->min and policy->max. If it is not, policy->max + * needs to be increased until one freuqency is supported. + * policy->min may not be decreased, though. This way we guarantee a + * specific processing capacity. + */ + tmp_freq = gx_validate_speed(policy->min, &tmp1, &tmp2); + if (tmp_freq < policy->min) + tmp_freq += stock_freq / max_duration; + policy->min = tmp_freq; + if (policy->min > policy->max) + policy->max = tmp_freq; + tmp_freq = gx_validate_speed(policy->max, &tmp1, &tmp2); + if (tmp_freq > policy->max) + tmp_freq -= stock_freq / max_duration; + policy->max = tmp_freq; + if (policy->max < policy->min) + policy->max = policy->min; + cpufreq_verify_within_limits(policy, (stock_freq / max_duration), stock_freq); + + return 0; +} + +/* + * cpufreq_gx_target: + * + */ +static int cpufreq_gx_target(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + u8 tmp1, tmp2; + unsigned int tmp_freq; + + if (!stock_freq || !policy) + return -EINVAL; + + policy->cpu = 0; + + tmp_freq = gx_validate_speed(target_freq, &tmp1, &tmp2); + while (tmp_freq < policy->min) { + tmp_freq += stock_freq / max_duration; + tmp_freq = gx_validate_speed(tmp_freq, &tmp1, &tmp2); + } + while (tmp_freq > policy->max) { + tmp_freq -= stock_freq / max_duration; + tmp_freq = gx_validate_speed(tmp_freq, &tmp1, &tmp2); + } + + gx_set_cpuspeed(tmp_freq); + + return 0; +} + +static int cpufreq_gx_cpu_init(struct cpufreq_policy *policy) +{ + unsigned int maxfreq, curfreq; + + if (!policy || policy->cpu != 0) + return -ENODEV; + + /* determine maximum frequency */ + if (pci_busclk) { + maxfreq = pci_busclk * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f]; + } else if (cpu_khz) { + maxfreq = cpu_khz; + } else { + maxfreq = 30000 * gx_freq_mult[getCx86(CX86_DIR1) & 0x0f]; + } + stock_freq = maxfreq; + curfreq = gx_get_cpuspeed(0); + + dprintk("cpu max frequency is %d.\n", maxfreq); + dprintk("cpu current frequency is %dkHz.\n",curfreq); + + /* setup basic struct for cpufreq API */ + policy->cpu = 0; + + if (max_duration < POLICY_MIN_DIV) + policy->min = maxfreq / max_duration; + else + policy->min = maxfreq / POLICY_MIN_DIV; + policy->max = maxfreq; + policy->cur = curfreq; + policy->governor = CPUFREQ_DEFAULT_GOVERNOR; + policy->cpuinfo.min_freq = maxfreq / max_duration; + policy->cpuinfo.max_freq = maxfreq; + policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; + + return 0; +} + +/* + * cpufreq_gx_init: + * MediaGX/Geode GX initialize cpufreq driver + */ +static struct cpufreq_driver gx_suspmod_driver = { + .get = gx_get_cpuspeed, + .verify = cpufreq_gx_verify, + .target = cpufreq_gx_target, + .init = cpufreq_gx_cpu_init, + .name = "gx-suspmod", + .owner = THIS_MODULE, +}; + +static int __init cpufreq_gx_init(void) +{ + int ret; + struct gxfreq_params *params; + struct pci_dev *gx_pci; + + /* Test if we have the right hardware */ + if ((gx_pci = gx_detect_chipset()) == NULL) + return -ENODEV; + + /* check whether module parameters are sane */ + if (max_duration > 0xff) + max_duration = 0xff; + + dprintk("geode suspend modulation available.\n"); + + params = kzalloc(sizeof(struct gxfreq_params), GFP_KERNEL); + if (params == NULL) + return -ENOMEM; + + params->cs55x0 = gx_pci; + gx_params = params; + + /* keep cs55x0 configurations */ + pci_read_config_byte(params->cs55x0, PCI_SUSCFG, &(params->pci_suscfg)); + pci_read_config_byte(params->cs55x0, PCI_PMER1, &(params->pci_pmer1)); + pci_read_config_byte(params->cs55x0, PCI_PMER2, &(params->pci_pmer2)); + pci_read_config_byte(params->cs55x0, PCI_MODON, &(params->on_duration)); + pci_read_config_byte(params->cs55x0, PCI_MODOFF, &(params->off_duration)); + + if ((ret = cpufreq_register_driver(&gx_suspmod_driver))) { + kfree(params); + return ret; /* register error! */ + } + + return 0; +} + +static void __exit cpufreq_gx_exit(void) +{ + cpufreq_unregister_driver(&gx_suspmod_driver); + pci_dev_put(gx_params->cs55x0); + kfree(gx_params); +} + +MODULE_AUTHOR ("Hiroshi Miura "); +MODULE_DESCRIPTION ("Cpufreq driver for Cyrix MediaGX and NatSemi Geode"); +MODULE_LICENSE ("GPL"); + +module_init(cpufreq_gx_init); +module_exit(cpufreq_gx_exit); + diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c new file mode 100644 index 00000000000..f0cce3c2dc3 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c @@ -0,0 +1,1024 @@ +/* + * (C) 2001-2004 Dave Jones. + * (C) 2002 Padraig Brady. + * + * Licensed under the terms of the GNU GPL License version 2. + * Based upon datasheets & sample CPUs kindly provided by VIA. + * + * VIA have currently 3 different versions of Longhaul. + * Version 1 (Longhaul) uses the BCR2 MSR at 0x1147. + * It is present only in Samuel 1 (C5A), Samuel 2 (C5B) stepping 0. + * Version 2 of longhaul is backward compatible with v1, but adds + * LONGHAUL MSR for purpose of both frequency and voltage scaling. + * Present in Samuel 2 (steppings 1-7 only) (C5B), and Ezra (C5C). + * Version 3 of longhaul got renamed to Powersaver and redesigned + * to use only the POWERSAVER MSR at 0x110a. + * It is present in Ezra-T (C5M), Nehemiah (C5X) and above. + * It's pretty much the same feature wise to longhaul v2, though + * there is provision for scaling FSB too, but this doesn't work + * too well in practice so we don't even try to use this. + * + * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "longhaul.h" + +#define PFX "longhaul: " + +#define TYPE_LONGHAUL_V1 1 +#define TYPE_LONGHAUL_V2 2 +#define TYPE_POWERSAVER 3 + +#define CPU_SAMUEL 1 +#define CPU_SAMUEL2 2 +#define CPU_EZRA 3 +#define CPU_EZRA_T 4 +#define CPU_NEHEMIAH 5 +#define CPU_NEHEMIAH_C 6 + +/* Flags */ +#define USE_ACPI_C3 (1 << 1) +#define USE_NORTHBRIDGE (1 << 2) + +static int cpu_model; +static unsigned int numscales=16; +static unsigned int fsb; + +static const struct mV_pos *vrm_mV_table; +static const unsigned char *mV_vrm_table; + +static unsigned int highest_speed, lowest_speed; /* kHz */ +static unsigned int minmult, maxmult; +static int can_scale_voltage; +static struct acpi_processor *pr = NULL; +static struct acpi_processor_cx *cx = NULL; +static u32 acpi_regs_addr; +static u8 longhaul_flags; +static unsigned int longhaul_index; + +/* Module parameters */ +static int scale_voltage; +static int disable_acpi_c3; +static int revid_errata; + +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg) + + +/* Clock ratios multiplied by 10 */ +static int clock_ratio[32]; +static int eblcr_table[32]; +static int longhaul_version; +static struct cpufreq_frequency_table *longhaul_table; + +#ifdef CONFIG_CPU_FREQ_DEBUG +static char speedbuffer[8]; + +static char *print_speed(int speed) +{ + if (speed < 1000) { + snprintf(speedbuffer, sizeof(speedbuffer),"%dMHz", speed); + return speedbuffer; + } + + if (speed%1000 == 0) + snprintf(speedbuffer, sizeof(speedbuffer), + "%dGHz", speed/1000); + else + snprintf(speedbuffer, sizeof(speedbuffer), + "%d.%dGHz", speed/1000, (speed%1000)/100); + + return speedbuffer; +} +#endif + + +static unsigned int calc_speed(int mult) +{ + int khz; + khz = (mult/10)*fsb; + if (mult%10) + khz += fsb/2; + khz *= 1000; + return khz; +} + + +static int longhaul_get_cpu_mult(void) +{ + unsigned long invalue=0,lo, hi; + + rdmsr (MSR_IA32_EBL_CR_POWERON, lo, hi); + invalue = (lo & (1<<22|1<<23|1<<24|1<<25)) >>22; + if (longhaul_version==TYPE_LONGHAUL_V2 || longhaul_version==TYPE_POWERSAVER) { + if (lo & (1<<27)) + invalue+=16; + } + return eblcr_table[invalue]; +} + +/* For processor with BCR2 MSR */ + +static void do_longhaul1(unsigned int clock_ratio_index) +{ + union msr_bcr2 bcr2; + + rdmsrl(MSR_VIA_BCR2, bcr2.val); + /* Enable software clock multiplier */ + bcr2.bits.ESOFTBF = 1; + bcr2.bits.CLOCKMUL = clock_ratio_index & 0xff; + + /* Sync to timer tick */ + safe_halt(); + /* Change frequency on next halt or sleep */ + wrmsrl(MSR_VIA_BCR2, bcr2.val); + /* Invoke transition */ + ACPI_FLUSH_CPU_CACHE(); + halt(); + + /* Disable software clock multiplier */ + local_irq_disable(); + rdmsrl(MSR_VIA_BCR2, bcr2.val); + bcr2.bits.ESOFTBF = 0; + wrmsrl(MSR_VIA_BCR2, bcr2.val); +} + +/* For processor with Longhaul MSR */ + +static void do_powersaver(int cx_address, unsigned int clock_ratio_index, + unsigned int dir) +{ + union msr_longhaul longhaul; + u32 t; + + rdmsrl(MSR_VIA_LONGHAUL, longhaul.val); + /* Setup new frequency */ + if (!revid_errata) + longhaul.bits.RevisionKey = longhaul.bits.RevisionID; + else + longhaul.bits.RevisionKey = 0; + longhaul.bits.SoftBusRatio = clock_ratio_index & 0xf; + longhaul.bits.SoftBusRatio4 = (clock_ratio_index & 0x10) >> 4; + /* Setup new voltage */ + if (can_scale_voltage) + longhaul.bits.SoftVID = (clock_ratio_index >> 8) & 0x1f; + /* Sync to timer tick */ + safe_halt(); + /* Raise voltage if necessary */ + if (can_scale_voltage && dir) { + longhaul.bits.EnableSoftVID = 1; + wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); + /* Change voltage */ + if (!cx_address) { + ACPI_FLUSH_CPU_CACHE(); + halt(); + } else { + ACPI_FLUSH_CPU_CACHE(); + /* Invoke C3 */ + inb(cx_address); + /* Dummy op - must do something useless after P_LVL3 + * read */ + t = inl(acpi_gbl_FADT.xpm_timer_block.address); + } + longhaul.bits.EnableSoftVID = 0; + wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); + } + + /* Change frequency on next halt or sleep */ + longhaul.bits.EnableSoftBusRatio = 1; + wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); + if (!cx_address) { + ACPI_FLUSH_CPU_CACHE(); + halt(); + } else { + ACPI_FLUSH_CPU_CACHE(); + /* Invoke C3 */ + inb(cx_address); + /* Dummy op - must do something useless after P_LVL3 read */ + t = inl(acpi_gbl_FADT.xpm_timer_block.address); + } + /* Disable bus ratio bit */ + longhaul.bits.EnableSoftBusRatio = 0; + wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); + + /* Reduce voltage if necessary */ + if (can_scale_voltage && !dir) { + longhaul.bits.EnableSoftVID = 1; + wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); + /* Change voltage */ + if (!cx_address) { + ACPI_FLUSH_CPU_CACHE(); + halt(); + } else { + ACPI_FLUSH_CPU_CACHE(); + /* Invoke C3 */ + inb(cx_address); + /* Dummy op - must do something useless after P_LVL3 + * read */ + t = inl(acpi_gbl_FADT.xpm_timer_block.address); + } + longhaul.bits.EnableSoftVID = 0; + wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); + } +} + +/** + * longhaul_set_cpu_frequency() + * @clock_ratio_index : bitpattern of the new multiplier. + * + * Sets a new clock ratio. + */ + +static void longhaul_setstate(unsigned int table_index) +{ + unsigned int clock_ratio_index; + int speed, mult; + struct cpufreq_freqs freqs; + unsigned long flags; + unsigned int pic1_mask, pic2_mask; + u16 bm_status = 0; + u32 bm_timeout = 1000; + unsigned int dir = 0; + + clock_ratio_index = longhaul_table[table_index].index; + /* Safety precautions */ + mult = clock_ratio[clock_ratio_index & 0x1f]; + if (mult == -1) + return; + speed = calc_speed(mult); + if ((speed > highest_speed) || (speed < lowest_speed)) + return; + /* Voltage transition before frequency transition? */ + if (can_scale_voltage && longhaul_index < table_index) + dir = 1; + + freqs.old = calc_speed(longhaul_get_cpu_mult()); + freqs.new = speed; + freqs.cpu = 0; /* longhaul.c is UP only driver */ + + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + + dprintk ("Setting to FSB:%dMHz Mult:%d.%dx (%s)\n", + fsb, mult/10, mult%10, print_speed(speed/1000)); +retry_loop: + preempt_disable(); + local_irq_save(flags); + + pic2_mask = inb(0xA1); + pic1_mask = inb(0x21); /* works on C3. save mask. */ + outb(0xFF,0xA1); /* Overkill */ + outb(0xFE,0x21); /* TMR0 only */ + + /* Wait while PCI bus is busy. */ + if (acpi_regs_addr && (longhaul_flags & USE_NORTHBRIDGE + || ((pr != NULL) && pr->flags.bm_control))) { + bm_status = inw(acpi_regs_addr); + bm_status &= 1 << 4; + while (bm_status && bm_timeout) { + outw(1 << 4, acpi_regs_addr); + bm_timeout--; + bm_status = inw(acpi_regs_addr); + bm_status &= 1 << 4; + } + } + + if (longhaul_flags & USE_NORTHBRIDGE) { + /* Disable AGP and PCI arbiters */ + outb(3, 0x22); + } else if ((pr != NULL) && pr->flags.bm_control) { + /* Disable bus master arbitration */ + acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1); + } + switch (longhaul_version) { + + /* + * Longhaul v1. (Samuel[C5A] and Samuel2 stepping 0[C5B]) + * Software controlled multipliers only. + */ + case TYPE_LONGHAUL_V1: + do_longhaul1(clock_ratio_index); + break; + + /* + * Longhaul v2 appears in Samuel2 Steppings 1->7 [C5B] and Ezra [C5C] + * + * Longhaul v3 (aka Powersaver). (Ezra-T [C5M] & Nehemiah [C5N]) + * Nehemiah can do FSB scaling too, but this has never been proven + * to work in practice. + */ + case TYPE_LONGHAUL_V2: + case TYPE_POWERSAVER: + if (longhaul_flags & USE_ACPI_C3) { + /* Don't allow wakeup */ + acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0); + do_powersaver(cx->address, clock_ratio_index, dir); + } else { + do_powersaver(0, clock_ratio_index, dir); + } + break; + } + + if (longhaul_flags & USE_NORTHBRIDGE) { + /* Enable arbiters */ + outb(0, 0x22); + } else if ((pr != NULL) && pr->flags.bm_control) { + /* Enable bus master arbitration */ + acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0); + } + outb(pic2_mask,0xA1); /* restore mask */ + outb(pic1_mask,0x21); + + local_irq_restore(flags); + preempt_enable(); + + freqs.new = calc_speed(longhaul_get_cpu_mult()); + /* Check if requested frequency is set. */ + if (unlikely(freqs.new != speed)) { + printk(KERN_INFO PFX "Failed to set requested frequency!\n"); + /* Revision ID = 1 but processor is expecting revision key + * equal to 0. Jumpers at the bottom of processor will change + * multiplier and FSB, but will not change bits in Longhaul + * MSR nor enable voltage scaling. */ + if (!revid_errata) { + printk(KERN_INFO PFX "Enabling \"Ignore Revision ID\" " + "option.\n"); + revid_errata = 1; + msleep(200); + goto retry_loop; + } + /* Why ACPI C3 sometimes doesn't work is a mystery for me. + * But it does happen. Processor is entering ACPI C3 state, + * but it doesn't change frequency. I tried poking various + * bits in northbridge registers, but without success. */ + if (longhaul_flags & USE_ACPI_C3) { + printk(KERN_INFO PFX "Disabling ACPI C3 support.\n"); + longhaul_flags &= ~USE_ACPI_C3; + if (revid_errata) { + printk(KERN_INFO PFX "Disabling \"Ignore " + "Revision ID\" option.\n"); + revid_errata = 0; + } + msleep(200); + goto retry_loop; + } + /* This shouldn't happen. Longhaul ver. 2 was reported not + * working on processors without voltage scaling, but with + * RevID = 1. RevID errata will make things right. Just + * to be 100% sure. */ + if (longhaul_version == TYPE_LONGHAUL_V2) { + printk(KERN_INFO PFX "Switching to Longhaul ver. 1\n"); + longhaul_version = TYPE_LONGHAUL_V1; + msleep(200); + goto retry_loop; + } + } + /* Report true CPU frequency */ + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + + if (!bm_timeout) + printk(KERN_INFO PFX "Warning: Timeout while waiting for idle PCI bus.\n"); +} + +/* + * Centaur decided to make life a little more tricky. + * Only longhaul v1 is allowed to read EBLCR BSEL[0:1]. + * Samuel2 and above have to try and guess what the FSB is. + * We do this by assuming we booted at maximum multiplier, and interpolate + * between that value multiplied by possible FSBs and cpu_mhz which + * was calculated at boot time. Really ugly, but no other way to do this. + */ + +#define ROUNDING 0xf + +static int guess_fsb(int mult) +{ + int speed = cpu_khz / 1000; + int i; + int speeds[] = { 666, 1000, 1333, 2000 }; + int f_max, f_min; + + for (i = 0; i < 4; i++) { + f_max = ((speeds[i] * mult) + 50) / 100; + f_max += (ROUNDING / 2); + f_min = f_max - ROUNDING; + if ((speed <= f_max) && (speed >= f_min)) + return speeds[i] / 10; + } + return 0; +} + + +static int __init longhaul_get_ranges(void) +{ + unsigned int i, j, k = 0; + unsigned int ratio; + int mult; + + /* Get current frequency */ + mult = longhaul_get_cpu_mult(); + if (mult == -1) { + printk(KERN_INFO PFX "Invalid (reserved) multiplier!\n"); + return -EINVAL; + } + fsb = guess_fsb(mult); + if (fsb == 0) { + printk(KERN_INFO PFX "Invalid (reserved) FSB!\n"); + return -EINVAL; + } + /* Get max multiplier - as we always did. + * Longhaul MSR is usefull only when voltage scaling is enabled. + * C3 is booting at max anyway. */ + maxmult = mult; + /* Get min multiplier */ + switch (cpu_model) { + case CPU_NEHEMIAH: + minmult = 50; + break; + case CPU_NEHEMIAH_C: + minmult = 40; + break; + default: + minmult = 30; + break; + } + + dprintk ("MinMult:%d.%dx MaxMult:%d.%dx\n", + minmult/10, minmult%10, maxmult/10, maxmult%10); + + highest_speed = calc_speed(maxmult); + lowest_speed = calc_speed(minmult); + dprintk ("FSB:%dMHz Lowest speed: %s Highest speed:%s\n", fsb, + print_speed(lowest_speed/1000), + print_speed(highest_speed/1000)); + + if (lowest_speed == highest_speed) { + printk (KERN_INFO PFX "highestspeed == lowest, aborting.\n"); + return -EINVAL; + } + if (lowest_speed > highest_speed) { + printk (KERN_INFO PFX "nonsense! lowest (%d > %d) !\n", + lowest_speed, highest_speed); + return -EINVAL; + } + + longhaul_table = kmalloc((numscales + 1) * sizeof(struct cpufreq_frequency_table), GFP_KERNEL); + if(!longhaul_table) + return -ENOMEM; + + for (j = 0; j < numscales; j++) { + ratio = clock_ratio[j]; + if (ratio == -1) + continue; + if (ratio > maxmult || ratio < minmult) + continue; + longhaul_table[k].frequency = calc_speed(ratio); + longhaul_table[k].index = j; + k++; + } + if (k <= 1) { + kfree(longhaul_table); + return -ENODEV; + } + /* Sort */ + for (j = 0; j < k - 1; j++) { + unsigned int min_f, min_i; + min_f = longhaul_table[j].frequency; + min_i = j; + for (i = j + 1; i < k; i++) { + if (longhaul_table[i].frequency < min_f) { + min_f = longhaul_table[i].frequency; + min_i = i; + } + } + if (min_i != j) { + unsigned int temp; + temp = longhaul_table[j].frequency; + longhaul_table[j].frequency = longhaul_table[min_i].frequency; + longhaul_table[min_i].frequency = temp; + temp = longhaul_table[j].index; + longhaul_table[j].index = longhaul_table[min_i].index; + longhaul_table[min_i].index = temp; + } + } + + longhaul_table[k].frequency = CPUFREQ_TABLE_END; + + /* Find index we are running on */ + for (j = 0; j < k; j++) { + if (clock_ratio[longhaul_table[j].index & 0x1f] == mult) { + longhaul_index = j; + break; + } + } + return 0; +} + + +static void __init longhaul_setup_voltagescaling(void) +{ + union msr_longhaul longhaul; + struct mV_pos minvid, maxvid, vid; + unsigned int j, speed, pos, kHz_step, numvscales; + int min_vid_speed; + + rdmsrl(MSR_VIA_LONGHAUL, longhaul.val); + if (!(longhaul.bits.RevisionID & 1)) { + printk(KERN_INFO PFX "Voltage scaling not supported by CPU.\n"); + return; + } + + if (!longhaul.bits.VRMRev) { + printk(KERN_INFO PFX "VRM 8.5\n"); + vrm_mV_table = &vrm85_mV[0]; + mV_vrm_table = &mV_vrm85[0]; + } else { + printk(KERN_INFO PFX "Mobile VRM\n"); + if (cpu_model < CPU_NEHEMIAH) + return; + vrm_mV_table = &mobilevrm_mV[0]; + mV_vrm_table = &mV_mobilevrm[0]; + } + + minvid = vrm_mV_table[longhaul.bits.MinimumVID]; + maxvid = vrm_mV_table[longhaul.bits.MaximumVID]; + + if (minvid.mV == 0 || maxvid.mV == 0 || minvid.mV > maxvid.mV) { + printk (KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. " + "Voltage scaling disabled.\n", + minvid.mV/1000, minvid.mV%1000, maxvid.mV/1000, maxvid.mV%1000); + return; + } + + if (minvid.mV == maxvid.mV) { + printk (KERN_INFO PFX "Claims to support voltage scaling but min & max are " + "both %d.%03d. Voltage scaling disabled\n", + maxvid.mV/1000, maxvid.mV%1000); + return; + } + + /* How many voltage steps */ + numvscales = maxvid.pos - minvid.pos + 1; + printk(KERN_INFO PFX + "Max VID=%d.%03d " + "Min VID=%d.%03d, " + "%d possible voltage scales\n", + maxvid.mV/1000, maxvid.mV%1000, + minvid.mV/1000, minvid.mV%1000, + numvscales); + + /* Calculate max frequency at min voltage */ + j = longhaul.bits.MinMHzBR; + if (longhaul.bits.MinMHzBR4) + j += 16; + min_vid_speed = eblcr_table[j]; + if (min_vid_speed == -1) + return; + switch (longhaul.bits.MinMHzFSB) { + case 0: + min_vid_speed *= 13333; + break; + case 1: + min_vid_speed *= 10000; + break; + case 3: + min_vid_speed *= 6666; + break; + default: + return; + break; + } + if (min_vid_speed >= highest_speed) + return; + /* Calculate kHz for one voltage step */ + kHz_step = (highest_speed - min_vid_speed) / numvscales; + + j = 0; + while (longhaul_table[j].frequency != CPUFREQ_TABLE_END) { + speed = longhaul_table[j].frequency; + if (speed > min_vid_speed) + pos = (speed - min_vid_speed) / kHz_step + minvid.pos; + else + pos = minvid.pos; + longhaul_table[j].index |= mV_vrm_table[pos] << 8; + vid = vrm_mV_table[mV_vrm_table[pos]]; + printk(KERN_INFO PFX "f: %d kHz, index: %d, vid: %d mV\n", speed, j, vid.mV); + j++; + } + + can_scale_voltage = 1; + printk(KERN_INFO PFX "Voltage scaling enabled.\n"); +} + + +static int longhaul_verify(struct cpufreq_policy *policy) +{ + return cpufreq_frequency_table_verify(policy, longhaul_table); +} + + +static int longhaul_target(struct cpufreq_policy *policy, + unsigned int target_freq, unsigned int relation) +{ + unsigned int table_index = 0; + unsigned int i; + unsigned int dir = 0; + u8 vid, current_vid; + + if (cpufreq_frequency_table_target(policy, longhaul_table, target_freq, relation, &table_index)) + return -EINVAL; + + /* Don't set same frequency again */ + if (longhaul_index == table_index) + return 0; + + if (!can_scale_voltage) + longhaul_setstate(table_index); + else { + /* On test system voltage transitions exceeding single + * step up or down were turning motherboard off. Both + * "ondemand" and "userspace" are unsafe. C7 is doing + * this in hardware, C3 is old and we need to do this + * in software. */ + i = longhaul_index; + current_vid = (longhaul_table[longhaul_index].index >> 8) & 0x1f; + if (table_index > longhaul_index) + dir = 1; + while (i != table_index) { + vid = (longhaul_table[i].index >> 8) & 0x1f; + if (vid != current_vid) { + longhaul_setstate(i); + current_vid = vid; + msleep(200); + } + if (dir) + i++; + else + i--; + } + longhaul_setstate(table_index); + } + longhaul_index = table_index; + return 0; +} + + +static unsigned int longhaul_get(unsigned int cpu) +{ + if (cpu) + return 0; + return calc_speed(longhaul_get_cpu_mult()); +} + +static acpi_status longhaul_walk_callback(acpi_handle obj_handle, + u32 nesting_level, + void *context, void **return_value) +{ + struct acpi_device *d; + + if ( acpi_bus_get_device(obj_handle, &d) ) { + return 0; + } + *return_value = (void *)acpi_driver_data(d); + return 1; +} + +/* VIA don't support PM2 reg, but have something similar */ +static int enable_arbiter_disable(void) +{ + struct pci_dev *dev; + int status = 1; + int reg; + u8 pci_cmd; + + /* Find PLE133 host bridge */ + reg = 0x78; + dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8601_0, + NULL); + /* Find CLE266 host bridge */ + if (dev == NULL) { + reg = 0x76; + dev = pci_get_device(PCI_VENDOR_ID_VIA, + PCI_DEVICE_ID_VIA_862X_0, NULL); + /* Find CN400 V-Link host bridge */ + if (dev == NULL) + dev = pci_get_device(PCI_VENDOR_ID_VIA, 0x7259, NULL); + } + if (dev != NULL) { + /* Enable access to port 0x22 */ + pci_read_config_byte(dev, reg, &pci_cmd); + if (!(pci_cmd & 1<<7)) { + pci_cmd |= 1<<7; + pci_write_config_byte(dev, reg, pci_cmd); + pci_read_config_byte(dev, reg, &pci_cmd); + if (!(pci_cmd & 1<<7)) { + printk(KERN_ERR PFX + "Can't enable access to port 0x22.\n"); + status = 0; + } + } + pci_dev_put(dev); + return status; + } + return 0; +} + +static int longhaul_setup_southbridge(void) +{ + struct pci_dev *dev; + u8 pci_cmd; + + /* Find VT8235 southbridge */ + dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, NULL); + if (dev == NULL) + /* Find VT8237 southbridge */ + dev = pci_get_device(PCI_VENDOR_ID_VIA, + PCI_DEVICE_ID_VIA_8237, NULL); + if (dev != NULL) { + /* Set transition time to max */ + pci_read_config_byte(dev, 0xec, &pci_cmd); + pci_cmd &= ~(1 << 2); + pci_write_config_byte(dev, 0xec, pci_cmd); + pci_read_config_byte(dev, 0xe4, &pci_cmd); + pci_cmd &= ~(1 << 7); + pci_write_config_byte(dev, 0xe4, pci_cmd); + pci_read_config_byte(dev, 0xe5, &pci_cmd); + pci_cmd |= 1 << 7; + pci_write_config_byte(dev, 0xe5, pci_cmd); + /* Get address of ACPI registers block*/ + pci_read_config_byte(dev, 0x81, &pci_cmd); + if (pci_cmd & 1 << 7) { + pci_read_config_dword(dev, 0x88, &acpi_regs_addr); + acpi_regs_addr &= 0xff00; + printk(KERN_INFO PFX "ACPI I/O at 0x%x\n", acpi_regs_addr); + } + + pci_dev_put(dev); + return 1; + } + return 0; +} + +static int __init longhaul_cpu_init(struct cpufreq_policy *policy) +{ + struct cpuinfo_x86 *c = cpu_data; + char *cpuname=NULL; + int ret; + u32 lo, hi; + + /* Check what we have on this motherboard */ + switch (c->x86_model) { + case 6: + cpu_model = CPU_SAMUEL; + cpuname = "C3 'Samuel' [C5A]"; + longhaul_version = TYPE_LONGHAUL_V1; + memcpy (clock_ratio, samuel1_clock_ratio, sizeof(samuel1_clock_ratio)); + memcpy (eblcr_table, samuel1_eblcr, sizeof(samuel1_eblcr)); + break; + + case 7: + switch (c->x86_mask) { + case 0: + longhaul_version = TYPE_LONGHAUL_V1; + cpu_model = CPU_SAMUEL2; + cpuname = "C3 'Samuel 2' [C5B]"; + /* Note, this is not a typo, early Samuel2's had + * Samuel1 ratios. */ + memcpy(clock_ratio, samuel1_clock_ratio, + sizeof(samuel1_clock_ratio)); + memcpy(eblcr_table, samuel2_eblcr, + sizeof(samuel2_eblcr)); + break; + case 1 ... 15: + longhaul_version = TYPE_LONGHAUL_V1; + if (c->x86_mask < 8) { + cpu_model = CPU_SAMUEL2; + cpuname = "C3 'Samuel 2' [C5B]"; + } else { + cpu_model = CPU_EZRA; + cpuname = "C3 'Ezra' [C5C]"; + } + memcpy(clock_ratio, ezra_clock_ratio, + sizeof(ezra_clock_ratio)); + memcpy(eblcr_table, ezra_eblcr, + sizeof(ezra_eblcr)); + break; + } + break; + + case 8: + cpu_model = CPU_EZRA_T; + cpuname = "C3 'Ezra-T' [C5M]"; + longhaul_version = TYPE_POWERSAVER; + numscales=32; + memcpy (clock_ratio, ezrat_clock_ratio, sizeof(ezrat_clock_ratio)); + memcpy (eblcr_table, ezrat_eblcr, sizeof(ezrat_eblcr)); + break; + + case 9: + longhaul_version = TYPE_POWERSAVER; + numscales = 32; + memcpy(clock_ratio, + nehemiah_clock_ratio, + sizeof(nehemiah_clock_ratio)); + memcpy(eblcr_table, nehemiah_eblcr, sizeof(nehemiah_eblcr)); + switch (c->x86_mask) { + case 0 ... 1: + cpu_model = CPU_NEHEMIAH; + cpuname = "C3 'Nehemiah A' [C5XLOE]"; + break; + case 2 ... 4: + cpu_model = CPU_NEHEMIAH; + cpuname = "C3 'Nehemiah B' [C5XLOH]"; + break; + case 5 ... 15: + cpu_model = CPU_NEHEMIAH_C; + cpuname = "C3 'Nehemiah C' [C5P]"; + break; + } + break; + + default: + cpuname = "Unknown"; + break; + } + /* Check Longhaul ver. 2 */ + if (longhaul_version == TYPE_LONGHAUL_V2) { + rdmsr(MSR_VIA_LONGHAUL, lo, hi); + if (lo == 0 && hi == 0) + /* Looks like MSR isn't present */ + longhaul_version = TYPE_LONGHAUL_V1; + } + + printk (KERN_INFO PFX "VIA %s CPU detected. ", cpuname); + switch (longhaul_version) { + case TYPE_LONGHAUL_V1: + case TYPE_LONGHAUL_V2: + printk ("Longhaul v%d supported.\n", longhaul_version); + break; + case TYPE_POWERSAVER: + printk ("Powersaver supported.\n"); + break; + }; + + /* Doesn't hurt */ + longhaul_setup_southbridge(); + + /* Find ACPI data for processor */ + acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, + ACPI_UINT32_MAX, &longhaul_walk_callback, + NULL, (void *)&pr); + + /* Check ACPI support for C3 state */ + if (pr != NULL && longhaul_version == TYPE_POWERSAVER) { + cx = &pr->power.states[ACPI_STATE_C3]; + if (cx->address > 0 && cx->latency <= 1000) + longhaul_flags |= USE_ACPI_C3; + } + /* Disable if it isn't working */ + if (disable_acpi_c3) + longhaul_flags &= ~USE_ACPI_C3; + /* Check if northbridge is friendly */ + if (enable_arbiter_disable()) + longhaul_flags |= USE_NORTHBRIDGE; + + /* Check ACPI support for bus master arbiter disable */ + if (!(longhaul_flags & USE_ACPI_C3 + || longhaul_flags & USE_NORTHBRIDGE) + && ((pr == NULL) || !(pr->flags.bm_control))) { + printk(KERN_ERR PFX + "No ACPI support. Unsupported northbridge.\n"); + return -ENODEV; + } + + if (longhaul_flags & USE_NORTHBRIDGE) + printk(KERN_INFO PFX "Using northbridge support.\n"); + if (longhaul_flags & USE_ACPI_C3) + printk(KERN_INFO PFX "Using ACPI support.\n"); + + ret = longhaul_get_ranges(); + if (ret != 0) + return ret; + + if ((longhaul_version != TYPE_LONGHAUL_V1) && (scale_voltage != 0)) + longhaul_setup_voltagescaling(); + + policy->governor = CPUFREQ_DEFAULT_GOVERNOR; + policy->cpuinfo.transition_latency = 200000; /* nsec */ + policy->cur = calc_speed(longhaul_get_cpu_mult()); + + ret = cpufreq_frequency_table_cpuinfo(policy, longhaul_table); + if (ret) + return ret; + + cpufreq_frequency_table_get_attr(longhaul_table, policy->cpu); + + return 0; +} + +static int __devexit longhaul_cpu_exit(struct cpufreq_policy *policy) +{ + cpufreq_frequency_table_put_attr(policy->cpu); + return 0; +} + +static struct freq_attr* longhaul_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + +static struct cpufreq_driver longhaul_driver = { + .verify = longhaul_verify, + .target = longhaul_target, + .get = longhaul_get, + .init = longhaul_cpu_init, + .exit = __devexit_p(longhaul_cpu_exit), + .name = "longhaul", + .owner = THIS_MODULE, + .attr = longhaul_attr, +}; + + +static int __init longhaul_init(void) +{ + struct cpuinfo_x86 *c = cpu_data; + + if (c->x86_vendor != X86_VENDOR_CENTAUR || c->x86 != 6) + return -ENODEV; + +#ifdef CONFIG_SMP + if (num_online_cpus() > 1) { + printk(KERN_ERR PFX "More than 1 CPU detected, longhaul disabled.\n"); + return -ENODEV; + } +#endif +#ifdef CONFIG_X86_IO_APIC + if (cpu_has_apic) { + printk(KERN_ERR PFX "APIC detected. Longhaul is currently broken in this configuration.\n"); + return -ENODEV; + } +#endif + switch (c->x86_model) { + case 6 ... 9: + return cpufreq_register_driver(&longhaul_driver); + case 10: + printk(KERN_ERR PFX "Use acpi-cpufreq driver for VIA C7\n"); + default: + ;; + } + + return -ENODEV; +} + + +static void __exit longhaul_exit(void) +{ + int i; + + for (i=0; i < numscales; i++) { + if (clock_ratio[i] == maxmult) { + longhaul_setstate(i); + break; + } + } + + cpufreq_unregister_driver(&longhaul_driver); + kfree(longhaul_table); +} + +/* Even if BIOS is exporting ACPI C3 state, and it is used + * with success when CPU is idle, this state doesn't + * trigger frequency transition in some cases. */ +module_param (disable_acpi_c3, int, 0644); +MODULE_PARM_DESC(disable_acpi_c3, "Don't use ACPI C3 support"); +/* Change CPU voltage with frequency. Very usefull to save + * power, but most VIA C3 processors aren't supporting it. */ +module_param (scale_voltage, int, 0644); +MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor"); +/* Force revision key to 0 for processors which doesn't + * support voltage scaling, but are introducing itself as + * such. */ +module_param(revid_errata, int, 0644); +MODULE_PARM_DESC(revid_errata, "Ignore CPU Revision ID"); + +MODULE_AUTHOR ("Dave Jones "); +MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors."); +MODULE_LICENSE ("GPL"); + +late_initcall(longhaul_init); +module_exit(longhaul_exit); diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.h b/arch/x86/kernel/cpu/cpufreq/longhaul.h new file mode 100644 index 00000000000..4fcc320997d --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.h @@ -0,0 +1,353 @@ +/* + * longhaul.h + * (C) 2003 Dave Jones. + * + * Licensed under the terms of the GNU GPL License version 2. + * + * VIA-specific information + */ + +union msr_bcr2 { + struct { + unsigned Reseved:19, // 18:0 + ESOFTBF:1, // 19 + Reserved2:3, // 22:20 + CLOCKMUL:4, // 26:23 + Reserved3:5; // 31:27 + } bits; + unsigned long val; +}; + +union msr_longhaul { + struct { + unsigned RevisionID:4, // 3:0 + RevisionKey:4, // 7:4 + EnableSoftBusRatio:1, // 8 + EnableSoftVID:1, // 9 + EnableSoftBSEL:1, // 10 + Reserved:3, // 11:13 + SoftBusRatio4:1, // 14 + VRMRev:1, // 15 + SoftBusRatio:4, // 19:16 + SoftVID:5, // 24:20 + Reserved2:3, // 27:25 + SoftBSEL:2, // 29:28 + Reserved3:2, // 31:30 + MaxMHzBR:4, // 35:32 + MaximumVID:5, // 40:36 + MaxMHzFSB:2, // 42:41 + MaxMHzBR4:1, // 43 + Reserved4:4, // 47:44 + MinMHzBR:4, // 51:48 + MinimumVID:5, // 56:52 + MinMHzFSB:2, // 58:57 + MinMHzBR4:1, // 59 + Reserved5:4; // 63:60 + } bits; + unsigned long long val; +}; + +/* + * Clock ratio tables. Div/Mod by 10 to get ratio. + * The eblcr ones specify the ratio read from the CPU. + * The clock_ratio ones specify what to write to the CPU. + */ + +/* + * VIA C3 Samuel 1 & Samuel 2 (stepping 0) + */ +static const int __initdata samuel1_clock_ratio[16] = { + -1, /* 0000 -> RESERVED */ + 30, /* 0001 -> 3.0x */ + 40, /* 0010 -> 4.0x */ + -1, /* 0011 -> RESERVED */ + -1, /* 0100 -> RESERVED */ + 35, /* 0101 -> 3.5x */ + 45, /* 0110 -> 4.5x */ + 55, /* 0111 -> 5.5x */ + 60, /* 1000 -> 6.0x */ + 70, /* 1001 -> 7.0x */ + 80, /* 1010 -> 8.0x */ + 50, /* 1011 -> 5.0x */ + 65, /* 1100 -> 6.5x */ + 75, /* 1101 -> 7.5x */ + -1, /* 1110 -> RESERVED */ + -1, /* 1111 -> RESERVED */ +}; + +static const int __initdata samuel1_eblcr[16] = { + 50, /* 0000 -> RESERVED */ + 30, /* 0001 -> 3.0x */ + 40, /* 0010 -> 4.0x */ + -1, /* 0011 -> RESERVED */ + 55, /* 0100 -> 5.5x */ + 35, /* 0101 -> 3.5x */ + 45, /* 0110 -> 4.5x */ + -1, /* 0111 -> RESERVED */ + -1, /* 1000 -> RESERVED */ + 70, /* 1001 -> 7.0x */ + 80, /* 1010 -> 8.0x */ + 60, /* 1011 -> 6.0x */ + -1, /* 1100 -> RESERVED */ + 75, /* 1101 -> 7.5x */ + -1, /* 1110 -> RESERVED */ + 65, /* 1111 -> 6.5x */ +}; + +/* + * VIA C3 Samuel2 Stepping 1->15 + */ +static const int __initdata samuel2_eblcr[16] = { + 50, /* 0000 -> 5.0x */ + 30, /* 0001 -> 3.0x */ + 40, /* 0010 -> 4.0x */ + 100, /* 0011 -> 10.0x */ + 55, /* 0100 -> 5.5x */ + 35, /* 0101 -> 3.5x */ + 45, /* 0110 -> 4.5x */ + 110, /* 0111 -> 11.0x */ + 90, /* 1000 -> 9.0x */ + 70, /* 1001 -> 7.0x */ + 80, /* 1010 -> 8.0x */ + 60, /* 1011 -> 6.0x */ + 120, /* 1100 -> 12.0x */ + 75, /* 1101 -> 7.5x */ + 130, /* 1110 -> 13.0x */ + 65, /* 1111 -> 6.5x */ +}; + +/* + * VIA C3 Ezra + */ +static const int __initdata ezra_clock_ratio[16] = { + 100, /* 0000 -> 10.0x */ + 30, /* 0001 -> 3.0x */ + 40, /* 0010 -> 4.0x */ + 90, /* 0011 -> 9.0x */ + 95, /* 0100 -> 9.5x */ + 35, /* 0101 -> 3.5x */ + 45, /* 0110 -> 4.5x */ + 55, /* 0111 -> 5.5x */ + 60, /* 1000 -> 6.0x */ + 70, /* 1001 -> 7.0x */ + 80, /* 1010 -> 8.0x */ + 50, /* 1011 -> 5.0x */ + 65, /* 1100 -> 6.5x */ + 75, /* 1101 -> 7.5x */ + 85, /* 1110 -> 8.5x */ + 120, /* 1111 -> 12.0x */ +}; + +static const int __initdata ezra_eblcr[16] = { + 50, /* 0000 -> 5.0x */ + 30, /* 0001 -> 3.0x */ + 40, /* 0010 -> 4.0x */ + 100, /* 0011 -> 10.0x */ + 55, /* 0100 -> 5.5x */ + 35, /* 0101 -> 3.5x */ + 45, /* 0110 -> 4.5x */ + 95, /* 0111 -> 9.5x */ + 90, /* 1000 -> 9.0x */ + 70, /* 1001 -> 7.0x */ + 80, /* 1010 -> 8.0x */ + 60, /* 1011 -> 6.0x */ + 120, /* 1100 -> 12.0x */ + 75, /* 1101 -> 7.5x */ + 85, /* 1110 -> 8.5x */ + 65, /* 1111 -> 6.5x */ +}; + +/* + * VIA C3 (Ezra-T) [C5M]. + */ +static const int __initdata ezrat_clock_ratio[32] = { + 100, /* 0000 -> 10.0x */ + 30, /* 0001 -> 3.0x */ + 40, /* 0010 -> 4.0x */ + 90, /* 0011 -> 9.0x */ + 95, /* 0100 -> 9.5x */ + 35, /* 0101 -> 3.5x */ + 45, /* 0110 -> 4.5x */ + 55, /* 0111 -> 5.5x */ + 60, /* 1000 -> 6.0x */ + 70, /* 1001 -> 7.0x */ + 80, /* 1010 -> 8.0x */ + 50, /* 1011 -> 5.0x */ + 65, /* 1100 -> 6.5x */ + 75, /* 1101 -> 7.5x */ + 85, /* 1110 -> 8.5x */ + 120, /* 1111 -> 12.0x */ + + -1, /* 0000 -> RESERVED (10.0x) */ + 110, /* 0001 -> 11.0x */ + -1, /* 0010 -> 12.0x */ + -1, /* 0011 -> RESERVED (9.0x)*/ + 105, /* 0100 -> 10.5x */ + 115, /* 0101 -> 11.5x */ + 125, /* 0110 -> 12.5x */ + 135, /* 0111 -> 13.5x */ + 140, /* 1000 -> 14.0x */ + 150, /* 1001 -> 15.0x */ + 160, /* 1010 -> 16.0x */ + 130, /* 1011 -> 13.0x */ + 145, /* 1100 -> 14.5x */ + 155, /* 1101 -> 15.5x */ + -1, /* 1110 -> RESERVED (13.0x) */ + -1, /* 1111 -> RESERVED (12.0x) */ +}; + +static const int __initdata ezrat_eblcr[32] = { + 50, /* 0000 -> 5.0x */ + 30, /* 0001 -> 3.0x */ + 40, /* 0010 -> 4.0x */ + 100, /* 0011 -> 10.0x */ + 55, /* 0100 -> 5.5x */ + 35, /* 0101 -> 3.5x */ + 45, /* 0110 -> 4.5x */ + 95, /* 0111 -> 9.5x */ + 90, /* 1000 -> 9.0x */ + 70, /* 1001 -> 7.0x */ + 80, /* 1010 -> 8.0x */ + 60, /* 1011 -> 6.0x */ + 120, /* 1100 -> 12.0x */ + 75, /* 1101 -> 7.5x */ + 85, /* 1110 -> 8.5x */ + 65, /* 1111 -> 6.5x */ + + -1, /* 0000 -> RESERVED (9.0x) */ + 110, /* 0001 -> 11.0x */ + 120, /* 0010 -> 12.0x */ + -1, /* 0011 -> RESERVED (10.0x)*/ + 135, /* 0100 -> 13.5x */ + 115, /* 0101 -> 11.5x */ + 125, /* 0110 -> 12.5x */ + 105, /* 0111 -> 10.5x */ + 130, /* 1000 -> 13.0x */ + 150, /* 1001 -> 15.0x */ + 160, /* 1010 -> 16.0x */ + 140, /* 1011 -> 14.0x */ + -1, /* 1100 -> RESERVED (12.0x) */ + 155, /* 1101 -> 15.5x */ + -1, /* 1110 -> RESERVED (13.0x) */ + 145, /* 1111 -> 14.5x */ +}; + +/* + * VIA C3 Nehemiah */ + +static const int __initdata nehemiah_clock_ratio[32] = { + 100, /* 0000 -> 10.0x */ + -1, /* 0001 -> 16.0x */ + 40, /* 0010 -> 4.0x */ + 90, /* 0011 -> 9.0x */ + 95, /* 0100 -> 9.5x */ + -1, /* 0101 -> RESERVED */ + 45, /* 0110 -> 4.5x */ + 55, /* 0111 -> 5.5x */ + 60, /* 1000 -> 6.0x */ + 70, /* 1001 -> 7.0x */ + 80, /* 1010 -> 8.0x */ + 50, /* 1011 -> 5.0x */ + 65, /* 1100 -> 6.5x */ + 75, /* 1101 -> 7.5x */ + 85, /* 1110 -> 8.5x */ + 120, /* 1111 -> 12.0x */ + -1, /* 0000 -> 10.0x */ + 110, /* 0001 -> 11.0x */ + -1, /* 0010 -> 12.0x */ + -1, /* 0011 -> 9.0x */ + 105, /* 0100 -> 10.5x */ + 115, /* 0101 -> 11.5x */ + 125, /* 0110 -> 12.5x */ + 135, /* 0111 -> 13.5x */ + 140, /* 1000 -> 14.0x */ + 150, /* 1001 -> 15.0x */ + 160, /* 1010 -> 16.0x */ + 130, /* 1011 -> 13.0x */ + 145, /* 1100 -> 14.5x */ + 155, /* 1101 -> 15.5x */ + -1, /* 1110 -> RESERVED (13.0x) */ + -1, /* 1111 -> 12.0x */ +}; + +static const int __initdata nehemiah_eblcr[32] = { + 50, /* 0000 -> 5.0x */ + 160, /* 0001 -> 16.0x */ + 40, /* 0010 -> 4.0x */ + 100, /* 0011 -> 10.0x */ + 55, /* 0100 -> 5.5x */ + -1, /* 0101 -> RESERVED */ + 45, /* 0110 -> 4.5x */ + 95, /* 0111 -> 9.5x */ + 90, /* 1000 -> 9.0x */ + 70, /* 1001 -> 7.0x */ + 80, /* 1010 -> 8.0x */ + 60, /* 1011 -> 6.0x */ + 120, /* 1100 -> 12.0x */ + 75, /* 1101 -> 7.5x */ + 85, /* 1110 -> 8.5x */ + 65, /* 1111 -> 6.5x */ + 90, /* 0000 -> 9.0x */ + 110, /* 0001 -> 11.0x */ + 120, /* 0010 -> 12.0x */ + 100, /* 0011 -> 10.0x */ + 135, /* 0100 -> 13.5x */ + 115, /* 0101 -> 11.5x */ + 125, /* 0110 -> 12.5x */ + 105, /* 0111 -> 10.5x */ + 130, /* 1000 -> 13.0x */ + 150, /* 1001 -> 15.0x */ + 160, /* 1010 -> 16.0x */ + 140, /* 1011 -> 14.0x */ + 120, /* 1100 -> 12.0x */ + 155, /* 1101 -> 15.5x */ + -1, /* 1110 -> RESERVED (13.0x) */ + 145 /* 1111 -> 14.5x */ +}; + +/* + * Voltage scales. Div/Mod by 1000 to get actual voltage. + * Which scale to use depends on the VRM type in use. + */ + +struct mV_pos { + unsigned short mV; + unsigned short pos; +}; + +static const struct mV_pos __initdata vrm85_mV[32] = { + {1250, 8}, {1200, 6}, {1150, 4}, {1100, 2}, + {1050, 0}, {1800, 30}, {1750, 28}, {1700, 26}, + {1650, 24}, {1600, 22}, {1550, 20}, {1500, 18}, + {1450, 16}, {1400, 14}, {1350, 12}, {1300, 10}, + {1275, 9}, {1225, 7}, {1175, 5}, {1125, 3}, + {1075, 1}, {1825, 31}, {1775, 29}, {1725, 27}, + {1675, 25}, {1625, 23}, {1575, 21}, {1525, 19}, + {1475, 17}, {1425, 15}, {1375, 13}, {1325, 11} +}; + +static const unsigned char __initdata mV_vrm85[32] = { + 0x04, 0x14, 0x03, 0x13, 0x02, 0x12, 0x01, 0x11, + 0x00, 0x10, 0x0f, 0x1f, 0x0e, 0x1e, 0x0d, 0x1d, + 0x0c, 0x1c, 0x0b, 0x1b, 0x0a, 0x1a, 0x09, 0x19, + 0x08, 0x18, 0x07, 0x17, 0x06, 0x16, 0x05, 0x15 +}; + +static const struct mV_pos __initdata mobilevrm_mV[32] = { + {1750, 31}, {1700, 30}, {1650, 29}, {1600, 28}, + {1550, 27}, {1500, 26}, {1450, 25}, {1400, 24}, + {1350, 23}, {1300, 22}, {1250, 21}, {1200, 20}, + {1150, 19}, {1100, 18}, {1050, 17}, {1000, 16}, + {975, 15}, {950, 14}, {925, 13}, {900, 12}, + {875, 11}, {850, 10}, {825, 9}, {800, 8}, + {775, 7}, {750, 6}, {725, 5}, {700, 4}, + {675, 3}, {650, 2}, {625, 1}, {600, 0} +}; + +static const unsigned char __initdata mV_mobilevrm[32] = { + 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, + 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, + 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, + 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 +}; + diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c new file mode 100644 index 00000000000..b2689514295 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/longrun.c @@ -0,0 +1,325 @@ +/* + * (C) 2002 - 2003 Dominik Brodowski + * + * Licensed under the terms of the GNU GPL License version 2. + * + * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longrun", msg) + +static struct cpufreq_driver longrun_driver; + +/** + * longrun_{low,high}_freq is needed for the conversion of cpufreq kHz + * values into per cent values. In TMTA microcode, the following is valid: + * performance_pctg = (current_freq - low_freq)/(high_freq - low_freq) + */ +static unsigned int longrun_low_freq, longrun_high_freq; + + +/** + * longrun_get_policy - get the current LongRun policy + * @policy: struct cpufreq_policy where current policy is written into + * + * Reads the current LongRun policy by access to MSR_TMTA_LONGRUN_FLAGS + * and MSR_TMTA_LONGRUN_CTRL + */ +static void __init longrun_get_policy(struct cpufreq_policy *policy) +{ + u32 msr_lo, msr_hi; + + rdmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi); + dprintk("longrun flags are %x - %x\n", msr_lo, msr_hi); + if (msr_lo & 0x01) + policy->policy = CPUFREQ_POLICY_PERFORMANCE; + else + policy->policy = CPUFREQ_POLICY_POWERSAVE; + + rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); + dprintk("longrun ctrl is %x - %x\n", msr_lo, msr_hi); + msr_lo &= 0x0000007F; + msr_hi &= 0x0000007F; + + if ( longrun_high_freq <= longrun_low_freq ) { + /* Assume degenerate Longrun table */ + policy->min = policy->max = longrun_high_freq; + } else { + policy->min = longrun_low_freq + msr_lo * + ((longrun_high_freq - longrun_low_freq) / 100); + policy->max = longrun_low_freq + msr_hi * + ((longrun_high_freq - longrun_low_freq) / 100); + } + policy->cpu = 0; +} + + +/** + * longrun_set_policy - sets a new CPUFreq policy + * @policy: new policy + * + * Sets a new CPUFreq policy on LongRun-capable processors. This function + * has to be called with cpufreq_driver locked. + */ +static int longrun_set_policy(struct cpufreq_policy *policy) +{ + u32 msr_lo, msr_hi; + u32 pctg_lo, pctg_hi; + + if (!policy) + return -EINVAL; + + if ( longrun_high_freq <= longrun_low_freq ) { + /* Assume degenerate Longrun table */ + pctg_lo = pctg_hi = 100; + } else { + pctg_lo = (policy->min - longrun_low_freq) / + ((longrun_high_freq - longrun_low_freq) / 100); + pctg_hi = (policy->max - longrun_low_freq) / + ((longrun_high_freq - longrun_low_freq) / 100); + } + + if (pctg_hi > 100) + pctg_hi = 100; + if (pctg_lo > pctg_hi) + pctg_lo = pctg_hi; + + /* performance or economy mode */ + rdmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi); + msr_lo &= 0xFFFFFFFE; + switch (policy->policy) { + case CPUFREQ_POLICY_PERFORMANCE: + msr_lo |= 0x00000001; + break; + case CPUFREQ_POLICY_POWERSAVE: + break; + } + wrmsr(MSR_TMTA_LONGRUN_FLAGS, msr_lo, msr_hi); + + /* lower and upper boundary */ + rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); + msr_lo &= 0xFFFFFF80; + msr_hi &= 0xFFFFFF80; + msr_lo |= pctg_lo; + msr_hi |= pctg_hi; + wrmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); + + return 0; +} + + +/** + * longrun_verify_poliy - verifies a new CPUFreq policy + * @policy: the policy to verify + * + * Validates a new CPUFreq policy. This function has to be called with + * cpufreq_driver locked. + */ +static int longrun_verify_policy(struct cpufreq_policy *policy) +{ + if (!policy) + return -EINVAL; + + policy->cpu = 0; + cpufreq_verify_within_limits(policy, + policy->cpuinfo.min_freq, + policy->cpuinfo.max_freq); + + if ((policy->policy != CPUFREQ_POLICY_POWERSAVE) && + (policy->policy != CPUFREQ_POLICY_PERFORMANCE)) + return -EINVAL; + + return 0; +} + +static unsigned int longrun_get(unsigned int cpu) +{ + u32 eax, ebx, ecx, edx; + + if (cpu) + return 0; + + cpuid(0x80860007, &eax, &ebx, &ecx, &edx); + dprintk("cpuid eax is %u\n", eax); + + return (eax * 1000); +} + +/** + * longrun_determine_freqs - determines the lowest and highest possible core frequency + * @low_freq: an int to put the lowest frequency into + * @high_freq: an int to put the highest frequency into + * + * Determines the lowest and highest possible core frequencies on this CPU. + * This is necessary to calculate the performance percentage according to + * TMTA rules: + * performance_pctg = (target_freq - low_freq)/(high_freq - low_freq) + */ +static unsigned int __init longrun_determine_freqs(unsigned int *low_freq, + unsigned int *high_freq) +{ + u32 msr_lo, msr_hi; + u32 save_lo, save_hi; + u32 eax, ebx, ecx, edx; + u32 try_hi; + struct cpuinfo_x86 *c = cpu_data; + + if (!low_freq || !high_freq) + return -EINVAL; + + if (cpu_has(c, X86_FEATURE_LRTI)) { + /* if the LongRun Table Interface is present, the + * detection is a bit easier: + * For minimum frequency, read out the maximum + * level (msr_hi), write that into "currently + * selected level", and read out the frequency. + * For maximum frequency, read out level zero. + */ + /* minimum */ + rdmsr(MSR_TMTA_LRTI_READOUT, msr_lo, msr_hi); + wrmsr(MSR_TMTA_LRTI_READOUT, msr_hi, msr_hi); + rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi); + *low_freq = msr_lo * 1000; /* to kHz */ + + /* maximum */ + wrmsr(MSR_TMTA_LRTI_READOUT, 0, msr_hi); + rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi); + *high_freq = msr_lo * 1000; /* to kHz */ + + dprintk("longrun table interface told %u - %u kHz\n", *low_freq, *high_freq); + + if (*low_freq > *high_freq) + *low_freq = *high_freq; + return 0; + } + + /* set the upper border to the value determined during TSC init */ + *high_freq = (cpu_khz / 1000); + *high_freq = *high_freq * 1000; + dprintk("high frequency is %u kHz\n", *high_freq); + + /* get current borders */ + rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); + save_lo = msr_lo & 0x0000007F; + save_hi = msr_hi & 0x0000007F; + + /* if current perf_pctg is larger than 90%, we need to decrease the + * upper limit to make the calculation more accurate. + */ + cpuid(0x80860007, &eax, &ebx, &ecx, &edx); + /* try decreasing in 10% steps, some processors react only + * on some barrier values */ + for (try_hi = 80; try_hi > 0 && ecx > 90; try_hi -=10) { + /* set to 0 to try_hi perf_pctg */ + msr_lo &= 0xFFFFFF80; + msr_hi &= 0xFFFFFF80; + msr_hi |= try_hi; + wrmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); + + /* read out current core MHz and current perf_pctg */ + cpuid(0x80860007, &eax, &ebx, &ecx, &edx); + + /* restore values */ + wrmsr(MSR_TMTA_LONGRUN_CTRL, save_lo, save_hi); + } + dprintk("percentage is %u %%, freq is %u MHz\n", ecx, eax); + + /* performance_pctg = (current_freq - low_freq)/(high_freq - low_freq) + * eqals + * low_freq * ( 1 - perf_pctg) = (cur_freq - high_freq * perf_pctg) + * + * high_freq * perf_pctg is stored tempoarily into "ebx". + */ + ebx = (((cpu_khz / 1000) * ecx) / 100); /* to MHz */ + + if ((ecx > 95) || (ecx == 0) || (eax < ebx)) + return -EIO; + + edx = (eax - ebx) / (100 - ecx); + *low_freq = edx * 1000; /* back to kHz */ + + dprintk("low frequency is %u kHz\n", *low_freq); + + if (*low_freq > *high_freq) + *low_freq = *high_freq; + + return 0; +} + + +static int __init longrun_cpu_init(struct cpufreq_policy *policy) +{ + int result = 0; + + /* capability check */ + if (policy->cpu != 0) + return -ENODEV; + + /* detect low and high frequency */ + result = longrun_determine_freqs(&longrun_low_freq, &longrun_high_freq); + if (result) + return result; + + /* cpuinfo and default policy values */ + policy->cpuinfo.min_freq = longrun_low_freq; + policy->cpuinfo.max_freq = longrun_high_freq; + policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; + longrun_get_policy(policy); + + return 0; +} + + +static struct cpufreq_driver longrun_driver = { + .flags = CPUFREQ_CONST_LOOPS, + .verify = longrun_verify_policy, + .setpolicy = longrun_set_policy, + .get = longrun_get, + .init = longrun_cpu_init, + .name = "longrun", + .owner = THIS_MODULE, +}; + + +/** + * longrun_init - initializes the Transmeta Crusoe LongRun CPUFreq driver + * + * Initializes the LongRun support. + */ +static int __init longrun_init(void) +{ + struct cpuinfo_x86 *c = cpu_data; + + if (c->x86_vendor != X86_VENDOR_TRANSMETA || + !cpu_has(c, X86_FEATURE_LONGRUN)) + return -ENODEV; + + return cpufreq_register_driver(&longrun_driver); +} + + +/** + * longrun_exit - unregisters LongRun support + */ +static void __exit longrun_exit(void) +{ + cpufreq_unregister_driver(&longrun_driver); +} + + +MODULE_AUTHOR ("Dominik Brodowski "); +MODULE_DESCRIPTION ("LongRun driver for Transmeta Crusoe and Efficeon processors."); +MODULE_LICENSE ("GPL"); + +module_init(longrun_init); +module_exit(longrun_exit); diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c new file mode 100644 index 00000000000..4c76b511e19 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c @@ -0,0 +1,316 @@ +/* + * Pentium 4/Xeon CPU on demand clock modulation/speed scaling + * (C) 2002 - 2003 Dominik Brodowski + * (C) 2002 Zwane Mwaikambo + * (C) 2002 Arjan van de Ven + * (C) 2002 Tora T. Engstad + * All Rights Reserved + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * The author(s) of this software shall not be held liable for damages + * of any nature resulting due to the use of this software. This + * software is provided AS-IS with no warranties. + * + * Date Errata Description + * 20020525 N44, O17 12.5% or 25% DC causes lockup + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "speedstep-lib.h" + +#define PFX "p4-clockmod: " +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "p4-clockmod", msg) + +/* + * Duty Cycle (3bits), note DC_DISABLE is not specified in + * intel docs i just use it to mean disable + */ +enum { + DC_RESV, DC_DFLT, DC_25PT, DC_38PT, DC_50PT, + DC_64PT, DC_75PT, DC_88PT, DC_DISABLE +}; + +#define DC_ENTRIES 8 + + +static int has_N44_O17_errata[NR_CPUS]; +static unsigned int stock_freq; +static struct cpufreq_driver p4clockmod_driver; +static unsigned int cpufreq_p4_get(unsigned int cpu); + +static int cpufreq_p4_setdc(unsigned int cpu, unsigned int newstate) +{ + u32 l, h; + + if (!cpu_online(cpu) || (newstate > DC_DISABLE) || (newstate == DC_RESV)) + return -EINVAL; + + rdmsr_on_cpu(cpu, MSR_IA32_THERM_STATUS, &l, &h); + + if (l & 0x01) + dprintk("CPU#%d currently thermal throttled\n", cpu); + + if (has_N44_O17_errata[cpu] && (newstate == DC_25PT || newstate == DC_DFLT)) + newstate = DC_38PT; + + rdmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, &l, &h); + if (newstate == DC_DISABLE) { + dprintk("CPU#%d disabling modulation\n", cpu); + wrmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, l & ~(1<<4), h); + } else { + dprintk("CPU#%d setting duty cycle to %d%%\n", + cpu, ((125 * newstate) / 10)); + /* bits 63 - 5 : reserved + * bit 4 : enable/disable + * bits 3-1 : duty cycle + * bit 0 : reserved + */ + l = (l & ~14); + l = l | (1<<4) | ((newstate & 0x7)<<1); + wrmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, l, h); + } + + return 0; +} + + +static struct cpufreq_frequency_table p4clockmod_table[] = { + {DC_RESV, CPUFREQ_ENTRY_INVALID}, + {DC_DFLT, 0}, + {DC_25PT, 0}, + {DC_38PT, 0}, + {DC_50PT, 0}, + {DC_64PT, 0}, + {DC_75PT, 0}, + {DC_88PT, 0}, + {DC_DISABLE, 0}, + {DC_RESV, CPUFREQ_TABLE_END}, +}; + + +static int cpufreq_p4_target(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + unsigned int newstate = DC_RESV; + struct cpufreq_freqs freqs; + int i; + + if (cpufreq_frequency_table_target(policy, &p4clockmod_table[0], target_freq, relation, &newstate)) + return -EINVAL; + + freqs.old = cpufreq_p4_get(policy->cpu); + freqs.new = stock_freq * p4clockmod_table[newstate].index / 8; + + if (freqs.new == freqs.old) + return 0; + + /* notifiers */ + for_each_cpu_mask(i, policy->cpus) { + freqs.cpu = i; + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + } + + /* run on each logical CPU, see section 13.15.3 of IA32 Intel Architecture Software + * Developer's Manual, Volume 3 + */ + for_each_cpu_mask(i, policy->cpus) + cpufreq_p4_setdc(i, p4clockmod_table[newstate].index); + + /* notifiers */ + for_each_cpu_mask(i, policy->cpus) { + freqs.cpu = i; + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + } + + return 0; +} + + +static int cpufreq_p4_verify(struct cpufreq_policy *policy) +{ + return cpufreq_frequency_table_verify(policy, &p4clockmod_table[0]); +} + + +static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c) +{ + if (c->x86 == 0x06) { + if (cpu_has(c, X86_FEATURE_EST)) + printk(KERN_WARNING PFX "Warning: EST-capable CPU detected. " + "The acpi-cpufreq module offers voltage scaling" + " in addition of frequency scaling. You should use " + "that instead of p4-clockmod, if possible.\n"); + switch (c->x86_model) { + case 0x0E: /* Core */ + case 0x0F: /* Core Duo */ + p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; + return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PCORE); + case 0x0D: /* Pentium M (Dothan) */ + p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; + /* fall through */ + case 0x09: /* Pentium M (Banias) */ + return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PM); + } + } + + if (c->x86 != 0xF) { + printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. Please send an e-mail to \n"); + return 0; + } + + /* on P-4s, the TSC runs with constant frequency independent whether + * throttling is active or not. */ + p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; + + if (speedstep_detect_processor() == SPEEDSTEP_PROCESSOR_P4M) { + printk(KERN_WARNING PFX "Warning: Pentium 4-M detected. " + "The speedstep-ich or acpi cpufreq modules offer " + "voltage scaling in addition of frequency scaling. " + "You should use either one instead of p4-clockmod, " + "if possible.\n"); + return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_P4M); + } + + return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_P4D); +} + + + +static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy) +{ + struct cpuinfo_x86 *c = &cpu_data[policy->cpu]; + int cpuid = 0; + unsigned int i; + +#ifdef CONFIG_SMP + policy->cpus = cpu_sibling_map[policy->cpu]; +#endif + + /* Errata workaround */ + cpuid = (c->x86 << 8) | (c->x86_model << 4) | c->x86_mask; + switch (cpuid) { + case 0x0f07: + case 0x0f0a: + case 0x0f11: + case 0x0f12: + has_N44_O17_errata[policy->cpu] = 1; + dprintk("has errata -- disabling low frequencies\n"); + } + + /* get max frequency */ + stock_freq = cpufreq_p4_get_frequency(c); + if (!stock_freq) + return -EINVAL; + + /* table init */ + for (i=1; (p4clockmod_table[i].frequency != CPUFREQ_TABLE_END); i++) { + if ((i<2) && (has_N44_O17_errata[policy->cpu])) + p4clockmod_table[i].frequency = CPUFREQ_ENTRY_INVALID; + else + p4clockmod_table[i].frequency = (stock_freq * i)/8; + } + cpufreq_frequency_table_get_attr(p4clockmod_table, policy->cpu); + + /* cpuinfo and default policy values */ + policy->governor = CPUFREQ_DEFAULT_GOVERNOR; + policy->cpuinfo.transition_latency = 1000000; /* assumed */ + policy->cur = stock_freq; + + return cpufreq_frequency_table_cpuinfo(policy, &p4clockmod_table[0]); +} + + +static int cpufreq_p4_cpu_exit(struct cpufreq_policy *policy) +{ + cpufreq_frequency_table_put_attr(policy->cpu); + return 0; +} + +static unsigned int cpufreq_p4_get(unsigned int cpu) +{ + u32 l, h; + + rdmsr_on_cpu(cpu, MSR_IA32_THERM_CONTROL, &l, &h); + + if (l & 0x10) { + l = l >> 1; + l &= 0x7; + } else + l = DC_DISABLE; + + if (l != DC_DISABLE) + return (stock_freq * l / 8); + + return stock_freq; +} + +static struct freq_attr* p4clockmod_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + +static struct cpufreq_driver p4clockmod_driver = { + .verify = cpufreq_p4_verify, + .target = cpufreq_p4_target, + .init = cpufreq_p4_cpu_init, + .exit = cpufreq_p4_cpu_exit, + .get = cpufreq_p4_get, + .name = "p4-clockmod", + .owner = THIS_MODULE, + .attr = p4clockmod_attr, +}; + + +static int __init cpufreq_p4_init(void) +{ + struct cpuinfo_x86 *c = cpu_data; + int ret; + + /* + * THERM_CONTROL is architectural for IA32 now, so + * we can rely on the capability checks + */ + if (c->x86_vendor != X86_VENDOR_INTEL) + return -ENODEV; + + if (!test_bit(X86_FEATURE_ACPI, c->x86_capability) || + !test_bit(X86_FEATURE_ACC, c->x86_capability)) + return -ENODEV; + + ret = cpufreq_register_driver(&p4clockmod_driver); + if (!ret) + printk(KERN_INFO PFX "P4/Xeon(TM) CPU On-Demand Clock Modulation available\n"); + + return (ret); +} + + +static void __exit cpufreq_p4_exit(void) +{ + cpufreq_unregister_driver(&p4clockmod_driver); +} + + +MODULE_AUTHOR ("Zwane Mwaikambo "); +MODULE_DESCRIPTION ("cpufreq driver for Pentium(TM) 4/Xeon(TM)"); +MODULE_LICENSE ("GPL"); + +late_initcall(cpufreq_p4_init); +module_exit(cpufreq_p4_exit); diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c new file mode 100644 index 00000000000..f89524051e4 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c @@ -0,0 +1,256 @@ +/* + * This file was based upon code in Powertweak Linux (http://powertweak.sf.net) + * (C) 2000-2003 Dave Jones, Arjan van de Ven, Janne Pänkälä, Dominik Brodowski. + * + * Licensed under the terms of the GNU GPL License version 2. + * + * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + + +#define POWERNOW_IOPORT 0xfff0 /* it doesn't matter where, as long + as it is unused */ + +static unsigned int busfreq; /* FSB, in 10 kHz */ +static unsigned int max_multiplier; + + +/* Clock ratio multiplied by 10 - see table 27 in AMD#23446 */ +static struct cpufreq_frequency_table clock_ratio[] = { + {45, /* 000 -> 4.5x */ 0}, + {50, /* 001 -> 5.0x */ 0}, + {40, /* 010 -> 4.0x */ 0}, + {55, /* 011 -> 5.5x */ 0}, + {20, /* 100 -> 2.0x */ 0}, + {30, /* 101 -> 3.0x */ 0}, + {60, /* 110 -> 6.0x */ 0}, + {35, /* 111 -> 3.5x */ 0}, + {0, CPUFREQ_TABLE_END} +}; + + +/** + * powernow_k6_get_cpu_multiplier - returns the current FSB multiplier + * + * Returns the current setting of the frequency multiplier. Core clock + * speed is frequency of the Front-Side Bus multiplied with this value. + */ +static int powernow_k6_get_cpu_multiplier(void) +{ + u64 invalue = 0; + u32 msrval; + + msrval = POWERNOW_IOPORT + 0x1; + wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */ + invalue=inl(POWERNOW_IOPORT + 0x8); + msrval = POWERNOW_IOPORT + 0x0; + wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */ + + return clock_ratio[(invalue >> 5)&7].index; +} + + +/** + * powernow_k6_set_state - set the PowerNow! multiplier + * @best_i: clock_ratio[best_i] is the target multiplier + * + * Tries to change the PowerNow! multiplier + */ +static void powernow_k6_set_state (unsigned int best_i) +{ + unsigned long outvalue=0, invalue=0; + unsigned long msrval; + struct cpufreq_freqs freqs; + + if (clock_ratio[best_i].index > max_multiplier) { + printk(KERN_ERR "cpufreq: invalid target frequency\n"); + return; + } + + freqs.old = busfreq * powernow_k6_get_cpu_multiplier(); + freqs.new = busfreq * clock_ratio[best_i].index; + freqs.cpu = 0; /* powernow-k6.c is UP only driver */ + + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + + /* we now need to transform best_i to the BVC format, see AMD#23446 */ + + outvalue = (1<<12) | (1<<10) | (1<<9) | (best_i<<5); + + msrval = POWERNOW_IOPORT + 0x1; + wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */ + invalue=inl(POWERNOW_IOPORT + 0x8); + invalue = invalue & 0xf; + outvalue = outvalue | invalue; + outl(outvalue ,(POWERNOW_IOPORT + 0x8)); + msrval = POWERNOW_IOPORT + 0x0; + wrmsr(MSR_K6_EPMR, msrval, 0); /* disable it again */ + + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + + return; +} + + +/** + * powernow_k6_verify - verifies a new CPUfreq policy + * @policy: new policy + * + * Policy must be within lowest and highest possible CPU Frequency, + * and at least one possible state must be within min and max. + */ +static int powernow_k6_verify(struct cpufreq_policy *policy) +{ + return cpufreq_frequency_table_verify(policy, &clock_ratio[0]); +} + + +/** + * powernow_k6_setpolicy - sets a new CPUFreq policy + * @policy: new policy + * @target_freq: the target frequency + * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) + * + * sets a new CPUFreq policy + */ +static int powernow_k6_target (struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + unsigned int newstate = 0; + + if (cpufreq_frequency_table_target(policy, &clock_ratio[0], target_freq, relation, &newstate)) + return -EINVAL; + + powernow_k6_set_state(newstate); + + return 0; +} + + +static int powernow_k6_cpu_init(struct cpufreq_policy *policy) +{ + unsigned int i; + int result; + + if (policy->cpu != 0) + return -ENODEV; + + /* get frequencies */ + max_multiplier = powernow_k6_get_cpu_multiplier(); + busfreq = cpu_khz / max_multiplier; + + /* table init */ + for (i=0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) { + if (clock_ratio[i].index > max_multiplier) + clock_ratio[i].frequency = CPUFREQ_ENTRY_INVALID; + else + clock_ratio[i].frequency = busfreq * clock_ratio[i].index; + } + + /* cpuinfo and default policy values */ + policy->governor = CPUFREQ_DEFAULT_GOVERNOR; + policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; + policy->cur = busfreq * max_multiplier; + + result = cpufreq_frequency_table_cpuinfo(policy, clock_ratio); + if (result) + return (result); + + cpufreq_frequency_table_get_attr(clock_ratio, policy->cpu); + + return 0; +} + + +static int powernow_k6_cpu_exit(struct cpufreq_policy *policy) +{ + unsigned int i; + for (i=0; i<8; i++) { + if (i==max_multiplier) + powernow_k6_set_state(i); + } + cpufreq_frequency_table_put_attr(policy->cpu); + return 0; +} + +static unsigned int powernow_k6_get(unsigned int cpu) +{ + return busfreq * powernow_k6_get_cpu_multiplier(); +} + +static struct freq_attr* powernow_k6_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + +static struct cpufreq_driver powernow_k6_driver = { + .verify = powernow_k6_verify, + .target = powernow_k6_target, + .init = powernow_k6_cpu_init, + .exit = powernow_k6_cpu_exit, + .get = powernow_k6_get, + .name = "powernow-k6", + .owner = THIS_MODULE, + .attr = powernow_k6_attr, +}; + + +/** + * powernow_k6_init - initializes the k6 PowerNow! CPUFreq driver + * + * Initializes the K6 PowerNow! support. Returns -ENODEV on unsupported + * devices, -EINVAL or -ENOMEM on problems during initiatization, and zero + * on success. + */ +static int __init powernow_k6_init(void) +{ + struct cpuinfo_x86 *c = cpu_data; + + if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 != 5) || + ((c->x86_model != 12) && (c->x86_model != 13))) + return -ENODEV; + + if (!request_region(POWERNOW_IOPORT, 16, "PowerNow!")) { + printk("cpufreq: PowerNow IOPORT region already used.\n"); + return -EIO; + } + + if (cpufreq_register_driver(&powernow_k6_driver)) { + release_region (POWERNOW_IOPORT, 16); + return -EINVAL; + } + + return 0; +} + + +/** + * powernow_k6_exit - unregisters AMD K6-2+/3+ PowerNow! support + * + * Unregisters AMD K6-2+ / K6-3+ PowerNow! support. + */ +static void __exit powernow_k6_exit(void) +{ + cpufreq_unregister_driver(&powernow_k6_driver); + release_region (POWERNOW_IOPORT, 16); +} + + +MODULE_AUTHOR ("Arjan van de Ven , Dave Jones , Dominik Brodowski "); +MODULE_DESCRIPTION ("PowerNow! driver for AMD K6-2+ / K6-3+ processors."); +MODULE_LICENSE ("GPL"); + +module_init(powernow_k6_init); +module_exit(powernow_k6_exit); diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c new file mode 100644 index 00000000000..ca3e1d34188 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c @@ -0,0 +1,703 @@ +/* + * AMD K7 Powernow driver. + * (C) 2003 Dave Jones on behalf of SuSE Labs. + * (C) 2003-2004 Dave Jones + * + * Licensed under the terms of the GNU GPL License version 2. + * Based upon datasheets & sample CPUs kindly provided by AMD. + * + * Errata 5: Processor may fail to execute a FID/VID change in presence of interrupt. + * - We cli/sti on stepping A0 CPUs around the FID/VID transition. + * Errata 15: Processors with half frequency multipliers may hang upon wakeup from disconnect. + * - We disable half multipliers if ACPI is used on A0 stepping CPUs. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#ifdef CONFIG_X86_POWERNOW_K7_ACPI +#include +#include +#endif + +#include "powernow-k7.h" + +#define PFX "powernow: " + + +struct psb_s { + u8 signature[10]; + u8 tableversion; + u8 flags; + u16 settlingtime; + u8 reserved1; + u8 numpst; +}; + +struct pst_s { + u32 cpuid; + u8 fsbspeed; + u8 maxfid; + u8 startvid; + u8 numpstates; +}; + +#ifdef CONFIG_X86_POWERNOW_K7_ACPI +union powernow_acpi_control_t { + struct { + unsigned long fid:5, + vid:5, + sgtc:20, + res1:2; + } bits; + unsigned long val; +}; +#endif + +#ifdef CONFIG_CPU_FREQ_DEBUG +/* divide by 1000 to get VCore voltage in V. */ +static const int mobile_vid_table[32] = { + 2000, 1950, 1900, 1850, 1800, 1750, 1700, 1650, + 1600, 1550, 1500, 1450, 1400, 1350, 1300, 0, + 1275, 1250, 1225, 1200, 1175, 1150, 1125, 1100, + 1075, 1050, 1025, 1000, 975, 950, 925, 0, +}; +#endif + +/* divide by 10 to get FID. */ +static const int fid_codes[32] = { + 110, 115, 120, 125, 50, 55, 60, 65, + 70, 75, 80, 85, 90, 95, 100, 105, + 30, 190, 40, 200, 130, 135, 140, 210, + 150, 225, 160, 165, 170, 180, -1, -1, +}; + +/* This parameter is used in order to force ACPI instead of legacy method for + * configuration purpose. + */ + +static int acpi_force; + +static struct cpufreq_frequency_table *powernow_table; + +static unsigned int can_scale_bus; +static unsigned int can_scale_vid; +static unsigned int minimum_speed=-1; +static unsigned int maximum_speed; +static unsigned int number_scales; +static unsigned int fsb; +static unsigned int latency; +static char have_a0; + +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "powernow-k7", msg) + +static int check_fsb(unsigned int fsbspeed) +{ + int delta; + unsigned int f = fsb / 1000; + + delta = (fsbspeed > f) ? fsbspeed - f : f - fsbspeed; + return (delta < 5); +} + +static int check_powernow(void) +{ + struct cpuinfo_x86 *c = cpu_data; + unsigned int maxei, eax, ebx, ecx, edx; + + if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 !=6)) { +#ifdef MODULE + printk (KERN_INFO PFX "This module only works with AMD K7 CPUs\n"); +#endif + return 0; + } + + /* Get maximum capabilities */ + maxei = cpuid_eax (0x80000000); + if (maxei < 0x80000007) { /* Any powernow info ? */ +#ifdef MODULE + printk (KERN_INFO PFX "No powernow capabilities detected\n"); +#endif + return 0; + } + + if ((c->x86_model == 6) && (c->x86_mask == 0)) { + printk (KERN_INFO PFX "K7 660[A0] core detected, enabling errata workarounds\n"); + have_a0 = 1; + } + + cpuid(0x80000007, &eax, &ebx, &ecx, &edx); + + /* Check we can actually do something before we say anything.*/ + if (!(edx & (1 << 1 | 1 << 2))) + return 0; + + printk (KERN_INFO PFX "PowerNOW! Technology present. Can scale: "); + + if (edx & 1 << 1) { + printk ("frequency"); + can_scale_bus=1; + } + + if ((edx & (1 << 1 | 1 << 2)) == 0x6) + printk (" and "); + + if (edx & 1 << 2) { + printk ("voltage"); + can_scale_vid=1; + } + + printk (".\n"); + return 1; +} + + +static int get_ranges (unsigned char *pst) +{ + unsigned int j; + unsigned int speed; + u8 fid, vid; + + powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) * (number_scales + 1)), GFP_KERNEL); + if (!powernow_table) + return -ENOMEM; + + for (j=0 ; j < number_scales; j++) { + fid = *pst++; + + powernow_table[j].frequency = (fsb * fid_codes[fid]) / 10; + powernow_table[j].index = fid; /* lower 8 bits */ + + speed = powernow_table[j].frequency; + + if ((fid_codes[fid] % 10)==5) { +#ifdef CONFIG_X86_POWERNOW_K7_ACPI + if (have_a0 == 1) + powernow_table[j].frequency = CPUFREQ_ENTRY_INVALID; +#endif + } + + if (speed < minimum_speed) + minimum_speed = speed; + if (speed > maximum_speed) + maximum_speed = speed; + + vid = *pst++; + powernow_table[j].index |= (vid << 8); /* upper 8 bits */ + + dprintk (" FID: 0x%x (%d.%dx [%dMHz]) " + "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10, + fid_codes[fid] % 10, speed/1000, vid, + mobile_vid_table[vid]/1000, + mobile_vid_table[vid]%1000); + } + powernow_table[number_scales].frequency = CPUFREQ_TABLE_END; + powernow_table[number_scales].index = 0; + + return 0; +} + + +static void change_FID(int fid) +{ + union msr_fidvidctl fidvidctl; + + rdmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val); + if (fidvidctl.bits.FID != fid) { + fidvidctl.bits.SGTC = latency; + fidvidctl.bits.FID = fid; + fidvidctl.bits.VIDC = 0; + fidvidctl.bits.FIDC = 1; + wrmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val); + } +} + + +static void change_VID(int vid) +{ + union msr_fidvidctl fidvidctl; + + rdmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val); + if (fidvidctl.bits.VID != vid) { + fidvidctl.bits.SGTC = latency; + fidvidctl.bits.VID = vid; + fidvidctl.bits.FIDC = 0; + fidvidctl.bits.VIDC = 1; + wrmsrl (MSR_K7_FID_VID_CTL, fidvidctl.val); + } +} + + +static void change_speed (unsigned int index) +{ + u8 fid, vid; + struct cpufreq_freqs freqs; + union msr_fidvidstatus fidvidstatus; + int cfid; + + /* fid are the lower 8 bits of the index we stored into + * the cpufreq frequency table in powernow_decode_bios, + * vid are the upper 8 bits. + */ + + fid = powernow_table[index].index & 0xFF; + vid = (powernow_table[index].index & 0xFF00) >> 8; + + freqs.cpu = 0; + + rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val); + cfid = fidvidstatus.bits.CFID; + freqs.old = fsb * fid_codes[cfid] / 10; + + freqs.new = powernow_table[index].frequency; + + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + + /* Now do the magic poking into the MSRs. */ + + if (have_a0 == 1) /* A0 errata 5 */ + local_irq_disable(); + + if (freqs.old > freqs.new) { + /* Going down, so change FID first */ + change_FID(fid); + change_VID(vid); + } else { + /* Going up, so change VID first */ + change_VID(vid); + change_FID(fid); + } + + + if (have_a0 == 1) + local_irq_enable(); + + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); +} + + +#ifdef CONFIG_X86_POWERNOW_K7_ACPI + +static struct acpi_processor_performance *acpi_processor_perf; + +static int powernow_acpi_init(void) +{ + int i; + int retval = 0; + union powernow_acpi_control_t pc; + + if (acpi_processor_perf != NULL && powernow_table != NULL) { + retval = -EINVAL; + goto err0; + } + + acpi_processor_perf = kzalloc(sizeof(struct acpi_processor_performance), + GFP_KERNEL); + if (!acpi_processor_perf) { + retval = -ENOMEM; + goto err0; + } + + if (acpi_processor_register_performance(acpi_processor_perf, 0)) { + retval = -EIO; + goto err1; + } + + if (acpi_processor_perf->control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) { + retval = -ENODEV; + goto err2; + } + + if (acpi_processor_perf->status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) { + retval = -ENODEV; + goto err2; + } + + number_scales = acpi_processor_perf->state_count; + + if (number_scales < 2) { + retval = -ENODEV; + goto err2; + } + + powernow_table = kzalloc((number_scales + 1) * (sizeof(struct cpufreq_frequency_table)), GFP_KERNEL); + if (!powernow_table) { + retval = -ENOMEM; + goto err2; + } + + pc.val = (unsigned long) acpi_processor_perf->states[0].control; + for (i = 0; i < number_scales; i++) { + u8 fid, vid; + struct acpi_processor_px *state = + &acpi_processor_perf->states[i]; + unsigned int speed, speed_mhz; + + pc.val = (unsigned long) state->control; + dprintk ("acpi: P%d: %d MHz %d mW %d uS control %08x SGTC %d\n", + i, + (u32) state->core_frequency, + (u32) state->power, + (u32) state->transition_latency, + (u32) state->control, + pc.bits.sgtc); + + vid = pc.bits.vid; + fid = pc.bits.fid; + + powernow_table[i].frequency = fsb * fid_codes[fid] / 10; + powernow_table[i].index = fid; /* lower 8 bits */ + powernow_table[i].index |= (vid << 8); /* upper 8 bits */ + + speed = powernow_table[i].frequency; + speed_mhz = speed / 1000; + + /* processor_perflib will multiply the MHz value by 1000 to + * get a KHz value (e.g. 1266000). However, powernow-k7 works + * with true KHz values (e.g. 1266768). To ensure that all + * powernow frequencies are available, we must ensure that + * ACPI doesn't restrict them, so we round up the MHz value + * to ensure that perflib's computed KHz value is greater than + * or equal to powernow's KHz value. + */ + if (speed % 1000 > 0) + speed_mhz++; + + if ((fid_codes[fid] % 10)==5) { + if (have_a0 == 1) + powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; + } + + dprintk (" FID: 0x%x (%d.%dx [%dMHz]) " + "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10, + fid_codes[fid] % 10, speed_mhz, vid, + mobile_vid_table[vid]/1000, + mobile_vid_table[vid]%1000); + + if (state->core_frequency != speed_mhz) { + state->core_frequency = speed_mhz; + dprintk(" Corrected ACPI frequency to %d\n", + speed_mhz); + } + + if (latency < pc.bits.sgtc) + latency = pc.bits.sgtc; + + if (speed < minimum_speed) + minimum_speed = speed; + if (speed > maximum_speed) + maximum_speed = speed; + } + + powernow_table[i].frequency = CPUFREQ_TABLE_END; + powernow_table[i].index = 0; + + /* notify BIOS that we exist */ + acpi_processor_notify_smm(THIS_MODULE); + + return 0; + +err2: + acpi_processor_unregister_performance(acpi_processor_perf, 0); +err1: + kfree(acpi_processor_perf); +err0: + printk(KERN_WARNING PFX "ACPI perflib can not be used in this platform\n"); + acpi_processor_perf = NULL; + return retval; +} +#else +static int powernow_acpi_init(void) +{ + printk(KERN_INFO PFX "no support for ACPI processor found." + " Please recompile your kernel with ACPI processor\n"); + return -EINVAL; +} +#endif + +static int powernow_decode_bios (int maxfid, int startvid) +{ + struct psb_s *psb; + struct pst_s *pst; + unsigned int i, j; + unsigned char *p; + unsigned int etuple; + unsigned int ret; + + etuple = cpuid_eax(0x80000001); + + for (i=0xC0000; i < 0xffff0 ; i+=16) { + + p = phys_to_virt(i); + + if (memcmp(p, "AMDK7PNOW!", 10) == 0){ + dprintk ("Found PSB header at %p\n", p); + psb = (struct psb_s *) p; + dprintk ("Table version: 0x%x\n", psb->tableversion); + if (psb->tableversion != 0x12) { + printk (KERN_INFO PFX "Sorry, only v1.2 tables supported right now\n"); + return -ENODEV; + } + + dprintk ("Flags: 0x%x\n", psb->flags); + if ((psb->flags & 1)==0) { + dprintk ("Mobile voltage regulator\n"); + } else { + dprintk ("Desktop voltage regulator\n"); + } + + latency = psb->settlingtime; + if (latency < 100) { + printk (KERN_INFO PFX "BIOS set settling time to %d microseconds." + "Should be at least 100. Correcting.\n", latency); + latency = 100; + } + dprintk ("Settling Time: %d microseconds.\n", psb->settlingtime); + dprintk ("Has %d PST tables. (Only dumping ones relevant to this CPU).\n", psb->numpst); + + p += sizeof (struct psb_s); + + pst = (struct pst_s *) p; + + for (j=0; jnumpst; j++) { + pst = (struct pst_s *) p; + number_scales = pst->numpstates; + + if ((etuple == pst->cpuid) && check_fsb(pst->fsbspeed) && + (maxfid==pst->maxfid) && (startvid==pst->startvid)) + { + dprintk ("PST:%d (@%p)\n", j, pst); + dprintk (" cpuid: 0x%x fsb: %d maxFID: 0x%x startvid: 0x%x\n", + pst->cpuid, pst->fsbspeed, pst->maxfid, pst->startvid); + + ret = get_ranges ((char *) pst + sizeof (struct pst_s)); + return ret; + } else { + unsigned int k; + p = (char *) pst + sizeof (struct pst_s); + for (k=0; k= 5) + m += 5; + + m /= 10; + + sgtc = 100 * m * latency; + sgtc = sgtc / 3; + if (sgtc > 0xfffff) { + printk(KERN_WARNING PFX "SGTC too large %d\n", sgtc); + sgtc = 0xfffff; + } + return sgtc; +} + +static unsigned int powernow_get(unsigned int cpu) +{ + union msr_fidvidstatus fidvidstatus; + unsigned int cfid; + + if (cpu) + return 0; + rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val); + cfid = fidvidstatus.bits.CFID; + + return (fsb * fid_codes[cfid] / 10); +} + + +static int __init acer_cpufreq_pst(struct dmi_system_id *d) +{ + printk(KERN_WARNING "%s laptop with broken PST tables in BIOS detected.\n", d->ident); + printk(KERN_WARNING "You need to downgrade to 3A21 (09/09/2002), or try a newer BIOS than 3A71 (01/20/2003)\n"); + printk(KERN_WARNING "cpufreq scaling has been disabled as a result of this.\n"); + return 0; +} + +/* + * Some Athlon laptops have really fucked PST tables. + * A BIOS update is all that can save them. + * Mention this, and disable cpufreq. + */ +static struct dmi_system_id __initdata powernow_dmi_table[] = { + { + .callback = acer_cpufreq_pst, + .ident = "Acer Aspire", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Insyde Software"), + DMI_MATCH(DMI_BIOS_VERSION, "3A71"), + }, + }, + { } +}; + +static int __init powernow_cpu_init (struct cpufreq_policy *policy) +{ + union msr_fidvidstatus fidvidstatus; + int result; + + if (policy->cpu != 0) + return -ENODEV; + + rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val); + + recalibrate_cpu_khz(); + + fsb = (10 * cpu_khz) / fid_codes[fidvidstatus.bits.CFID]; + if (!fsb) { + printk(KERN_WARNING PFX "can not determine bus frequency\n"); + return -EINVAL; + } + dprintk("FSB: %3dMHz\n", fsb/1000); + + if (dmi_check_system(powernow_dmi_table) || acpi_force) { + printk (KERN_INFO PFX "PSB/PST known to be broken. Trying ACPI instead\n"); + result = powernow_acpi_init(); + } else { + result = powernow_decode_bios(fidvidstatus.bits.MFID, fidvidstatus.bits.SVID); + if (result) { + printk (KERN_INFO PFX "Trying ACPI perflib\n"); + maximum_speed = 0; + minimum_speed = -1; + latency = 0; + result = powernow_acpi_init(); + if (result) { + printk (KERN_INFO PFX "ACPI and legacy methods failed\n"); + printk (KERN_INFO PFX "See http://www.codemonkey.org.uk/projects/cpufreq/powernow-k7.html\n"); + } + } else { + /* SGTC use the bus clock as timer */ + latency = fixup_sgtc(); + printk(KERN_INFO PFX "SGTC: %d\n", latency); + } + } + + if (result) + return result; + + printk (KERN_INFO PFX "Minimum speed %d MHz. Maximum speed %d MHz.\n", + minimum_speed/1000, maximum_speed/1000); + + policy->governor = CPUFREQ_DEFAULT_GOVERNOR; + + policy->cpuinfo.transition_latency = cpufreq_scale(2000000UL, fsb, latency); + + policy->cur = powernow_get(0); + + cpufreq_frequency_table_get_attr(powernow_table, policy->cpu); + + return cpufreq_frequency_table_cpuinfo(policy, powernow_table); +} + +static int powernow_cpu_exit (struct cpufreq_policy *policy) { + cpufreq_frequency_table_put_attr(policy->cpu); + +#ifdef CONFIG_X86_POWERNOW_K7_ACPI + if (acpi_processor_perf) { + acpi_processor_unregister_performance(acpi_processor_perf, 0); + kfree(acpi_processor_perf); + } +#endif + + kfree(powernow_table); + return 0; +} + +static struct freq_attr* powernow_table_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + +static struct cpufreq_driver powernow_driver = { + .verify = powernow_verify, + .target = powernow_target, + .get = powernow_get, + .init = powernow_cpu_init, + .exit = powernow_cpu_exit, + .name = "powernow-k7", + .owner = THIS_MODULE, + .attr = powernow_table_attr, +}; + +static int __init powernow_init (void) +{ + if (check_powernow()==0) + return -ENODEV; + return cpufreq_register_driver(&powernow_driver); +} + + +static void __exit powernow_exit (void) +{ + cpufreq_unregister_driver(&powernow_driver); +} + +module_param(acpi_force, int, 0444); +MODULE_PARM_DESC(acpi_force, "Force ACPI to be used."); + +MODULE_AUTHOR ("Dave Jones "); +MODULE_DESCRIPTION ("Powernow driver for AMD K7 processors."); +MODULE_LICENSE ("GPL"); + +late_initcall(powernow_init); +module_exit(powernow_exit); + diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.h b/arch/x86/kernel/cpu/cpufreq/powernow-k7.h new file mode 100644 index 00000000000..f8a63b3664e --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.h @@ -0,0 +1,44 @@ +/* + * $Id: powernow-k7.h,v 1.2 2003/02/10 18:26:01 davej Exp $ + * (C) 2003 Dave Jones. + * + * Licensed under the terms of the GNU GPL License version 2. + * + * AMD-specific information + * + */ + +union msr_fidvidctl { + struct { + unsigned FID:5, // 4:0 + reserved1:3, // 7:5 + VID:5, // 12:8 + reserved2:3, // 15:13 + FIDC:1, // 16 + VIDC:1, // 17 + reserved3:2, // 19:18 + FIDCHGRATIO:1, // 20 + reserved4:11, // 31-21 + SGTC:20, // 32:51 + reserved5:12; // 63:52 + } bits; + unsigned long long val; +}; + +union msr_fidvidstatus { + struct { + unsigned CFID:5, // 4:0 + reserved1:3, // 7:5 + SFID:5, // 12:8 + reserved2:3, // 15:13 + MFID:5, // 20:16 + reserved3:11, // 31:21 + CVID:5, // 36:32 + reserved4:3, // 39:37 + SVID:5, // 44:40 + reserved5:3, // 47:45 + MVID:5, // 52:48 + reserved6:11; // 63:53 + } bits; + unsigned long long val; +}; diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c new file mode 100644 index 00000000000..34ed53a0673 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -0,0 +1,1363 @@ +/* + * (c) 2003-2006 Advanced Micro Devices, Inc. + * Your use of this code is subject to the terms and conditions of the + * GNU general public license version 2. See "COPYING" or + * http://www.gnu.org/licenses/gpl.html + * + * Support : mark.langsdorf@amd.com + * + * Based on the powernow-k7.c module written by Dave Jones. + * (C) 2003 Dave Jones on behalf of SuSE Labs + * (C) 2004 Dominik Brodowski + * (C) 2004 Pavel Machek + * Licensed under the terms of the GNU GPL License version 2. + * Based upon datasheets & sample CPUs kindly provided by AMD. + * + * Valuable input gratefully received from Dave Jones, Pavel Machek, + * Dominik Brodowski, Jacob Shin, and others. + * Originally developed by Paul Devriendt. + * Processor information obtained from Chapter 9 (Power and Thermal Management) + * of the "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD + * Opteron Processors" available for download from www.amd.com + * + * Tables for specific CPUs can be inferred from + * http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/30430.pdf + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include /* for current / set_cpus_allowed() */ + +#include +#include +#include + +#ifdef CONFIG_X86_POWERNOW_K8_ACPI +#include +#include +#include +#endif + +#define PFX "powernow-k8: " +#define BFX PFX "BIOS error: " +#define VERSION "version 2.00.00" +#include "powernow-k8.h" + +/* serialize freq changes */ +static DEFINE_MUTEX(fidvid_mutex); + +static struct powernow_k8_data *powernow_data[NR_CPUS]; + +static int cpu_family = CPU_OPTERON; + +#ifndef CONFIG_SMP +static cpumask_t cpu_core_map[1]; +#endif + +/* Return a frequency in MHz, given an input fid */ +static u32 find_freq_from_fid(u32 fid) +{ + return 800 + (fid * 100); +} + + +/* Return a frequency in KHz, given an input fid */ +static u32 find_khz_freq_from_fid(u32 fid) +{ + return 1000 * find_freq_from_fid(fid); +} + +/* Return a frequency in MHz, given an input fid and did */ +static u32 find_freq_from_fiddid(u32 fid, u32 did) +{ + return 100 * (fid + 0x10) >> did; +} + +static u32 find_khz_freq_from_fiddid(u32 fid, u32 did) +{ + return 1000 * find_freq_from_fiddid(fid, did); +} + +static u32 find_fid_from_pstate(u32 pstate) +{ + u32 hi, lo; + rdmsr(MSR_PSTATE_DEF_BASE + pstate, lo, hi); + return lo & HW_PSTATE_FID_MASK; +} + +static u32 find_did_from_pstate(u32 pstate) +{ + u32 hi, lo; + rdmsr(MSR_PSTATE_DEF_BASE + pstate, lo, hi); + return (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT; +} + +/* Return the vco fid for an input fid + * + * Each "low" fid has corresponding "high" fid, and you can get to "low" fids + * only from corresponding high fids. This returns "high" fid corresponding to + * "low" one. + */ +static u32 convert_fid_to_vco_fid(u32 fid) +{ + if (fid < HI_FID_TABLE_BOTTOM) + return 8 + (2 * fid); + else + return fid; +} + +/* + * Return 1 if the pending bit is set. Unless we just instructed the processor + * to transition to a new state, seeing this bit set is really bad news. + */ +static int pending_bit_stuck(void) +{ + u32 lo, hi; + + if (cpu_family == CPU_HW_PSTATE) + return 0; + + rdmsr(MSR_FIDVID_STATUS, lo, hi); + return lo & MSR_S_LO_CHANGE_PENDING ? 1 : 0; +} + +/* + * Update the global current fid / vid values from the status msr. + * Returns 1 on error. + */ +static int query_current_values_with_pending_wait(struct powernow_k8_data *data) +{ + u32 lo, hi; + u32 i = 0; + + if (cpu_family == CPU_HW_PSTATE) { + rdmsr(MSR_PSTATE_STATUS, lo, hi); + i = lo & HW_PSTATE_MASK; + rdmsr(MSR_PSTATE_DEF_BASE + i, lo, hi); + data->currfid = lo & HW_PSTATE_FID_MASK; + data->currdid = (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT; + return 0; + } + do { + if (i++ > 10000) { + dprintk("detected change pending stuck\n"); + return 1; + } + rdmsr(MSR_FIDVID_STATUS, lo, hi); + } while (lo & MSR_S_LO_CHANGE_PENDING); + + data->currvid = hi & MSR_S_HI_CURRENT_VID; + data->currfid = lo & MSR_S_LO_CURRENT_FID; + + return 0; +} + +/* the isochronous relief time */ +static void count_off_irt(struct powernow_k8_data *data) +{ + udelay((1 << data->irt) * 10); + return; +} + +/* the voltage stabalization time */ +static void count_off_vst(struct powernow_k8_data *data) +{ + udelay(data->vstable * VST_UNITS_20US); + return; +} + +/* need to init the control msr to a safe value (for each cpu) */ +static void fidvid_msr_init(void) +{ + u32 lo, hi; + u8 fid, vid; + + rdmsr(MSR_FIDVID_STATUS, lo, hi); + vid = hi & MSR_S_HI_CURRENT_VID; + fid = lo & MSR_S_LO_CURRENT_FID; + lo = fid | (vid << MSR_C_LO_VID_SHIFT); + hi = MSR_C_HI_STP_GNT_BENIGN; + dprintk("cpu%d, init lo 0x%x, hi 0x%x\n", smp_processor_id(), lo, hi); + wrmsr(MSR_FIDVID_CTL, lo, hi); +} + + +/* write the new fid value along with the other control fields to the msr */ +static int write_new_fid(struct powernow_k8_data *data, u32 fid) +{ + u32 lo; + u32 savevid = data->currvid; + u32 i = 0; + + if ((fid & INVALID_FID_MASK) || (data->currvid & INVALID_VID_MASK)) { + printk(KERN_ERR PFX "internal error - overflow on fid write\n"); + return 1; + } + + lo = fid | (data->currvid << MSR_C_LO_VID_SHIFT) | MSR_C_LO_INIT_FID_VID; + + dprintk("writing fid 0x%x, lo 0x%x, hi 0x%x\n", + fid, lo, data->plllock * PLL_LOCK_CONVERSION); + + do { + wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION); + if (i++ > 100) { + printk(KERN_ERR PFX "Hardware error - pending bit very stuck - no further pstate changes possible\n"); + return 1; + } + } while (query_current_values_with_pending_wait(data)); + + count_off_irt(data); + + if (savevid != data->currvid) { + printk(KERN_ERR PFX "vid change on fid trans, old 0x%x, new 0x%x\n", + savevid, data->currvid); + return 1; + } + + if (fid != data->currfid) { + printk(KERN_ERR PFX "fid trans failed, fid 0x%x, curr 0x%x\n", fid, + data->currfid); + return 1; + } + + return 0; +} + +/* Write a new vid to the hardware */ +static int write_new_vid(struct powernow_k8_data *data, u32 vid) +{ + u32 lo; + u32 savefid = data->currfid; + int i = 0; + + if ((data->currfid & INVALID_FID_MASK) || (vid & INVALID_VID_MASK)) { + printk(KERN_ERR PFX "internal error - overflow on vid write\n"); + return 1; + } + + lo = data->currfid | (vid << MSR_C_LO_VID_SHIFT) | MSR_C_LO_INIT_FID_VID; + + dprintk("writing vid 0x%x, lo 0x%x, hi 0x%x\n", + vid, lo, STOP_GRANT_5NS); + + do { + wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS); + if (i++ > 100) { + printk(KERN_ERR PFX "internal error - pending bit very stuck - no further pstate changes possible\n"); + return 1; + } + } while (query_current_values_with_pending_wait(data)); + + if (savefid != data->currfid) { + printk(KERN_ERR PFX "fid changed on vid trans, old 0x%x new 0x%x\n", + savefid, data->currfid); + return 1; + } + + if (vid != data->currvid) { + printk(KERN_ERR PFX "vid trans failed, vid 0x%x, curr 0x%x\n", vid, + data->currvid); + return 1; + } + + return 0; +} + +/* + * Reduce the vid by the max of step or reqvid. + * Decreasing vid codes represent increasing voltages: + * vid of 0 is 1.550V, vid of 0x1e is 0.800V, vid of VID_OFF is off. + */ +static int decrease_vid_code_by_step(struct powernow_k8_data *data, u32 reqvid, u32 step) +{ + if ((data->currvid - reqvid) > step) + reqvid = data->currvid - step; + + if (write_new_vid(data, reqvid)) + return 1; + + count_off_vst(data); + + return 0; +} + +/* Change hardware pstate by single MSR write */ +static int transition_pstate(struct powernow_k8_data *data, u32 pstate) +{ + wrmsr(MSR_PSTATE_CTRL, pstate, 0); + data->currfid = find_fid_from_pstate(pstate); + return 0; +} + +/* Change Opteron/Athlon64 fid and vid, by the 3 phases. */ +static int transition_fid_vid(struct powernow_k8_data *data, u32 reqfid, u32 reqvid) +{ + if (core_voltage_pre_transition(data, reqvid)) + return 1; + + if (core_frequency_transition(data, reqfid)) + return 1; + + if (core_voltage_post_transition(data, reqvid)) + return 1; + + if (query_current_values_with_pending_wait(data)) + return 1; + + if ((reqfid != data->currfid) || (reqvid != data->currvid)) { + printk(KERN_ERR PFX "failed (cpu%d): req 0x%x 0x%x, curr 0x%x 0x%x\n", + smp_processor_id(), + reqfid, reqvid, data->currfid, data->currvid); + return 1; + } + + dprintk("transitioned (cpu%d): new fid 0x%x, vid 0x%x\n", + smp_processor_id(), data->currfid, data->currvid); + + return 0; +} + +/* Phase 1 - core voltage transition ... setup voltage */ +static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid) +{ + u32 rvosteps = data->rvo; + u32 savefid = data->currfid; + u32 maxvid, lo; + + dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, reqvid 0x%x, rvo 0x%x\n", + smp_processor_id(), + data->currfid, data->currvid, reqvid, data->rvo); + + rdmsr(MSR_FIDVID_STATUS, lo, maxvid); + maxvid = 0x1f & (maxvid >> 16); + dprintk("ph1 maxvid=0x%x\n", maxvid); + if (reqvid < maxvid) /* lower numbers are higher voltages */ + reqvid = maxvid; + + while (data->currvid > reqvid) { + dprintk("ph1: curr 0x%x, req vid 0x%x\n", + data->currvid, reqvid); + if (decrease_vid_code_by_step(data, reqvid, data->vidmvs)) + return 1; + } + + while ((rvosteps > 0) && ((data->rvo + data->currvid) > reqvid)) { + if (data->currvid == maxvid) { + rvosteps = 0; + } else { + dprintk("ph1: changing vid for rvo, req 0x%x\n", + data->currvid - 1); + if (decrease_vid_code_by_step(data, data->currvid - 1, 1)) + return 1; + rvosteps--; + } + } + + if (query_current_values_with_pending_wait(data)) + return 1; + + if (savefid != data->currfid) { + printk(KERN_ERR PFX "ph1 err, currfid changed 0x%x\n", data->currfid); + return 1; + } + + dprintk("ph1 complete, currfid 0x%x, currvid 0x%x\n", + data->currfid, data->currvid); + + return 0; +} + +/* Phase 2 - core frequency transition */ +static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid) +{ + u32 vcoreqfid, vcocurrfid, vcofiddiff, fid_interval, savevid = data->currvid; + + if ((reqfid < HI_FID_TABLE_BOTTOM) && (data->currfid < HI_FID_TABLE_BOTTOM)) { + printk(KERN_ERR PFX "ph2: illegal lo-lo transition 0x%x 0x%x\n", + reqfid, data->currfid); + return 1; + } + + if (data->currfid == reqfid) { + printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n", data->currfid); + return 0; + } + + dprintk("ph2 (cpu%d): starting, currfid 0x%x, currvid 0x%x, reqfid 0x%x\n", + smp_processor_id(), + data->currfid, data->currvid, reqfid); + + vcoreqfid = convert_fid_to_vco_fid(reqfid); + vcocurrfid = convert_fid_to_vco_fid(data->currfid); + vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid + : vcoreqfid - vcocurrfid; + + while (vcofiddiff > 2) { + (data->currfid & 1) ? (fid_interval = 1) : (fid_interval = 2); + + if (reqfid > data->currfid) { + if (data->currfid > LO_FID_TABLE_TOP) { + if (write_new_fid(data, data->currfid + fid_interval)) { + return 1; + } + } else { + if (write_new_fid + (data, 2 + convert_fid_to_vco_fid(data->currfid))) { + return 1; + } + } + } else { + if (write_new_fid(data, data->currfid - fid_interval)) + return 1; + } + + vcocurrfid = convert_fid_to_vco_fid(data->currfid); + vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid + : vcoreqfid - vcocurrfid; + } + + if (write_new_fid(data, reqfid)) + return 1; + + if (query_current_values_with_pending_wait(data)) + return 1; + + if (data->currfid != reqfid) { + printk(KERN_ERR PFX + "ph2: mismatch, failed fid transition, curr 0x%x, req 0x%x\n", + data->currfid, reqfid); + return 1; + } + + if (savevid != data->currvid) { + printk(KERN_ERR PFX "ph2: vid changed, save 0x%x, curr 0x%x\n", + savevid, data->currvid); + return 1; + } + + dprintk("ph2 complete, currfid 0x%x, currvid 0x%x\n", + data->currfid, data->currvid); + + return 0; +} + +/* Phase 3 - core voltage transition flow ... jump to the final vid. */ +static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid) +{ + u32 savefid = data->currfid; + u32 savereqvid = reqvid; + + dprintk("ph3 (cpu%d): starting, currfid 0x%x, currvid 0x%x\n", + smp_processor_id(), + data->currfid, data->currvid); + + if (reqvid != data->currvid) { + if (write_new_vid(data, reqvid)) + return 1; + + if (savefid != data->currfid) { + printk(KERN_ERR PFX + "ph3: bad fid change, save 0x%x, curr 0x%x\n", + savefid, data->currfid); + return 1; + } + + if (data->currvid != reqvid) { + printk(KERN_ERR PFX + "ph3: failed vid transition\n, req 0x%x, curr 0x%x", + reqvid, data->currvid); + return 1; + } + } + + if (query_current_values_with_pending_wait(data)) + return 1; + + if (savereqvid != data->currvid) { + dprintk("ph3 failed, currvid 0x%x\n", data->currvid); + return 1; + } + + if (savefid != data->currfid) { + dprintk("ph3 failed, currfid changed 0x%x\n", + data->currfid); + return 1; + } + + dprintk("ph3 complete, currfid 0x%x, currvid 0x%x\n", + data->currfid, data->currvid); + + return 0; +} + +static int check_supported_cpu(unsigned int cpu) +{ + cpumask_t oldmask = CPU_MASK_ALL; + u32 eax, ebx, ecx, edx; + unsigned int rc = 0; + + oldmask = current->cpus_allowed; + set_cpus_allowed(current, cpumask_of_cpu(cpu)); + + if (smp_processor_id() != cpu) { + printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu); + goto out; + } + + if (current_cpu_data.x86_vendor != X86_VENDOR_AMD) + goto out; + + eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); + if (((eax & CPUID_XFAM) != CPUID_XFAM_K8) && + ((eax & CPUID_XFAM) < CPUID_XFAM_10H)) + goto out; + + if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) { + if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) || + ((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) { + printk(KERN_INFO PFX "Processor cpuid %x not supported\n", eax); + goto out; + } + + eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES); + if (eax < CPUID_FREQ_VOLT_CAPABILITIES) { + printk(KERN_INFO PFX + "No frequency change capabilities detected\n"); + goto out; + } + + cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); + if ((edx & P_STATE_TRANSITION_CAPABLE) != P_STATE_TRANSITION_CAPABLE) { + printk(KERN_INFO PFX "Power state transitions not supported\n"); + goto out; + } + } else { /* must be a HW Pstate capable processor */ + cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); + if ((edx & USE_HW_PSTATE) == USE_HW_PSTATE) + cpu_family = CPU_HW_PSTATE; + else + goto out; + } + + rc = 1; + +out: + set_cpus_allowed(current, oldmask); + return rc; +} + +static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, u8 maxvid) +{ + unsigned int j; + u8 lastfid = 0xff; + + for (j = 0; j < data->numps; j++) { + if (pst[j].vid > LEAST_VID) { + printk(KERN_ERR PFX "vid %d invalid : 0x%x\n", j, pst[j].vid); + return -EINVAL; + } + if (pst[j].vid < data->rvo) { /* vid + rvo >= 0 */ + printk(KERN_ERR BFX "0 vid exceeded with pstate %d\n", j); + return -ENODEV; + } + if (pst[j].vid < maxvid + data->rvo) { /* vid + rvo >= maxvid */ + printk(KERN_ERR BFX "maxvid exceeded with pstate %d\n", j); + return -ENODEV; + } + if (pst[j].fid > MAX_FID) { + printk(KERN_ERR BFX "maxfid exceeded with pstate %d\n", j); + return -ENODEV; + } + if (j && (pst[j].fid < HI_FID_TABLE_BOTTOM)) { + /* Only first fid is allowed to be in "low" range */ + printk(KERN_ERR BFX "two low fids - %d : 0x%x\n", j, pst[j].fid); + return -EINVAL; + } + if (pst[j].fid < lastfid) + lastfid = pst[j].fid; + } + if (lastfid & 1) { + printk(KERN_ERR BFX "lastfid invalid\n"); + return -EINVAL; + } + if (lastfid > LO_FID_TABLE_TOP) + printk(KERN_INFO BFX "first fid not from lo freq table\n"); + + return 0; +} + +static void print_basics(struct powernow_k8_data *data) +{ + int j; + for (j = 0; j < data->numps; j++) { + if (data->powernow_table[j].frequency != CPUFREQ_ENTRY_INVALID) { + if (cpu_family == CPU_HW_PSTATE) { + printk(KERN_INFO PFX " %d : fid 0x%x did 0x%x (%d MHz)\n", + j, + (data->powernow_table[j].index & 0xff00) >> 8, + (data->powernow_table[j].index & 0xff0000) >> 16, + data->powernow_table[j].frequency/1000); + } else { + printk(KERN_INFO PFX " %d : fid 0x%x (%d MHz), vid 0x%x\n", + j, + data->powernow_table[j].index & 0xff, + data->powernow_table[j].frequency/1000, + data->powernow_table[j].index >> 8); + } + } + } + if (data->batps) + printk(KERN_INFO PFX "Only %d pstates on battery\n", data->batps); +} + +static int fill_powernow_table(struct powernow_k8_data *data, struct pst_s *pst, u8 maxvid) +{ + struct cpufreq_frequency_table *powernow_table; + unsigned int j; + + if (data->batps) { /* use ACPI support to get full speed on mains power */ + printk(KERN_WARNING PFX "Only %d pstates usable (use ACPI driver for full range\n", data->batps); + data->numps = data->batps; + } + + for ( j=1; jnumps; j++ ) { + if (pst[j-1].fid >= pst[j].fid) { + printk(KERN_ERR PFX "PST out of sequence\n"); + return -EINVAL; + } + } + + if (data->numps < 2) { + printk(KERN_ERR PFX "no p states to transition\n"); + return -ENODEV; + } + + if (check_pst_table(data, pst, maxvid)) + return -EINVAL; + + powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) + * (data->numps + 1)), GFP_KERNEL); + if (!powernow_table) { + printk(KERN_ERR PFX "powernow_table memory alloc failure\n"); + return -ENOMEM; + } + + for (j = 0; j < data->numps; j++) { + powernow_table[j].index = pst[j].fid; /* lower 8 bits */ + powernow_table[j].index |= (pst[j].vid << 8); /* upper 8 bits */ + powernow_table[j].frequency = find_khz_freq_from_fid(pst[j].fid); + } + powernow_table[data->numps].frequency = CPUFREQ_TABLE_END; + powernow_table[data->numps].index = 0; + + if (query_current_values_with_pending_wait(data)) { + kfree(powernow_table); + return -EIO; + } + + dprintk("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid); + data->powernow_table = powernow_table; + if (first_cpu(cpu_core_map[data->cpu]) == data->cpu) + print_basics(data); + + for (j = 0; j < data->numps; j++) + if ((pst[j].fid==data->currfid) && (pst[j].vid==data->currvid)) + return 0; + + dprintk("currfid/vid do not match PST, ignoring\n"); + return 0; +} + +/* Find and validate the PSB/PST table in BIOS. */ +static int find_psb_table(struct powernow_k8_data *data) +{ + struct psb_s *psb; + unsigned int i; + u32 mvs; + u8 maxvid; + u32 cpst = 0; + u32 thiscpuid; + + for (i = 0xc0000; i < 0xffff0; i += 0x10) { + /* Scan BIOS looking for the signature. */ + /* It can not be at ffff0 - it is too big. */ + + psb = phys_to_virt(i); + if (memcmp(psb, PSB_ID_STRING, PSB_ID_STRING_LEN) != 0) + continue; + + dprintk("found PSB header at 0x%p\n", psb); + + dprintk("table vers: 0x%x\n", psb->tableversion); + if (psb->tableversion != PSB_VERSION_1_4) { + printk(KERN_ERR BFX "PSB table is not v1.4\n"); + return -ENODEV; + } + + dprintk("flags: 0x%x\n", psb->flags1); + if (psb->flags1) { + printk(KERN_ERR BFX "unknown flags\n"); + return -ENODEV; + } + + data->vstable = psb->vstable; + dprintk("voltage stabilization time: %d(*20us)\n", data->vstable); + + dprintk("flags2: 0x%x\n", psb->flags2); + data->rvo = psb->flags2 & 3; + data->irt = ((psb->flags2) >> 2) & 3; + mvs = ((psb->flags2) >> 4) & 3; + data->vidmvs = 1 << mvs; + data->batps = ((psb->flags2) >> 6) & 3; + + dprintk("ramp voltage offset: %d\n", data->rvo); + dprintk("isochronous relief time: %d\n", data->irt); + dprintk("maximum voltage step: %d - 0x%x\n", mvs, data->vidmvs); + + dprintk("numpst: 0x%x\n", psb->num_tables); + cpst = psb->num_tables; + if ((psb->cpuid == 0x00000fc0) || (psb->cpuid == 0x00000fe0) ){ + thiscpuid = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); + if ((thiscpuid == 0x00000fc0) || (thiscpuid == 0x00000fe0) ) { + cpst = 1; + } + } + if (cpst != 1) { + printk(KERN_ERR BFX "numpst must be 1\n"); + return -ENODEV; + } + + data->plllock = psb->plllocktime; + dprintk("plllocktime: 0x%x (units 1us)\n", psb->plllocktime); + dprintk("maxfid: 0x%x\n", psb->maxfid); + dprintk("maxvid: 0x%x\n", psb->maxvid); + maxvid = psb->maxvid; + + data->numps = psb->numps; + dprintk("numpstates: 0x%x\n", data->numps); + return fill_powernow_table(data, (struct pst_s *)(psb+1), maxvid); + } + /* + * If you see this message, complain to BIOS manufacturer. If + * he tells you "we do not support Linux" or some similar + * nonsense, remember that Windows 2000 uses the same legacy + * mechanism that the old Linux PSB driver uses. Tell them it + * is broken with Windows 2000. + * + * The reference to the AMD documentation is chapter 9 in the + * BIOS and Kernel Developer's Guide, which is available on + * www.amd.com + */ + printk(KERN_ERR PFX "BIOS error - no PSB or ACPI _PSS objects\n"); + return -ENODEV; +} + +#ifdef CONFIG_X86_POWERNOW_K8_ACPI +static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) +{ + if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) + return; + + data->irt = (data->acpi_data.states[index].control >> IRT_SHIFT) & IRT_MASK; + data->rvo = (data->acpi_data.states[index].control >> RVO_SHIFT) & RVO_MASK; + data->exttype = (data->acpi_data.states[index].control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK; + data->plllock = (data->acpi_data.states[index].control >> PLL_L_SHIFT) & PLL_L_MASK; + data->vidmvs = 1 << ((data->acpi_data.states[index].control >> MVS_SHIFT) & MVS_MASK); + data->vstable = (data->acpi_data.states[index].control >> VST_SHIFT) & VST_MASK; +} + +static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) +{ + struct cpufreq_frequency_table *powernow_table; + int ret_val; + + if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { + dprintk("register performance failed: bad ACPI data\n"); + return -EIO; + } + + /* verify the data contained in the ACPI structures */ + if (data->acpi_data.state_count <= 1) { + dprintk("No ACPI P-States\n"); + goto err_out; + } + + if ((data->acpi_data.control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) || + (data->acpi_data.status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) { + dprintk("Invalid control/status registers (%x - %x)\n", + data->acpi_data.control_register.space_id, + data->acpi_data.status_register.space_id); + goto err_out; + } + + /* fill in data->powernow_table */ + powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) + * (data->acpi_data.state_count + 1)), GFP_KERNEL); + if (!powernow_table) { + dprintk("powernow_table memory alloc failure\n"); + goto err_out; + } + + if (cpu_family == CPU_HW_PSTATE) + ret_val = fill_powernow_table_pstate(data, powernow_table); + else + ret_val = fill_powernow_table_fidvid(data, powernow_table); + if (ret_val) + goto err_out_mem; + + powernow_table[data->acpi_data.state_count].frequency = CPUFREQ_TABLE_END; + powernow_table[data->acpi_data.state_count].index = 0; + data->powernow_table = powernow_table; + + /* fill in data */ + data->numps = data->acpi_data.state_count; + if (first_cpu(cpu_core_map[data->cpu]) == data->cpu) + print_basics(data); + powernow_k8_acpi_pst_values(data, 0); + + /* notify BIOS that we exist */ + acpi_processor_notify_smm(THIS_MODULE); + + return 0; + +err_out_mem: + kfree(powernow_table); + +err_out: + acpi_processor_unregister_performance(&data->acpi_data, data->cpu); + + /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */ + data->acpi_data.state_count = 0; + + return -ENODEV; +} + +static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table) +{ + int i; + + for (i = 0; i < data->acpi_data.state_count; i++) { + u32 index; + u32 hi = 0, lo = 0; + u32 fid; + u32 did; + + index = data->acpi_data.states[i].control & HW_PSTATE_MASK; + if (index > MAX_HW_PSTATE) { + printk(KERN_ERR PFX "invalid pstate %d - bad value %d.\n", i, index); + printk(KERN_ERR PFX "Please report to BIOS manufacturer\n"); + } + rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi); + if (!(hi & HW_PSTATE_VALID_MASK)) { + dprintk("invalid pstate %d, ignoring\n", index); + powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; + continue; + } + + fid = lo & HW_PSTATE_FID_MASK; + did = (lo & HW_PSTATE_DID_MASK) >> HW_PSTATE_DID_SHIFT; + + dprintk(" %d : fid 0x%x, did 0x%x\n", index, fid, did); + + powernow_table[i].index = index | (fid << HW_FID_INDEX_SHIFT) | (did << HW_DID_INDEX_SHIFT); + + powernow_table[i].frequency = find_khz_freq_from_fiddid(fid, did); + + if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) { + printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n", + powernow_table[i].frequency, + (unsigned int) (data->acpi_data.states[i].core_frequency * 1000)); + powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; + continue; + } + } + return 0; +} + +static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table) +{ + int i; + int cntlofreq = 0; + for (i = 0; i < data->acpi_data.state_count; i++) { + u32 fid; + u32 vid; + + if (data->exttype) { + fid = data->acpi_data.states[i].status & EXT_FID_MASK; + vid = (data->acpi_data.states[i].status >> VID_SHIFT) & EXT_VID_MASK; + } else { + fid = data->acpi_data.states[i].control & FID_MASK; + vid = (data->acpi_data.states[i].control >> VID_SHIFT) & VID_MASK; + } + + dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid); + + powernow_table[i].index = fid; /* lower 8 bits */ + powernow_table[i].index |= (vid << 8); /* upper 8 bits */ + powernow_table[i].frequency = find_khz_freq_from_fid(fid); + + /* verify frequency is OK */ + if ((powernow_table[i].frequency > (MAX_FREQ * 1000)) || + (powernow_table[i].frequency < (MIN_FREQ * 1000))) { + dprintk("invalid freq %u kHz, ignoring\n", powernow_table[i].frequency); + powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; + continue; + } + + /* verify voltage is OK - BIOSs are using "off" to indicate invalid */ + if (vid == VID_OFF) { + dprintk("invalid vid %u, ignoring\n", vid); + powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; + continue; + } + + /* verify only 1 entry from the lo frequency table */ + if (fid < HI_FID_TABLE_BOTTOM) { + if (cntlofreq) { + /* if both entries are the same, ignore this one ... */ + if ((powernow_table[i].frequency != powernow_table[cntlofreq].frequency) || + (powernow_table[i].index != powernow_table[cntlofreq].index)) { + printk(KERN_ERR PFX "Too many lo freq table entries\n"); + return 1; + } + + dprintk("double low frequency table entry, ignoring it.\n"); + powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; + continue; + } else + cntlofreq = i; + } + + if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) { + printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n", + powernow_table[i].frequency, + (unsigned int) (data->acpi_data.states[i].core_frequency * 1000)); + powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; + continue; + } + } + return 0; +} + +static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) +{ + if (data->acpi_data.state_count) + acpi_processor_unregister_performance(&data->acpi_data, data->cpu); +} + +#else +static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; } +static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; } +static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; } +#endif /* CONFIG_X86_POWERNOW_K8_ACPI */ + +/* Take a frequency, and issue the fid/vid transition command */ +static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned int index) +{ + u32 fid = 0; + u32 vid = 0; + int res, i; + struct cpufreq_freqs freqs; + + dprintk("cpu %d transition to index %u\n", smp_processor_id(), index); + + /* fid/vid correctness check for k8 */ + /* fid are the lower 8 bits of the index we stored into + * the cpufreq frequency table in find_psb_table, vid + * are the upper 8 bits. + */ + fid = data->powernow_table[index].index & 0xFF; + vid = (data->powernow_table[index].index & 0xFF00) >> 8; + + dprintk("table matched fid 0x%x, giving vid 0x%x\n", fid, vid); + + if (query_current_values_with_pending_wait(data)) + return 1; + + if ((data->currvid == vid) && (data->currfid == fid)) { + dprintk("target matches current values (fid 0x%x, vid 0x%x)\n", + fid, vid); + return 0; + } + + if ((fid < HI_FID_TABLE_BOTTOM) && (data->currfid < HI_FID_TABLE_BOTTOM)) { + printk(KERN_ERR PFX + "ignoring illegal change in lo freq table-%x to 0x%x\n", + data->currfid, fid); + return 1; + } + + dprintk("cpu %d, changing to fid 0x%x, vid 0x%x\n", + smp_processor_id(), fid, vid); + freqs.old = find_khz_freq_from_fid(data->currfid); + freqs.new = find_khz_freq_from_fid(fid); + + for_each_cpu_mask(i, *(data->available_cores)) { + freqs.cpu = i; + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + } + + res = transition_fid_vid(data, fid, vid); + freqs.new = find_khz_freq_from_fid(data->currfid); + + for_each_cpu_mask(i, *(data->available_cores)) { + freqs.cpu = i; + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + } + return res; +} + +/* Take a frequency, and issue the hardware pstate transition command */ +static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned int index) +{ + u32 fid = 0; + u32 did = 0; + u32 pstate = 0; + int res, i; + struct cpufreq_freqs freqs; + + dprintk("cpu %d transition to index %u\n", smp_processor_id(), index); + + /* get fid did for hardware pstate transition */ + pstate = index & HW_PSTATE_MASK; + if (pstate > MAX_HW_PSTATE) + return 0; + fid = (index & HW_FID_INDEX_MASK) >> HW_FID_INDEX_SHIFT; + did = (index & HW_DID_INDEX_MASK) >> HW_DID_INDEX_SHIFT; + freqs.old = find_khz_freq_from_fiddid(data->currfid, data->currdid); + freqs.new = find_khz_freq_from_fiddid(fid, did); + + for_each_cpu_mask(i, *(data->available_cores)) { + freqs.cpu = i; + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + } + + res = transition_pstate(data, pstate); + data->currfid = find_fid_from_pstate(pstate); + data->currdid = find_did_from_pstate(pstate); + freqs.new = find_khz_freq_from_fiddid(data->currfid, data->currdid); + + for_each_cpu_mask(i, *(data->available_cores)) { + freqs.cpu = i; + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + } + return res; +} + +/* Driver entry point to switch to the target frequency */ +static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation) +{ + cpumask_t oldmask = CPU_MASK_ALL; + struct powernow_k8_data *data = powernow_data[pol->cpu]; + u32 checkfid; + u32 checkvid; + unsigned int newstate; + int ret = -EIO; + + if (!data) + return -EINVAL; + + checkfid = data->currfid; + checkvid = data->currvid; + + /* only run on specific CPU from here on */ + oldmask = current->cpus_allowed; + set_cpus_allowed(current, cpumask_of_cpu(pol->cpu)); + + if (smp_processor_id() != pol->cpu) { + printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); + goto err_out; + } + + if (pending_bit_stuck()) { + printk(KERN_ERR PFX "failing targ, change pending bit set\n"); + goto err_out; + } + + dprintk("targ: cpu %d, %d kHz, min %d, max %d, relation %d\n", + pol->cpu, targfreq, pol->min, pol->max, relation); + + if (query_current_values_with_pending_wait(data)) + goto err_out; + + if (cpu_family == CPU_HW_PSTATE) + dprintk("targ: curr fid 0x%x, did 0x%x\n", + data->currfid, data->currdid); + else { + dprintk("targ: curr fid 0x%x, vid 0x%x\n", + data->currfid, data->currvid); + + if ((checkvid != data->currvid) || (checkfid != data->currfid)) { + printk(KERN_INFO PFX + "error - out of sync, fix 0x%x 0x%x, vid 0x%x 0x%x\n", + checkfid, data->currfid, checkvid, data->currvid); + } + } + + if (cpufreq_frequency_table_target(pol, data->powernow_table, targfreq, relation, &newstate)) + goto err_out; + + mutex_lock(&fidvid_mutex); + + powernow_k8_acpi_pst_values(data, newstate); + + if (cpu_family == CPU_HW_PSTATE) + ret = transition_frequency_pstate(data, newstate); + else + ret = transition_frequency_fidvid(data, newstate); + if (ret) { + printk(KERN_ERR PFX "transition frequency failed\n"); + ret = 1; + mutex_unlock(&fidvid_mutex); + goto err_out; + } + mutex_unlock(&fidvid_mutex); + + if (cpu_family == CPU_HW_PSTATE) + pol->cur = find_khz_freq_from_fiddid(data->currfid, data->currdid); + else + pol->cur = find_khz_freq_from_fid(data->currfid); + ret = 0; + +err_out: + set_cpus_allowed(current, oldmask); + return ret; +} + +/* Driver entry point to verify the policy and range of frequencies */ +static int powernowk8_verify(struct cpufreq_policy *pol) +{ + struct powernow_k8_data *data = powernow_data[pol->cpu]; + + if (!data) + return -EINVAL; + + return cpufreq_frequency_table_verify(pol, data->powernow_table); +} + +/* per CPU init entry point to the driver */ +static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) +{ + struct powernow_k8_data *data; + cpumask_t oldmask = CPU_MASK_ALL; + int rc; + + if (!cpu_online(pol->cpu)) + return -ENODEV; + + if (!check_supported_cpu(pol->cpu)) + return -ENODEV; + + data = kzalloc(sizeof(struct powernow_k8_data), GFP_KERNEL); + if (!data) { + printk(KERN_ERR PFX "unable to alloc powernow_k8_data"); + return -ENOMEM; + } + + data->cpu = pol->cpu; + + if (powernow_k8_cpu_init_acpi(data)) { + /* + * Use the PSB BIOS structure. This is only availabe on + * an UP version, and is deprecated by AMD. + */ + if (num_online_cpus() != 1) { + printk(KERN_ERR PFX "MP systems not supported by PSB BIOS structure\n"); + kfree(data); + return -ENODEV; + } + if (pol->cpu != 0) { + printk(KERN_ERR PFX "No _PSS objects for CPU other than CPU0\n"); + kfree(data); + return -ENODEV; + } + rc = find_psb_table(data); + if (rc) { + kfree(data); + return -ENODEV; + } + } + + /* only run on specific CPU from here on */ + oldmask = current->cpus_allowed; + set_cpus_allowed(current, cpumask_of_cpu(pol->cpu)); + + if (smp_processor_id() != pol->cpu) { + printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); + goto err_out; + } + + if (pending_bit_stuck()) { + printk(KERN_ERR PFX "failing init, change pending bit set\n"); + goto err_out; + } + + if (query_current_values_with_pending_wait(data)) + goto err_out; + + if (cpu_family == CPU_OPTERON) + fidvid_msr_init(); + + /* run on any CPU again */ + set_cpus_allowed(current, oldmask); + + pol->governor = CPUFREQ_DEFAULT_GOVERNOR; + if (cpu_family == CPU_HW_PSTATE) + pol->cpus = cpumask_of_cpu(pol->cpu); + else + pol->cpus = cpu_core_map[pol->cpu]; + data->available_cores = &(pol->cpus); + + /* Take a crude guess here. + * That guess was in microseconds, so multiply with 1000 */ + pol->cpuinfo.transition_latency = (((data->rvo + 8) * data->vstable * VST_UNITS_20US) + + (3 * (1 << data->irt) * 10)) * 1000; + + if (cpu_family == CPU_HW_PSTATE) + pol->cur = find_khz_freq_from_fiddid(data->currfid, data->currdid); + else + pol->cur = find_khz_freq_from_fid(data->currfid); + dprintk("policy current frequency %d kHz\n", pol->cur); + + /* min/max the cpu is capable of */ + if (cpufreq_frequency_table_cpuinfo(pol, data->powernow_table)) { + printk(KERN_ERR PFX "invalid powernow_table\n"); + powernow_k8_cpu_exit_acpi(data); + kfree(data->powernow_table); + kfree(data); + return -EINVAL; + } + + cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu); + + if (cpu_family == CPU_HW_PSTATE) + dprintk("cpu_init done, current fid 0x%x, did 0x%x\n", + data->currfid, data->currdid); + else + dprintk("cpu_init done, current fid 0x%x, vid 0x%x\n", + data->currfid, data->currvid); + + powernow_data[pol->cpu] = data; + + return 0; + +err_out: + set_cpus_allowed(current, oldmask); + powernow_k8_cpu_exit_acpi(data); + + kfree(data); + return -ENODEV; +} + +static int __devexit powernowk8_cpu_exit (struct cpufreq_policy *pol) +{ + struct powernow_k8_data *data = powernow_data[pol->cpu]; + + if (!data) + return -EINVAL; + + powernow_k8_cpu_exit_acpi(data); + + cpufreq_frequency_table_put_attr(pol->cpu); + + kfree(data->powernow_table); + kfree(data); + + return 0; +} + +static unsigned int powernowk8_get (unsigned int cpu) +{ + struct powernow_k8_data *data; + cpumask_t oldmask = current->cpus_allowed; + unsigned int khz = 0; + + data = powernow_data[first_cpu(cpu_core_map[cpu])]; + + if (!data) + return -EINVAL; + + set_cpus_allowed(current, cpumask_of_cpu(cpu)); + if (smp_processor_id() != cpu) { + printk(KERN_ERR PFX "limiting to CPU %d failed in powernowk8_get\n", cpu); + set_cpus_allowed(current, oldmask); + return 0; + } + + if (query_current_values_with_pending_wait(data)) + goto out; + + if (cpu_family == CPU_HW_PSTATE) + khz = find_khz_freq_from_fiddid(data->currfid, data->currdid); + else + khz = find_khz_freq_from_fid(data->currfid); + + +out: + set_cpus_allowed(current, oldmask); + return khz; +} + +static struct freq_attr* powernow_k8_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + +static struct cpufreq_driver cpufreq_amd64_driver = { + .verify = powernowk8_verify, + .target = powernowk8_target, + .init = powernowk8_cpu_init, + .exit = __devexit_p(powernowk8_cpu_exit), + .get = powernowk8_get, + .name = "powernow-k8", + .owner = THIS_MODULE, + .attr = powernow_k8_attr, +}; + +/* driver entry point for init */ +static int __cpuinit powernowk8_init(void) +{ + unsigned int i, supported_cpus = 0; + unsigned int booted_cores = 1; + + for_each_online_cpu(i) { + if (check_supported_cpu(i)) + supported_cpus++; + } + +#ifdef CONFIG_SMP + booted_cores = cpu_data[0].booted_cores; +#endif + + if (supported_cpus == num_online_cpus()) { + printk(KERN_INFO PFX "Found %d %s " + "processors (%d cpu cores) (" VERSION ")\n", + supported_cpus/booted_cores, + boot_cpu_data.x86_model_id, supported_cpus); + return cpufreq_register_driver(&cpufreq_amd64_driver); + } + + return -ENODEV; +} + +/* driver entry point for term */ +static void __exit powernowk8_exit(void) +{ + dprintk("exit\n"); + + cpufreq_unregister_driver(&cpufreq_amd64_driver); +} + +MODULE_AUTHOR("Paul Devriendt and Mark Langsdorf "); +MODULE_DESCRIPTION("AMD Athlon 64 and Opteron processor frequency driver."); +MODULE_LICENSE("GPL"); + +late_initcall(powernowk8_init); +module_exit(powernowk8_exit); diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h new file mode 100644 index 00000000000..b06c812208c --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h @@ -0,0 +1,232 @@ +/* + * (c) 2003-2006 Advanced Micro Devices, Inc. + * Your use of this code is subject to the terms and conditions of the + * GNU general public license version 2. See "COPYING" or + * http://www.gnu.org/licenses/gpl.html + */ + +struct powernow_k8_data { + unsigned int cpu; + + u32 numps; /* number of p-states */ + u32 batps; /* number of p-states supported on battery */ + + /* these values are constant when the PSB is used to determine + * vid/fid pairings, but are modified during the ->target() call + * when ACPI is used */ + u32 rvo; /* ramp voltage offset */ + u32 irt; /* isochronous relief time */ + u32 vidmvs; /* usable value calculated from mvs */ + u32 vstable; /* voltage stabilization time, units 20 us */ + u32 plllock; /* pll lock time, units 1 us */ + u32 exttype; /* extended interface = 1 */ + + /* keep track of the current fid / vid or did */ + u32 currvid, currfid, currdid; + + /* the powernow_table includes all frequency and vid/fid pairings: + * fid are the lower 8 bits of the index, vid are the upper 8 bits. + * frequency is in kHz */ + struct cpufreq_frequency_table *powernow_table; + +#ifdef CONFIG_X86_POWERNOW_K8_ACPI + /* the acpi table needs to be kept. it's only available if ACPI was + * used to determine valid frequency/vid/fid states */ + struct acpi_processor_performance acpi_data; +#endif + /* we need to keep track of associated cores, but let cpufreq + * handle hotplug events - so just point at cpufreq pol->cpus + * structure */ + cpumask_t *available_cores; +}; + + +/* processor's cpuid instruction support */ +#define CPUID_PROCESSOR_SIGNATURE 1 /* function 1 */ +#define CPUID_XFAM 0x0ff00000 /* extended family */ +#define CPUID_XFAM_K8 0 +#define CPUID_XMOD 0x000f0000 /* extended model */ +#define CPUID_XMOD_REV_MASK 0x00080000 +#define CPUID_XFAM_10H 0x00100000 /* family 0x10 */ +#define CPUID_USE_XFAM_XMOD 0x00000f00 +#define CPUID_GET_MAX_CAPABILITIES 0x80000000 +#define CPUID_FREQ_VOLT_CAPABILITIES 0x80000007 +#define P_STATE_TRANSITION_CAPABLE 6 + +/* Model Specific Registers for p-state transitions. MSRs are 64-bit. For */ +/* writes (wrmsr - opcode 0f 30), the register number is placed in ecx, and */ +/* the value to write is placed in edx:eax. For reads (rdmsr - opcode 0f 32), */ +/* the register number is placed in ecx, and the data is returned in edx:eax. */ + +#define MSR_FIDVID_CTL 0xc0010041 +#define MSR_FIDVID_STATUS 0xc0010042 + +/* Field definitions within the FID VID Low Control MSR : */ +#define MSR_C_LO_INIT_FID_VID 0x00010000 +#define MSR_C_LO_NEW_VID 0x00003f00 +#define MSR_C_LO_NEW_FID 0x0000003f +#define MSR_C_LO_VID_SHIFT 8 + +/* Field definitions within the FID VID High Control MSR : */ +#define MSR_C_HI_STP_GNT_TO 0x000fffff + +/* Field definitions within the FID VID Low Status MSR : */ +#define MSR_S_LO_CHANGE_PENDING 0x80000000 /* cleared when completed */ +#define MSR_S_LO_MAX_RAMP_VID 0x3f000000 +#define MSR_S_LO_MAX_FID 0x003f0000 +#define MSR_S_LO_START_FID 0x00003f00 +#define MSR_S_LO_CURRENT_FID 0x0000003f + +/* Field definitions within the FID VID High Status MSR : */ +#define MSR_S_HI_MIN_WORKING_VID 0x3f000000 +#define MSR_S_HI_MAX_WORKING_VID 0x003f0000 +#define MSR_S_HI_START_VID 0x00003f00 +#define MSR_S_HI_CURRENT_VID 0x0000003f +#define MSR_C_HI_STP_GNT_BENIGN 0x00000001 + + +/* Hardware Pstate _PSS and MSR definitions */ +#define USE_HW_PSTATE 0x00000080 +#define HW_PSTATE_FID_MASK 0x0000003f +#define HW_PSTATE_DID_MASK 0x000001c0 +#define HW_PSTATE_DID_SHIFT 6 +#define HW_PSTATE_MASK 0x00000007 +#define HW_PSTATE_VALID_MASK 0x80000000 +#define HW_FID_INDEX_SHIFT 8 +#define HW_FID_INDEX_MASK 0x0000ff00 +#define HW_DID_INDEX_SHIFT 16 +#define HW_DID_INDEX_MASK 0x00ff0000 +#define HW_WATTS_MASK 0xff +#define HW_PWR_DVR_MASK 0x300 +#define HW_PWR_DVR_SHIFT 8 +#define HW_PWR_MAX_MULT 3 +#define MAX_HW_PSTATE 8 /* hw pstate supports up to 8 */ +#define MSR_PSTATE_DEF_BASE 0xc0010064 /* base of Pstate MSRs */ +#define MSR_PSTATE_STATUS 0xc0010063 /* Pstate Status MSR */ +#define MSR_PSTATE_CTRL 0xc0010062 /* Pstate control MSR */ + +/* define the two driver architectures */ +#define CPU_OPTERON 0 +#define CPU_HW_PSTATE 1 + + +/* + * There are restrictions frequencies have to follow: + * - only 1 entry in the low fid table ( <=1.4GHz ) + * - lowest entry in the high fid table must be >= 2 * the entry in the + * low fid table + * - lowest entry in the high fid table must be a <= 200MHz + 2 * the entry + * in the low fid table + * - the parts can only step at <= 200 MHz intervals, odd fid values are + * supported in revision G and later revisions. + * - lowest frequency must be >= interprocessor hypertransport link speed + * (only applies to MP systems obviously) + */ + +/* fids (frequency identifiers) are arranged in 2 tables - lo and hi */ +#define LO_FID_TABLE_TOP 7 /* fid values marking the boundary */ +#define HI_FID_TABLE_BOTTOM 8 /* between the low and high tables */ + +#define LO_VCOFREQ_TABLE_TOP 1400 /* corresponding vco frequency values */ +#define HI_VCOFREQ_TABLE_BOTTOM 1600 + +#define MIN_FREQ_RESOLUTION 200 /* fids jump by 2 matching freq jumps by 200 */ + +#define MAX_FID 0x2a /* Spec only gives FID values as far as 5 GHz */ +#define LEAST_VID 0x3e /* Lowest (numerically highest) useful vid value */ + +#define MIN_FREQ 800 /* Min and max freqs, per spec */ +#define MAX_FREQ 5000 + +#define INVALID_FID_MASK 0xffffffc0 /* not a valid fid if these bits are set */ +#define INVALID_VID_MASK 0xffffffc0 /* not a valid vid if these bits are set */ + +#define VID_OFF 0x3f + +#define STOP_GRANT_5NS 1 /* min poss memory access latency for voltage change */ + +#define PLL_LOCK_CONVERSION (1000/5) /* ms to ns, then divide by clock period */ + +#define MAXIMUM_VID_STEPS 1 /* Current cpus only allow a single step of 25mV */ +#define VST_UNITS_20US 20 /* Voltage Stabalization Time is in units of 20us */ + +/* + * Most values of interest are enocoded in a single field of the _PSS + * entries: the "control" value. + */ + +#define IRT_SHIFT 30 +#define RVO_SHIFT 28 +#define EXT_TYPE_SHIFT 27 +#define PLL_L_SHIFT 20 +#define MVS_SHIFT 18 +#define VST_SHIFT 11 +#define VID_SHIFT 6 +#define IRT_MASK 3 +#define RVO_MASK 3 +#define EXT_TYPE_MASK 1 +#define PLL_L_MASK 0x7f +#define MVS_MASK 3 +#define VST_MASK 0x7f +#define VID_MASK 0x1f +#define FID_MASK 0x1f +#define EXT_VID_MASK 0x3f +#define EXT_FID_MASK 0x3f + + +/* + * Version 1.4 of the PSB table. This table is constructed by BIOS and is + * to tell the OS's power management driver which VIDs and FIDs are + * supported by this particular processor. + * If the data in the PSB / PST is wrong, then this driver will program the + * wrong values into hardware, which is very likely to lead to a crash. + */ + +#define PSB_ID_STRING "AMDK7PNOW!" +#define PSB_ID_STRING_LEN 10 + +#define PSB_VERSION_1_4 0x14 + +struct psb_s { + u8 signature[10]; + u8 tableversion; + u8 flags1; + u16 vstable; + u8 flags2; + u8 num_tables; + u32 cpuid; + u8 plllocktime; + u8 maxfid; + u8 maxvid; + u8 numps; +}; + +/* Pairs of fid/vid values are appended to the version 1.4 PSB table. */ +struct pst_s { + u8 fid; + u8 vid; +}; + +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "powernow-k8", msg) + +static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid); +static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid); +static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid); + +static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index); + +#ifdef CONFIG_X86_POWERNOW_K8_ACPI +static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table); +static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table); +#endif + +#ifdef CONFIG_SMP +static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[]) +{ +} +#else +static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[]) +{ + cpu_set(0, cpu_sharedcore_mask[0]); +} +#endif diff --git a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c new file mode 100644 index 00000000000..b8fb4b521c6 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c @@ -0,0 +1,191 @@ +/* + * sc520_freq.c: cpufreq driver for the AMD Elan sc520 + * + * Copyright (C) 2005 Sean Young + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Based on elanfreq.c + * + * 2005-03-30: - initial revision + */ + +#include +#include +#include + +#include +#include + +#include +#include +#include + +#define MMCR_BASE 0xfffef000 /* The default base address */ +#define OFFS_CPUCTL 0x2 /* CPU Control Register */ + +static __u8 __iomem *cpuctl; + +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "sc520_freq", msg) + +static struct cpufreq_frequency_table sc520_freq_table[] = { + {0x01, 100000}, + {0x02, 133000}, + {0, CPUFREQ_TABLE_END}, +}; + +static unsigned int sc520_freq_get_cpu_frequency(unsigned int cpu) +{ + u8 clockspeed_reg = *cpuctl; + + switch (clockspeed_reg & 0x03) { + default: + printk(KERN_ERR "sc520_freq: error: cpuctl register has unexpected value %02x\n", clockspeed_reg); + case 0x01: + return 100000; + case 0x02: + return 133000; + } +} + +static void sc520_freq_set_cpu_state (unsigned int state) +{ + + struct cpufreq_freqs freqs; + u8 clockspeed_reg; + + freqs.old = sc520_freq_get_cpu_frequency(0); + freqs.new = sc520_freq_table[state].frequency; + freqs.cpu = 0; /* AMD Elan is UP */ + + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + + dprintk("attempting to set frequency to %i kHz\n", + sc520_freq_table[state].frequency); + + local_irq_disable(); + + clockspeed_reg = *cpuctl & ~0x03; + *cpuctl = clockspeed_reg | sc520_freq_table[state].index; + + local_irq_enable(); + + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); +}; + +static int sc520_freq_verify (struct cpufreq_policy *policy) +{ + return cpufreq_frequency_table_verify(policy, &sc520_freq_table[0]); +} + +static int sc520_freq_target (struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + unsigned int newstate = 0; + + if (cpufreq_frequency_table_target(policy, sc520_freq_table, target_freq, relation, &newstate)) + return -EINVAL; + + sc520_freq_set_cpu_state(newstate); + + return 0; +} + + +/* + * Module init and exit code + */ + +static int sc520_freq_cpu_init(struct cpufreq_policy *policy) +{ + struct cpuinfo_x86 *c = cpu_data; + int result; + + /* capability check */ + if (c->x86_vendor != X86_VENDOR_AMD || + c->x86 != 4 || c->x86_model != 9) + return -ENODEV; + + /* cpuinfo and default policy values */ + policy->governor = CPUFREQ_DEFAULT_GOVERNOR; + policy->cpuinfo.transition_latency = 1000000; /* 1ms */ + policy->cur = sc520_freq_get_cpu_frequency(0); + + result = cpufreq_frequency_table_cpuinfo(policy, sc520_freq_table); + if (result) + return (result); + + cpufreq_frequency_table_get_attr(sc520_freq_table, policy->cpu); + + return 0; +} + + +static int sc520_freq_cpu_exit(struct cpufreq_policy *policy) +{ + cpufreq_frequency_table_put_attr(policy->cpu); + return 0; +} + + +static struct freq_attr* sc520_freq_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + + +static struct cpufreq_driver sc520_freq_driver = { + .get = sc520_freq_get_cpu_frequency, + .verify = sc520_freq_verify, + .target = sc520_freq_target, + .init = sc520_freq_cpu_init, + .exit = sc520_freq_cpu_exit, + .name = "sc520_freq", + .owner = THIS_MODULE, + .attr = sc520_freq_attr, +}; + + +static int __init sc520_freq_init(void) +{ + struct cpuinfo_x86 *c = cpu_data; + int err; + + /* Test if we have the right hardware */ + if(c->x86_vendor != X86_VENDOR_AMD || + c->x86 != 4 || c->x86_model != 9) { + dprintk("no Elan SC520 processor found!\n"); + return -ENODEV; + } + cpuctl = ioremap((unsigned long)(MMCR_BASE + OFFS_CPUCTL), 1); + if(!cpuctl) { + printk(KERN_ERR "sc520_freq: error: failed to remap memory\n"); + return -ENOMEM; + } + + err = cpufreq_register_driver(&sc520_freq_driver); + if (err) + iounmap(cpuctl); + + return err; +} + + +static void __exit sc520_freq_exit(void) +{ + cpufreq_unregister_driver(&sc520_freq_driver); + iounmap(cpuctl); +} + + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Sean Young "); +MODULE_DESCRIPTION("cpufreq driver for AMD's Elan sc520 CPU"); + +module_init(sc520_freq_init); +module_exit(sc520_freq_exit); + diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c new file mode 100644 index 00000000000..6c5dc2c85ae --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c @@ -0,0 +1,634 @@ +/* + * cpufreq driver for Enhanced SpeedStep, as found in Intel's Pentium + * M (part of the Centrino chipset). + * + * Since the original Pentium M, most new Intel CPUs support Enhanced + * SpeedStep. + * + * Despite the "SpeedStep" in the name, this is almost entirely unlike + * traditional SpeedStep. + * + * Modelled on speedstep.c + * + * Copyright (C) 2003 Jeremy Fitzhardinge + */ + +#include +#include +#include +#include +#include /* current */ +#include +#include + +#include +#include +#include + +#define PFX "speedstep-centrino: " +#define MAINTAINER "cpufreq@lists.linux.org.uk" + +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg) + +#define INTEL_MSR_RANGE (0xffff) + +struct cpu_id +{ + __u8 x86; /* CPU family */ + __u8 x86_model; /* model */ + __u8 x86_mask; /* stepping */ +}; + +enum { + CPU_BANIAS, + CPU_DOTHAN_A1, + CPU_DOTHAN_A2, + CPU_DOTHAN_B0, + CPU_MP4HT_D0, + CPU_MP4HT_E0, +}; + +static const struct cpu_id cpu_ids[] = { + [CPU_BANIAS] = { 6, 9, 5 }, + [CPU_DOTHAN_A1] = { 6, 13, 1 }, + [CPU_DOTHAN_A2] = { 6, 13, 2 }, + [CPU_DOTHAN_B0] = { 6, 13, 6 }, + [CPU_MP4HT_D0] = {15, 3, 4 }, + [CPU_MP4HT_E0] = {15, 4, 1 }, +}; +#define N_IDS ARRAY_SIZE(cpu_ids) + +struct cpu_model +{ + const struct cpu_id *cpu_id; + const char *model_name; + unsigned max_freq; /* max clock in kHz */ + + struct cpufreq_frequency_table *op_points; /* clock/voltage pairs */ +}; +static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, const struct cpu_id *x); + +/* Operating points for current CPU */ +static struct cpu_model *centrino_model[NR_CPUS]; +static const struct cpu_id *centrino_cpu[NR_CPUS]; + +static struct cpufreq_driver centrino_driver; + +#ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE + +/* Computes the correct form for IA32_PERF_CTL MSR for a particular + frequency/voltage operating point; frequency in MHz, volts in mV. + This is stored as "index" in the structure. */ +#define OP(mhz, mv) \ + { \ + .frequency = (mhz) * 1000, \ + .index = (((mhz)/100) << 8) | ((mv - 700) / 16) \ + } + +/* + * These voltage tables were derived from the Intel Pentium M + * datasheet, document 25261202.pdf, Table 5. I have verified they + * are consistent with my IBM ThinkPad X31, which has a 1.3GHz Pentium + * M. + */ + +/* Ultra Low Voltage Intel Pentium M processor 900MHz (Banias) */ +static struct cpufreq_frequency_table banias_900[] = +{ + OP(600, 844), + OP(800, 988), + OP(900, 1004), + { .frequency = CPUFREQ_TABLE_END } +}; + +/* Ultra Low Voltage Intel Pentium M processor 1000MHz (Banias) */ +static struct cpufreq_frequency_table banias_1000[] = +{ + OP(600, 844), + OP(800, 972), + OP(900, 988), + OP(1000, 1004), + { .frequency = CPUFREQ_TABLE_END } +}; + +/* Low Voltage Intel Pentium M processor 1.10GHz (Banias) */ +static struct cpufreq_frequency_table banias_1100[] = +{ + OP( 600, 956), + OP( 800, 1020), + OP( 900, 1100), + OP(1000, 1164), + OP(1100, 1180), + { .frequency = CPUFREQ_TABLE_END } +}; + + +/* Low Voltage Intel Pentium M processor 1.20GHz (Banias) */ +static struct cpufreq_frequency_table banias_1200[] = +{ + OP( 600, 956), + OP( 800, 1004), + OP( 900, 1020), + OP(1000, 1100), + OP(1100, 1164), + OP(1200, 1180), + { .frequency = CPUFREQ_TABLE_END } +}; + +/* Intel Pentium M processor 1.30GHz (Banias) */ +static struct cpufreq_frequency_table banias_1300[] = +{ + OP( 600, 956), + OP( 800, 1260), + OP(1000, 1292), + OP(1200, 1356), + OP(1300, 1388), + { .frequency = CPUFREQ_TABLE_END } +}; + +/* Intel Pentium M processor 1.40GHz (Banias) */ +static struct cpufreq_frequency_table banias_1400[] = +{ + OP( 600, 956), + OP( 800, 1180), + OP(1000, 1308), + OP(1200, 1436), + OP(1400, 1484), + { .frequency = CPUFREQ_TABLE_END } +}; + +/* Intel Pentium M processor 1.50GHz (Banias) */ +static struct cpufreq_frequency_table banias_1500[] = +{ + OP( 600, 956), + OP( 800, 1116), + OP(1000, 1228), + OP(1200, 1356), + OP(1400, 1452), + OP(1500, 1484), + { .frequency = CPUFREQ_TABLE_END } +}; + +/* Intel Pentium M processor 1.60GHz (Banias) */ +static struct cpufreq_frequency_table banias_1600[] = +{ + OP( 600, 956), + OP( 800, 1036), + OP(1000, 1164), + OP(1200, 1276), + OP(1400, 1420), + OP(1600, 1484), + { .frequency = CPUFREQ_TABLE_END } +}; + +/* Intel Pentium M processor 1.70GHz (Banias) */ +static struct cpufreq_frequency_table banias_1700[] = +{ + OP( 600, 956), + OP( 800, 1004), + OP(1000, 1116), + OP(1200, 1228), + OP(1400, 1308), + OP(1700, 1484), + { .frequency = CPUFREQ_TABLE_END } +}; +#undef OP + +#define _BANIAS(cpuid, max, name) \ +{ .cpu_id = cpuid, \ + .model_name = "Intel(R) Pentium(R) M processor " name "MHz", \ + .max_freq = (max)*1000, \ + .op_points = banias_##max, \ +} +#define BANIAS(max) _BANIAS(&cpu_ids[CPU_BANIAS], max, #max) + +/* CPU models, their operating frequency range, and freq/voltage + operating points */ +static struct cpu_model models[] = +{ + _BANIAS(&cpu_ids[CPU_BANIAS], 900, " 900"), + BANIAS(1000), + BANIAS(1100), + BANIAS(1200), + BANIAS(1300), + BANIAS(1400), + BANIAS(1500), + BANIAS(1600), + BANIAS(1700), + + /* NULL model_name is a wildcard */ + { &cpu_ids[CPU_DOTHAN_A1], NULL, 0, NULL }, + { &cpu_ids[CPU_DOTHAN_A2], NULL, 0, NULL }, + { &cpu_ids[CPU_DOTHAN_B0], NULL, 0, NULL }, + { &cpu_ids[CPU_MP4HT_D0], NULL, 0, NULL }, + { &cpu_ids[CPU_MP4HT_E0], NULL, 0, NULL }, + + { NULL, } +}; +#undef _BANIAS +#undef BANIAS + +static int centrino_cpu_init_table(struct cpufreq_policy *policy) +{ + struct cpuinfo_x86 *cpu = &cpu_data[policy->cpu]; + struct cpu_model *model; + + for(model = models; model->cpu_id != NULL; model++) + if (centrino_verify_cpu_id(cpu, model->cpu_id) && + (model->model_name == NULL || + strcmp(cpu->x86_model_id, model->model_name) == 0)) + break; + + if (model->cpu_id == NULL) { + /* No match at all */ + dprintk("no support for CPU model \"%s\": " + "send /proc/cpuinfo to " MAINTAINER "\n", + cpu->x86_model_id); + return -ENOENT; + } + + if (model->op_points == NULL) { + /* Matched a non-match */ + dprintk("no table support for CPU model \"%s\"\n", + cpu->x86_model_id); + dprintk("try using the acpi-cpufreq driver\n"); + return -ENOENT; + } + + centrino_model[policy->cpu] = model; + + dprintk("found \"%s\": max frequency: %dkHz\n", + model->model_name, model->max_freq); + + return 0; +} + +#else +static inline int centrino_cpu_init_table(struct cpufreq_policy *policy) { return -ENODEV; } +#endif /* CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE */ + +static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, const struct cpu_id *x) +{ + if ((c->x86 == x->x86) && + (c->x86_model == x->x86_model) && + (c->x86_mask == x->x86_mask)) + return 1; + return 0; +} + +/* To be called only after centrino_model is initialized */ +static unsigned extract_clock(unsigned msr, unsigned int cpu, int failsafe) +{ + int i; + + /* + * Extract clock in kHz from PERF_CTL value + * for centrino, as some DSDTs are buggy. + * Ideally, this can be done using the acpi_data structure. + */ + if ((centrino_cpu[cpu] == &cpu_ids[CPU_BANIAS]) || + (centrino_cpu[cpu] == &cpu_ids[CPU_DOTHAN_A1]) || + (centrino_cpu[cpu] == &cpu_ids[CPU_DOTHAN_B0])) { + msr = (msr >> 8) & 0xff; + return msr * 100000; + } + + if ((!centrino_model[cpu]) || (!centrino_model[cpu]->op_points)) + return 0; + + msr &= 0xffff; + for (i=0;centrino_model[cpu]->op_points[i].frequency != CPUFREQ_TABLE_END; i++) { + if (msr == centrino_model[cpu]->op_points[i].index) + return centrino_model[cpu]->op_points[i].frequency; + } + if (failsafe) + return centrino_model[cpu]->op_points[i-1].frequency; + else + return 0; +} + +/* Return the current CPU frequency in kHz */ +static unsigned int get_cur_freq(unsigned int cpu) +{ + unsigned l, h; + unsigned clock_freq; + cpumask_t saved_mask; + + saved_mask = current->cpus_allowed; + set_cpus_allowed(current, cpumask_of_cpu(cpu)); + if (smp_processor_id() != cpu) + return 0; + + rdmsr(MSR_IA32_PERF_STATUS, l, h); + clock_freq = extract_clock(l, cpu, 0); + + if (unlikely(clock_freq == 0)) { + /* + * On some CPUs, we can see transient MSR values (which are + * not present in _PSS), while CPU is doing some automatic + * P-state transition (like TM2). Get the last freq set + * in PERF_CTL. + */ + rdmsr(MSR_IA32_PERF_CTL, l, h); + clock_freq = extract_clock(l, cpu, 1); + } + + set_cpus_allowed(current, saved_mask); + return clock_freq; +} + + +static int centrino_cpu_init(struct cpufreq_policy *policy) +{ + struct cpuinfo_x86 *cpu = &cpu_data[policy->cpu]; + unsigned freq; + unsigned l, h; + int ret; + int i; + + /* Only Intel makes Enhanced Speedstep-capable CPUs */ + if (cpu->x86_vendor != X86_VENDOR_INTEL || !cpu_has(cpu, X86_FEATURE_EST)) + return -ENODEV; + + if (cpu_has(cpu, X86_FEATURE_CONSTANT_TSC)) + centrino_driver.flags |= CPUFREQ_CONST_LOOPS; + + if (policy->cpu != 0) + return -ENODEV; + + for (i = 0; i < N_IDS; i++) + if (centrino_verify_cpu_id(cpu, &cpu_ids[i])) + break; + + if (i != N_IDS) + centrino_cpu[policy->cpu] = &cpu_ids[i]; + + if (!centrino_cpu[policy->cpu]) { + dprintk("found unsupported CPU with " + "Enhanced SpeedStep: send /proc/cpuinfo to " + MAINTAINER "\n"); + return -ENODEV; + } + + if (centrino_cpu_init_table(policy)) { + return -ENODEV; + } + + /* Check to see if Enhanced SpeedStep is enabled, and try to + enable it if not. */ + rdmsr(MSR_IA32_MISC_ENABLE, l, h); + + if (!(l & (1<<16))) { + l |= (1<<16); + dprintk("trying to enable Enhanced SpeedStep (%x)\n", l); + wrmsr(MSR_IA32_MISC_ENABLE, l, h); + + /* check to see if it stuck */ + rdmsr(MSR_IA32_MISC_ENABLE, l, h); + if (!(l & (1<<16))) { + printk(KERN_INFO PFX "couldn't enable Enhanced SpeedStep\n"); + return -ENODEV; + } + } + + freq = get_cur_freq(policy->cpu); + + policy->governor = CPUFREQ_DEFAULT_GOVERNOR; + policy->cpuinfo.transition_latency = 10000; /* 10uS transition latency */ + policy->cur = freq; + + dprintk("centrino_cpu_init: cur=%dkHz\n", policy->cur); + + ret = cpufreq_frequency_table_cpuinfo(policy, centrino_model[policy->cpu]->op_points); + if (ret) + return (ret); + + cpufreq_frequency_table_get_attr(centrino_model[policy->cpu]->op_points, policy->cpu); + + return 0; +} + +static int centrino_cpu_exit(struct cpufreq_policy *policy) +{ + unsigned int cpu = policy->cpu; + + if (!centrino_model[cpu]) + return -ENODEV; + + cpufreq_frequency_table_put_attr(cpu); + + centrino_model[cpu] = NULL; + + return 0; +} + +/** + * centrino_verify - verifies a new CPUFreq policy + * @policy: new policy + * + * Limit must be within this model's frequency range at least one + * border included. + */ +static int centrino_verify (struct cpufreq_policy *policy) +{ + return cpufreq_frequency_table_verify(policy, centrino_model[policy->cpu]->op_points); +} + +/** + * centrino_setpolicy - set a new CPUFreq policy + * @policy: new policy + * @target_freq: the target frequency + * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) + * + * Sets a new CPUFreq policy. + */ +static int centrino_target (struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + unsigned int newstate = 0; + unsigned int msr, oldmsr = 0, h = 0, cpu = policy->cpu; + struct cpufreq_freqs freqs; + cpumask_t online_policy_cpus; + cpumask_t saved_mask; + cpumask_t set_mask; + cpumask_t covered_cpus; + int retval = 0; + unsigned int j, k, first_cpu, tmp; + + if (unlikely(centrino_model[cpu] == NULL)) + return -ENODEV; + + if (unlikely(cpufreq_frequency_table_target(policy, + centrino_model[cpu]->op_points, + target_freq, + relation, + &newstate))) { + return -EINVAL; + } + +#ifdef CONFIG_HOTPLUG_CPU + /* cpufreq holds the hotplug lock, so we are safe from here on */ + cpus_and(online_policy_cpus, cpu_online_map, policy->cpus); +#else + online_policy_cpus = policy->cpus; +#endif + + saved_mask = current->cpus_allowed; + first_cpu = 1; + cpus_clear(covered_cpus); + for_each_cpu_mask(j, online_policy_cpus) { + /* + * Support for SMP systems. + * Make sure we are running on CPU that wants to change freq + */ + cpus_clear(set_mask); + if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) + cpus_or(set_mask, set_mask, online_policy_cpus); + else + cpu_set(j, set_mask); + + set_cpus_allowed(current, set_mask); + preempt_disable(); + if (unlikely(!cpu_isset(smp_processor_id(), set_mask))) { + dprintk("couldn't limit to CPUs in this domain\n"); + retval = -EAGAIN; + if (first_cpu) { + /* We haven't started the transition yet. */ + goto migrate_end; + } + preempt_enable(); + break; + } + + msr = centrino_model[cpu]->op_points[newstate].index; + + if (first_cpu) { + rdmsr(MSR_IA32_PERF_CTL, oldmsr, h); + if (msr == (oldmsr & 0xffff)) { + dprintk("no change needed - msr was and needs " + "to be %x\n", oldmsr); + retval = 0; + goto migrate_end; + } + + freqs.old = extract_clock(oldmsr, cpu, 0); + freqs.new = extract_clock(msr, cpu, 0); + + dprintk("target=%dkHz old=%d new=%d msr=%04x\n", + target_freq, freqs.old, freqs.new, msr); + + for_each_cpu_mask(k, online_policy_cpus) { + freqs.cpu = k; + cpufreq_notify_transition(&freqs, + CPUFREQ_PRECHANGE); + } + + first_cpu = 0; + /* all but 16 LSB are reserved, treat them with care */ + oldmsr &= ~0xffff; + msr &= 0xffff; + oldmsr |= msr; + } + + wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); + if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { + preempt_enable(); + break; + } + + cpu_set(j, covered_cpus); + preempt_enable(); + } + + for_each_cpu_mask(k, online_policy_cpus) { + freqs.cpu = k; + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + } + + if (unlikely(retval)) { + /* + * We have failed halfway through the frequency change. + * We have sent callbacks to policy->cpus and + * MSRs have already been written on coverd_cpus. + * Best effort undo.. + */ + + if (!cpus_empty(covered_cpus)) { + for_each_cpu_mask(j, covered_cpus) { + set_cpus_allowed(current, cpumask_of_cpu(j)); + wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); + } + } + + tmp = freqs.new; + freqs.new = freqs.old; + freqs.old = tmp; + for_each_cpu_mask(j, online_policy_cpus) { + freqs.cpu = j; + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + } + } + set_cpus_allowed(current, saved_mask); + return 0; + +migrate_end: + preempt_enable(); + set_cpus_allowed(current, saved_mask); + return 0; +} + +static struct freq_attr* centrino_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + +static struct cpufreq_driver centrino_driver = { + .name = "centrino", /* should be speedstep-centrino, + but there's a 16 char limit */ + .init = centrino_cpu_init, + .exit = centrino_cpu_exit, + .verify = centrino_verify, + .target = centrino_target, + .get = get_cur_freq, + .attr = centrino_attr, + .owner = THIS_MODULE, +}; + + +/** + * centrino_init - initializes the Enhanced SpeedStep CPUFreq driver + * + * Initializes the Enhanced SpeedStep support. Returns -ENODEV on + * unsupported devices, -ENOENT if there's no voltage table for this + * particular CPU model, -EINVAL on problems during initiatization, + * and zero on success. + * + * This is quite picky. Not only does the CPU have to advertise the + * "est" flag in the cpuid capability flags, we look for a specific + * CPU model and stepping, and we need to have the exact model name in + * our voltage tables. That is, be paranoid about not releasing + * someone's valuable magic smoke. + */ +static int __init centrino_init(void) +{ + struct cpuinfo_x86 *cpu = cpu_data; + + if (!cpu_has(cpu, X86_FEATURE_EST)) + return -ENODEV; + + return cpufreq_register_driver(¢rino_driver); +} + +static void __exit centrino_exit(void) +{ + cpufreq_unregister_driver(¢rino_driver); +} + +MODULE_AUTHOR ("Jeremy Fitzhardinge "); +MODULE_DESCRIPTION ("Enhanced SpeedStep driver for Intel Pentium M processors."); +MODULE_LICENSE ("GPL"); + +late_initcall(centrino_init); +module_exit(centrino_exit); diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c new file mode 100644 index 00000000000..a5b2346faf1 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c @@ -0,0 +1,440 @@ +/* + * (C) 2001 Dave Jones, Arjan van de ven. + * (C) 2002 - 2003 Dominik Brodowski + * + * Licensed under the terms of the GNU GPL License version 2. + * Based upon reverse engineered information, and on Intel documentation + * for chipsets ICH2-M and ICH3-M. + * + * Many thanks to Ducrot Bruno for finding and fixing the last + * "missing link" for ICH2-M/ICH3-M support, and to Thomas Winkler + * for extensive testing. + * + * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* + */ + + +/********************************************************************* + * SPEEDSTEP - DEFINITIONS * + *********************************************************************/ + +#include +#include +#include +#include +#include +#include +#include + +#include "speedstep-lib.h" + + +/* speedstep_chipset: + * It is necessary to know which chipset is used. As accesses to + * this device occur at various places in this module, we need a + * static struct pci_dev * pointing to that device. + */ +static struct pci_dev *speedstep_chipset_dev; + + +/* speedstep_processor + */ +static unsigned int speedstep_processor = 0; + +static u32 pmbase; + +/* + * There are only two frequency states for each processor. Values + * are in kHz for the time being. + */ +static struct cpufreq_frequency_table speedstep_freqs[] = { + {SPEEDSTEP_HIGH, 0}, + {SPEEDSTEP_LOW, 0}, + {0, CPUFREQ_TABLE_END}, +}; + + +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-ich", msg) + + +/** + * speedstep_find_register - read the PMBASE address + * + * Returns: -ENODEV if no register could be found + */ +static int speedstep_find_register (void) +{ + if (!speedstep_chipset_dev) + return -ENODEV; + + /* get PMBASE */ + pci_read_config_dword(speedstep_chipset_dev, 0x40, &pmbase); + if (!(pmbase & 0x01)) { + printk(KERN_ERR "speedstep-ich: could not find speedstep register\n"); + return -ENODEV; + } + + pmbase &= 0xFFFFFFFE; + if (!pmbase) { + printk(KERN_ERR "speedstep-ich: could not find speedstep register\n"); + return -ENODEV; + } + + dprintk("pmbase is 0x%x\n", pmbase); + return 0; +} + +/** + * speedstep_set_state - set the SpeedStep state + * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) + * + * Tries to change the SpeedStep state. + */ +static void speedstep_set_state (unsigned int state) +{ + u8 pm2_blk; + u8 value; + unsigned long flags; + + if (state > 0x1) + return; + + /* Disable IRQs */ + local_irq_save(flags); + + /* read state */ + value = inb(pmbase + 0x50); + + dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value); + + /* write new state */ + value &= 0xFE; + value |= state; + + dprintk("writing 0x%x to pmbase 0x%x + 0x50\n", value, pmbase); + + /* Disable bus master arbitration */ + pm2_blk = inb(pmbase + 0x20); + pm2_blk |= 0x01; + outb(pm2_blk, (pmbase + 0x20)); + + /* Actual transition */ + outb(value, (pmbase + 0x50)); + + /* Restore bus master arbitration */ + pm2_blk &= 0xfe; + outb(pm2_blk, (pmbase + 0x20)); + + /* check if transition was successful */ + value = inb(pmbase + 0x50); + + /* Enable IRQs */ + local_irq_restore(flags); + + dprintk("read at pmbase 0x%x + 0x50 returned 0x%x\n", pmbase, value); + + if (state == (value & 0x1)) { + dprintk("change to %u MHz succeeded\n", (speedstep_get_processor_frequency(speedstep_processor) / 1000)); + } else { + printk (KERN_ERR "cpufreq: change failed - I/O error\n"); + } + + return; +} + + +/** + * speedstep_activate - activate SpeedStep control in the chipset + * + * Tries to activate the SpeedStep status and control registers. + * Returns -EINVAL on an unsupported chipset, and zero on success. + */ +static int speedstep_activate (void) +{ + u16 value = 0; + + if (!speedstep_chipset_dev) + return -EINVAL; + + pci_read_config_word(speedstep_chipset_dev, 0x00A0, &value); + if (!(value & 0x08)) { + value |= 0x08; + dprintk("activating SpeedStep (TM) registers\n"); + pci_write_config_word(speedstep_chipset_dev, 0x00A0, value); + } + + return 0; +} + + +/** + * speedstep_detect_chipset - detect the Southbridge which contains SpeedStep logic + * + * Detects ICH2-M, ICH3-M and ICH4-M so far. The pci_dev points to + * the LPC bridge / PM module which contains all power-management + * functions. Returns the SPEEDSTEP_CHIPSET_-number for the detected + * chipset, or zero on failure. + */ +static unsigned int speedstep_detect_chipset (void) +{ + speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_82801DB_12, + PCI_ANY_ID, + PCI_ANY_ID, + NULL); + if (speedstep_chipset_dev) + return 4; /* 4-M */ + + speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_82801CA_12, + PCI_ANY_ID, + PCI_ANY_ID, + NULL); + if (speedstep_chipset_dev) + return 3; /* 3-M */ + + + speedstep_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_82801BA_10, + PCI_ANY_ID, + PCI_ANY_ID, + NULL); + if (speedstep_chipset_dev) { + /* speedstep.c causes lockups on Dell Inspirons 8000 and + * 8100 which use a pretty old revision of the 82815 + * host brige. Abort on these systems. + */ + static struct pci_dev *hostbridge; + + hostbridge = pci_get_subsys(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_82815_MC, + PCI_ANY_ID, + PCI_ANY_ID, + NULL); + + if (!hostbridge) + return 2; /* 2-M */ + + if (hostbridge->revision < 5) { + dprintk("hostbridge does not support speedstep\n"); + speedstep_chipset_dev = NULL; + pci_dev_put(hostbridge); + return 0; + } + + pci_dev_put(hostbridge); + return 2; /* 2-M */ + } + + return 0; +} + +static unsigned int _speedstep_get(cpumask_t cpus) +{ + unsigned int speed; + cpumask_t cpus_allowed; + + cpus_allowed = current->cpus_allowed; + set_cpus_allowed(current, cpus); + speed = speedstep_get_processor_frequency(speedstep_processor); + set_cpus_allowed(current, cpus_allowed); + dprintk("detected %u kHz as current frequency\n", speed); + return speed; +} + +static unsigned int speedstep_get(unsigned int cpu) +{ + return _speedstep_get(cpumask_of_cpu(cpu)); +} + +/** + * speedstep_target - set a new CPUFreq policy + * @policy: new policy + * @target_freq: the target frequency + * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) + * + * Sets a new CPUFreq policy. + */ +static int speedstep_target (struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + unsigned int newstate = 0; + struct cpufreq_freqs freqs; + cpumask_t cpus_allowed; + int i; + + if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate)) + return -EINVAL; + + freqs.old = _speedstep_get(policy->cpus); + freqs.new = speedstep_freqs[newstate].frequency; + freqs.cpu = policy->cpu; + + dprintk("transiting from %u to %u kHz\n", freqs.old, freqs.new); + + /* no transition necessary */ + if (freqs.old == freqs.new) + return 0; + + cpus_allowed = current->cpus_allowed; + + for_each_cpu_mask(i, policy->cpus) { + freqs.cpu = i; + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + } + + /* switch to physical CPU where state is to be changed */ + set_cpus_allowed(current, policy->cpus); + + speedstep_set_state(newstate); + + /* allow to be run on all CPUs */ + set_cpus_allowed(current, cpus_allowed); + + for_each_cpu_mask(i, policy->cpus) { + freqs.cpu = i; + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + } + + return 0; +} + + +/** + * speedstep_verify - verifies a new CPUFreq policy + * @policy: new policy + * + * Limit must be within speedstep_low_freq and speedstep_high_freq, with + * at least one border included. + */ +static int speedstep_verify (struct cpufreq_policy *policy) +{ + return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]); +} + + +static int speedstep_cpu_init(struct cpufreq_policy *policy) +{ + int result = 0; + unsigned int speed; + cpumask_t cpus_allowed; + + /* only run on CPU to be set, or on its sibling */ +#ifdef CONFIG_SMP + policy->cpus = cpu_sibling_map[policy->cpu]; +#endif + + cpus_allowed = current->cpus_allowed; + set_cpus_allowed(current, policy->cpus); + + /* detect low and high frequency and transition latency */ + result = speedstep_get_freqs(speedstep_processor, + &speedstep_freqs[SPEEDSTEP_LOW].frequency, + &speedstep_freqs[SPEEDSTEP_HIGH].frequency, + &policy->cpuinfo.transition_latency, + &speedstep_set_state); + set_cpus_allowed(current, cpus_allowed); + if (result) + return result; + + /* get current speed setting */ + speed = _speedstep_get(policy->cpus); + if (!speed) + return -EIO; + + dprintk("currently at %s speed setting - %i MHz\n", + (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency) ? "low" : "high", + (speed / 1000)); + + /* cpuinfo and default policy values */ + policy->governor = CPUFREQ_DEFAULT_GOVERNOR; + policy->cur = speed; + + result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs); + if (result) + return (result); + + cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu); + + return 0; +} + + +static int speedstep_cpu_exit(struct cpufreq_policy *policy) +{ + cpufreq_frequency_table_put_attr(policy->cpu); + return 0; +} + +static struct freq_attr* speedstep_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + + +static struct cpufreq_driver speedstep_driver = { + .name = "speedstep-ich", + .verify = speedstep_verify, + .target = speedstep_target, + .init = speedstep_cpu_init, + .exit = speedstep_cpu_exit, + .get = speedstep_get, + .owner = THIS_MODULE, + .attr = speedstep_attr, +}; + + +/** + * speedstep_init - initializes the SpeedStep CPUFreq driver + * + * Initializes the SpeedStep support. Returns -ENODEV on unsupported + * devices, -EINVAL on problems during initiatization, and zero on + * success. + */ +static int __init speedstep_init(void) +{ + /* detect processor */ + speedstep_processor = speedstep_detect_processor(); + if (!speedstep_processor) { + dprintk("Intel(R) SpeedStep(TM) capable processor not found\n"); + return -ENODEV; + } + + /* detect chipset */ + if (!speedstep_detect_chipset()) { + dprintk("Intel(R) SpeedStep(TM) for this chipset not (yet) available.\n"); + return -ENODEV; + } + + /* activate speedstep support */ + if (speedstep_activate()) { + pci_dev_put(speedstep_chipset_dev); + return -EINVAL; + } + + if (speedstep_find_register()) + return -ENODEV; + + return cpufreq_register_driver(&speedstep_driver); +} + + +/** + * speedstep_exit - unregisters SpeedStep support + * + * Unregisters SpeedStep support. + */ +static void __exit speedstep_exit(void) +{ + pci_dev_put(speedstep_chipset_dev); + cpufreq_unregister_driver(&speedstep_driver); +} + + +MODULE_AUTHOR ("Dave Jones , Dominik Brodowski "); +MODULE_DESCRIPTION ("Speedstep driver for Intel mobile processors on chipsets with ICH-M southbridges."); +MODULE_LICENSE ("GPL"); + +module_init(speedstep_init); +module_exit(speedstep_exit); diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c new file mode 100644 index 00000000000..b1acc8ce316 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c @@ -0,0 +1,444 @@ +/* + * (C) 2002 - 2003 Dominik Brodowski + * + * Licensed under the terms of the GNU GPL License version 2. + * + * Library for common functions for Intel SpeedStep v.1 and v.2 support + * + * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* + */ + +#include +#include +#include +#include +#include +#include + +#include +#include "speedstep-lib.h" + +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-lib", msg) + +#ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK +static int relaxed_check = 0; +#else +#define relaxed_check 0 +#endif + +/********************************************************************* + * GET PROCESSOR CORE SPEED IN KHZ * + *********************************************************************/ + +static unsigned int pentium3_get_frequency (unsigned int processor) +{ + /* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */ + struct { + unsigned int ratio; /* Frequency Multiplier (x10) */ + u8 bitmap; /* power on configuration bits + [27, 25:22] (in MSR 0x2a) */ + } msr_decode_mult [] = { + { 30, 0x01 }, + { 35, 0x05 }, + { 40, 0x02 }, + { 45, 0x06 }, + { 50, 0x00 }, + { 55, 0x04 }, + { 60, 0x0b }, + { 65, 0x0f }, + { 70, 0x09 }, + { 75, 0x0d }, + { 80, 0x0a }, + { 85, 0x26 }, + { 90, 0x20 }, + { 100, 0x2b }, + { 0, 0xff } /* error or unknown value */ + }; + + /* PIII(-M) FSB settings: see table b1-b of 24547206.pdf */ + struct { + unsigned int value; /* Front Side Bus speed in MHz */ + u8 bitmap; /* power on configuration bits [18: 19] + (in MSR 0x2a) */ + } msr_decode_fsb [] = { + { 66, 0x0 }, + { 100, 0x2 }, + { 133, 0x1 }, + { 0, 0xff} + }; + + u32 msr_lo, msr_tmp; + int i = 0, j = 0; + + /* read MSR 0x2a - we only need the low 32 bits */ + rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp); + dprintk("P3 - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp); + msr_tmp = msr_lo; + + /* decode the FSB */ + msr_tmp &= 0x00c0000; + msr_tmp >>= 18; + while (msr_tmp != msr_decode_fsb[i].bitmap) { + if (msr_decode_fsb[i].bitmap == 0xff) + return 0; + i++; + } + + /* decode the multiplier */ + if (processor == SPEEDSTEP_PROCESSOR_PIII_C_EARLY) { + dprintk("workaround for early PIIIs\n"); + msr_lo &= 0x03c00000; + } else + msr_lo &= 0x0bc00000; + msr_lo >>= 22; + while (msr_lo != msr_decode_mult[j].bitmap) { + if (msr_decode_mult[j].bitmap == 0xff) + return 0; + j++; + } + + dprintk("speed is %u\n", (msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100)); + + return (msr_decode_mult[j].ratio * msr_decode_fsb[i].value * 100); +} + + +static unsigned int pentiumM_get_frequency(void) +{ + u32 msr_lo, msr_tmp; + + rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp); + dprintk("PM - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp); + + /* see table B-2 of 24547212.pdf */ + if (msr_lo & 0x00040000) { + printk(KERN_DEBUG "speedstep-lib: PM - invalid FSB: 0x%x 0x%x\n", msr_lo, msr_tmp); + return 0; + } + + msr_tmp = (msr_lo >> 22) & 0x1f; + dprintk("bits 22-26 are 0x%x, speed is %u\n", msr_tmp, (msr_tmp * 100 * 1000)); + + return (msr_tmp * 100 * 1000); +} + +static unsigned int pentium_core_get_frequency(void) +{ + u32 fsb = 0; + u32 msr_lo, msr_tmp; + + rdmsr(MSR_FSB_FREQ, msr_lo, msr_tmp); + /* see table B-2 of 25366920.pdf */ + switch (msr_lo & 0x07) { + case 5: + fsb = 100000; + break; + case 1: + fsb = 133333; + break; + case 3: + fsb = 166667; + break; + default: + printk(KERN_ERR "PCORE - MSR_FSB_FREQ undefined value"); + } + + rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp); + dprintk("PCORE - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp); + + msr_tmp = (msr_lo >> 22) & 0x1f; + dprintk("bits 22-26 are 0x%x, speed is %u\n", msr_tmp, (msr_tmp * fsb)); + + return (msr_tmp * fsb); +} + + +static unsigned int pentium4_get_frequency(void) +{ + struct cpuinfo_x86 *c = &boot_cpu_data; + u32 msr_lo, msr_hi, mult; + unsigned int fsb = 0; + + rdmsr(0x2c, msr_lo, msr_hi); + + dprintk("P4 - MSR_EBC_FREQUENCY_ID: 0x%x 0x%x\n", msr_lo, msr_hi); + + /* decode the FSB: see IA-32 Intel (C) Architecture Software + * Developer's Manual, Volume 3: System Prgramming Guide, + * revision #12 in Table B-1: MSRs in the Pentium 4 and + * Intel Xeon Processors, on page B-4 and B-5. + */ + if (c->x86_model < 2) + fsb = 100 * 1000; + else { + u8 fsb_code = (msr_lo >> 16) & 0x7; + switch (fsb_code) { + case 0: + fsb = 100 * 1000; + break; + case 1: + fsb = 13333 * 10; + break; + case 2: + fsb = 200 * 1000; + break; + } + } + + if (!fsb) + printk(KERN_DEBUG "speedstep-lib: couldn't detect FSB speed. Please send an e-mail to \n"); + + /* Multiplier. */ + if (c->x86_model < 2) + mult = msr_lo >> 27; + else + mult = msr_lo >> 24; + + dprintk("P4 - FSB %u kHz; Multiplier %u; Speed %u kHz\n", fsb, mult, (fsb * mult)); + + return (fsb * mult); +} + + +unsigned int speedstep_get_processor_frequency(unsigned int processor) +{ + switch (processor) { + case SPEEDSTEP_PROCESSOR_PCORE: + return pentium_core_get_frequency(); + case SPEEDSTEP_PROCESSOR_PM: + return pentiumM_get_frequency(); + case SPEEDSTEP_PROCESSOR_P4D: + case SPEEDSTEP_PROCESSOR_P4M: + return pentium4_get_frequency(); + case SPEEDSTEP_PROCESSOR_PIII_T: + case SPEEDSTEP_PROCESSOR_PIII_C: + case SPEEDSTEP_PROCESSOR_PIII_C_EARLY: + return pentium3_get_frequency(processor); + default: + return 0; + }; + return 0; +} +EXPORT_SYMBOL_GPL(speedstep_get_processor_frequency); + + +/********************************************************************* + * DETECT SPEEDSTEP-CAPABLE PROCESSOR * + *********************************************************************/ + +unsigned int speedstep_detect_processor (void) +{ + struct cpuinfo_x86 *c = cpu_data; + u32 ebx, msr_lo, msr_hi; + + dprintk("x86: %x, model: %x\n", c->x86, c->x86_model); + + if ((c->x86_vendor != X86_VENDOR_INTEL) || + ((c->x86 != 6) && (c->x86 != 0xF))) + return 0; + + if (c->x86 == 0xF) { + /* Intel Mobile Pentium 4-M + * or Intel Mobile Pentium 4 with 533 MHz FSB */ + if (c->x86_model != 2) + return 0; + + ebx = cpuid_ebx(0x00000001); + ebx &= 0x000000FF; + + dprintk("ebx value is %x, x86_mask is %x\n", ebx, c->x86_mask); + + switch (c->x86_mask) { + case 4: + /* + * B-stepping [M-P4-M] + * sample has ebx = 0x0f, production has 0x0e. + */ + if ((ebx == 0x0e) || (ebx == 0x0f)) + return SPEEDSTEP_PROCESSOR_P4M; + break; + case 7: + /* + * C-stepping [M-P4-M] + * needs to have ebx=0x0e, else it's a celeron: + * cf. 25130917.pdf / page 7, footnote 5 even + * though 25072120.pdf / page 7 doesn't say + * samples are only of B-stepping... + */ + if (ebx == 0x0e) + return SPEEDSTEP_PROCESSOR_P4M; + break; + case 9: + /* + * D-stepping [M-P4-M or M-P4/533] + * + * this is totally strange: CPUID 0x0F29 is + * used by M-P4-M, M-P4/533 and(!) Celeron CPUs. + * The latter need to be sorted out as they don't + * support speedstep. + * Celerons with CPUID 0x0F29 may have either + * ebx=0x8 or 0xf -- 25130917.pdf doesn't say anything + * specific. + * M-P4-Ms may have either ebx=0xe or 0xf [see above] + * M-P4/533 have either ebx=0xe or 0xf. [25317607.pdf] + * also, M-P4M HTs have ebx=0x8, too + * For now, they are distinguished by the model_id string + */ + if ((ebx == 0x0e) || (strstr(c->x86_model_id,"Mobile Intel(R) Pentium(R) 4") != NULL)) + return SPEEDSTEP_PROCESSOR_P4M; + break; + default: + break; + } + return 0; + } + + switch (c->x86_model) { + case 0x0B: /* Intel PIII [Tualatin] */ + /* cpuid_ebx(1) is 0x04 for desktop PIII, 0x06 for mobile PIII-M */ + ebx = cpuid_ebx(0x00000001); + dprintk("ebx is %x\n", ebx); + + ebx &= 0x000000FF; + + if (ebx != 0x06) + return 0; + + /* So far all PIII-M processors support SpeedStep. See + * Intel's 24540640.pdf of June 2003 + */ + return SPEEDSTEP_PROCESSOR_PIII_T; + + case 0x08: /* Intel PIII [Coppermine] */ + + /* all mobile PIII Coppermines have FSB 100 MHz + * ==> sort out a few desktop PIIIs. */ + rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_hi); + dprintk("Coppermine: MSR_IA32_EBL_CR_POWERON is 0x%x, 0x%x\n", msr_lo, msr_hi); + msr_lo &= 0x00c0000; + if (msr_lo != 0x0080000) + return 0; + + /* + * If the processor is a mobile version, + * platform ID has bit 50 set + * it has SpeedStep technology if either + * bit 56 or 57 is set + */ + rdmsr(MSR_IA32_PLATFORM_ID, msr_lo, msr_hi); + dprintk("Coppermine: MSR_IA32_PLATFORM ID is 0x%x, 0x%x\n", msr_lo, msr_hi); + if ((msr_hi & (1<<18)) && (relaxed_check ? 1 : (msr_hi & (3<<24)))) { + if (c->x86_mask == 0x01) { + dprintk("early PIII version\n"); + return SPEEDSTEP_PROCESSOR_PIII_C_EARLY; + } else + return SPEEDSTEP_PROCESSOR_PIII_C; + } + + default: + return 0; + } +} +EXPORT_SYMBOL_GPL(speedstep_detect_processor); + + +/********************************************************************* + * DETECT SPEEDSTEP SPEEDS * + *********************************************************************/ + +unsigned int speedstep_get_freqs(unsigned int processor, + unsigned int *low_speed, + unsigned int *high_speed, + unsigned int *transition_latency, + void (*set_state) (unsigned int state)) +{ + unsigned int prev_speed; + unsigned int ret = 0; + unsigned long flags; + struct timeval tv1, tv2; + + if ((!processor) || (!low_speed) || (!high_speed) || (!set_state)) + return -EINVAL; + + dprintk("trying to determine both speeds\n"); + + /* get current speed */ + prev_speed = speedstep_get_processor_frequency(processor); + if (!prev_speed) + return -EIO; + + dprintk("previous speed is %u\n", prev_speed); + + local_irq_save(flags); + + /* switch to low state */ + set_state(SPEEDSTEP_LOW); + *low_speed = speedstep_get_processor_frequency(processor); + if (!*low_speed) { + ret = -EIO; + goto out; + } + + dprintk("low speed is %u\n", *low_speed); + + /* start latency measurement */ + if (transition_latency) + do_gettimeofday(&tv1); + + /* switch to high state */ + set_state(SPEEDSTEP_HIGH); + + /* end latency measurement */ + if (transition_latency) + do_gettimeofday(&tv2); + + *high_speed = speedstep_get_processor_frequency(processor); + if (!*high_speed) { + ret = -EIO; + goto out; + } + + dprintk("high speed is %u\n", *high_speed); + + if (*low_speed == *high_speed) { + ret = -ENODEV; + goto out; + } + + /* switch to previous state, if necessary */ + if (*high_speed != prev_speed) + set_state(SPEEDSTEP_LOW); + + if (transition_latency) { + *transition_latency = (tv2.tv_sec - tv1.tv_sec) * USEC_PER_SEC + + tv2.tv_usec - tv1.tv_usec; + dprintk("transition latency is %u uSec\n", *transition_latency); + + /* convert uSec to nSec and add 20% for safety reasons */ + *transition_latency *= 1200; + + /* check if the latency measurement is too high or too low + * and set it to a safe value (500uSec) in that case + */ + if (*transition_latency > 10000000 || *transition_latency < 50000) { + printk (KERN_WARNING "speedstep: frequency transition measured seems out of " + "range (%u nSec), falling back to a safe one of %u nSec.\n", + *transition_latency, 500000); + *transition_latency = 500000; + } + } + +out: + local_irq_restore(flags); + return (ret); +} +EXPORT_SYMBOL_GPL(speedstep_get_freqs); + +#ifdef CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK +module_param(relaxed_check, int, 0444); +MODULE_PARM_DESC(relaxed_check, "Don't do all checks for speedstep capability."); +#endif + +MODULE_AUTHOR ("Dominik Brodowski "); +MODULE_DESCRIPTION ("Library for Intel SpeedStep 1 or 2 cpufreq drivers."); +MODULE_LICENSE ("GPL"); diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h new file mode 100644 index 00000000000..b11bcc608ca --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h @@ -0,0 +1,49 @@ +/* + * (C) 2002 - 2003 Dominik Brodowski + * + * Licensed under the terms of the GNU GPL License version 2. + * + * Library for common functions for Intel SpeedStep v.1 and v.2 support + * + * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous* + */ + + + +/* processors */ + +#define SPEEDSTEP_PROCESSOR_PIII_C_EARLY 0x00000001 /* Coppermine core */ +#define SPEEDSTEP_PROCESSOR_PIII_C 0x00000002 /* Coppermine core */ +#define SPEEDSTEP_PROCESSOR_PIII_T 0x00000003 /* Tualatin core */ +#define SPEEDSTEP_PROCESSOR_P4M 0x00000004 /* P4-M */ + +/* the following processors are not speedstep-capable and are not auto-detected + * in speedstep_detect_processor(). However, their speed can be detected using + * the speedstep_get_processor_frequency() call. */ +#define SPEEDSTEP_PROCESSOR_PM 0xFFFFFF03 /* Pentium M */ +#define SPEEDSTEP_PROCESSOR_P4D 0xFFFFFF04 /* desktop P4 */ +#define SPEEDSTEP_PROCESSOR_PCORE 0xFFFFFF05 /* Core */ + +/* speedstep states -- only two of them */ + +#define SPEEDSTEP_HIGH 0x00000000 +#define SPEEDSTEP_LOW 0x00000001 + + +/* detect a speedstep-capable processor */ +extern unsigned int speedstep_detect_processor (void); + +/* detect the current speed (in khz) of the processor */ +extern unsigned int speedstep_get_processor_frequency(unsigned int processor); + + +/* detect the low and high speeds of the processor. The callback + * set_state"'s first argument is either SPEEDSTEP_HIGH or + * SPEEDSTEP_LOW; the second argument is zero so that no + * cpufreq_notify_transition calls are initiated. + */ +extern unsigned int speedstep_get_freqs(unsigned int processor, + unsigned int *low_speed, + unsigned int *high_speed, + unsigned int *transition_latency, + void (*set_state) (unsigned int state)); diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c new file mode 100644 index 00000000000..e1c509aa305 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c @@ -0,0 +1,424 @@ +/* + * Intel SpeedStep SMI driver. + * + * (C) 2003 Hiroshi Miura + * + * Licensed under the terms of the GNU GPL License version 2. + * + */ + + +/********************************************************************* + * SPEEDSTEP - DEFINITIONS * + *********************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "speedstep-lib.h" + +/* speedstep system management interface port/command. + * + * These parameters are got from IST-SMI BIOS call. + * If user gives it, these are used. + * + */ +static int smi_port = 0; +static int smi_cmd = 0; +static unsigned int smi_sig = 0; + +/* info about the processor */ +static unsigned int speedstep_processor = 0; + +/* + * There are only two frequency states for each processor. Values + * are in kHz for the time being. + */ +static struct cpufreq_frequency_table speedstep_freqs[] = { + {SPEEDSTEP_HIGH, 0}, + {SPEEDSTEP_LOW, 0}, + {0, CPUFREQ_TABLE_END}, +}; + +#define GET_SPEEDSTEP_OWNER 0 +#define GET_SPEEDSTEP_STATE 1 +#define SET_SPEEDSTEP_STATE 2 +#define GET_SPEEDSTEP_FREQS 4 + +/* how often shall the SMI call be tried if it failed, e.g. because + * of DMA activity going on? */ +#define SMI_TRIES 5 + +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-smi", msg) + +/** + * speedstep_smi_ownership + */ +static int speedstep_smi_ownership (void) +{ + u32 command, result, magic; + u32 function = GET_SPEEDSTEP_OWNER; + unsigned char magic_data[] = "Copyright (c) 1999 Intel Corporation"; + + command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); + magic = virt_to_phys(magic_data); + + dprintk("trying to obtain ownership with command %x at port %x\n", command, smi_port); + + __asm__ __volatile__( + "out %%al, (%%dx)\n" + : "=D" (result) + : "a" (command), "b" (function), "c" (0), "d" (smi_port), + "D" (0), "S" (magic) + : "memory" + ); + + dprintk("result is %x\n", result); + + return result; +} + +/** + * speedstep_smi_get_freqs - get SpeedStep preferred & current freq. + * @low: the low frequency value is placed here + * @high: the high frequency value is placed here + * + * Only available on later SpeedStep-enabled systems, returns false results or + * even hangs [cf. bugme.osdl.org # 1422] on earlier systems. Empirical testing + * shows that the latter occurs if !(ist_info.event & 0xFFFF). + */ +static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high) +{ + u32 command, result = 0, edi, high_mhz, low_mhz; + u32 state=0; + u32 function = GET_SPEEDSTEP_FREQS; + + if (!(ist_info.event & 0xFFFF)) { + dprintk("bug #1422 -- can't read freqs from BIOS\n"); + return -ENODEV; + } + + command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); + + dprintk("trying to determine frequencies with command %x at port %x\n", command, smi_port); + + __asm__ __volatile__("movl $0, %%edi\n" + "out %%al, (%%dx)\n" + : "=a" (result), "=b" (high_mhz), "=c" (low_mhz), "=d" (state), "=D" (edi) + : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0) + ); + + dprintk("result %x, low_freq %u, high_freq %u\n", result, low_mhz, high_mhz); + + /* abort if results are obviously incorrect... */ + if ((high_mhz + low_mhz) < 600) + return -EINVAL; + + *high = high_mhz * 1000; + *low = low_mhz * 1000; + + return result; +} + +/** + * speedstep_get_state - set the SpeedStep state + * @state: processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) + * + */ +static int speedstep_get_state (void) +{ + u32 function=GET_SPEEDSTEP_STATE; + u32 result, state, edi, command; + + command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); + + dprintk("trying to determine current setting with command %x at port %x\n", command, smi_port); + + __asm__ __volatile__("movl $0, %%edi\n" + "out %%al, (%%dx)\n" + : "=a" (result), "=b" (state), "=D" (edi) + : "a" (command), "b" (function), "c" (0), "d" (smi_port), "S" (0) + ); + + dprintk("state is %x, result is %x\n", state, result); + + return (state & 1); +} + + +/** + * speedstep_set_state - set the SpeedStep state + * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) + * + */ +static void speedstep_set_state (unsigned int state) +{ + unsigned int result = 0, command, new_state; + unsigned long flags; + unsigned int function=SET_SPEEDSTEP_STATE; + unsigned int retry = 0; + + if (state > 0x1) + return; + + /* Disable IRQs */ + local_irq_save(flags); + + command = (smi_sig & 0xffffff00) | (smi_cmd & 0xff); + + dprintk("trying to set frequency to state %u with command %x at port %x\n", state, command, smi_port); + + do { + if (retry) { + dprintk("retry %u, previous result %u, waiting...\n", retry, result); + mdelay(retry * 50); + } + retry++; + __asm__ __volatile__( + "movl $0, %%edi\n" + "out %%al, (%%dx)\n" + : "=b" (new_state), "=D" (result) + : "a" (command), "b" (function), "c" (state), "d" (smi_port), "S" (0) + ); + } while ((new_state != state) && (retry <= SMI_TRIES)); + + /* enable IRQs */ + local_irq_restore(flags); + + if (new_state == state) { + dprintk("change to %u MHz succeeded after %u tries with result %u\n", (speedstep_freqs[new_state].frequency / 1000), retry, result); + } else { + printk(KERN_ERR "cpufreq: change failed with new_state %u and result %u\n", new_state, result); + } + + return; +} + + +/** + * speedstep_target - set a new CPUFreq policy + * @policy: new policy + * @target_freq: new freq + * @relation: + * + * Sets a new CPUFreq policy/freq. + */ +static int speedstep_target (struct cpufreq_policy *policy, + unsigned int target_freq, unsigned int relation) +{ + unsigned int newstate = 0; + struct cpufreq_freqs freqs; + + if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], target_freq, relation, &newstate)) + return -EINVAL; + + freqs.old = speedstep_freqs[speedstep_get_state()].frequency; + freqs.new = speedstep_freqs[newstate].frequency; + freqs.cpu = 0; /* speedstep.c is UP only driver */ + + if (freqs.old == freqs.new) + return 0; + + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + speedstep_set_state(newstate); + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + + return 0; +} + + +/** + * speedstep_verify - verifies a new CPUFreq policy + * @policy: new policy + * + * Limit must be within speedstep_low_freq and speedstep_high_freq, with + * at least one border included. + */ +static int speedstep_verify (struct cpufreq_policy *policy) +{ + return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]); +} + + +static int speedstep_cpu_init(struct cpufreq_policy *policy) +{ + int result; + unsigned int speed,state; + + /* capability check */ + if (policy->cpu != 0) + return -ENODEV; + + result = speedstep_smi_ownership(); + if (result) { + dprintk("fails in aquiring ownership of a SMI interface.\n"); + return -EINVAL; + } + + /* detect low and high frequency */ + result = speedstep_smi_get_freqs(&speedstep_freqs[SPEEDSTEP_LOW].frequency, + &speedstep_freqs[SPEEDSTEP_HIGH].frequency); + if (result) { + /* fall back to speedstep_lib.c dection mechanism: try both states out */ + dprintk("could not detect low and high frequencies by SMI call.\n"); + result = speedstep_get_freqs(speedstep_processor, + &speedstep_freqs[SPEEDSTEP_LOW].frequency, + &speedstep_freqs[SPEEDSTEP_HIGH].frequency, + NULL, + &speedstep_set_state); + + if (result) { + dprintk("could not detect two different speeds -- aborting.\n"); + return result; + } else + dprintk("workaround worked.\n"); + } + + /* get current speed setting */ + state = speedstep_get_state(); + speed = speedstep_freqs[state].frequency; + + dprintk("currently at %s speed setting - %i MHz\n", + (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency) ? "low" : "high", + (speed / 1000)); + + /* cpuinfo and default policy values */ + policy->governor = CPUFREQ_DEFAULT_GOVERNOR; + policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; + policy->cur = speed; + + result = cpufreq_frequency_table_cpuinfo(policy, speedstep_freqs); + if (result) + return (result); + + cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu); + + return 0; +} + +static int speedstep_cpu_exit(struct cpufreq_policy *policy) +{ + cpufreq_frequency_table_put_attr(policy->cpu); + return 0; +} + +static unsigned int speedstep_get(unsigned int cpu) +{ + if (cpu) + return -ENODEV; + return speedstep_get_processor_frequency(speedstep_processor); +} + + +static int speedstep_resume(struct cpufreq_policy *policy) +{ + int result = speedstep_smi_ownership(); + + if (result) + dprintk("fails in re-aquiring ownership of a SMI interface.\n"); + + return result; +} + +static struct freq_attr* speedstep_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + +static struct cpufreq_driver speedstep_driver = { + .name = "speedstep-smi", + .verify = speedstep_verify, + .target = speedstep_target, + .init = speedstep_cpu_init, + .exit = speedstep_cpu_exit, + .get = speedstep_get, + .resume = speedstep_resume, + .owner = THIS_MODULE, + .attr = speedstep_attr, +}; + +/** + * speedstep_init - initializes the SpeedStep CPUFreq driver + * + * Initializes the SpeedStep support. Returns -ENODEV on unsupported + * BIOS, -EINVAL on problems during initiatization, and zero on + * success. + */ +static int __init speedstep_init(void) +{ + speedstep_processor = speedstep_detect_processor(); + + switch (speedstep_processor) { + case SPEEDSTEP_PROCESSOR_PIII_T: + case SPEEDSTEP_PROCESSOR_PIII_C: + case SPEEDSTEP_PROCESSOR_PIII_C_EARLY: + break; + default: + speedstep_processor = 0; + } + + if (!speedstep_processor) { + dprintk ("No supported Intel CPU detected.\n"); + return -ENODEV; + } + + dprintk("signature:0x%.8lx, command:0x%.8lx, event:0x%.8lx, perf_level:0x%.8lx.\n", + ist_info.signature, ist_info.command, ist_info.event, ist_info.perf_level); + + /* Error if no IST-SMI BIOS or no PARM + sig= 'ISGE' aka 'Intel Speedstep Gate E' */ + if ((ist_info.signature != 0x47534943) && ( + (smi_port == 0) || (smi_cmd == 0))) + return -ENODEV; + + if (smi_sig == 1) + smi_sig = 0x47534943; + else + smi_sig = ist_info.signature; + + /* setup smi_port from MODLULE_PARM or BIOS */ + if ((smi_port > 0xff) || (smi_port < 0)) + return -EINVAL; + else if (smi_port == 0) + smi_port = ist_info.command & 0xff; + + if ((smi_cmd > 0xff) || (smi_cmd < 0)) + return -EINVAL; + else if (smi_cmd == 0) + smi_cmd = (ist_info.command >> 16) & 0xff; + + return cpufreq_register_driver(&speedstep_driver); +} + + +/** + * speedstep_exit - unregisters SpeedStep support + * + * Unregisters SpeedStep support. + */ +static void __exit speedstep_exit(void) +{ + cpufreq_unregister_driver(&speedstep_driver); +} + +module_param(smi_port, int, 0444); +module_param(smi_cmd, int, 0444); +module_param(smi_sig, uint, 0444); + +MODULE_PARM_DESC(smi_port, "Override the BIOS-given IST port with this value -- Intel's default setting is 0xb2"); +MODULE_PARM_DESC(smi_cmd, "Override the BIOS-given IST command with this value -- Intel's default setting is 0x82"); +MODULE_PARM_DESC(smi_sig, "Set to 1 to fake the IST signature when using the SMI interface."); + +MODULE_AUTHOR ("Hiroshi Miura"); +MODULE_DESCRIPTION ("Speedstep driver for IST applet SMI interface."); +MODULE_LICENSE ("GPL"); + +module_init(speedstep_init); +module_exit(speedstep_exit); diff --git a/arch/x86_64/kernel/Makefile_64 b/arch/x86_64/kernel/Makefile_64 index 1c9de796fa1..a6d8216084a 100644 --- a/arch/x86_64/kernel/Makefile_64 +++ b/arch/x86_64/kernel/Makefile_64 @@ -27,7 +27,7 @@ obj-$(CONFIG_KEXEC) += machine_kexec_64.o relocate_kernel_64.o crash_64.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_64.o obj-$(CONFIG_PM) += suspend_64.o obj-$(CONFIG_HIBERNATION) += suspend_asm_64.o -obj-$(CONFIG_CPU_FREQ) += ../../i386/kernel/cpu/cpufreq/ +obj-$(CONFIG_CPU_FREQ) += ../../x86/kernel/cpu/cpufreq/ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_IOMMU) += pci-gart_64.o aperture_64.o obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o -- cgit v1.2.3-70-g09d2 From 2ec1df4130c60d1eb49dc0fa0ed15858fede6b05 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 11 Oct 2007 11:16:28 +0200 Subject: i386: move kernel/cpu/mtrr Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/i386/kernel/cpu/Makefile | 2 +- arch/i386/kernel/cpu/mtrr/Makefile | 3 - arch/i386/kernel/cpu/mtrr/amd.c | 121 ------ arch/i386/kernel/cpu/mtrr/centaur.c | 224 ----------- arch/i386/kernel/cpu/mtrr/cyrix.c | 380 ------------------ arch/i386/kernel/cpu/mtrr/generic.c | 509 ------------------------ arch/i386/kernel/cpu/mtrr/if.c | 439 --------------------- arch/i386/kernel/cpu/mtrr/main.c | 768 ------------------------------------ arch/i386/kernel/cpu/mtrr/mtrr.h | 98 ----- arch/i386/kernel/cpu/mtrr/state.c | 79 ---- arch/x86/kernel/cpu/mtrr/Makefile | 3 + arch/x86/kernel/cpu/mtrr/amd.c | 121 ++++++ arch/x86/kernel/cpu/mtrr/centaur.c | 224 +++++++++++ arch/x86/kernel/cpu/mtrr/cyrix.c | 380 ++++++++++++++++++ arch/x86/kernel/cpu/mtrr/generic.c | 509 ++++++++++++++++++++++++ arch/x86/kernel/cpu/mtrr/if.c | 439 +++++++++++++++++++++ arch/x86/kernel/cpu/mtrr/main.c | 768 ++++++++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/mtrr/mtrr.h | 98 +++++ arch/x86/kernel/cpu/mtrr/state.c | 79 ++++ arch/x86_64/kernel/Makefile_64 | 2 +- 20 files changed, 2623 insertions(+), 2623 deletions(-) delete mode 100644 arch/i386/kernel/cpu/mtrr/Makefile delete mode 100644 arch/i386/kernel/cpu/mtrr/amd.c delete mode 100644 arch/i386/kernel/cpu/mtrr/centaur.c delete mode 100644 arch/i386/kernel/cpu/mtrr/cyrix.c delete mode 100644 arch/i386/kernel/cpu/mtrr/generic.c delete mode 100644 arch/i386/kernel/cpu/mtrr/if.c delete mode 100644 arch/i386/kernel/cpu/mtrr/main.c delete mode 100644 arch/i386/kernel/cpu/mtrr/mtrr.h delete mode 100644 arch/i386/kernel/cpu/mtrr/state.c create mode 100644 arch/x86/kernel/cpu/mtrr/Makefile create mode 100644 arch/x86/kernel/cpu/mtrr/amd.c create mode 100644 arch/x86/kernel/cpu/mtrr/centaur.c create mode 100644 arch/x86/kernel/cpu/mtrr/cyrix.c create mode 100644 arch/x86/kernel/cpu/mtrr/generic.c create mode 100644 arch/x86/kernel/cpu/mtrr/if.c create mode 100644 arch/x86/kernel/cpu/mtrr/main.c create mode 100644 arch/x86/kernel/cpu/mtrr/mtrr.h create mode 100644 arch/x86/kernel/cpu/mtrr/state.c (limited to 'arch/x86') diff --git a/arch/i386/kernel/cpu/Makefile b/arch/i386/kernel/cpu/Makefile index 8d9ce0232ad..6687f6d5ad2 100644 --- a/arch/i386/kernel/cpu/Makefile +++ b/arch/i386/kernel/cpu/Makefile @@ -14,7 +14,7 @@ obj-y += umc.o obj-$(CONFIG_X86_MCE) += ../../../x86/kernel/cpu/mcheck/ -obj-$(CONFIG_MTRR) += mtrr/ +obj-$(CONFIG_MTRR) += ../../../x86/kernel/cpu/mtrr/ obj-$(CONFIG_CPU_FREQ) += ../../../x86/kernel/cpu/cpufreq/ obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o diff --git a/arch/i386/kernel/cpu/mtrr/Makefile b/arch/i386/kernel/cpu/mtrr/Makefile deleted file mode 100644 index 191fc053364..00000000000 --- a/arch/i386/kernel/cpu/mtrr/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -obj-y := main.o if.o generic.o state.o -obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o - diff --git a/arch/i386/kernel/cpu/mtrr/amd.c b/arch/i386/kernel/cpu/mtrr/amd.c deleted file mode 100644 index 0949cdbf848..00000000000 --- a/arch/i386/kernel/cpu/mtrr/amd.c +++ /dev/null @@ -1,121 +0,0 @@ -#include -#include -#include -#include - -#include "mtrr.h" - -static void -amd_get_mtrr(unsigned int reg, unsigned long *base, - unsigned long *size, mtrr_type * type) -{ - unsigned long low, high; - - rdmsr(MSR_K6_UWCCR, low, high); - /* Upper dword is region 1, lower is region 0 */ - if (reg == 1) - low = high; - /* The base masks off on the right alignment */ - *base = (low & 0xFFFE0000) >> PAGE_SHIFT; - *type = 0; - if (low & 1) - *type = MTRR_TYPE_UNCACHABLE; - if (low & 2) - *type = MTRR_TYPE_WRCOMB; - if (!(low & 3)) { - *size = 0; - return; - } - /* - * This needs a little explaining. The size is stored as an - * inverted mask of bits of 128K granularity 15 bits long offset - * 2 bits - * - * So to get a size we do invert the mask and add 1 to the lowest - * mask bit (4 as its 2 bits in). This gives us a size we then shift - * to turn into 128K blocks - * - * eg 111 1111 1111 1100 is 512K - * - * invert 000 0000 0000 0011 - * +1 000 0000 0000 0100 - * *128K ... - */ - low = (~low) & 0x1FFFC; - *size = (low + 4) << (15 - PAGE_SHIFT); - return; -} - -static void amd_set_mtrr(unsigned int reg, unsigned long base, - unsigned long size, mtrr_type type) -/* [SUMMARY] Set variable MTRR register on the local CPU. - The register to set. - The base address of the region. - The size of the region. If this is 0 the region is disabled. - The type of the region. - If TRUE, do the change safely. If FALSE, safety measures should - be done externally. - [RETURNS] Nothing. -*/ -{ - u32 regs[2]; - - /* - * Low is MTRR0 , High MTRR 1 - */ - rdmsr(MSR_K6_UWCCR, regs[0], regs[1]); - /* - * Blank to disable - */ - if (size == 0) - regs[reg] = 0; - else - /* Set the register to the base, the type (off by one) and an - inverted bitmask of the size The size is the only odd - bit. We are fed say 512K We invert this and we get 111 1111 - 1111 1011 but if you subtract one and invert you get the - desired 111 1111 1111 1100 mask - - But ~(x - 1) == ~x + 1 == -x. Two's complement rocks! */ - regs[reg] = (-size >> (15 - PAGE_SHIFT) & 0x0001FFFC) - | (base << PAGE_SHIFT) | (type + 1); - - /* - * The writeback rule is quite specific. See the manual. Its - * disable local interrupts, write back the cache, set the mtrr - */ - wbinvd(); - wrmsr(MSR_K6_UWCCR, regs[0], regs[1]); -} - -static int amd_validate_add_page(unsigned long base, unsigned long size, unsigned int type) -{ - /* Apply the K6 block alignment and size rules - In order - o Uncached or gathering only - o 128K or bigger block - o Power of 2 block - o base suitably aligned to the power - */ - if (type > MTRR_TYPE_WRCOMB || size < (1 << (17 - PAGE_SHIFT)) - || (size & ~(size - 1)) - size || (base & (size - 1))) - return -EINVAL; - return 0; -} - -static struct mtrr_ops amd_mtrr_ops = { - .vendor = X86_VENDOR_AMD, - .set = amd_set_mtrr, - .get = amd_get_mtrr, - .get_free_region = generic_get_free_region, - .validate_add_page = amd_validate_add_page, - .have_wrcomb = positive_have_wrcomb, -}; - -int __init amd_init_mtrr(void) -{ - set_mtrr_ops(&amd_mtrr_ops); - return 0; -} - -//arch_initcall(amd_mtrr_init); diff --git a/arch/i386/kernel/cpu/mtrr/centaur.c b/arch/i386/kernel/cpu/mtrr/centaur.c deleted file mode 100644 index cb9aa3a7a7a..00000000000 --- a/arch/i386/kernel/cpu/mtrr/centaur.c +++ /dev/null @@ -1,224 +0,0 @@ -#include -#include -#include -#include -#include "mtrr.h" - -static struct { - unsigned long high; - unsigned long low; -} centaur_mcr[8]; - -static u8 centaur_mcr_reserved; -static u8 centaur_mcr_type; /* 0 for winchip, 1 for winchip2 */ - -/* - * Report boot time MCR setups - */ - -static int -centaur_get_free_region(unsigned long base, unsigned long size, int replace_reg) -/* [SUMMARY] Get a free MTRR. - The starting (base) address of the region. - The size (in bytes) of the region. - [RETURNS] The index of the region on success, else -1 on error. -*/ -{ - int i, max; - mtrr_type ltype; - unsigned long lbase, lsize; - - max = num_var_ranges; - if (replace_reg >= 0 && replace_reg < max) - return replace_reg; - for (i = 0; i < max; ++i) { - if (centaur_mcr_reserved & (1 << i)) - continue; - mtrr_if->get(i, &lbase, &lsize, <ype); - if (lsize == 0) - return i; - } - return -ENOSPC; -} - -void -mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) -{ - centaur_mcr[mcr].low = lo; - centaur_mcr[mcr].high = hi; -} - -static void -centaur_get_mcr(unsigned int reg, unsigned long *base, - unsigned long *size, mtrr_type * type) -{ - *base = centaur_mcr[reg].high >> PAGE_SHIFT; - *size = -(centaur_mcr[reg].low & 0xfffff000) >> PAGE_SHIFT; - *type = MTRR_TYPE_WRCOMB; /* If it is there, it is write-combining */ - if (centaur_mcr_type == 1 && ((centaur_mcr[reg].low & 31) & 2)) - *type = MTRR_TYPE_UNCACHABLE; - if (centaur_mcr_type == 1 && (centaur_mcr[reg].low & 31) == 25) - *type = MTRR_TYPE_WRBACK; - if (centaur_mcr_type == 0 && (centaur_mcr[reg].low & 31) == 31) - *type = MTRR_TYPE_WRBACK; - -} - -static void centaur_set_mcr(unsigned int reg, unsigned long base, - unsigned long size, mtrr_type type) -{ - unsigned long low, high; - - if (size == 0) { - /* Disable */ - high = low = 0; - } else { - high = base << PAGE_SHIFT; - if (centaur_mcr_type == 0) - low = -size << PAGE_SHIFT | 0x1f; /* only support write-combining... */ - else { - if (type == MTRR_TYPE_UNCACHABLE) - low = -size << PAGE_SHIFT | 0x02; /* NC */ - else - low = -size << PAGE_SHIFT | 0x09; /* WWO,WC */ - } - } - centaur_mcr[reg].high = high; - centaur_mcr[reg].low = low; - wrmsr(MSR_IDT_MCR0 + reg, low, high); -} - -#if 0 -/* - * Initialise the later (saner) Winchip MCR variant. In this version - * the BIOS can pass us the registers it has used (but not their values) - * and the control register is read/write - */ - -static void __init -centaur_mcr1_init(void) -{ - unsigned i; - u32 lo, hi; - - /* Unfortunately, MCR's are read-only, so there is no way to - * find out what the bios might have done. - */ - - rdmsr(MSR_IDT_MCR_CTRL, lo, hi); - if (((lo >> 17) & 7) == 1) { /* Type 1 Winchip2 MCR */ - lo &= ~0x1C0; /* clear key */ - lo |= 0x040; /* set key to 1 */ - wrmsr(MSR_IDT_MCR_CTRL, lo, hi); /* unlock MCR */ - } - - centaur_mcr_type = 1; - - /* - * Clear any unconfigured MCR's. - */ - - for (i = 0; i < 8; ++i) { - if (centaur_mcr[i].high == 0 && centaur_mcr[i].low == 0) { - if (!(lo & (1 << (9 + i)))) - wrmsr(MSR_IDT_MCR0 + i, 0, 0); - else - /* - * If the BIOS set up an MCR we cannot see it - * but we don't wish to obliterate it - */ - centaur_mcr_reserved |= (1 << i); - } - } - /* - * Throw the main write-combining switch... - * However if OOSTORE is enabled then people have already done far - * cleverer things and we should behave. - */ - - lo |= 15; /* Write combine enables */ - wrmsr(MSR_IDT_MCR_CTRL, lo, hi); -} - -/* - * Initialise the original winchip with read only MCR registers - * no used bitmask for the BIOS to pass on and write only control - */ - -static void __init -centaur_mcr0_init(void) -{ - unsigned i; - - /* Unfortunately, MCR's are read-only, so there is no way to - * find out what the bios might have done. - */ - - /* Clear any unconfigured MCR's. - * This way we are sure that the centaur_mcr array contains the actual - * values. The disadvantage is that any BIOS tweaks are thus undone. - * - */ - for (i = 0; i < 8; ++i) { - if (centaur_mcr[i].high == 0 && centaur_mcr[i].low == 0) - wrmsr(MSR_IDT_MCR0 + i, 0, 0); - } - - wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0); /* Write only */ -} - -/* - * Initialise Winchip series MCR registers - */ - -static void __init -centaur_mcr_init(void) -{ - struct set_mtrr_context ctxt; - - set_mtrr_prepare_save(&ctxt); - set_mtrr_cache_disable(&ctxt); - - if (boot_cpu_data.x86_model == 4) - centaur_mcr0_init(); - else if (boot_cpu_data.x86_model == 8 || boot_cpu_data.x86_model == 9) - centaur_mcr1_init(); - - set_mtrr_done(&ctxt); -} -#endif - -static int centaur_validate_add_page(unsigned long base, - unsigned long size, unsigned int type) -{ - /* - * FIXME: Winchip2 supports uncached - */ - if (type != MTRR_TYPE_WRCOMB && - (centaur_mcr_type == 0 || type != MTRR_TYPE_UNCACHABLE)) { - printk(KERN_WARNING - "mtrr: only write-combining%s supported\n", - centaur_mcr_type ? " and uncacheable are" - : " is"); - return -EINVAL; - } - return 0; -} - -static struct mtrr_ops centaur_mtrr_ops = { - .vendor = X86_VENDOR_CENTAUR, -// .init = centaur_mcr_init, - .set = centaur_set_mcr, - .get = centaur_get_mcr, - .get_free_region = centaur_get_free_region, - .validate_add_page = centaur_validate_add_page, - .have_wrcomb = positive_have_wrcomb, -}; - -int __init centaur_init_mtrr(void) -{ - set_mtrr_ops(¢aur_mtrr_ops); - return 0; -} - -//arch_initcall(centaur_init_mtrr); diff --git a/arch/i386/kernel/cpu/mtrr/cyrix.c b/arch/i386/kernel/cpu/mtrr/cyrix.c deleted file mode 100644 index 2287d4863a8..00000000000 --- a/arch/i386/kernel/cpu/mtrr/cyrix.c +++ /dev/null @@ -1,380 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include "mtrr.h" - -int arr3_protected; - -static void -cyrix_get_arr(unsigned int reg, unsigned long *base, - unsigned long *size, mtrr_type * type) -{ - unsigned long flags; - unsigned char arr, ccr3, rcr, shift; - - arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */ - - /* Save flags and disable interrupts */ - local_irq_save(flags); - - ccr3 = getCx86(CX86_CCR3); - setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ - ((unsigned char *) base)[3] = getCx86(arr); - ((unsigned char *) base)[2] = getCx86(arr + 1); - ((unsigned char *) base)[1] = getCx86(arr + 2); - rcr = getCx86(CX86_RCR_BASE + reg); - setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ - - /* Enable interrupts if it was enabled previously */ - local_irq_restore(flags); - shift = ((unsigned char *) base)[1] & 0x0f; - *base >>= PAGE_SHIFT; - - /* Power of two, at least 4K on ARR0-ARR6, 256K on ARR7 - * Note: shift==0xf means 4G, this is unsupported. - */ - if (shift) - *size = (reg < 7 ? 0x1UL : 0x40UL) << (shift - 1); - else - *size = 0; - - /* Bit 0 is Cache Enable on ARR7, Cache Disable on ARR0-ARR6 */ - if (reg < 7) { - switch (rcr) { - case 1: - *type = MTRR_TYPE_UNCACHABLE; - break; - case 8: - *type = MTRR_TYPE_WRBACK; - break; - case 9: - *type = MTRR_TYPE_WRCOMB; - break; - case 24: - default: - *type = MTRR_TYPE_WRTHROUGH; - break; - } - } else { - switch (rcr) { - case 0: - *type = MTRR_TYPE_UNCACHABLE; - break; - case 8: - *type = MTRR_TYPE_WRCOMB; - break; - case 9: - *type = MTRR_TYPE_WRBACK; - break; - case 25: - default: - *type = MTRR_TYPE_WRTHROUGH; - break; - } - } -} - -static int -cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg) -/* [SUMMARY] Get a free ARR. - The starting (base) address of the region. - The size (in bytes) of the region. - [RETURNS] The index of the region on success, else -1 on error. -*/ -{ - int i; - mtrr_type ltype; - unsigned long lbase, lsize; - - switch (replace_reg) { - case 7: - if (size < 0x40) - break; - case 6: - case 5: - case 4: - return replace_reg; - case 3: - if (arr3_protected) - break; - case 2: - case 1: - case 0: - return replace_reg; - } - /* If we are to set up a region >32M then look at ARR7 immediately */ - if (size > 0x2000) { - cyrix_get_arr(7, &lbase, &lsize, <ype); - if (lsize == 0) - return 7; - /* Else try ARR0-ARR6 first */ - } else { - for (i = 0; i < 7; i++) { - cyrix_get_arr(i, &lbase, &lsize, <ype); - if ((i == 3) && arr3_protected) - continue; - if (lsize == 0) - return i; - } - /* ARR0-ARR6 isn't free, try ARR7 but its size must be at least 256K */ - cyrix_get_arr(i, &lbase, &lsize, <ype); - if ((lsize == 0) && (size >= 0x40)) - return i; - } - return -ENOSPC; -} - -static u32 cr4 = 0; -static u32 ccr3; - -static void prepare_set(void) -{ - u32 cr0; - - /* Save value of CR4 and clear Page Global Enable (bit 7) */ - if ( cpu_has_pge ) { - cr4 = read_cr4(); - write_cr4(cr4 & ~X86_CR4_PGE); - } - - /* Disable and flush caches. Note that wbinvd flushes the TLBs as - a side-effect */ - cr0 = read_cr0() | 0x40000000; - wbinvd(); - write_cr0(cr0); - wbinvd(); - - /* Cyrix ARRs - everything else were excluded at the top */ - ccr3 = getCx86(CX86_CCR3); - - /* Cyrix ARRs - everything else were excluded at the top */ - setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); - -} - -static void post_set(void) -{ - /* Flush caches and TLBs */ - wbinvd(); - - /* Cyrix ARRs - everything else was excluded at the top */ - setCx86(CX86_CCR3, ccr3); - - /* Enable caches */ - write_cr0(read_cr0() & 0xbfffffff); - - /* Restore value of CR4 */ - if ( cpu_has_pge ) - write_cr4(cr4); -} - -static void cyrix_set_arr(unsigned int reg, unsigned long base, - unsigned long size, mtrr_type type) -{ - unsigned char arr, arr_type, arr_size; - - arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */ - - /* count down from 32M (ARR0-ARR6) or from 2G (ARR7) */ - if (reg >= 7) - size >>= 6; - - size &= 0x7fff; /* make sure arr_size <= 14 */ - for (arr_size = 0; size; arr_size++, size >>= 1) ; - - if (reg < 7) { - switch (type) { - case MTRR_TYPE_UNCACHABLE: - arr_type = 1; - break; - case MTRR_TYPE_WRCOMB: - arr_type = 9; - break; - case MTRR_TYPE_WRTHROUGH: - arr_type = 24; - break; - default: - arr_type = 8; - break; - } - } else { - switch (type) { - case MTRR_TYPE_UNCACHABLE: - arr_type = 0; - break; - case MTRR_TYPE_WRCOMB: - arr_type = 8; - break; - case MTRR_TYPE_WRTHROUGH: - arr_type = 25; - break; - default: - arr_type = 9; - break; - } - } - - prepare_set(); - - base <<= PAGE_SHIFT; - setCx86(arr, ((unsigned char *) &base)[3]); - setCx86(arr + 1, ((unsigned char *) &base)[2]); - setCx86(arr + 2, (((unsigned char *) &base)[1]) | arr_size); - setCx86(CX86_RCR_BASE + reg, arr_type); - - post_set(); -} - -typedef struct { - unsigned long base; - unsigned long size; - mtrr_type type; -} arr_state_t; - -static arr_state_t arr_state[8] = { - {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, - {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL} -}; - -static unsigned char ccr_state[7] = { 0, 0, 0, 0, 0, 0, 0 }; - -static void cyrix_set_all(void) -{ - int i; - - prepare_set(); - - /* the CCRs are not contiguous */ - for (i = 0; i < 4; i++) - setCx86(CX86_CCR0 + i, ccr_state[i]); - for (; i < 7; i++) - setCx86(CX86_CCR4 + i, ccr_state[i]); - for (i = 0; i < 8; i++) - cyrix_set_arr(i, arr_state[i].base, - arr_state[i].size, arr_state[i].type); - - post_set(); -} - -#if 0 -/* - * On Cyrix 6x86(MX) and M II the ARR3 is special: it has connection - * with the SMM (System Management Mode) mode. So we need the following: - * Check whether SMI_LOCK (CCR3 bit 0) is set - * if it is set, write a warning message: ARR3 cannot be changed! - * (it cannot be changed until the next processor reset) - * if it is reset, then we can change it, set all the needed bits: - * - disable access to SMM memory through ARR3 range (CCR1 bit 7 reset) - * - disable access to SMM memory (CCR1 bit 2 reset) - * - disable SMM mode (CCR1 bit 1 reset) - * - disable write protection of ARR3 (CCR6 bit 1 reset) - * - (maybe) disable ARR3 - * Just to be sure, we enable ARR usage by the processor (CCR5 bit 5 set) - */ -static void __init -cyrix_arr_init(void) -{ - struct set_mtrr_context ctxt; - unsigned char ccr[7]; - int ccrc[7] = { 0, 0, 0, 0, 0, 0, 0 }; -#ifdef CONFIG_SMP - int i; -#endif - - /* flush cache and enable MAPEN */ - set_mtrr_prepare_save(&ctxt); - set_mtrr_cache_disable(&ctxt); - - /* Save all CCRs locally */ - ccr[0] = getCx86(CX86_CCR0); - ccr[1] = getCx86(CX86_CCR1); - ccr[2] = getCx86(CX86_CCR2); - ccr[3] = ctxt.ccr3; - ccr[4] = getCx86(CX86_CCR4); - ccr[5] = getCx86(CX86_CCR5); - ccr[6] = getCx86(CX86_CCR6); - - if (ccr[3] & 1) { - ccrc[3] = 1; - arr3_protected = 1; - } else { - /* Disable SMM mode (bit 1), access to SMM memory (bit 2) and - * access to SMM memory through ARR3 (bit 7). - */ - if (ccr[1] & 0x80) { - ccr[1] &= 0x7f; - ccrc[1] |= 0x80; - } - if (ccr[1] & 0x04) { - ccr[1] &= 0xfb; - ccrc[1] |= 0x04; - } - if (ccr[1] & 0x02) { - ccr[1] &= 0xfd; - ccrc[1] |= 0x02; - } - arr3_protected = 0; - if (ccr[6] & 0x02) { - ccr[6] &= 0xfd; - ccrc[6] = 1; /* Disable write protection of ARR3 */ - setCx86(CX86_CCR6, ccr[6]); - } - /* Disable ARR3. This is safe now that we disabled SMM. */ - /* cyrix_set_arr_up (3, 0, 0, 0, FALSE); */ - } - /* If we changed CCR1 in memory, change it in the processor, too. */ - if (ccrc[1]) - setCx86(CX86_CCR1, ccr[1]); - - /* Enable ARR usage by the processor */ - if (!(ccr[5] & 0x20)) { - ccr[5] |= 0x20; - ccrc[5] = 1; - setCx86(CX86_CCR5, ccr[5]); - } -#ifdef CONFIG_SMP - for (i = 0; i < 7; i++) - ccr_state[i] = ccr[i]; - for (i = 0; i < 8; i++) - cyrix_get_arr(i, - &arr_state[i].base, &arr_state[i].size, - &arr_state[i].type); -#endif - - set_mtrr_done(&ctxt); /* flush cache and disable MAPEN */ - - if (ccrc[5]) - printk(KERN_INFO "mtrr: ARR usage was not enabled, enabled manually\n"); - if (ccrc[3]) - printk(KERN_INFO "mtrr: ARR3 cannot be changed\n"); -/* - if ( ccrc[1] & 0x80) printk ("mtrr: SMM memory access through ARR3 disabled\n"); - if ( ccrc[1] & 0x04) printk ("mtrr: SMM memory access disabled\n"); - if ( ccrc[1] & 0x02) printk ("mtrr: SMM mode disabled\n"); -*/ - if (ccrc[6]) - printk(KERN_INFO "mtrr: ARR3 was write protected, unprotected\n"); -} -#endif - -static struct mtrr_ops cyrix_mtrr_ops = { - .vendor = X86_VENDOR_CYRIX, -// .init = cyrix_arr_init, - .set_all = cyrix_set_all, - .set = cyrix_set_arr, - .get = cyrix_get_arr, - .get_free_region = cyrix_get_free_region, - .validate_add_page = generic_validate_add_page, - .have_wrcomb = positive_have_wrcomb, -}; - -int __init cyrix_init_mtrr(void) -{ - set_mtrr_ops(&cyrix_mtrr_ops); - return 0; -} - -//arch_initcall(cyrix_init_mtrr); diff --git a/arch/i386/kernel/cpu/mtrr/generic.c b/arch/i386/kernel/cpu/mtrr/generic.c deleted file mode 100644 index 56f64e34829..00000000000 --- a/arch/i386/kernel/cpu/mtrr/generic.c +++ /dev/null @@ -1,509 +0,0 @@ -/* This only handles 32bit MTRR on 32bit hosts. This is strictly wrong - because MTRRs can span upto 40 bits (36bits on most modern x86) */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "mtrr.h" - -struct mtrr_state { - struct mtrr_var_range *var_ranges; - mtrr_type fixed_ranges[NUM_FIXED_RANGES]; - unsigned char enabled; - unsigned char have_fixed; - mtrr_type def_type; -}; - -struct fixed_range_block { - int base_msr; /* start address of an MTRR block */ - int ranges; /* number of MTRRs in this block */ -}; - -static struct fixed_range_block fixed_range_blocks[] = { - { MTRRfix64K_00000_MSR, 1 }, /* one 64k MTRR */ - { MTRRfix16K_80000_MSR, 2 }, /* two 16k MTRRs */ - { MTRRfix4K_C0000_MSR, 8 }, /* eight 4k MTRRs */ - {} -}; - -static unsigned long smp_changes_mask; -static struct mtrr_state mtrr_state = {}; - -#undef MODULE_PARAM_PREFIX -#define MODULE_PARAM_PREFIX "mtrr." - -static int mtrr_show; -module_param_named(show, mtrr_show, bool, 0); - -/* Get the MSR pair relating to a var range */ -static void -get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr) -{ - rdmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi); - rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); -} - -static void -get_fixed_ranges(mtrr_type * frs) -{ - unsigned int *p = (unsigned int *) frs; - int i; - - rdmsr(MTRRfix64K_00000_MSR, p[0], p[1]); - - for (i = 0; i < 2; i++) - rdmsr(MTRRfix16K_80000_MSR + i, p[2 + i * 2], p[3 + i * 2]); - for (i = 0; i < 8; i++) - rdmsr(MTRRfix4K_C0000_MSR + i, p[6 + i * 2], p[7 + i * 2]); -} - -void mtrr_save_fixed_ranges(void *info) -{ - if (cpu_has_mtrr) - get_fixed_ranges(mtrr_state.fixed_ranges); -} - -static void print_fixed(unsigned base, unsigned step, const mtrr_type*types) -{ - unsigned i; - - for (i = 0; i < 8; ++i, ++types, base += step) - printk(KERN_INFO "MTRR %05X-%05X %s\n", - base, base + step - 1, mtrr_attrib_to_str(*types)); -} - -/* Grab all of the MTRR state for this CPU into *state */ -void __init get_mtrr_state(void) -{ - unsigned int i; - struct mtrr_var_range *vrs; - unsigned lo, dummy; - - if (!mtrr_state.var_ranges) { - mtrr_state.var_ranges = kmalloc(num_var_ranges * sizeof (struct mtrr_var_range), - GFP_KERNEL); - if (!mtrr_state.var_ranges) - return; - } - vrs = mtrr_state.var_ranges; - - rdmsr(MTRRcap_MSR, lo, dummy); - mtrr_state.have_fixed = (lo >> 8) & 1; - - for (i = 0; i < num_var_ranges; i++) - get_mtrr_var_range(i, &vrs[i]); - if (mtrr_state.have_fixed) - get_fixed_ranges(mtrr_state.fixed_ranges); - - rdmsr(MTRRdefType_MSR, lo, dummy); - mtrr_state.def_type = (lo & 0xff); - mtrr_state.enabled = (lo & 0xc00) >> 10; - - if (mtrr_show) { - int high_width; - - printk(KERN_INFO "MTRR default type: %s\n", mtrr_attrib_to_str(mtrr_state.def_type)); - if (mtrr_state.have_fixed) { - printk(KERN_INFO "MTRR fixed ranges %sabled:\n", - mtrr_state.enabled & 1 ? "en" : "dis"); - print_fixed(0x00000, 0x10000, mtrr_state.fixed_ranges + 0); - for (i = 0; i < 2; ++i) - print_fixed(0x80000 + i * 0x20000, 0x04000, mtrr_state.fixed_ranges + (i + 1) * 8); - for (i = 0; i < 8; ++i) - print_fixed(0xC0000 + i * 0x08000, 0x01000, mtrr_state.fixed_ranges + (i + 3) * 8); - } - printk(KERN_INFO "MTRR variable ranges %sabled:\n", - mtrr_state.enabled & 2 ? "en" : "dis"); - high_width = ((size_or_mask ? ffs(size_or_mask) - 1 : 32) - (32 - PAGE_SHIFT) + 3) / 4; - for (i = 0; i < num_var_ranges; ++i) { - if (mtrr_state.var_ranges[i].mask_lo & (1 << 11)) - printk(KERN_INFO "MTRR %u base %0*X%05X000 mask %0*X%05X000 %s\n", - i, - high_width, - mtrr_state.var_ranges[i].base_hi, - mtrr_state.var_ranges[i].base_lo >> 12, - high_width, - mtrr_state.var_ranges[i].mask_hi, - mtrr_state.var_ranges[i].mask_lo >> 12, - mtrr_attrib_to_str(mtrr_state.var_ranges[i].base_lo & 0xff)); - else - printk(KERN_INFO "MTRR %u disabled\n", i); - } - } -} - -/* Some BIOS's are fucked and don't set all MTRRs the same! */ -void __init mtrr_state_warn(void) -{ - unsigned long mask = smp_changes_mask; - - if (!mask) - return; - if (mask & MTRR_CHANGE_MASK_FIXED) - printk(KERN_WARNING "mtrr: your CPUs had inconsistent fixed MTRR settings\n"); - if (mask & MTRR_CHANGE_MASK_VARIABLE) - printk(KERN_WARNING "mtrr: your CPUs had inconsistent variable MTRR settings\n"); - if (mask & MTRR_CHANGE_MASK_DEFTYPE) - printk(KERN_WARNING "mtrr: your CPUs had inconsistent MTRRdefType settings\n"); - printk(KERN_INFO "mtrr: probably your BIOS does not setup all CPUs.\n"); - printk(KERN_INFO "mtrr: corrected configuration.\n"); -} - -/* Doesn't attempt to pass an error out to MTRR users - because it's quite complicated in some cases and probably not - worth it because the best error handling is to ignore it. */ -void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b) -{ - if (wrmsr_safe(msr, a, b) < 0) - printk(KERN_ERR - "MTRR: CPU %u: Writing MSR %x to %x:%x failed\n", - smp_processor_id(), msr, a, b); -} - -/** - * Enable and allow read/write of extended fixed-range MTRR bits on K8 CPUs - * see AMD publication no. 24593, chapter 3.2.1 for more information - */ -static inline void k8_enable_fixed_iorrs(void) -{ - unsigned lo, hi; - - rdmsr(MSR_K8_SYSCFG, lo, hi); - mtrr_wrmsr(MSR_K8_SYSCFG, lo - | K8_MTRRFIXRANGE_DRAM_ENABLE - | K8_MTRRFIXRANGE_DRAM_MODIFY, hi); -} - -/** - * Checks and updates an fixed-range MTRR if it differs from the value it - * should have. If K8 extenstions are wanted, update the K8 SYSCFG MSR also. - * see AMD publication no. 24593, chapter 7.8.1, page 233 for more information - * \param msr MSR address of the MTTR which should be checked and updated - * \param changed pointer which indicates whether the MTRR needed to be changed - * \param msrwords pointer to the MSR values which the MSR should have - */ -static void set_fixed_range(int msr, int * changed, unsigned int * msrwords) -{ - unsigned lo, hi; - - rdmsr(msr, lo, hi); - - if (lo != msrwords[0] || hi != msrwords[1]) { - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && - boot_cpu_data.x86 == 15 && - ((msrwords[0] | msrwords[1]) & K8_MTRR_RDMEM_WRMEM_MASK)) - k8_enable_fixed_iorrs(); - mtrr_wrmsr(msr, msrwords[0], msrwords[1]); - *changed = TRUE; - } -} - -int generic_get_free_region(unsigned long base, unsigned long size, int replace_reg) -/* [SUMMARY] Get a free MTRR. - The starting (base) address of the region. - The size (in bytes) of the region. - [RETURNS] The index of the region on success, else -1 on error. -*/ -{ - int i, max; - mtrr_type ltype; - unsigned long lbase, lsize; - - max = num_var_ranges; - if (replace_reg >= 0 && replace_reg < max) - return replace_reg; - for (i = 0; i < max; ++i) { - mtrr_if->get(i, &lbase, &lsize, <ype); - if (lsize == 0) - return i; - } - return -ENOSPC; -} - -static void generic_get_mtrr(unsigned int reg, unsigned long *base, - unsigned long *size, mtrr_type *type) -{ - unsigned int mask_lo, mask_hi, base_lo, base_hi; - - rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi); - if ((mask_lo & 0x800) == 0) { - /* Invalid (i.e. free) range */ - *base = 0; - *size = 0; - *type = 0; - return; - } - - rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi); - - /* Work out the shifted address mask. */ - mask_lo = size_or_mask | mask_hi << (32 - PAGE_SHIFT) - | mask_lo >> PAGE_SHIFT; - - /* This works correctly if size is a power of two, i.e. a - contiguous range. */ - *size = -mask_lo; - *base = base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT; - *type = base_lo & 0xff; -} - -/** - * Checks and updates the fixed-range MTRRs if they differ from the saved set - * \param frs pointer to fixed-range MTRR values, saved by get_fixed_ranges() - */ -static int set_fixed_ranges(mtrr_type * frs) -{ - unsigned long long *saved = (unsigned long long *) frs; - int changed = FALSE; - int block=-1, range; - - while (fixed_range_blocks[++block].ranges) - for (range=0; range < fixed_range_blocks[block].ranges; range++) - set_fixed_range(fixed_range_blocks[block].base_msr + range, - &changed, (unsigned int *) saved++); - - return changed; -} - -/* Set the MSR pair relating to a var range. Returns TRUE if - changes are made */ -static int set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr) -{ - unsigned int lo, hi; - int changed = FALSE; - - rdmsr(MTRRphysBase_MSR(index), lo, hi); - if ((vr->base_lo & 0xfffff0ffUL) != (lo & 0xfffff0ffUL) - || (vr->base_hi & (size_and_mask >> (32 - PAGE_SHIFT))) != - (hi & (size_and_mask >> (32 - PAGE_SHIFT)))) { - mtrr_wrmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi); - changed = TRUE; - } - - rdmsr(MTRRphysMask_MSR(index), lo, hi); - - if ((vr->mask_lo & 0xfffff800UL) != (lo & 0xfffff800UL) - || (vr->mask_hi & (size_and_mask >> (32 - PAGE_SHIFT))) != - (hi & (size_and_mask >> (32 - PAGE_SHIFT)))) { - mtrr_wrmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); - changed = TRUE; - } - return changed; -} - -static u32 deftype_lo, deftype_hi; - -static unsigned long set_mtrr_state(void) -/* [SUMMARY] Set the MTRR state for this CPU. - The MTRR state information to read. - Some relevant CPU context. - [NOTE] The CPU must already be in a safe state for MTRR changes. - [RETURNS] 0 if no changes made, else a mask indication what was changed. -*/ -{ - unsigned int i; - unsigned long change_mask = 0; - - for (i = 0; i < num_var_ranges; i++) - if (set_mtrr_var_ranges(i, &mtrr_state.var_ranges[i])) - change_mask |= MTRR_CHANGE_MASK_VARIABLE; - - if (mtrr_state.have_fixed && set_fixed_ranges(mtrr_state.fixed_ranges)) - change_mask |= MTRR_CHANGE_MASK_FIXED; - - /* Set_mtrr_restore restores the old value of MTRRdefType, - so to set it we fiddle with the saved value */ - if ((deftype_lo & 0xff) != mtrr_state.def_type - || ((deftype_lo & 0xc00) >> 10) != mtrr_state.enabled) { - deftype_lo = (deftype_lo & ~0xcff) | mtrr_state.def_type | (mtrr_state.enabled << 10); - change_mask |= MTRR_CHANGE_MASK_DEFTYPE; - } - - return change_mask; -} - - -static unsigned long cr4 = 0; -static DEFINE_SPINLOCK(set_atomicity_lock); - -/* - * Since we are disabling the cache don't allow any interrupts - they - * would run extremely slow and would only increase the pain. The caller must - * ensure that local interrupts are disabled and are reenabled after post_set() - * has been called. - */ - -static void prepare_set(void) __acquires(set_atomicity_lock) -{ - unsigned long cr0; - - /* Note that this is not ideal, since the cache is only flushed/disabled - for this CPU while the MTRRs are changed, but changing this requires - more invasive changes to the way the kernel boots */ - - spin_lock(&set_atomicity_lock); - - /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */ - cr0 = read_cr0() | 0x40000000; /* set CD flag */ - write_cr0(cr0); - wbinvd(); - - /* Save value of CR4 and clear Page Global Enable (bit 7) */ - if ( cpu_has_pge ) { - cr4 = read_cr4(); - write_cr4(cr4 & ~X86_CR4_PGE); - } - - /* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */ - __flush_tlb(); - - /* Save MTRR state */ - rdmsr(MTRRdefType_MSR, deftype_lo, deftype_hi); - - /* Disable MTRRs, and set the default type to uncached */ - mtrr_wrmsr(MTRRdefType_MSR, deftype_lo & ~0xcff, deftype_hi); -} - -static void post_set(void) __releases(set_atomicity_lock) -{ - /* Flush TLBs (no need to flush caches - they are disabled) */ - __flush_tlb(); - - /* Intel (P6) standard MTRRs */ - mtrr_wrmsr(MTRRdefType_MSR, deftype_lo, deftype_hi); - - /* Enable caches */ - write_cr0(read_cr0() & 0xbfffffff); - - /* Restore value of CR4 */ - if ( cpu_has_pge ) - write_cr4(cr4); - spin_unlock(&set_atomicity_lock); -} - -static void generic_set_all(void) -{ - unsigned long mask, count; - unsigned long flags; - - local_irq_save(flags); - prepare_set(); - - /* Actually set the state */ - mask = set_mtrr_state(); - - post_set(); - local_irq_restore(flags); - - /* Use the atomic bitops to update the global mask */ - for (count = 0; count < sizeof mask * 8; ++count) { - if (mask & 0x01) - set_bit(count, &smp_changes_mask); - mask >>= 1; - } - -} - -static void generic_set_mtrr(unsigned int reg, unsigned long base, - unsigned long size, mtrr_type type) -/* [SUMMARY] Set variable MTRR register on the local CPU. - The register to set. - The base address of the region. - The size of the region. If this is 0 the region is disabled. - The type of the region. - If TRUE, do the change safely. If FALSE, safety measures should - be done externally. - [RETURNS] Nothing. -*/ -{ - unsigned long flags; - struct mtrr_var_range *vr; - - vr = &mtrr_state.var_ranges[reg]; - - local_irq_save(flags); - prepare_set(); - - if (size == 0) { - /* The invalid bit is kept in the mask, so we simply clear the - relevant mask register to disable a range. */ - mtrr_wrmsr(MTRRphysMask_MSR(reg), 0, 0); - memset(vr, 0, sizeof(struct mtrr_var_range)); - } else { - vr->base_lo = base << PAGE_SHIFT | type; - vr->base_hi = (base & size_and_mask) >> (32 - PAGE_SHIFT); - vr->mask_lo = -size << PAGE_SHIFT | 0x800; - vr->mask_hi = (-size & size_and_mask) >> (32 - PAGE_SHIFT); - - mtrr_wrmsr(MTRRphysBase_MSR(reg), vr->base_lo, vr->base_hi); - mtrr_wrmsr(MTRRphysMask_MSR(reg), vr->mask_lo, vr->mask_hi); - } - - post_set(); - local_irq_restore(flags); -} - -int generic_validate_add_page(unsigned long base, unsigned long size, unsigned int type) -{ - unsigned long lbase, last; - - /* For Intel PPro stepping <= 7, must be 4 MiB aligned - and not touch 0x70000000->0x7003FFFF */ - if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 && - boot_cpu_data.x86_model == 1 && - boot_cpu_data.x86_mask <= 7) { - if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) { - printk(KERN_WARNING "mtrr: base(0x%lx000) is not 4 MiB aligned\n", base); - return -EINVAL; - } - if (!(base + size < 0x70000 || base > 0x7003F) && - (type == MTRR_TYPE_WRCOMB - || type == MTRR_TYPE_WRBACK)) { - printk(KERN_WARNING "mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n"); - return -EINVAL; - } - } - - /* Check upper bits of base and last are equal and lower bits are 0 - for base and 1 for last */ - last = base + size - 1; - for (lbase = base; !(lbase & 1) && (last & 1); - lbase = lbase >> 1, last = last >> 1) ; - if (lbase != last) { - printk(KERN_WARNING "mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n", - base, size); - return -EINVAL; - } - return 0; -} - - -static int generic_have_wrcomb(void) -{ - unsigned long config, dummy; - rdmsr(MTRRcap_MSR, config, dummy); - return (config & (1 << 10)); -} - -int positive_have_wrcomb(void) -{ - return 1; -} - -/* generic structure... - */ -struct mtrr_ops generic_mtrr_ops = { - .use_intel_if = 1, - .set_all = generic_set_all, - .get = generic_get_mtrr, - .get_free_region = generic_get_free_region, - .set = generic_set_mtrr, - .validate_add_page = generic_validate_add_page, - .have_wrcomb = generic_have_wrcomb, -}; diff --git a/arch/i386/kernel/cpu/mtrr/if.c b/arch/i386/kernel/cpu/mtrr/if.c deleted file mode 100644 index c7d8f175674..00000000000 --- a/arch/i386/kernel/cpu/mtrr/if.c +++ /dev/null @@ -1,439 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -#define LINE_SIZE 80 - -#include -#include "mtrr.h" - -/* RED-PEN: this is accessed without any locking */ -extern unsigned int *usage_table; - - -#define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private) - -static const char *const mtrr_strings[MTRR_NUM_TYPES] = -{ - "uncachable", /* 0 */ - "write-combining", /* 1 */ - "?", /* 2 */ - "?", /* 3 */ - "write-through", /* 4 */ - "write-protect", /* 5 */ - "write-back", /* 6 */ -}; - -const char *mtrr_attrib_to_str(int x) -{ - return (x <= 6) ? mtrr_strings[x] : "?"; -} - -#ifdef CONFIG_PROC_FS - -static int -mtrr_file_add(unsigned long base, unsigned long size, - unsigned int type, char increment, struct file *file, int page) -{ - int reg, max; - unsigned int *fcount = FILE_FCOUNT(file); - - max = num_var_ranges; - if (fcount == NULL) { - fcount = kzalloc(max * sizeof *fcount, GFP_KERNEL); - if (!fcount) - return -ENOMEM; - FILE_FCOUNT(file) = fcount; - } - if (!page) { - if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) - return -EINVAL; - base >>= PAGE_SHIFT; - size >>= PAGE_SHIFT; - } - reg = mtrr_add_page(base, size, type, 1); - if (reg >= 0) - ++fcount[reg]; - return reg; -} - -static int -mtrr_file_del(unsigned long base, unsigned long size, - struct file *file, int page) -{ - int reg; - unsigned int *fcount = FILE_FCOUNT(file); - - if (!page) { - if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) - return -EINVAL; - base >>= PAGE_SHIFT; - size >>= PAGE_SHIFT; - } - reg = mtrr_del_page(-1, base, size); - if (reg < 0) - return reg; - if (fcount == NULL) - return reg; - if (fcount[reg] < 1) - return -EINVAL; - --fcount[reg]; - return reg; -} - -/* RED-PEN: seq_file can seek now. this is ignored. */ -static ssize_t -mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) -/* Format of control line: - "base=%Lx size=%Lx type=%s" OR: - "disable=%d" -*/ -{ - int i, err; - unsigned long reg; - unsigned long long base, size; - char *ptr; - char line[LINE_SIZE]; - size_t linelen; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - if (!len) - return -EINVAL; - memset(line, 0, LINE_SIZE); - if (len > LINE_SIZE) - len = LINE_SIZE; - if (copy_from_user(line, buf, len - 1)) - return -EFAULT; - linelen = strlen(line); - ptr = line + linelen - 1; - if (linelen && *ptr == '\n') - *ptr = '\0'; - if (!strncmp(line, "disable=", 8)) { - reg = simple_strtoul(line + 8, &ptr, 0); - err = mtrr_del_page(reg, 0, 0); - if (err < 0) - return err; - return len; - } - if (strncmp(line, "base=", 5)) - return -EINVAL; - base = simple_strtoull(line + 5, &ptr, 0); - for (; isspace(*ptr); ++ptr) ; - if (strncmp(ptr, "size=", 5)) - return -EINVAL; - size = simple_strtoull(ptr + 5, &ptr, 0); - if ((base & 0xfff) || (size & 0xfff)) - return -EINVAL; - for (; isspace(*ptr); ++ptr) ; - if (strncmp(ptr, "type=", 5)) - return -EINVAL; - ptr += 5; - for (; isspace(*ptr); ++ptr) ; - for (i = 0; i < MTRR_NUM_TYPES; ++i) { - if (strcmp(ptr, mtrr_strings[i])) - continue; - base >>= PAGE_SHIFT; - size >>= PAGE_SHIFT; - err = - mtrr_add_page((unsigned long) base, (unsigned long) size, i, - 1); - if (err < 0) - return err; - return len; - } - return -EINVAL; -} - -static long -mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) -{ - int err = 0; - mtrr_type type; - unsigned long size; - struct mtrr_sentry sentry; - struct mtrr_gentry gentry; - void __user *arg = (void __user *) __arg; - - switch (cmd) { - case MTRRIOC_ADD_ENTRY: - case MTRRIOC_SET_ENTRY: - case MTRRIOC_DEL_ENTRY: - case MTRRIOC_KILL_ENTRY: - case MTRRIOC_ADD_PAGE_ENTRY: - case MTRRIOC_SET_PAGE_ENTRY: - case MTRRIOC_DEL_PAGE_ENTRY: - case MTRRIOC_KILL_PAGE_ENTRY: - if (copy_from_user(&sentry, arg, sizeof sentry)) - return -EFAULT; - break; - case MTRRIOC_GET_ENTRY: - case MTRRIOC_GET_PAGE_ENTRY: - if (copy_from_user(&gentry, arg, sizeof gentry)) - return -EFAULT; - break; -#ifdef CONFIG_COMPAT - case MTRRIOC32_ADD_ENTRY: - case MTRRIOC32_SET_ENTRY: - case MTRRIOC32_DEL_ENTRY: - case MTRRIOC32_KILL_ENTRY: - case MTRRIOC32_ADD_PAGE_ENTRY: - case MTRRIOC32_SET_PAGE_ENTRY: - case MTRRIOC32_DEL_PAGE_ENTRY: - case MTRRIOC32_KILL_PAGE_ENTRY: { - struct mtrr_sentry32 __user *s32 = (struct mtrr_sentry32 __user *)__arg; - err = get_user(sentry.base, &s32->base); - err |= get_user(sentry.size, &s32->size); - err |= get_user(sentry.type, &s32->type); - if (err) - return err; - break; - } - case MTRRIOC32_GET_ENTRY: - case MTRRIOC32_GET_PAGE_ENTRY: { - struct mtrr_gentry32 __user *g32 = (struct mtrr_gentry32 __user *)__arg; - err = get_user(gentry.regnum, &g32->regnum); - err |= get_user(gentry.base, &g32->base); - err |= get_user(gentry.size, &g32->size); - err |= get_user(gentry.type, &g32->type); - if (err) - return err; - break; - } -#endif - } - - switch (cmd) { - default: - return -ENOTTY; - case MTRRIOC_ADD_ENTRY: -#ifdef CONFIG_COMPAT - case MTRRIOC32_ADD_ENTRY: -#endif - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - err = - mtrr_file_add(sentry.base, sentry.size, sentry.type, 1, - file, 0); - break; - case MTRRIOC_SET_ENTRY: -#ifdef CONFIG_COMPAT - case MTRRIOC32_SET_ENTRY: -#endif - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - err = mtrr_add(sentry.base, sentry.size, sentry.type, 0); - break; - case MTRRIOC_DEL_ENTRY: -#ifdef CONFIG_COMPAT - case MTRRIOC32_DEL_ENTRY: -#endif - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - err = mtrr_file_del(sentry.base, sentry.size, file, 0); - break; - case MTRRIOC_KILL_ENTRY: -#ifdef CONFIG_COMPAT - case MTRRIOC32_KILL_ENTRY: -#endif - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - err = mtrr_del(-1, sentry.base, sentry.size); - break; - case MTRRIOC_GET_ENTRY: -#ifdef CONFIG_COMPAT - case MTRRIOC32_GET_ENTRY: -#endif - if (gentry.regnum >= num_var_ranges) - return -EINVAL; - mtrr_if->get(gentry.regnum, &gentry.base, &size, &type); - - /* Hide entries that go above 4GB */ - if (gentry.base + size - 1 >= (1UL << (8 * sizeof(gentry.size) - PAGE_SHIFT)) - || size >= (1UL << (8 * sizeof(gentry.size) - PAGE_SHIFT))) - gentry.base = gentry.size = gentry.type = 0; - else { - gentry.base <<= PAGE_SHIFT; - gentry.size = size << PAGE_SHIFT; - gentry.type = type; - } - - break; - case MTRRIOC_ADD_PAGE_ENTRY: -#ifdef CONFIG_COMPAT - case MTRRIOC32_ADD_PAGE_ENTRY: -#endif - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - err = - mtrr_file_add(sentry.base, sentry.size, sentry.type, 1, - file, 1); - break; - case MTRRIOC_SET_PAGE_ENTRY: -#ifdef CONFIG_COMPAT - case MTRRIOC32_SET_PAGE_ENTRY: -#endif - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - err = mtrr_add_page(sentry.base, sentry.size, sentry.type, 0); - break; - case MTRRIOC_DEL_PAGE_ENTRY: -#ifdef CONFIG_COMPAT - case MTRRIOC32_DEL_PAGE_ENTRY: -#endif - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - err = mtrr_file_del(sentry.base, sentry.size, file, 1); - break; - case MTRRIOC_KILL_PAGE_ENTRY: -#ifdef CONFIG_COMPAT - case MTRRIOC32_KILL_PAGE_ENTRY: -#endif - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - err = mtrr_del_page(-1, sentry.base, sentry.size); - break; - case MTRRIOC_GET_PAGE_ENTRY: -#ifdef CONFIG_COMPAT - case MTRRIOC32_GET_PAGE_ENTRY: -#endif - if (gentry.regnum >= num_var_ranges) - return -EINVAL; - mtrr_if->get(gentry.regnum, &gentry.base, &size, &type); - /* Hide entries that would overflow */ - if (size != (__typeof__(gentry.size))size) - gentry.base = gentry.size = gentry.type = 0; - else { - gentry.size = size; - gentry.type = type; - } - break; - } - - if (err) - return err; - - switch(cmd) { - case MTRRIOC_GET_ENTRY: - case MTRRIOC_GET_PAGE_ENTRY: - if (copy_to_user(arg, &gentry, sizeof gentry)) - err = -EFAULT; - break; -#ifdef CONFIG_COMPAT - case MTRRIOC32_GET_ENTRY: - case MTRRIOC32_GET_PAGE_ENTRY: { - struct mtrr_gentry32 __user *g32 = (struct mtrr_gentry32 __user *)__arg; - err = put_user(gentry.base, &g32->base); - err |= put_user(gentry.size, &g32->size); - err |= put_user(gentry.regnum, &g32->regnum); - err |= put_user(gentry.type, &g32->type); - break; - } -#endif - } - return err; -} - -static int -mtrr_close(struct inode *ino, struct file *file) -{ - int i, max; - unsigned int *fcount = FILE_FCOUNT(file); - - if (fcount != NULL) { - max = num_var_ranges; - for (i = 0; i < max; ++i) { - while (fcount[i] > 0) { - mtrr_del(i, 0, 0); - --fcount[i]; - } - } - kfree(fcount); - FILE_FCOUNT(file) = NULL; - } - return single_release(ino, file); -} - -static int mtrr_seq_show(struct seq_file *seq, void *offset); - -static int mtrr_open(struct inode *inode, struct file *file) -{ - if (!mtrr_if) - return -EIO; - if (!mtrr_if->get) - return -ENXIO; - return single_open(file, mtrr_seq_show, NULL); -} - -static const struct file_operations mtrr_fops = { - .owner = THIS_MODULE, - .open = mtrr_open, - .read = seq_read, - .llseek = seq_lseek, - .write = mtrr_write, - .unlocked_ioctl = mtrr_ioctl, - .compat_ioctl = mtrr_ioctl, - .release = mtrr_close, -}; - - -static struct proc_dir_entry *proc_root_mtrr; - - -static int mtrr_seq_show(struct seq_file *seq, void *offset) -{ - char factor; - int i, max, len; - mtrr_type type; - unsigned long base, size; - - len = 0; - max = num_var_ranges; - for (i = 0; i < max; i++) { - mtrr_if->get(i, &base, &size, &type); - if (size == 0) - usage_table[i] = 0; - else { - if (size < (0x100000 >> PAGE_SHIFT)) { - /* less than 1MB */ - factor = 'K'; - size <<= PAGE_SHIFT - 10; - } else { - factor = 'M'; - size >>= 20 - PAGE_SHIFT; - } - /* RED-PEN: base can be > 32bit */ - len += seq_printf(seq, - "reg%02i: base=0x%05lx000 (%4luMB), size=%4lu%cB: %s, count=%d\n", - i, base, base >> (20 - PAGE_SHIFT), size, factor, - mtrr_attrib_to_str(type), usage_table[i]); - } - } - return 0; -} - -static int __init mtrr_if_init(void) -{ - struct cpuinfo_x86 *c = &boot_cpu_data; - - if ((!cpu_has(c, X86_FEATURE_MTRR)) && - (!cpu_has(c, X86_FEATURE_K6_MTRR)) && - (!cpu_has(c, X86_FEATURE_CYRIX_ARR)) && - (!cpu_has(c, X86_FEATURE_CENTAUR_MCR))) - return -ENODEV; - - proc_root_mtrr = - create_proc_entry("mtrr", S_IWUSR | S_IRUGO, &proc_root); - if (proc_root_mtrr) { - proc_root_mtrr->owner = THIS_MODULE; - proc_root_mtrr->proc_fops = &mtrr_fops; - } - return 0; -} - -arch_initcall(mtrr_if_init); -#endif /* CONFIG_PROC_FS */ diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c deleted file mode 100644 index c48b6fea5ab..00000000000 --- a/arch/i386/kernel/cpu/mtrr/main.c +++ /dev/null @@ -1,768 +0,0 @@ -/* Generic MTRR (Memory Type Range Register) driver. - - Copyright (C) 1997-2000 Richard Gooch - Copyright (c) 2002 Patrick Mochel - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public - License as published by the Free Software Foundation; either - version 2 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public - License along with this library; if not, write to the Free - Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - - Richard Gooch may be reached by email at rgooch@atnf.csiro.au - The postal address is: - Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia. - - Source: "Pentium Pro Family Developer's Manual, Volume 3: - Operating System Writer's Guide" (Intel document number 242692), - section 11.11.7 - - This was cleaned and made readable by Patrick Mochel - on 6-7 March 2002. - Source: Intel Architecture Software Developers Manual, Volume 3: - System Programming Guide; Section 9.11. (1997 edition - PPro). -*/ - -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include "mtrr.h" - -u32 num_var_ranges = 0; - -unsigned int *usage_table; -static DEFINE_MUTEX(mtrr_mutex); - -u64 size_or_mask, size_and_mask; - -static struct mtrr_ops * mtrr_ops[X86_VENDOR_NUM] = {}; - -struct mtrr_ops * mtrr_if = NULL; - -static void set_mtrr(unsigned int reg, unsigned long base, - unsigned long size, mtrr_type type); - -#ifndef CONFIG_X86_64 -extern int arr3_protected; -#else -#define arr3_protected 0 -#endif - -void set_mtrr_ops(struct mtrr_ops * ops) -{ - if (ops->vendor && ops->vendor < X86_VENDOR_NUM) - mtrr_ops[ops->vendor] = ops; -} - -/* Returns non-zero if we have the write-combining memory type */ -static int have_wrcomb(void) -{ - struct pci_dev *dev; - u8 rev; - - if ((dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL)) != NULL) { - /* ServerWorks LE chipsets < rev 6 have problems with write-combining - Don't allow it and leave room for other chipsets to be tagged */ - if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS && - dev->device == PCI_DEVICE_ID_SERVERWORKS_LE) { - pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev); - if (rev <= 5) { - printk(KERN_INFO "mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n"); - pci_dev_put(dev); - return 0; - } - } - /* Intel 450NX errata # 23. Non ascending cacheline evictions to - write combining memory may resulting in data corruption */ - if (dev->vendor == PCI_VENDOR_ID_INTEL && - dev->device == PCI_DEVICE_ID_INTEL_82451NX) { - printk(KERN_INFO "mtrr: Intel 450NX MMC detected. Write-combining disabled.\n"); - pci_dev_put(dev); - return 0; - } - pci_dev_put(dev); - } - return (mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0); -} - -/* This function returns the number of variable MTRRs */ -static void __init set_num_var_ranges(void) -{ - unsigned long config = 0, dummy; - - if (use_intel()) { - rdmsr(MTRRcap_MSR, config, dummy); - } else if (is_cpu(AMD)) - config = 2; - else if (is_cpu(CYRIX) || is_cpu(CENTAUR)) - config = 8; - num_var_ranges = config & 0xff; -} - -static void __init init_table(void) -{ - int i, max; - - max = num_var_ranges; - if ((usage_table = kmalloc(max * sizeof *usage_table, GFP_KERNEL)) - == NULL) { - printk(KERN_ERR "mtrr: could not allocate\n"); - return; - } - for (i = 0; i < max; i++) - usage_table[i] = 1; -} - -struct set_mtrr_data { - atomic_t count; - atomic_t gate; - unsigned long smp_base; - unsigned long smp_size; - unsigned int smp_reg; - mtrr_type smp_type; -}; - -#ifdef CONFIG_SMP - -static void ipi_handler(void *info) -/* [SUMMARY] Synchronisation handler. Executed by "other" CPUs. - [RETURNS] Nothing. -*/ -{ - struct set_mtrr_data *data = info; - unsigned long flags; - - local_irq_save(flags); - - atomic_dec(&data->count); - while(!atomic_read(&data->gate)) - cpu_relax(); - - /* The master has cleared me to execute */ - if (data->smp_reg != ~0U) - mtrr_if->set(data->smp_reg, data->smp_base, - data->smp_size, data->smp_type); - else - mtrr_if->set_all(); - - atomic_dec(&data->count); - while(atomic_read(&data->gate)) - cpu_relax(); - - atomic_dec(&data->count); - local_irq_restore(flags); -} - -#endif - -static inline int types_compatible(mtrr_type type1, mtrr_type type2) { - return type1 == MTRR_TYPE_UNCACHABLE || - type2 == MTRR_TYPE_UNCACHABLE || - (type1 == MTRR_TYPE_WRTHROUGH && type2 == MTRR_TYPE_WRBACK) || - (type1 == MTRR_TYPE_WRBACK && type2 == MTRR_TYPE_WRTHROUGH); -} - -/** - * set_mtrr - update mtrrs on all processors - * @reg: mtrr in question - * @base: mtrr base - * @size: mtrr size - * @type: mtrr type - * - * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly: - * - * 1. Send IPI to do the following: - * 2. Disable Interrupts - * 3. Wait for all procs to do so - * 4. Enter no-fill cache mode - * 5. Flush caches - * 6. Clear PGE bit - * 7. Flush all TLBs - * 8. Disable all range registers - * 9. Update the MTRRs - * 10. Enable all range registers - * 11. Flush all TLBs and caches again - * 12. Enter normal cache mode and reenable caching - * 13. Set PGE - * 14. Wait for buddies to catch up - * 15. Enable interrupts. - * - * What does that mean for us? Well, first we set data.count to the number - * of CPUs. As each CPU disables interrupts, it'll decrement it once. We wait - * until it hits 0 and proceed. We set the data.gate flag and reset data.count. - * Meanwhile, they are waiting for that flag to be set. Once it's set, each - * CPU goes through the transition of updating MTRRs. The CPU vendors may each do it - * differently, so we call mtrr_if->set() callback and let them take care of it. - * When they're done, they again decrement data->count and wait for data.gate to - * be reset. - * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag. - * Everyone then enables interrupts and we all continue on. - * - * Note that the mechanism is the same for UP systems, too; all the SMP stuff - * becomes nops. - */ -static void set_mtrr(unsigned int reg, unsigned long base, - unsigned long size, mtrr_type type) -{ - struct set_mtrr_data data; - unsigned long flags; - - data.smp_reg = reg; - data.smp_base = base; - data.smp_size = size; - data.smp_type = type; - atomic_set(&data.count, num_booting_cpus() - 1); - /* make sure data.count is visible before unleashing other CPUs */ - smp_wmb(); - atomic_set(&data.gate,0); - - /* Start the ball rolling on other CPUs */ - if (smp_call_function(ipi_handler, &data, 1, 0) != 0) - panic("mtrr: timed out waiting for other CPUs\n"); - - local_irq_save(flags); - - while(atomic_read(&data.count)) - cpu_relax(); - - /* ok, reset count and toggle gate */ - atomic_set(&data.count, num_booting_cpus() - 1); - smp_wmb(); - atomic_set(&data.gate,1); - - /* do our MTRR business */ - - /* HACK! - * We use this same function to initialize the mtrrs on boot. - * The state of the boot cpu's mtrrs has been saved, and we want - * to replicate across all the APs. - * If we're doing that @reg is set to something special... - */ - if (reg != ~0U) - mtrr_if->set(reg,base,size,type); - - /* wait for the others */ - while(atomic_read(&data.count)) - cpu_relax(); - - atomic_set(&data.count, num_booting_cpus() - 1); - smp_wmb(); - atomic_set(&data.gate,0); - - /* - * Wait here for everyone to have seen the gate change - * So we're the last ones to touch 'data' - */ - while(atomic_read(&data.count)) - cpu_relax(); - - local_irq_restore(flags); -} - -/** - * mtrr_add_page - Add a memory type region - * @base: Physical base address of region in pages (in units of 4 kB!) - * @size: Physical size of region in pages (4 kB) - * @type: Type of MTRR desired - * @increment: If this is true do usage counting on the region - * - * Memory type region registers control the caching on newer Intel and - * non Intel processors. This function allows drivers to request an - * MTRR is added. The details and hardware specifics of each processor's - * implementation are hidden from the caller, but nevertheless the - * caller should expect to need to provide a power of two size on an - * equivalent power of two boundary. - * - * If the region cannot be added either because all regions are in use - * or the CPU cannot support it a negative value is returned. On success - * the register number for this entry is returned, but should be treated - * as a cookie only. - * - * On a multiprocessor machine the changes are made to all processors. - * This is required on x86 by the Intel processors. - * - * The available types are - * - * %MTRR_TYPE_UNCACHABLE - No caching - * - * %MTRR_TYPE_WRBACK - Write data back in bursts whenever - * - * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts - * - * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes - * - * BUGS: Needs a quiet flag for the cases where drivers do not mind - * failures and do not wish system log messages to be sent. - */ - -int mtrr_add_page(unsigned long base, unsigned long size, - unsigned int type, char increment) -{ - int i, replace, error; - mtrr_type ltype; - unsigned long lbase, lsize; - - if (!mtrr_if) - return -ENXIO; - - if ((error = mtrr_if->validate_add_page(base,size,type))) - return error; - - if (type >= MTRR_NUM_TYPES) { - printk(KERN_WARNING "mtrr: type: %u invalid\n", type); - return -EINVAL; - } - - /* If the type is WC, check that this processor supports it */ - if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) { - printk(KERN_WARNING - "mtrr: your processor doesn't support write-combining\n"); - return -ENOSYS; - } - - if (!size) { - printk(KERN_WARNING "mtrr: zero sized request\n"); - return -EINVAL; - } - - if (base & size_or_mask || size & size_or_mask) { - printk(KERN_WARNING "mtrr: base or size exceeds the MTRR width\n"); - return -EINVAL; - } - - error = -EINVAL; - replace = -1; - - /* No CPU hotplug when we change MTRR entries */ - lock_cpu_hotplug(); - /* Search for existing MTRR */ - mutex_lock(&mtrr_mutex); - for (i = 0; i < num_var_ranges; ++i) { - mtrr_if->get(i, &lbase, &lsize, <ype); - if (!lsize || base > lbase + lsize - 1 || base + size - 1 < lbase) - continue; - /* At this point we know there is some kind of overlap/enclosure */ - if (base < lbase || base + size - 1 > lbase + lsize - 1) { - if (base <= lbase && base + size - 1 >= lbase + lsize - 1) { - /* New region encloses an existing region */ - if (type == ltype) { - replace = replace == -1 ? i : -2; - continue; - } - else if (types_compatible(type, ltype)) - continue; - } - printk(KERN_WARNING - "mtrr: 0x%lx000,0x%lx000 overlaps existing" - " 0x%lx000,0x%lx000\n", base, size, lbase, - lsize); - goto out; - } - /* New region is enclosed by an existing region */ - if (ltype != type) { - if (types_compatible(type, ltype)) - continue; - printk (KERN_WARNING "mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n", - base, size, mtrr_attrib_to_str(ltype), - mtrr_attrib_to_str(type)); - goto out; - } - if (increment) - ++usage_table[i]; - error = i; - goto out; - } - /* Search for an empty MTRR */ - i = mtrr_if->get_free_region(base, size, replace); - if (i >= 0) { - set_mtrr(i, base, size, type); - if (likely(replace < 0)) - usage_table[i] = 1; - else { - usage_table[i] = usage_table[replace] + !!increment; - if (unlikely(replace != i)) { - set_mtrr(replace, 0, 0, 0); - usage_table[replace] = 0; - } - } - } else - printk(KERN_INFO "mtrr: no more MTRRs available\n"); - error = i; - out: - mutex_unlock(&mtrr_mutex); - unlock_cpu_hotplug(); - return error; -} - -static int mtrr_check(unsigned long base, unsigned long size) -{ - if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { - printk(KERN_WARNING - "mtrr: size and base must be multiples of 4 kiB\n"); - printk(KERN_DEBUG - "mtrr: size: 0x%lx base: 0x%lx\n", size, base); - dump_stack(); - return -1; - } - return 0; -} - -/** - * mtrr_add - Add a memory type region - * @base: Physical base address of region - * @size: Physical size of region - * @type: Type of MTRR desired - * @increment: If this is true do usage counting on the region - * - * Memory type region registers control the caching on newer Intel and - * non Intel processors. This function allows drivers to request an - * MTRR is added. The details and hardware specifics of each processor's - * implementation are hidden from the caller, but nevertheless the - * caller should expect to need to provide a power of two size on an - * equivalent power of two boundary. - * - * If the region cannot be added either because all regions are in use - * or the CPU cannot support it a negative value is returned. On success - * the register number for this entry is returned, but should be treated - * as a cookie only. - * - * On a multiprocessor machine the changes are made to all processors. - * This is required on x86 by the Intel processors. - * - * The available types are - * - * %MTRR_TYPE_UNCACHABLE - No caching - * - * %MTRR_TYPE_WRBACK - Write data back in bursts whenever - * - * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts - * - * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes - * - * BUGS: Needs a quiet flag for the cases where drivers do not mind - * failures and do not wish system log messages to be sent. - */ - -int -mtrr_add(unsigned long base, unsigned long size, unsigned int type, - char increment) -{ - if (mtrr_check(base, size)) - return -EINVAL; - return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type, - increment); -} - -/** - * mtrr_del_page - delete a memory type region - * @reg: Register returned by mtrr_add - * @base: Physical base address - * @size: Size of region - * - * If register is supplied then base and size are ignored. This is - * how drivers should call it. - * - * Releases an MTRR region. If the usage count drops to zero the - * register is freed and the region returns to default state. - * On success the register is returned, on failure a negative error - * code. - */ - -int mtrr_del_page(int reg, unsigned long base, unsigned long size) -{ - int i, max; - mtrr_type ltype; - unsigned long lbase, lsize; - int error = -EINVAL; - - if (!mtrr_if) - return -ENXIO; - - max = num_var_ranges; - /* No CPU hotplug when we change MTRR entries */ - lock_cpu_hotplug(); - mutex_lock(&mtrr_mutex); - if (reg < 0) { - /* Search for existing MTRR */ - for (i = 0; i < max; ++i) { - mtrr_if->get(i, &lbase, &lsize, <ype); - if (lbase == base && lsize == size) { - reg = i; - break; - } - } - if (reg < 0) { - printk(KERN_DEBUG "mtrr: no MTRR for %lx000,%lx000 found\n", base, - size); - goto out; - } - } - if (reg >= max) { - printk(KERN_WARNING "mtrr: register: %d too big\n", reg); - goto out; - } - if (is_cpu(CYRIX) && !use_intel()) { - if ((reg == 3) && arr3_protected) { - printk(KERN_WARNING "mtrr: ARR3 cannot be changed\n"); - goto out; - } - } - mtrr_if->get(reg, &lbase, &lsize, <ype); - if (lsize < 1) { - printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg); - goto out; - } - if (usage_table[reg] < 1) { - printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg); - goto out; - } - if (--usage_table[reg] < 1) - set_mtrr(reg, 0, 0, 0); - error = reg; - out: - mutex_unlock(&mtrr_mutex); - unlock_cpu_hotplug(); - return error; -} -/** - * mtrr_del - delete a memory type region - * @reg: Register returned by mtrr_add - * @base: Physical base address - * @size: Size of region - * - * If register is supplied then base and size are ignored. This is - * how drivers should call it. - * - * Releases an MTRR region. If the usage count drops to zero the - * register is freed and the region returns to default state. - * On success the register is returned, on failure a negative error - * code. - */ - -int -mtrr_del(int reg, unsigned long base, unsigned long size) -{ - if (mtrr_check(base, size)) - return -EINVAL; - return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT); -} - -EXPORT_SYMBOL(mtrr_add); -EXPORT_SYMBOL(mtrr_del); - -/* HACK ALERT! - * These should be called implicitly, but we can't yet until all the initcall - * stuff is done... - */ -extern void amd_init_mtrr(void); -extern void cyrix_init_mtrr(void); -extern void centaur_init_mtrr(void); - -static void __init init_ifs(void) -{ -#ifndef CONFIG_X86_64 - amd_init_mtrr(); - cyrix_init_mtrr(); - centaur_init_mtrr(); -#endif -} - -/* The suspend/resume methods are only for CPU without MTRR. CPU using generic - * MTRR driver doesn't require this - */ -struct mtrr_value { - mtrr_type ltype; - unsigned long lbase; - unsigned long lsize; -}; - -static struct mtrr_value * mtrr_state; - -static int mtrr_save(struct sys_device * sysdev, pm_message_t state) -{ - int i; - int size = num_var_ranges * sizeof(struct mtrr_value); - - mtrr_state = kzalloc(size,GFP_ATOMIC); - if (!mtrr_state) - return -ENOMEM; - - for (i = 0; i < num_var_ranges; i++) { - mtrr_if->get(i, - &mtrr_state[i].lbase, - &mtrr_state[i].lsize, - &mtrr_state[i].ltype); - } - return 0; -} - -static int mtrr_restore(struct sys_device * sysdev) -{ - int i; - - for (i = 0; i < num_var_ranges; i++) { - if (mtrr_state[i].lsize) - set_mtrr(i, - mtrr_state[i].lbase, - mtrr_state[i].lsize, - mtrr_state[i].ltype); - } - kfree(mtrr_state); - return 0; -} - - - -static struct sysdev_driver mtrr_sysdev_driver = { - .suspend = mtrr_save, - .resume = mtrr_restore, -}; - - -/** - * mtrr_bp_init - initialize mtrrs on the boot CPU - * - * This needs to be called early; before any of the other CPUs are - * initialized (i.e. before smp_init()). - * - */ -void __init mtrr_bp_init(void) -{ - init_ifs(); - - if (cpu_has_mtrr) { - mtrr_if = &generic_mtrr_ops; - size_or_mask = 0xff000000; /* 36 bits */ - size_and_mask = 0x00f00000; - - /* This is an AMD specific MSR, but we assume(hope?) that - Intel will implement it to when they extend the address - bus of the Xeon. */ - if (cpuid_eax(0x80000000) >= 0x80000008) { - u32 phys_addr; - phys_addr = cpuid_eax(0x80000008) & 0xff; - /* CPUID workaround for Intel 0F33/0F34 CPU */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && - boot_cpu_data.x86 == 0xF && - boot_cpu_data.x86_model == 0x3 && - (boot_cpu_data.x86_mask == 0x3 || - boot_cpu_data.x86_mask == 0x4)) - phys_addr = 36; - - size_or_mask = ~((1ULL << (phys_addr - PAGE_SHIFT)) - 1); - size_and_mask = ~size_or_mask & 0xfffff00000ULL; - } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR && - boot_cpu_data.x86 == 6) { - /* VIA C* family have Intel style MTRRs, but - don't support PAE */ - size_or_mask = 0xfff00000; /* 32 bits */ - size_and_mask = 0; - } - } else { - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_AMD: - if (cpu_has_k6_mtrr) { - /* Pre-Athlon (K6) AMD CPU MTRRs */ - mtrr_if = mtrr_ops[X86_VENDOR_AMD]; - size_or_mask = 0xfff00000; /* 32 bits */ - size_and_mask = 0; - } - break; - case X86_VENDOR_CENTAUR: - if (cpu_has_centaur_mcr) { - mtrr_if = mtrr_ops[X86_VENDOR_CENTAUR]; - size_or_mask = 0xfff00000; /* 32 bits */ - size_and_mask = 0; - } - break; - case X86_VENDOR_CYRIX: - if (cpu_has_cyrix_arr) { - mtrr_if = mtrr_ops[X86_VENDOR_CYRIX]; - size_or_mask = 0xfff00000; /* 32 bits */ - size_and_mask = 0; - } - break; - default: - break; - } - } - - if (mtrr_if) { - set_num_var_ranges(); - init_table(); - if (use_intel()) - get_mtrr_state(); - } -} - -void mtrr_ap_init(void) -{ - unsigned long flags; - - if (!mtrr_if || !use_intel()) - return; - /* - * Ideally we should hold mtrr_mutex here to avoid mtrr entries changed, - * but this routine will be called in cpu boot time, holding the lock - * breaks it. This routine is called in two cases: 1.very earily time - * of software resume, when there absolutely isn't mtrr entry changes; - * 2.cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug lock to - * prevent mtrr entry changes - */ - local_irq_save(flags); - - mtrr_if->set_all(); - - local_irq_restore(flags); -} - -/** - * Save current fixed-range MTRR state of the BSP - */ -void mtrr_save_state(void) -{ - int cpu = get_cpu(); - - if (cpu == 0) - mtrr_save_fixed_ranges(NULL); - else - smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1, 1); - put_cpu(); -} - -static int __init mtrr_init_finialize(void) -{ - if (!mtrr_if) - return 0; - if (use_intel()) - mtrr_state_warn(); - else { - /* The CPUs haven't MTRR and seemes not support SMP. They have - * specific drivers, we use a tricky method to support - * suspend/resume for them. - * TBD: is there any system with such CPU which supports - * suspend/resume? if no, we should remove the code. - */ - sysdev_driver_register(&cpu_sysdev_class, - &mtrr_sysdev_driver); - } - return 0; -} -subsys_initcall(mtrr_init_finialize); diff --git a/arch/i386/kernel/cpu/mtrr/mtrr.h b/arch/i386/kernel/cpu/mtrr/mtrr.h deleted file mode 100644 index 289dfe6030e..00000000000 --- a/arch/i386/kernel/cpu/mtrr/mtrr.h +++ /dev/null @@ -1,98 +0,0 @@ -/* - * local mtrr defines. - */ - -#ifndef TRUE -#define TRUE 1 -#define FALSE 0 -#endif - -#define MTRRcap_MSR 0x0fe -#define MTRRdefType_MSR 0x2ff - -#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg)) -#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1) - -#define NUM_FIXED_RANGES 88 -#define MTRRfix64K_00000_MSR 0x250 -#define MTRRfix16K_80000_MSR 0x258 -#define MTRRfix16K_A0000_MSR 0x259 -#define MTRRfix4K_C0000_MSR 0x268 -#define MTRRfix4K_C8000_MSR 0x269 -#define MTRRfix4K_D0000_MSR 0x26a -#define MTRRfix4K_D8000_MSR 0x26b -#define MTRRfix4K_E0000_MSR 0x26c -#define MTRRfix4K_E8000_MSR 0x26d -#define MTRRfix4K_F0000_MSR 0x26e -#define MTRRfix4K_F8000_MSR 0x26f - -#define MTRR_CHANGE_MASK_FIXED 0x01 -#define MTRR_CHANGE_MASK_VARIABLE 0x02 -#define MTRR_CHANGE_MASK_DEFTYPE 0x04 - -/* In the Intel processor's MTRR interface, the MTRR type is always held in - an 8 bit field: */ -typedef u8 mtrr_type; - -struct mtrr_ops { - u32 vendor; - u32 use_intel_if; -// void (*init)(void); - void (*set)(unsigned int reg, unsigned long base, - unsigned long size, mtrr_type type); - void (*set_all)(void); - - void (*get)(unsigned int reg, unsigned long *base, - unsigned long *size, mtrr_type * type); - int (*get_free_region)(unsigned long base, unsigned long size, - int replace_reg); - int (*validate_add_page)(unsigned long base, unsigned long size, - unsigned int type); - int (*have_wrcomb)(void); -}; - -extern int generic_get_free_region(unsigned long base, unsigned long size, - int replace_reg); -extern int generic_validate_add_page(unsigned long base, unsigned long size, - unsigned int type); - -extern struct mtrr_ops generic_mtrr_ops; - -extern int positive_have_wrcomb(void); - -/* library functions for processor-specific routines */ -struct set_mtrr_context { - unsigned long flags; - unsigned long cr4val; - u32 deftype_lo; - u32 deftype_hi; - u32 ccr3; -}; - -struct mtrr_var_range { - u32 base_lo; - u32 base_hi; - u32 mask_lo; - u32 mask_hi; -}; - -void set_mtrr_done(struct set_mtrr_context *ctxt); -void set_mtrr_cache_disable(struct set_mtrr_context *ctxt); -void set_mtrr_prepare_save(struct set_mtrr_context *ctxt); - -void get_mtrr_state(void); - -extern void set_mtrr_ops(struct mtrr_ops * ops); - -extern u64 size_or_mask, size_and_mask; -extern struct mtrr_ops * mtrr_if; - -#define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd) -#define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1) - -extern unsigned int num_var_ranges; - -void mtrr_state_warn(void); -const char *mtrr_attrib_to_str(int x); -void mtrr_wrmsr(unsigned, unsigned, unsigned); - diff --git a/arch/i386/kernel/cpu/mtrr/state.c b/arch/i386/kernel/cpu/mtrr/state.c deleted file mode 100644 index c9014ca4a57..00000000000 --- a/arch/i386/kernel/cpu/mtrr/state.c +++ /dev/null @@ -1,79 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include "mtrr.h" - - -/* Put the processor into a state where MTRRs can be safely set */ -void set_mtrr_prepare_save(struct set_mtrr_context *ctxt) -{ - unsigned int cr0; - - /* Disable interrupts locally */ - local_irq_save(ctxt->flags); - - if (use_intel() || is_cpu(CYRIX)) { - - /* Save value of CR4 and clear Page Global Enable (bit 7) */ - if ( cpu_has_pge ) { - ctxt->cr4val = read_cr4(); - write_cr4(ctxt->cr4val & ~X86_CR4_PGE); - } - - /* Disable and flush caches. Note that wbinvd flushes the TLBs as - a side-effect */ - cr0 = read_cr0() | 0x40000000; - wbinvd(); - write_cr0(cr0); - wbinvd(); - - if (use_intel()) - /* Save MTRR state */ - rdmsr(MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi); - else - /* Cyrix ARRs - everything else were excluded at the top */ - ctxt->ccr3 = getCx86(CX86_CCR3); - } -} - -void set_mtrr_cache_disable(struct set_mtrr_context *ctxt) -{ - if (use_intel()) - /* Disable MTRRs, and set the default type to uncached */ - mtrr_wrmsr(MTRRdefType_MSR, ctxt->deftype_lo & 0xf300UL, - ctxt->deftype_hi); - else if (is_cpu(CYRIX)) - /* Cyrix ARRs - everything else were excluded at the top */ - setCx86(CX86_CCR3, (ctxt->ccr3 & 0x0f) | 0x10); -} - -/* Restore the processor after a set_mtrr_prepare */ -void set_mtrr_done(struct set_mtrr_context *ctxt) -{ - if (use_intel() || is_cpu(CYRIX)) { - - /* Flush caches and TLBs */ - wbinvd(); - - /* Restore MTRRdefType */ - if (use_intel()) - /* Intel (P6) standard MTRRs */ - mtrr_wrmsr(MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi); - else - /* Cyrix ARRs - everything else was excluded at the top */ - setCx86(CX86_CCR3, ctxt->ccr3); - - /* Enable caches */ - write_cr0(read_cr0() & 0xbfffffff); - - /* Restore value of CR4 */ - if ( cpu_has_pge ) - write_cr4(ctxt->cr4val); - } - /* Re-enable interrupts locally (if enabled previously) */ - local_irq_restore(ctxt->flags); -} - diff --git a/arch/x86/kernel/cpu/mtrr/Makefile b/arch/x86/kernel/cpu/mtrr/Makefile new file mode 100644 index 00000000000..191fc053364 --- /dev/null +++ b/arch/x86/kernel/cpu/mtrr/Makefile @@ -0,0 +1,3 @@ +obj-y := main.o if.o generic.o state.o +obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o + diff --git a/arch/x86/kernel/cpu/mtrr/amd.c b/arch/x86/kernel/cpu/mtrr/amd.c new file mode 100644 index 00000000000..0949cdbf848 --- /dev/null +++ b/arch/x86/kernel/cpu/mtrr/amd.c @@ -0,0 +1,121 @@ +#include +#include +#include +#include + +#include "mtrr.h" + +static void +amd_get_mtrr(unsigned int reg, unsigned long *base, + unsigned long *size, mtrr_type * type) +{ + unsigned long low, high; + + rdmsr(MSR_K6_UWCCR, low, high); + /* Upper dword is region 1, lower is region 0 */ + if (reg == 1) + low = high; + /* The base masks off on the right alignment */ + *base = (low & 0xFFFE0000) >> PAGE_SHIFT; + *type = 0; + if (low & 1) + *type = MTRR_TYPE_UNCACHABLE; + if (low & 2) + *type = MTRR_TYPE_WRCOMB; + if (!(low & 3)) { + *size = 0; + return; + } + /* + * This needs a little explaining. The size is stored as an + * inverted mask of bits of 128K granularity 15 bits long offset + * 2 bits + * + * So to get a size we do invert the mask and add 1 to the lowest + * mask bit (4 as its 2 bits in). This gives us a size we then shift + * to turn into 128K blocks + * + * eg 111 1111 1111 1100 is 512K + * + * invert 000 0000 0000 0011 + * +1 000 0000 0000 0100 + * *128K ... + */ + low = (~low) & 0x1FFFC; + *size = (low + 4) << (15 - PAGE_SHIFT); + return; +} + +static void amd_set_mtrr(unsigned int reg, unsigned long base, + unsigned long size, mtrr_type type) +/* [SUMMARY] Set variable MTRR register on the local CPU. + The register to set. + The base address of the region. + The size of the region. If this is 0 the region is disabled. + The type of the region. + If TRUE, do the change safely. If FALSE, safety measures should + be done externally. + [RETURNS] Nothing. +*/ +{ + u32 regs[2]; + + /* + * Low is MTRR0 , High MTRR 1 + */ + rdmsr(MSR_K6_UWCCR, regs[0], regs[1]); + /* + * Blank to disable + */ + if (size == 0) + regs[reg] = 0; + else + /* Set the register to the base, the type (off by one) and an + inverted bitmask of the size The size is the only odd + bit. We are fed say 512K We invert this and we get 111 1111 + 1111 1011 but if you subtract one and invert you get the + desired 111 1111 1111 1100 mask + + But ~(x - 1) == ~x + 1 == -x. Two's complement rocks! */ + regs[reg] = (-size >> (15 - PAGE_SHIFT) & 0x0001FFFC) + | (base << PAGE_SHIFT) | (type + 1); + + /* + * The writeback rule is quite specific. See the manual. Its + * disable local interrupts, write back the cache, set the mtrr + */ + wbinvd(); + wrmsr(MSR_K6_UWCCR, regs[0], regs[1]); +} + +static int amd_validate_add_page(unsigned long base, unsigned long size, unsigned int type) +{ + /* Apply the K6 block alignment and size rules + In order + o Uncached or gathering only + o 128K or bigger block + o Power of 2 block + o base suitably aligned to the power + */ + if (type > MTRR_TYPE_WRCOMB || size < (1 << (17 - PAGE_SHIFT)) + || (size & ~(size - 1)) - size || (base & (size - 1))) + return -EINVAL; + return 0; +} + +static struct mtrr_ops amd_mtrr_ops = { + .vendor = X86_VENDOR_AMD, + .set = amd_set_mtrr, + .get = amd_get_mtrr, + .get_free_region = generic_get_free_region, + .validate_add_page = amd_validate_add_page, + .have_wrcomb = positive_have_wrcomb, +}; + +int __init amd_init_mtrr(void) +{ + set_mtrr_ops(&amd_mtrr_ops); + return 0; +} + +//arch_initcall(amd_mtrr_init); diff --git a/arch/x86/kernel/cpu/mtrr/centaur.c b/arch/x86/kernel/cpu/mtrr/centaur.c new file mode 100644 index 00000000000..cb9aa3a7a7a --- /dev/null +++ b/arch/x86/kernel/cpu/mtrr/centaur.c @@ -0,0 +1,224 @@ +#include +#include +#include +#include +#include "mtrr.h" + +static struct { + unsigned long high; + unsigned long low; +} centaur_mcr[8]; + +static u8 centaur_mcr_reserved; +static u8 centaur_mcr_type; /* 0 for winchip, 1 for winchip2 */ + +/* + * Report boot time MCR setups + */ + +static int +centaur_get_free_region(unsigned long base, unsigned long size, int replace_reg) +/* [SUMMARY] Get a free MTRR. + The starting (base) address of the region. + The size (in bytes) of the region. + [RETURNS] The index of the region on success, else -1 on error. +*/ +{ + int i, max; + mtrr_type ltype; + unsigned long lbase, lsize; + + max = num_var_ranges; + if (replace_reg >= 0 && replace_reg < max) + return replace_reg; + for (i = 0; i < max; ++i) { + if (centaur_mcr_reserved & (1 << i)) + continue; + mtrr_if->get(i, &lbase, &lsize, <ype); + if (lsize == 0) + return i; + } + return -ENOSPC; +} + +void +mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) +{ + centaur_mcr[mcr].low = lo; + centaur_mcr[mcr].high = hi; +} + +static void +centaur_get_mcr(unsigned int reg, unsigned long *base, + unsigned long *size, mtrr_type * type) +{ + *base = centaur_mcr[reg].high >> PAGE_SHIFT; + *size = -(centaur_mcr[reg].low & 0xfffff000) >> PAGE_SHIFT; + *type = MTRR_TYPE_WRCOMB; /* If it is there, it is write-combining */ + if (centaur_mcr_type == 1 && ((centaur_mcr[reg].low & 31) & 2)) + *type = MTRR_TYPE_UNCACHABLE; + if (centaur_mcr_type == 1 && (centaur_mcr[reg].low & 31) == 25) + *type = MTRR_TYPE_WRBACK; + if (centaur_mcr_type == 0 && (centaur_mcr[reg].low & 31) == 31) + *type = MTRR_TYPE_WRBACK; + +} + +static void centaur_set_mcr(unsigned int reg, unsigned long base, + unsigned long size, mtrr_type type) +{ + unsigned long low, high; + + if (size == 0) { + /* Disable */ + high = low = 0; + } else { + high = base << PAGE_SHIFT; + if (centaur_mcr_type == 0) + low = -size << PAGE_SHIFT | 0x1f; /* only support write-combining... */ + else { + if (type == MTRR_TYPE_UNCACHABLE) + low = -size << PAGE_SHIFT | 0x02; /* NC */ + else + low = -size << PAGE_SHIFT | 0x09; /* WWO,WC */ + } + } + centaur_mcr[reg].high = high; + centaur_mcr[reg].low = low; + wrmsr(MSR_IDT_MCR0 + reg, low, high); +} + +#if 0 +/* + * Initialise the later (saner) Winchip MCR variant. In this version + * the BIOS can pass us the registers it has used (but not their values) + * and the control register is read/write + */ + +static void __init +centaur_mcr1_init(void) +{ + unsigned i; + u32 lo, hi; + + /* Unfortunately, MCR's are read-only, so there is no way to + * find out what the bios might have done. + */ + + rdmsr(MSR_IDT_MCR_CTRL, lo, hi); + if (((lo >> 17) & 7) == 1) { /* Type 1 Winchip2 MCR */ + lo &= ~0x1C0; /* clear key */ + lo |= 0x040; /* set key to 1 */ + wrmsr(MSR_IDT_MCR_CTRL, lo, hi); /* unlock MCR */ + } + + centaur_mcr_type = 1; + + /* + * Clear any unconfigured MCR's. + */ + + for (i = 0; i < 8; ++i) { + if (centaur_mcr[i].high == 0 && centaur_mcr[i].low == 0) { + if (!(lo & (1 << (9 + i)))) + wrmsr(MSR_IDT_MCR0 + i, 0, 0); + else + /* + * If the BIOS set up an MCR we cannot see it + * but we don't wish to obliterate it + */ + centaur_mcr_reserved |= (1 << i); + } + } + /* + * Throw the main write-combining switch... + * However if OOSTORE is enabled then people have already done far + * cleverer things and we should behave. + */ + + lo |= 15; /* Write combine enables */ + wrmsr(MSR_IDT_MCR_CTRL, lo, hi); +} + +/* + * Initialise the original winchip with read only MCR registers + * no used bitmask for the BIOS to pass on and write only control + */ + +static void __init +centaur_mcr0_init(void) +{ + unsigned i; + + /* Unfortunately, MCR's are read-only, so there is no way to + * find out what the bios might have done. + */ + + /* Clear any unconfigured MCR's. + * This way we are sure that the centaur_mcr array contains the actual + * values. The disadvantage is that any BIOS tweaks are thus undone. + * + */ + for (i = 0; i < 8; ++i) { + if (centaur_mcr[i].high == 0 && centaur_mcr[i].low == 0) + wrmsr(MSR_IDT_MCR0 + i, 0, 0); + } + + wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0); /* Write only */ +} + +/* + * Initialise Winchip series MCR registers + */ + +static void __init +centaur_mcr_init(void) +{ + struct set_mtrr_context ctxt; + + set_mtrr_prepare_save(&ctxt); + set_mtrr_cache_disable(&ctxt); + + if (boot_cpu_data.x86_model == 4) + centaur_mcr0_init(); + else if (boot_cpu_data.x86_model == 8 || boot_cpu_data.x86_model == 9) + centaur_mcr1_init(); + + set_mtrr_done(&ctxt); +} +#endif + +static int centaur_validate_add_page(unsigned long base, + unsigned long size, unsigned int type) +{ + /* + * FIXME: Winchip2 supports uncached + */ + if (type != MTRR_TYPE_WRCOMB && + (centaur_mcr_type == 0 || type != MTRR_TYPE_UNCACHABLE)) { + printk(KERN_WARNING + "mtrr: only write-combining%s supported\n", + centaur_mcr_type ? " and uncacheable are" + : " is"); + return -EINVAL; + } + return 0; +} + +static struct mtrr_ops centaur_mtrr_ops = { + .vendor = X86_VENDOR_CENTAUR, +// .init = centaur_mcr_init, + .set = centaur_set_mcr, + .get = centaur_get_mcr, + .get_free_region = centaur_get_free_region, + .validate_add_page = centaur_validate_add_page, + .have_wrcomb = positive_have_wrcomb, +}; + +int __init centaur_init_mtrr(void) +{ + set_mtrr_ops(¢aur_mtrr_ops); + return 0; +} + +//arch_initcall(centaur_init_mtrr); diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c new file mode 100644 index 00000000000..2287d4863a8 --- /dev/null +++ b/arch/x86/kernel/cpu/mtrr/cyrix.c @@ -0,0 +1,380 @@ +#include +#include +#include +#include +#include +#include +#include "mtrr.h" + +int arr3_protected; + +static void +cyrix_get_arr(unsigned int reg, unsigned long *base, + unsigned long *size, mtrr_type * type) +{ + unsigned long flags; + unsigned char arr, ccr3, rcr, shift; + + arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */ + + /* Save flags and disable interrupts */ + local_irq_save(flags); + + ccr3 = getCx86(CX86_CCR3); + setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ + ((unsigned char *) base)[3] = getCx86(arr); + ((unsigned char *) base)[2] = getCx86(arr + 1); + ((unsigned char *) base)[1] = getCx86(arr + 2); + rcr = getCx86(CX86_RCR_BASE + reg); + setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ + + /* Enable interrupts if it was enabled previously */ + local_irq_restore(flags); + shift = ((unsigned char *) base)[1] & 0x0f; + *base >>= PAGE_SHIFT; + + /* Power of two, at least 4K on ARR0-ARR6, 256K on ARR7 + * Note: shift==0xf means 4G, this is unsupported. + */ + if (shift) + *size = (reg < 7 ? 0x1UL : 0x40UL) << (shift - 1); + else + *size = 0; + + /* Bit 0 is Cache Enable on ARR7, Cache Disable on ARR0-ARR6 */ + if (reg < 7) { + switch (rcr) { + case 1: + *type = MTRR_TYPE_UNCACHABLE; + break; + case 8: + *type = MTRR_TYPE_WRBACK; + break; + case 9: + *type = MTRR_TYPE_WRCOMB; + break; + case 24: + default: + *type = MTRR_TYPE_WRTHROUGH; + break; + } + } else { + switch (rcr) { + case 0: + *type = MTRR_TYPE_UNCACHABLE; + break; + case 8: + *type = MTRR_TYPE_WRCOMB; + break; + case 9: + *type = MTRR_TYPE_WRBACK; + break; + case 25: + default: + *type = MTRR_TYPE_WRTHROUGH; + break; + } + } +} + +static int +cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg) +/* [SUMMARY] Get a free ARR. + The starting (base) address of the region. + The size (in bytes) of the region. + [RETURNS] The index of the region on success, else -1 on error. +*/ +{ + int i; + mtrr_type ltype; + unsigned long lbase, lsize; + + switch (replace_reg) { + case 7: + if (size < 0x40) + break; + case 6: + case 5: + case 4: + return replace_reg; + case 3: + if (arr3_protected) + break; + case 2: + case 1: + case 0: + return replace_reg; + } + /* If we are to set up a region >32M then look at ARR7 immediately */ + if (size > 0x2000) { + cyrix_get_arr(7, &lbase, &lsize, <ype); + if (lsize == 0) + return 7; + /* Else try ARR0-ARR6 first */ + } else { + for (i = 0; i < 7; i++) { + cyrix_get_arr(i, &lbase, &lsize, <ype); + if ((i == 3) && arr3_protected) + continue; + if (lsize == 0) + return i; + } + /* ARR0-ARR6 isn't free, try ARR7 but its size must be at least 256K */ + cyrix_get_arr(i, &lbase, &lsize, <ype); + if ((lsize == 0) && (size >= 0x40)) + return i; + } + return -ENOSPC; +} + +static u32 cr4 = 0; +static u32 ccr3; + +static void prepare_set(void) +{ + u32 cr0; + + /* Save value of CR4 and clear Page Global Enable (bit 7) */ + if ( cpu_has_pge ) { + cr4 = read_cr4(); + write_cr4(cr4 & ~X86_CR4_PGE); + } + + /* Disable and flush caches. Note that wbinvd flushes the TLBs as + a side-effect */ + cr0 = read_cr0() | 0x40000000; + wbinvd(); + write_cr0(cr0); + wbinvd(); + + /* Cyrix ARRs - everything else were excluded at the top */ + ccr3 = getCx86(CX86_CCR3); + + /* Cyrix ARRs - everything else were excluded at the top */ + setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); + +} + +static void post_set(void) +{ + /* Flush caches and TLBs */ + wbinvd(); + + /* Cyrix ARRs - everything else was excluded at the top */ + setCx86(CX86_CCR3, ccr3); + + /* Enable caches */ + write_cr0(read_cr0() & 0xbfffffff); + + /* Restore value of CR4 */ + if ( cpu_has_pge ) + write_cr4(cr4); +} + +static void cyrix_set_arr(unsigned int reg, unsigned long base, + unsigned long size, mtrr_type type) +{ + unsigned char arr, arr_type, arr_size; + + arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */ + + /* count down from 32M (ARR0-ARR6) or from 2G (ARR7) */ + if (reg >= 7) + size >>= 6; + + size &= 0x7fff; /* make sure arr_size <= 14 */ + for (arr_size = 0; size; arr_size++, size >>= 1) ; + + if (reg < 7) { + switch (type) { + case MTRR_TYPE_UNCACHABLE: + arr_type = 1; + break; + case MTRR_TYPE_WRCOMB: + arr_type = 9; + break; + case MTRR_TYPE_WRTHROUGH: + arr_type = 24; + break; + default: + arr_type = 8; + break; + } + } else { + switch (type) { + case MTRR_TYPE_UNCACHABLE: + arr_type = 0; + break; + case MTRR_TYPE_WRCOMB: + arr_type = 8; + break; + case MTRR_TYPE_WRTHROUGH: + arr_type = 25; + break; + default: + arr_type = 9; + break; + } + } + + prepare_set(); + + base <<= PAGE_SHIFT; + setCx86(arr, ((unsigned char *) &base)[3]); + setCx86(arr + 1, ((unsigned char *) &base)[2]); + setCx86(arr + 2, (((unsigned char *) &base)[1]) | arr_size); + setCx86(CX86_RCR_BASE + reg, arr_type); + + post_set(); +} + +typedef struct { + unsigned long base; + unsigned long size; + mtrr_type type; +} arr_state_t; + +static arr_state_t arr_state[8] = { + {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, + {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL} +}; + +static unsigned char ccr_state[7] = { 0, 0, 0, 0, 0, 0, 0 }; + +static void cyrix_set_all(void) +{ + int i; + + prepare_set(); + + /* the CCRs are not contiguous */ + for (i = 0; i < 4; i++) + setCx86(CX86_CCR0 + i, ccr_state[i]); + for (; i < 7; i++) + setCx86(CX86_CCR4 + i, ccr_state[i]); + for (i = 0; i < 8; i++) + cyrix_set_arr(i, arr_state[i].base, + arr_state[i].size, arr_state[i].type); + + post_set(); +} + +#if 0 +/* + * On Cyrix 6x86(MX) and M II the ARR3 is special: it has connection + * with the SMM (System Management Mode) mode. So we need the following: + * Check whether SMI_LOCK (CCR3 bit 0) is set + * if it is set, write a warning message: ARR3 cannot be changed! + * (it cannot be changed until the next processor reset) + * if it is reset, then we can change it, set all the needed bits: + * - disable access to SMM memory through ARR3 range (CCR1 bit 7 reset) + * - disable access to SMM memory (CCR1 bit 2 reset) + * - disable SMM mode (CCR1 bit 1 reset) + * - disable write protection of ARR3 (CCR6 bit 1 reset) + * - (maybe) disable ARR3 + * Just to be sure, we enable ARR usage by the processor (CCR5 bit 5 set) + */ +static void __init +cyrix_arr_init(void) +{ + struct set_mtrr_context ctxt; + unsigned char ccr[7]; + int ccrc[7] = { 0, 0, 0, 0, 0, 0, 0 }; +#ifdef CONFIG_SMP + int i; +#endif + + /* flush cache and enable MAPEN */ + set_mtrr_prepare_save(&ctxt); + set_mtrr_cache_disable(&ctxt); + + /* Save all CCRs locally */ + ccr[0] = getCx86(CX86_CCR0); + ccr[1] = getCx86(CX86_CCR1); + ccr[2] = getCx86(CX86_CCR2); + ccr[3] = ctxt.ccr3; + ccr[4] = getCx86(CX86_CCR4); + ccr[5] = getCx86(CX86_CCR5); + ccr[6] = getCx86(CX86_CCR6); + + if (ccr[3] & 1) { + ccrc[3] = 1; + arr3_protected = 1; + } else { + /* Disable SMM mode (bit 1), access to SMM memory (bit 2) and + * access to SMM memory through ARR3 (bit 7). + */ + if (ccr[1] & 0x80) { + ccr[1] &= 0x7f; + ccrc[1] |= 0x80; + } + if (ccr[1] & 0x04) { + ccr[1] &= 0xfb; + ccrc[1] |= 0x04; + } + if (ccr[1] & 0x02) { + ccr[1] &= 0xfd; + ccrc[1] |= 0x02; + } + arr3_protected = 0; + if (ccr[6] & 0x02) { + ccr[6] &= 0xfd; + ccrc[6] = 1; /* Disable write protection of ARR3 */ + setCx86(CX86_CCR6, ccr[6]); + } + /* Disable ARR3. This is safe now that we disabled SMM. */ + /* cyrix_set_arr_up (3, 0, 0, 0, FALSE); */ + } + /* If we changed CCR1 in memory, change it in the processor, too. */ + if (ccrc[1]) + setCx86(CX86_CCR1, ccr[1]); + + /* Enable ARR usage by the processor */ + if (!(ccr[5] & 0x20)) { + ccr[5] |= 0x20; + ccrc[5] = 1; + setCx86(CX86_CCR5, ccr[5]); + } +#ifdef CONFIG_SMP + for (i = 0; i < 7; i++) + ccr_state[i] = ccr[i]; + for (i = 0; i < 8; i++) + cyrix_get_arr(i, + &arr_state[i].base, &arr_state[i].size, + &arr_state[i].type); +#endif + + set_mtrr_done(&ctxt); /* flush cache and disable MAPEN */ + + if (ccrc[5]) + printk(KERN_INFO "mtrr: ARR usage was not enabled, enabled manually\n"); + if (ccrc[3]) + printk(KERN_INFO "mtrr: ARR3 cannot be changed\n"); +/* + if ( ccrc[1] & 0x80) printk ("mtrr: SMM memory access through ARR3 disabled\n"); + if ( ccrc[1] & 0x04) printk ("mtrr: SMM memory access disabled\n"); + if ( ccrc[1] & 0x02) printk ("mtrr: SMM mode disabled\n"); +*/ + if (ccrc[6]) + printk(KERN_INFO "mtrr: ARR3 was write protected, unprotected\n"); +} +#endif + +static struct mtrr_ops cyrix_mtrr_ops = { + .vendor = X86_VENDOR_CYRIX, +// .init = cyrix_arr_init, + .set_all = cyrix_set_all, + .set = cyrix_set_arr, + .get = cyrix_get_arr, + .get_free_region = cyrix_get_free_region, + .validate_add_page = generic_validate_add_page, + .have_wrcomb = positive_have_wrcomb, +}; + +int __init cyrix_init_mtrr(void) +{ + set_mtrr_ops(&cyrix_mtrr_ops); + return 0; +} + +//arch_initcall(cyrix_init_mtrr); diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c new file mode 100644 index 00000000000..56f64e34829 --- /dev/null +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -0,0 +1,509 @@ +/* This only handles 32bit MTRR on 32bit hosts. This is strictly wrong + because MTRRs can span upto 40 bits (36bits on most modern x86) */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "mtrr.h" + +struct mtrr_state { + struct mtrr_var_range *var_ranges; + mtrr_type fixed_ranges[NUM_FIXED_RANGES]; + unsigned char enabled; + unsigned char have_fixed; + mtrr_type def_type; +}; + +struct fixed_range_block { + int base_msr; /* start address of an MTRR block */ + int ranges; /* number of MTRRs in this block */ +}; + +static struct fixed_range_block fixed_range_blocks[] = { + { MTRRfix64K_00000_MSR, 1 }, /* one 64k MTRR */ + { MTRRfix16K_80000_MSR, 2 }, /* two 16k MTRRs */ + { MTRRfix4K_C0000_MSR, 8 }, /* eight 4k MTRRs */ + {} +}; + +static unsigned long smp_changes_mask; +static struct mtrr_state mtrr_state = {}; + +#undef MODULE_PARAM_PREFIX +#define MODULE_PARAM_PREFIX "mtrr." + +static int mtrr_show; +module_param_named(show, mtrr_show, bool, 0); + +/* Get the MSR pair relating to a var range */ +static void +get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr) +{ + rdmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi); + rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); +} + +static void +get_fixed_ranges(mtrr_type * frs) +{ + unsigned int *p = (unsigned int *) frs; + int i; + + rdmsr(MTRRfix64K_00000_MSR, p[0], p[1]); + + for (i = 0; i < 2; i++) + rdmsr(MTRRfix16K_80000_MSR + i, p[2 + i * 2], p[3 + i * 2]); + for (i = 0; i < 8; i++) + rdmsr(MTRRfix4K_C0000_MSR + i, p[6 + i * 2], p[7 + i * 2]); +} + +void mtrr_save_fixed_ranges(void *info) +{ + if (cpu_has_mtrr) + get_fixed_ranges(mtrr_state.fixed_ranges); +} + +static void print_fixed(unsigned base, unsigned step, const mtrr_type*types) +{ + unsigned i; + + for (i = 0; i < 8; ++i, ++types, base += step) + printk(KERN_INFO "MTRR %05X-%05X %s\n", + base, base + step - 1, mtrr_attrib_to_str(*types)); +} + +/* Grab all of the MTRR state for this CPU into *state */ +void __init get_mtrr_state(void) +{ + unsigned int i; + struct mtrr_var_range *vrs; + unsigned lo, dummy; + + if (!mtrr_state.var_ranges) { + mtrr_state.var_ranges = kmalloc(num_var_ranges * sizeof (struct mtrr_var_range), + GFP_KERNEL); + if (!mtrr_state.var_ranges) + return; + } + vrs = mtrr_state.var_ranges; + + rdmsr(MTRRcap_MSR, lo, dummy); + mtrr_state.have_fixed = (lo >> 8) & 1; + + for (i = 0; i < num_var_ranges; i++) + get_mtrr_var_range(i, &vrs[i]); + if (mtrr_state.have_fixed) + get_fixed_ranges(mtrr_state.fixed_ranges); + + rdmsr(MTRRdefType_MSR, lo, dummy); + mtrr_state.def_type = (lo & 0xff); + mtrr_state.enabled = (lo & 0xc00) >> 10; + + if (mtrr_show) { + int high_width; + + printk(KERN_INFO "MTRR default type: %s\n", mtrr_attrib_to_str(mtrr_state.def_type)); + if (mtrr_state.have_fixed) { + printk(KERN_INFO "MTRR fixed ranges %sabled:\n", + mtrr_state.enabled & 1 ? "en" : "dis"); + print_fixed(0x00000, 0x10000, mtrr_state.fixed_ranges + 0); + for (i = 0; i < 2; ++i) + print_fixed(0x80000 + i * 0x20000, 0x04000, mtrr_state.fixed_ranges + (i + 1) * 8); + for (i = 0; i < 8; ++i) + print_fixed(0xC0000 + i * 0x08000, 0x01000, mtrr_state.fixed_ranges + (i + 3) * 8); + } + printk(KERN_INFO "MTRR variable ranges %sabled:\n", + mtrr_state.enabled & 2 ? "en" : "dis"); + high_width = ((size_or_mask ? ffs(size_or_mask) - 1 : 32) - (32 - PAGE_SHIFT) + 3) / 4; + for (i = 0; i < num_var_ranges; ++i) { + if (mtrr_state.var_ranges[i].mask_lo & (1 << 11)) + printk(KERN_INFO "MTRR %u base %0*X%05X000 mask %0*X%05X000 %s\n", + i, + high_width, + mtrr_state.var_ranges[i].base_hi, + mtrr_state.var_ranges[i].base_lo >> 12, + high_width, + mtrr_state.var_ranges[i].mask_hi, + mtrr_state.var_ranges[i].mask_lo >> 12, + mtrr_attrib_to_str(mtrr_state.var_ranges[i].base_lo & 0xff)); + else + printk(KERN_INFO "MTRR %u disabled\n", i); + } + } +} + +/* Some BIOS's are fucked and don't set all MTRRs the same! */ +void __init mtrr_state_warn(void) +{ + unsigned long mask = smp_changes_mask; + + if (!mask) + return; + if (mask & MTRR_CHANGE_MASK_FIXED) + printk(KERN_WARNING "mtrr: your CPUs had inconsistent fixed MTRR settings\n"); + if (mask & MTRR_CHANGE_MASK_VARIABLE) + printk(KERN_WARNING "mtrr: your CPUs had inconsistent variable MTRR settings\n"); + if (mask & MTRR_CHANGE_MASK_DEFTYPE) + printk(KERN_WARNING "mtrr: your CPUs had inconsistent MTRRdefType settings\n"); + printk(KERN_INFO "mtrr: probably your BIOS does not setup all CPUs.\n"); + printk(KERN_INFO "mtrr: corrected configuration.\n"); +} + +/* Doesn't attempt to pass an error out to MTRR users + because it's quite complicated in some cases and probably not + worth it because the best error handling is to ignore it. */ +void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b) +{ + if (wrmsr_safe(msr, a, b) < 0) + printk(KERN_ERR + "MTRR: CPU %u: Writing MSR %x to %x:%x failed\n", + smp_processor_id(), msr, a, b); +} + +/** + * Enable and allow read/write of extended fixed-range MTRR bits on K8 CPUs + * see AMD publication no. 24593, chapter 3.2.1 for more information + */ +static inline void k8_enable_fixed_iorrs(void) +{ + unsigned lo, hi; + + rdmsr(MSR_K8_SYSCFG, lo, hi); + mtrr_wrmsr(MSR_K8_SYSCFG, lo + | K8_MTRRFIXRANGE_DRAM_ENABLE + | K8_MTRRFIXRANGE_DRAM_MODIFY, hi); +} + +/** + * Checks and updates an fixed-range MTRR if it differs from the value it + * should have. If K8 extenstions are wanted, update the K8 SYSCFG MSR also. + * see AMD publication no. 24593, chapter 7.8.1, page 233 for more information + * \param msr MSR address of the MTTR which should be checked and updated + * \param changed pointer which indicates whether the MTRR needed to be changed + * \param msrwords pointer to the MSR values which the MSR should have + */ +static void set_fixed_range(int msr, int * changed, unsigned int * msrwords) +{ + unsigned lo, hi; + + rdmsr(msr, lo, hi); + + if (lo != msrwords[0] || hi != msrwords[1]) { + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && + boot_cpu_data.x86 == 15 && + ((msrwords[0] | msrwords[1]) & K8_MTRR_RDMEM_WRMEM_MASK)) + k8_enable_fixed_iorrs(); + mtrr_wrmsr(msr, msrwords[0], msrwords[1]); + *changed = TRUE; + } +} + +int generic_get_free_region(unsigned long base, unsigned long size, int replace_reg) +/* [SUMMARY] Get a free MTRR. + The starting (base) address of the region. + The size (in bytes) of the region. + [RETURNS] The index of the region on success, else -1 on error. +*/ +{ + int i, max; + mtrr_type ltype; + unsigned long lbase, lsize; + + max = num_var_ranges; + if (replace_reg >= 0 && replace_reg < max) + return replace_reg; + for (i = 0; i < max; ++i) { + mtrr_if->get(i, &lbase, &lsize, <ype); + if (lsize == 0) + return i; + } + return -ENOSPC; +} + +static void generic_get_mtrr(unsigned int reg, unsigned long *base, + unsigned long *size, mtrr_type *type) +{ + unsigned int mask_lo, mask_hi, base_lo, base_hi; + + rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi); + if ((mask_lo & 0x800) == 0) { + /* Invalid (i.e. free) range */ + *base = 0; + *size = 0; + *type = 0; + return; + } + + rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi); + + /* Work out the shifted address mask. */ + mask_lo = size_or_mask | mask_hi << (32 - PAGE_SHIFT) + | mask_lo >> PAGE_SHIFT; + + /* This works correctly if size is a power of two, i.e. a + contiguous range. */ + *size = -mask_lo; + *base = base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT; + *type = base_lo & 0xff; +} + +/** + * Checks and updates the fixed-range MTRRs if they differ from the saved set + * \param frs pointer to fixed-range MTRR values, saved by get_fixed_ranges() + */ +static int set_fixed_ranges(mtrr_type * frs) +{ + unsigned long long *saved = (unsigned long long *) frs; + int changed = FALSE; + int block=-1, range; + + while (fixed_range_blocks[++block].ranges) + for (range=0; range < fixed_range_blocks[block].ranges; range++) + set_fixed_range(fixed_range_blocks[block].base_msr + range, + &changed, (unsigned int *) saved++); + + return changed; +} + +/* Set the MSR pair relating to a var range. Returns TRUE if + changes are made */ +static int set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr) +{ + unsigned int lo, hi; + int changed = FALSE; + + rdmsr(MTRRphysBase_MSR(index), lo, hi); + if ((vr->base_lo & 0xfffff0ffUL) != (lo & 0xfffff0ffUL) + || (vr->base_hi & (size_and_mask >> (32 - PAGE_SHIFT))) != + (hi & (size_and_mask >> (32 - PAGE_SHIFT)))) { + mtrr_wrmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi); + changed = TRUE; + } + + rdmsr(MTRRphysMask_MSR(index), lo, hi); + + if ((vr->mask_lo & 0xfffff800UL) != (lo & 0xfffff800UL) + || (vr->mask_hi & (size_and_mask >> (32 - PAGE_SHIFT))) != + (hi & (size_and_mask >> (32 - PAGE_SHIFT)))) { + mtrr_wrmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); + changed = TRUE; + } + return changed; +} + +static u32 deftype_lo, deftype_hi; + +static unsigned long set_mtrr_state(void) +/* [SUMMARY] Set the MTRR state for this CPU. + The MTRR state information to read. + Some relevant CPU context. + [NOTE] The CPU must already be in a safe state for MTRR changes. + [RETURNS] 0 if no changes made, else a mask indication what was changed. +*/ +{ + unsigned int i; + unsigned long change_mask = 0; + + for (i = 0; i < num_var_ranges; i++) + if (set_mtrr_var_ranges(i, &mtrr_state.var_ranges[i])) + change_mask |= MTRR_CHANGE_MASK_VARIABLE; + + if (mtrr_state.have_fixed && set_fixed_ranges(mtrr_state.fixed_ranges)) + change_mask |= MTRR_CHANGE_MASK_FIXED; + + /* Set_mtrr_restore restores the old value of MTRRdefType, + so to set it we fiddle with the saved value */ + if ((deftype_lo & 0xff) != mtrr_state.def_type + || ((deftype_lo & 0xc00) >> 10) != mtrr_state.enabled) { + deftype_lo = (deftype_lo & ~0xcff) | mtrr_state.def_type | (mtrr_state.enabled << 10); + change_mask |= MTRR_CHANGE_MASK_DEFTYPE; + } + + return change_mask; +} + + +static unsigned long cr4 = 0; +static DEFINE_SPINLOCK(set_atomicity_lock); + +/* + * Since we are disabling the cache don't allow any interrupts - they + * would run extremely slow and would only increase the pain. The caller must + * ensure that local interrupts are disabled and are reenabled after post_set() + * has been called. + */ + +static void prepare_set(void) __acquires(set_atomicity_lock) +{ + unsigned long cr0; + + /* Note that this is not ideal, since the cache is only flushed/disabled + for this CPU while the MTRRs are changed, but changing this requires + more invasive changes to the way the kernel boots */ + + spin_lock(&set_atomicity_lock); + + /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */ + cr0 = read_cr0() | 0x40000000; /* set CD flag */ + write_cr0(cr0); + wbinvd(); + + /* Save value of CR4 and clear Page Global Enable (bit 7) */ + if ( cpu_has_pge ) { + cr4 = read_cr4(); + write_cr4(cr4 & ~X86_CR4_PGE); + } + + /* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */ + __flush_tlb(); + + /* Save MTRR state */ + rdmsr(MTRRdefType_MSR, deftype_lo, deftype_hi); + + /* Disable MTRRs, and set the default type to uncached */ + mtrr_wrmsr(MTRRdefType_MSR, deftype_lo & ~0xcff, deftype_hi); +} + +static void post_set(void) __releases(set_atomicity_lock) +{ + /* Flush TLBs (no need to flush caches - they are disabled) */ + __flush_tlb(); + + /* Intel (P6) standard MTRRs */ + mtrr_wrmsr(MTRRdefType_MSR, deftype_lo, deftype_hi); + + /* Enable caches */ + write_cr0(read_cr0() & 0xbfffffff); + + /* Restore value of CR4 */ + if ( cpu_has_pge ) + write_cr4(cr4); + spin_unlock(&set_atomicity_lock); +} + +static void generic_set_all(void) +{ + unsigned long mask, count; + unsigned long flags; + + local_irq_save(flags); + prepare_set(); + + /* Actually set the state */ + mask = set_mtrr_state(); + + post_set(); + local_irq_restore(flags); + + /* Use the atomic bitops to update the global mask */ + for (count = 0; count < sizeof mask * 8; ++count) { + if (mask & 0x01) + set_bit(count, &smp_changes_mask); + mask >>= 1; + } + +} + +static void generic_set_mtrr(unsigned int reg, unsigned long base, + unsigned long size, mtrr_type type) +/* [SUMMARY] Set variable MTRR register on the local CPU. + The register to set. + The base address of the region. + The size of the region. If this is 0 the region is disabled. + The type of the region. + If TRUE, do the change safely. If FALSE, safety measures should + be done externally. + [RETURNS] Nothing. +*/ +{ + unsigned long flags; + struct mtrr_var_range *vr; + + vr = &mtrr_state.var_ranges[reg]; + + local_irq_save(flags); + prepare_set(); + + if (size == 0) { + /* The invalid bit is kept in the mask, so we simply clear the + relevant mask register to disable a range. */ + mtrr_wrmsr(MTRRphysMask_MSR(reg), 0, 0); + memset(vr, 0, sizeof(struct mtrr_var_range)); + } else { + vr->base_lo = base << PAGE_SHIFT | type; + vr->base_hi = (base & size_and_mask) >> (32 - PAGE_SHIFT); + vr->mask_lo = -size << PAGE_SHIFT | 0x800; + vr->mask_hi = (-size & size_and_mask) >> (32 - PAGE_SHIFT); + + mtrr_wrmsr(MTRRphysBase_MSR(reg), vr->base_lo, vr->base_hi); + mtrr_wrmsr(MTRRphysMask_MSR(reg), vr->mask_lo, vr->mask_hi); + } + + post_set(); + local_irq_restore(flags); +} + +int generic_validate_add_page(unsigned long base, unsigned long size, unsigned int type) +{ + unsigned long lbase, last; + + /* For Intel PPro stepping <= 7, must be 4 MiB aligned + and not touch 0x70000000->0x7003FFFF */ + if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 && + boot_cpu_data.x86_model == 1 && + boot_cpu_data.x86_mask <= 7) { + if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) { + printk(KERN_WARNING "mtrr: base(0x%lx000) is not 4 MiB aligned\n", base); + return -EINVAL; + } + if (!(base + size < 0x70000 || base > 0x7003F) && + (type == MTRR_TYPE_WRCOMB + || type == MTRR_TYPE_WRBACK)) { + printk(KERN_WARNING "mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n"); + return -EINVAL; + } + } + + /* Check upper bits of base and last are equal and lower bits are 0 + for base and 1 for last */ + last = base + size - 1; + for (lbase = base; !(lbase & 1) && (last & 1); + lbase = lbase >> 1, last = last >> 1) ; + if (lbase != last) { + printk(KERN_WARNING "mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n", + base, size); + return -EINVAL; + } + return 0; +} + + +static int generic_have_wrcomb(void) +{ + unsigned long config, dummy; + rdmsr(MTRRcap_MSR, config, dummy); + return (config & (1 << 10)); +} + +int positive_have_wrcomb(void) +{ + return 1; +} + +/* generic structure... + */ +struct mtrr_ops generic_mtrr_ops = { + .use_intel_if = 1, + .set_all = generic_set_all, + .get = generic_get_mtrr, + .get_free_region = generic_get_free_region, + .set = generic_set_mtrr, + .validate_add_page = generic_validate_add_page, + .have_wrcomb = generic_have_wrcomb, +}; diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c new file mode 100644 index 00000000000..c7d8f175674 --- /dev/null +++ b/arch/x86/kernel/cpu/mtrr/if.c @@ -0,0 +1,439 @@ +#include +#include +#include +#include +#include +#include +#include + +#define LINE_SIZE 80 + +#include +#include "mtrr.h" + +/* RED-PEN: this is accessed without any locking */ +extern unsigned int *usage_table; + + +#define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private) + +static const char *const mtrr_strings[MTRR_NUM_TYPES] = +{ + "uncachable", /* 0 */ + "write-combining", /* 1 */ + "?", /* 2 */ + "?", /* 3 */ + "write-through", /* 4 */ + "write-protect", /* 5 */ + "write-back", /* 6 */ +}; + +const char *mtrr_attrib_to_str(int x) +{ + return (x <= 6) ? mtrr_strings[x] : "?"; +} + +#ifdef CONFIG_PROC_FS + +static int +mtrr_file_add(unsigned long base, unsigned long size, + unsigned int type, char increment, struct file *file, int page) +{ + int reg, max; + unsigned int *fcount = FILE_FCOUNT(file); + + max = num_var_ranges; + if (fcount == NULL) { + fcount = kzalloc(max * sizeof *fcount, GFP_KERNEL); + if (!fcount) + return -ENOMEM; + FILE_FCOUNT(file) = fcount; + } + if (!page) { + if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) + return -EINVAL; + base >>= PAGE_SHIFT; + size >>= PAGE_SHIFT; + } + reg = mtrr_add_page(base, size, type, 1); + if (reg >= 0) + ++fcount[reg]; + return reg; +} + +static int +mtrr_file_del(unsigned long base, unsigned long size, + struct file *file, int page) +{ + int reg; + unsigned int *fcount = FILE_FCOUNT(file); + + if (!page) { + if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) + return -EINVAL; + base >>= PAGE_SHIFT; + size >>= PAGE_SHIFT; + } + reg = mtrr_del_page(-1, base, size); + if (reg < 0) + return reg; + if (fcount == NULL) + return reg; + if (fcount[reg] < 1) + return -EINVAL; + --fcount[reg]; + return reg; +} + +/* RED-PEN: seq_file can seek now. this is ignored. */ +static ssize_t +mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) +/* Format of control line: + "base=%Lx size=%Lx type=%s" OR: + "disable=%d" +*/ +{ + int i, err; + unsigned long reg; + unsigned long long base, size; + char *ptr; + char line[LINE_SIZE]; + size_t linelen; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (!len) + return -EINVAL; + memset(line, 0, LINE_SIZE); + if (len > LINE_SIZE) + len = LINE_SIZE; + if (copy_from_user(line, buf, len - 1)) + return -EFAULT; + linelen = strlen(line); + ptr = line + linelen - 1; + if (linelen && *ptr == '\n') + *ptr = '\0'; + if (!strncmp(line, "disable=", 8)) { + reg = simple_strtoul(line + 8, &ptr, 0); + err = mtrr_del_page(reg, 0, 0); + if (err < 0) + return err; + return len; + } + if (strncmp(line, "base=", 5)) + return -EINVAL; + base = simple_strtoull(line + 5, &ptr, 0); + for (; isspace(*ptr); ++ptr) ; + if (strncmp(ptr, "size=", 5)) + return -EINVAL; + size = simple_strtoull(ptr + 5, &ptr, 0); + if ((base & 0xfff) || (size & 0xfff)) + return -EINVAL; + for (; isspace(*ptr); ++ptr) ; + if (strncmp(ptr, "type=", 5)) + return -EINVAL; + ptr += 5; + for (; isspace(*ptr); ++ptr) ; + for (i = 0; i < MTRR_NUM_TYPES; ++i) { + if (strcmp(ptr, mtrr_strings[i])) + continue; + base >>= PAGE_SHIFT; + size >>= PAGE_SHIFT; + err = + mtrr_add_page((unsigned long) base, (unsigned long) size, i, + 1); + if (err < 0) + return err; + return len; + } + return -EINVAL; +} + +static long +mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) +{ + int err = 0; + mtrr_type type; + unsigned long size; + struct mtrr_sentry sentry; + struct mtrr_gentry gentry; + void __user *arg = (void __user *) __arg; + + switch (cmd) { + case MTRRIOC_ADD_ENTRY: + case MTRRIOC_SET_ENTRY: + case MTRRIOC_DEL_ENTRY: + case MTRRIOC_KILL_ENTRY: + case MTRRIOC_ADD_PAGE_ENTRY: + case MTRRIOC_SET_PAGE_ENTRY: + case MTRRIOC_DEL_PAGE_ENTRY: + case MTRRIOC_KILL_PAGE_ENTRY: + if (copy_from_user(&sentry, arg, sizeof sentry)) + return -EFAULT; + break; + case MTRRIOC_GET_ENTRY: + case MTRRIOC_GET_PAGE_ENTRY: + if (copy_from_user(&gentry, arg, sizeof gentry)) + return -EFAULT; + break; +#ifdef CONFIG_COMPAT + case MTRRIOC32_ADD_ENTRY: + case MTRRIOC32_SET_ENTRY: + case MTRRIOC32_DEL_ENTRY: + case MTRRIOC32_KILL_ENTRY: + case MTRRIOC32_ADD_PAGE_ENTRY: + case MTRRIOC32_SET_PAGE_ENTRY: + case MTRRIOC32_DEL_PAGE_ENTRY: + case MTRRIOC32_KILL_PAGE_ENTRY: { + struct mtrr_sentry32 __user *s32 = (struct mtrr_sentry32 __user *)__arg; + err = get_user(sentry.base, &s32->base); + err |= get_user(sentry.size, &s32->size); + err |= get_user(sentry.type, &s32->type); + if (err) + return err; + break; + } + case MTRRIOC32_GET_ENTRY: + case MTRRIOC32_GET_PAGE_ENTRY: { + struct mtrr_gentry32 __user *g32 = (struct mtrr_gentry32 __user *)__arg; + err = get_user(gentry.regnum, &g32->regnum); + err |= get_user(gentry.base, &g32->base); + err |= get_user(gentry.size, &g32->size); + err |= get_user(gentry.type, &g32->type); + if (err) + return err; + break; + } +#endif + } + + switch (cmd) { + default: + return -ENOTTY; + case MTRRIOC_ADD_ENTRY: +#ifdef CONFIG_COMPAT + case MTRRIOC32_ADD_ENTRY: +#endif + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + err = + mtrr_file_add(sentry.base, sentry.size, sentry.type, 1, + file, 0); + break; + case MTRRIOC_SET_ENTRY: +#ifdef CONFIG_COMPAT + case MTRRIOC32_SET_ENTRY: +#endif + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + err = mtrr_add(sentry.base, sentry.size, sentry.type, 0); + break; + case MTRRIOC_DEL_ENTRY: +#ifdef CONFIG_COMPAT + case MTRRIOC32_DEL_ENTRY: +#endif + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + err = mtrr_file_del(sentry.base, sentry.size, file, 0); + break; + case MTRRIOC_KILL_ENTRY: +#ifdef CONFIG_COMPAT + case MTRRIOC32_KILL_ENTRY: +#endif + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + err = mtrr_del(-1, sentry.base, sentry.size); + break; + case MTRRIOC_GET_ENTRY: +#ifdef CONFIG_COMPAT + case MTRRIOC32_GET_ENTRY: +#endif + if (gentry.regnum >= num_var_ranges) + return -EINVAL; + mtrr_if->get(gentry.regnum, &gentry.base, &size, &type); + + /* Hide entries that go above 4GB */ + if (gentry.base + size - 1 >= (1UL << (8 * sizeof(gentry.size) - PAGE_SHIFT)) + || size >= (1UL << (8 * sizeof(gentry.size) - PAGE_SHIFT))) + gentry.base = gentry.size = gentry.type = 0; + else { + gentry.base <<= PAGE_SHIFT; + gentry.size = size << PAGE_SHIFT; + gentry.type = type; + } + + break; + case MTRRIOC_ADD_PAGE_ENTRY: +#ifdef CONFIG_COMPAT + case MTRRIOC32_ADD_PAGE_ENTRY: +#endif + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + err = + mtrr_file_add(sentry.base, sentry.size, sentry.type, 1, + file, 1); + break; + case MTRRIOC_SET_PAGE_ENTRY: +#ifdef CONFIG_COMPAT + case MTRRIOC32_SET_PAGE_ENTRY: +#endif + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + err = mtrr_add_page(sentry.base, sentry.size, sentry.type, 0); + break; + case MTRRIOC_DEL_PAGE_ENTRY: +#ifdef CONFIG_COMPAT + case MTRRIOC32_DEL_PAGE_ENTRY: +#endif + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + err = mtrr_file_del(sentry.base, sentry.size, file, 1); + break; + case MTRRIOC_KILL_PAGE_ENTRY: +#ifdef CONFIG_COMPAT + case MTRRIOC32_KILL_PAGE_ENTRY: +#endif + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + err = mtrr_del_page(-1, sentry.base, sentry.size); + break; + case MTRRIOC_GET_PAGE_ENTRY: +#ifdef CONFIG_COMPAT + case MTRRIOC32_GET_PAGE_ENTRY: +#endif + if (gentry.regnum >= num_var_ranges) + return -EINVAL; + mtrr_if->get(gentry.regnum, &gentry.base, &size, &type); + /* Hide entries that would overflow */ + if (size != (__typeof__(gentry.size))size) + gentry.base = gentry.size = gentry.type = 0; + else { + gentry.size = size; + gentry.type = type; + } + break; + } + + if (err) + return err; + + switch(cmd) { + case MTRRIOC_GET_ENTRY: + case MTRRIOC_GET_PAGE_ENTRY: + if (copy_to_user(arg, &gentry, sizeof gentry)) + err = -EFAULT; + break; +#ifdef CONFIG_COMPAT + case MTRRIOC32_GET_ENTRY: + case MTRRIOC32_GET_PAGE_ENTRY: { + struct mtrr_gentry32 __user *g32 = (struct mtrr_gentry32 __user *)__arg; + err = put_user(gentry.base, &g32->base); + err |= put_user(gentry.size, &g32->size); + err |= put_user(gentry.regnum, &g32->regnum); + err |= put_user(gentry.type, &g32->type); + break; + } +#endif + } + return err; +} + +static int +mtrr_close(struct inode *ino, struct file *file) +{ + int i, max; + unsigned int *fcount = FILE_FCOUNT(file); + + if (fcount != NULL) { + max = num_var_ranges; + for (i = 0; i < max; ++i) { + while (fcount[i] > 0) { + mtrr_del(i, 0, 0); + --fcount[i]; + } + } + kfree(fcount); + FILE_FCOUNT(file) = NULL; + } + return single_release(ino, file); +} + +static int mtrr_seq_show(struct seq_file *seq, void *offset); + +static int mtrr_open(struct inode *inode, struct file *file) +{ + if (!mtrr_if) + return -EIO; + if (!mtrr_if->get) + return -ENXIO; + return single_open(file, mtrr_seq_show, NULL); +} + +static const struct file_operations mtrr_fops = { + .owner = THIS_MODULE, + .open = mtrr_open, + .read = seq_read, + .llseek = seq_lseek, + .write = mtrr_write, + .unlocked_ioctl = mtrr_ioctl, + .compat_ioctl = mtrr_ioctl, + .release = mtrr_close, +}; + + +static struct proc_dir_entry *proc_root_mtrr; + + +static int mtrr_seq_show(struct seq_file *seq, void *offset) +{ + char factor; + int i, max, len; + mtrr_type type; + unsigned long base, size; + + len = 0; + max = num_var_ranges; + for (i = 0; i < max; i++) { + mtrr_if->get(i, &base, &size, &type); + if (size == 0) + usage_table[i] = 0; + else { + if (size < (0x100000 >> PAGE_SHIFT)) { + /* less than 1MB */ + factor = 'K'; + size <<= PAGE_SHIFT - 10; + } else { + factor = 'M'; + size >>= 20 - PAGE_SHIFT; + } + /* RED-PEN: base can be > 32bit */ + len += seq_printf(seq, + "reg%02i: base=0x%05lx000 (%4luMB), size=%4lu%cB: %s, count=%d\n", + i, base, base >> (20 - PAGE_SHIFT), size, factor, + mtrr_attrib_to_str(type), usage_table[i]); + } + } + return 0; +} + +static int __init mtrr_if_init(void) +{ + struct cpuinfo_x86 *c = &boot_cpu_data; + + if ((!cpu_has(c, X86_FEATURE_MTRR)) && + (!cpu_has(c, X86_FEATURE_K6_MTRR)) && + (!cpu_has(c, X86_FEATURE_CYRIX_ARR)) && + (!cpu_has(c, X86_FEATURE_CENTAUR_MCR))) + return -ENODEV; + + proc_root_mtrr = + create_proc_entry("mtrr", S_IWUSR | S_IRUGO, &proc_root); + if (proc_root_mtrr) { + proc_root_mtrr->owner = THIS_MODULE; + proc_root_mtrr->proc_fops = &mtrr_fops; + } + return 0; +} + +arch_initcall(mtrr_if_init); +#endif /* CONFIG_PROC_FS */ diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c new file mode 100644 index 00000000000..c48b6fea5ab --- /dev/null +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -0,0 +1,768 @@ +/* Generic MTRR (Memory Type Range Register) driver. + + Copyright (C) 1997-2000 Richard Gooch + Copyright (c) 2002 Patrick Mochel + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with this library; if not, write to the Free + Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + Richard Gooch may be reached by email at rgooch@atnf.csiro.au + The postal address is: + Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia. + + Source: "Pentium Pro Family Developer's Manual, Volume 3: + Operating System Writer's Guide" (Intel document number 242692), + section 11.11.7 + + This was cleaned and made readable by Patrick Mochel + on 6-7 March 2002. + Source: Intel Architecture Software Developers Manual, Volume 3: + System Programming Guide; Section 9.11. (1997 edition - PPro). +*/ + +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include "mtrr.h" + +u32 num_var_ranges = 0; + +unsigned int *usage_table; +static DEFINE_MUTEX(mtrr_mutex); + +u64 size_or_mask, size_and_mask; + +static struct mtrr_ops * mtrr_ops[X86_VENDOR_NUM] = {}; + +struct mtrr_ops * mtrr_if = NULL; + +static void set_mtrr(unsigned int reg, unsigned long base, + unsigned long size, mtrr_type type); + +#ifndef CONFIG_X86_64 +extern int arr3_protected; +#else +#define arr3_protected 0 +#endif + +void set_mtrr_ops(struct mtrr_ops * ops) +{ + if (ops->vendor && ops->vendor < X86_VENDOR_NUM) + mtrr_ops[ops->vendor] = ops; +} + +/* Returns non-zero if we have the write-combining memory type */ +static int have_wrcomb(void) +{ + struct pci_dev *dev; + u8 rev; + + if ((dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL)) != NULL) { + /* ServerWorks LE chipsets < rev 6 have problems with write-combining + Don't allow it and leave room for other chipsets to be tagged */ + if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS && + dev->device == PCI_DEVICE_ID_SERVERWORKS_LE) { + pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev); + if (rev <= 5) { + printk(KERN_INFO "mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n"); + pci_dev_put(dev); + return 0; + } + } + /* Intel 450NX errata # 23. Non ascending cacheline evictions to + write combining memory may resulting in data corruption */ + if (dev->vendor == PCI_VENDOR_ID_INTEL && + dev->device == PCI_DEVICE_ID_INTEL_82451NX) { + printk(KERN_INFO "mtrr: Intel 450NX MMC detected. Write-combining disabled.\n"); + pci_dev_put(dev); + return 0; + } + pci_dev_put(dev); + } + return (mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0); +} + +/* This function returns the number of variable MTRRs */ +static void __init set_num_var_ranges(void) +{ + unsigned long config = 0, dummy; + + if (use_intel()) { + rdmsr(MTRRcap_MSR, config, dummy); + } else if (is_cpu(AMD)) + config = 2; + else if (is_cpu(CYRIX) || is_cpu(CENTAUR)) + config = 8; + num_var_ranges = config & 0xff; +} + +static void __init init_table(void) +{ + int i, max; + + max = num_var_ranges; + if ((usage_table = kmalloc(max * sizeof *usage_table, GFP_KERNEL)) + == NULL) { + printk(KERN_ERR "mtrr: could not allocate\n"); + return; + } + for (i = 0; i < max; i++) + usage_table[i] = 1; +} + +struct set_mtrr_data { + atomic_t count; + atomic_t gate; + unsigned long smp_base; + unsigned long smp_size; + unsigned int smp_reg; + mtrr_type smp_type; +}; + +#ifdef CONFIG_SMP + +static void ipi_handler(void *info) +/* [SUMMARY] Synchronisation handler. Executed by "other" CPUs. + [RETURNS] Nothing. +*/ +{ + struct set_mtrr_data *data = info; + unsigned long flags; + + local_irq_save(flags); + + atomic_dec(&data->count); + while(!atomic_read(&data->gate)) + cpu_relax(); + + /* The master has cleared me to execute */ + if (data->smp_reg != ~0U) + mtrr_if->set(data->smp_reg, data->smp_base, + data->smp_size, data->smp_type); + else + mtrr_if->set_all(); + + atomic_dec(&data->count); + while(atomic_read(&data->gate)) + cpu_relax(); + + atomic_dec(&data->count); + local_irq_restore(flags); +} + +#endif + +static inline int types_compatible(mtrr_type type1, mtrr_type type2) { + return type1 == MTRR_TYPE_UNCACHABLE || + type2 == MTRR_TYPE_UNCACHABLE || + (type1 == MTRR_TYPE_WRTHROUGH && type2 == MTRR_TYPE_WRBACK) || + (type1 == MTRR_TYPE_WRBACK && type2 == MTRR_TYPE_WRTHROUGH); +} + +/** + * set_mtrr - update mtrrs on all processors + * @reg: mtrr in question + * @base: mtrr base + * @size: mtrr size + * @type: mtrr type + * + * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly: + * + * 1. Send IPI to do the following: + * 2. Disable Interrupts + * 3. Wait for all procs to do so + * 4. Enter no-fill cache mode + * 5. Flush caches + * 6. Clear PGE bit + * 7. Flush all TLBs + * 8. Disable all range registers + * 9. Update the MTRRs + * 10. Enable all range registers + * 11. Flush all TLBs and caches again + * 12. Enter normal cache mode and reenable caching + * 13. Set PGE + * 14. Wait for buddies to catch up + * 15. Enable interrupts. + * + * What does that mean for us? Well, first we set data.count to the number + * of CPUs. As each CPU disables interrupts, it'll decrement it once. We wait + * until it hits 0 and proceed. We set the data.gate flag and reset data.count. + * Meanwhile, they are waiting for that flag to be set. Once it's set, each + * CPU goes through the transition of updating MTRRs. The CPU vendors may each do it + * differently, so we call mtrr_if->set() callback and let them take care of it. + * When they're done, they again decrement data->count and wait for data.gate to + * be reset. + * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag. + * Everyone then enables interrupts and we all continue on. + * + * Note that the mechanism is the same for UP systems, too; all the SMP stuff + * becomes nops. + */ +static void set_mtrr(unsigned int reg, unsigned long base, + unsigned long size, mtrr_type type) +{ + struct set_mtrr_data data; + unsigned long flags; + + data.smp_reg = reg; + data.smp_base = base; + data.smp_size = size; + data.smp_type = type; + atomic_set(&data.count, num_booting_cpus() - 1); + /* make sure data.count is visible before unleashing other CPUs */ + smp_wmb(); + atomic_set(&data.gate,0); + + /* Start the ball rolling on other CPUs */ + if (smp_call_function(ipi_handler, &data, 1, 0) != 0) + panic("mtrr: timed out waiting for other CPUs\n"); + + local_irq_save(flags); + + while(atomic_read(&data.count)) + cpu_relax(); + + /* ok, reset count and toggle gate */ + atomic_set(&data.count, num_booting_cpus() - 1); + smp_wmb(); + atomic_set(&data.gate,1); + + /* do our MTRR business */ + + /* HACK! + * We use this same function to initialize the mtrrs on boot. + * The state of the boot cpu's mtrrs has been saved, and we want + * to replicate across all the APs. + * If we're doing that @reg is set to something special... + */ + if (reg != ~0U) + mtrr_if->set(reg,base,size,type); + + /* wait for the others */ + while(atomic_read(&data.count)) + cpu_relax(); + + atomic_set(&data.count, num_booting_cpus() - 1); + smp_wmb(); + atomic_set(&data.gate,0); + + /* + * Wait here for everyone to have seen the gate change + * So we're the last ones to touch 'data' + */ + while(atomic_read(&data.count)) + cpu_relax(); + + local_irq_restore(flags); +} + +/** + * mtrr_add_page - Add a memory type region + * @base: Physical base address of region in pages (in units of 4 kB!) + * @size: Physical size of region in pages (4 kB) + * @type: Type of MTRR desired + * @increment: If this is true do usage counting on the region + * + * Memory type region registers control the caching on newer Intel and + * non Intel processors. This function allows drivers to request an + * MTRR is added. The details and hardware specifics of each processor's + * implementation are hidden from the caller, but nevertheless the + * caller should expect to need to provide a power of two size on an + * equivalent power of two boundary. + * + * If the region cannot be added either because all regions are in use + * or the CPU cannot support it a negative value is returned. On success + * the register number for this entry is returned, but should be treated + * as a cookie only. + * + * On a multiprocessor machine the changes are made to all processors. + * This is required on x86 by the Intel processors. + * + * The available types are + * + * %MTRR_TYPE_UNCACHABLE - No caching + * + * %MTRR_TYPE_WRBACK - Write data back in bursts whenever + * + * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts + * + * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes + * + * BUGS: Needs a quiet flag for the cases where drivers do not mind + * failures and do not wish system log messages to be sent. + */ + +int mtrr_add_page(unsigned long base, unsigned long size, + unsigned int type, char increment) +{ + int i, replace, error; + mtrr_type ltype; + unsigned long lbase, lsize; + + if (!mtrr_if) + return -ENXIO; + + if ((error = mtrr_if->validate_add_page(base,size,type))) + return error; + + if (type >= MTRR_NUM_TYPES) { + printk(KERN_WARNING "mtrr: type: %u invalid\n", type); + return -EINVAL; + } + + /* If the type is WC, check that this processor supports it */ + if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) { + printk(KERN_WARNING + "mtrr: your processor doesn't support write-combining\n"); + return -ENOSYS; + } + + if (!size) { + printk(KERN_WARNING "mtrr: zero sized request\n"); + return -EINVAL; + } + + if (base & size_or_mask || size & size_or_mask) { + printk(KERN_WARNING "mtrr: base or size exceeds the MTRR width\n"); + return -EINVAL; + } + + error = -EINVAL; + replace = -1; + + /* No CPU hotplug when we change MTRR entries */ + lock_cpu_hotplug(); + /* Search for existing MTRR */ + mutex_lock(&mtrr_mutex); + for (i = 0; i < num_var_ranges; ++i) { + mtrr_if->get(i, &lbase, &lsize, <ype); + if (!lsize || base > lbase + lsize - 1 || base + size - 1 < lbase) + continue; + /* At this point we know there is some kind of overlap/enclosure */ + if (base < lbase || base + size - 1 > lbase + lsize - 1) { + if (base <= lbase && base + size - 1 >= lbase + lsize - 1) { + /* New region encloses an existing region */ + if (type == ltype) { + replace = replace == -1 ? i : -2; + continue; + } + else if (types_compatible(type, ltype)) + continue; + } + printk(KERN_WARNING + "mtrr: 0x%lx000,0x%lx000 overlaps existing" + " 0x%lx000,0x%lx000\n", base, size, lbase, + lsize); + goto out; + } + /* New region is enclosed by an existing region */ + if (ltype != type) { + if (types_compatible(type, ltype)) + continue; + printk (KERN_WARNING "mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n", + base, size, mtrr_attrib_to_str(ltype), + mtrr_attrib_to_str(type)); + goto out; + } + if (increment) + ++usage_table[i]; + error = i; + goto out; + } + /* Search for an empty MTRR */ + i = mtrr_if->get_free_region(base, size, replace); + if (i >= 0) { + set_mtrr(i, base, size, type); + if (likely(replace < 0)) + usage_table[i] = 1; + else { + usage_table[i] = usage_table[replace] + !!increment; + if (unlikely(replace != i)) { + set_mtrr(replace, 0, 0, 0); + usage_table[replace] = 0; + } + } + } else + printk(KERN_INFO "mtrr: no more MTRRs available\n"); + error = i; + out: + mutex_unlock(&mtrr_mutex); + unlock_cpu_hotplug(); + return error; +} + +static int mtrr_check(unsigned long base, unsigned long size) +{ + if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { + printk(KERN_WARNING + "mtrr: size and base must be multiples of 4 kiB\n"); + printk(KERN_DEBUG + "mtrr: size: 0x%lx base: 0x%lx\n", size, base); + dump_stack(); + return -1; + } + return 0; +} + +/** + * mtrr_add - Add a memory type region + * @base: Physical base address of region + * @size: Physical size of region + * @type: Type of MTRR desired + * @increment: If this is true do usage counting on the region + * + * Memory type region registers control the caching on newer Intel and + * non Intel processors. This function allows drivers to request an + * MTRR is added. The details and hardware specifics of each processor's + * implementation are hidden from the caller, but nevertheless the + * caller should expect to need to provide a power of two size on an + * equivalent power of two boundary. + * + * If the region cannot be added either because all regions are in use + * or the CPU cannot support it a negative value is returned. On success + * the register number for this entry is returned, but should be treated + * as a cookie only. + * + * On a multiprocessor machine the changes are made to all processors. + * This is required on x86 by the Intel processors. + * + * The available types are + * + * %MTRR_TYPE_UNCACHABLE - No caching + * + * %MTRR_TYPE_WRBACK - Write data back in bursts whenever + * + * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts + * + * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes + * + * BUGS: Needs a quiet flag for the cases where drivers do not mind + * failures and do not wish system log messages to be sent. + */ + +int +mtrr_add(unsigned long base, unsigned long size, unsigned int type, + char increment) +{ + if (mtrr_check(base, size)) + return -EINVAL; + return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type, + increment); +} + +/** + * mtrr_del_page - delete a memory type region + * @reg: Register returned by mtrr_add + * @base: Physical base address + * @size: Size of region + * + * If register is supplied then base and size are ignored. This is + * how drivers should call it. + * + * Releases an MTRR region. If the usage count drops to zero the + * register is freed and the region returns to default state. + * On success the register is returned, on failure a negative error + * code. + */ + +int mtrr_del_page(int reg, unsigned long base, unsigned long size) +{ + int i, max; + mtrr_type ltype; + unsigned long lbase, lsize; + int error = -EINVAL; + + if (!mtrr_if) + return -ENXIO; + + max = num_var_ranges; + /* No CPU hotplug when we change MTRR entries */ + lock_cpu_hotplug(); + mutex_lock(&mtrr_mutex); + if (reg < 0) { + /* Search for existing MTRR */ + for (i = 0; i < max; ++i) { + mtrr_if->get(i, &lbase, &lsize, <ype); + if (lbase == base && lsize == size) { + reg = i; + break; + } + } + if (reg < 0) { + printk(KERN_DEBUG "mtrr: no MTRR for %lx000,%lx000 found\n", base, + size); + goto out; + } + } + if (reg >= max) { + printk(KERN_WARNING "mtrr: register: %d too big\n", reg); + goto out; + } + if (is_cpu(CYRIX) && !use_intel()) { + if ((reg == 3) && arr3_protected) { + printk(KERN_WARNING "mtrr: ARR3 cannot be changed\n"); + goto out; + } + } + mtrr_if->get(reg, &lbase, &lsize, <ype); + if (lsize < 1) { + printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg); + goto out; + } + if (usage_table[reg] < 1) { + printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg); + goto out; + } + if (--usage_table[reg] < 1) + set_mtrr(reg, 0, 0, 0); + error = reg; + out: + mutex_unlock(&mtrr_mutex); + unlock_cpu_hotplug(); + return error; +} +/** + * mtrr_del - delete a memory type region + * @reg: Register returned by mtrr_add + * @base: Physical base address + * @size: Size of region + * + * If register is supplied then base and size are ignored. This is + * how drivers should call it. + * + * Releases an MTRR region. If the usage count drops to zero the + * register is freed and the region returns to default state. + * On success the register is returned, on failure a negative error + * code. + */ + +int +mtrr_del(int reg, unsigned long base, unsigned long size) +{ + if (mtrr_check(base, size)) + return -EINVAL; + return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT); +} + +EXPORT_SYMBOL(mtrr_add); +EXPORT_SYMBOL(mtrr_del); + +/* HACK ALERT! + * These should be called implicitly, but we can't yet until all the initcall + * stuff is done... + */ +extern void amd_init_mtrr(void); +extern void cyrix_init_mtrr(void); +extern void centaur_init_mtrr(void); + +static void __init init_ifs(void) +{ +#ifndef CONFIG_X86_64 + amd_init_mtrr(); + cyrix_init_mtrr(); + centaur_init_mtrr(); +#endif +} + +/* The suspend/resume methods are only for CPU without MTRR. CPU using generic + * MTRR driver doesn't require this + */ +struct mtrr_value { + mtrr_type ltype; + unsigned long lbase; + unsigned long lsize; +}; + +static struct mtrr_value * mtrr_state; + +static int mtrr_save(struct sys_device * sysdev, pm_message_t state) +{ + int i; + int size = num_var_ranges * sizeof(struct mtrr_value); + + mtrr_state = kzalloc(size,GFP_ATOMIC); + if (!mtrr_state) + return -ENOMEM; + + for (i = 0; i < num_var_ranges; i++) { + mtrr_if->get(i, + &mtrr_state[i].lbase, + &mtrr_state[i].lsize, + &mtrr_state[i].ltype); + } + return 0; +} + +static int mtrr_restore(struct sys_device * sysdev) +{ + int i; + + for (i = 0; i < num_var_ranges; i++) { + if (mtrr_state[i].lsize) + set_mtrr(i, + mtrr_state[i].lbase, + mtrr_state[i].lsize, + mtrr_state[i].ltype); + } + kfree(mtrr_state); + return 0; +} + + + +static struct sysdev_driver mtrr_sysdev_driver = { + .suspend = mtrr_save, + .resume = mtrr_restore, +}; + + +/** + * mtrr_bp_init - initialize mtrrs on the boot CPU + * + * This needs to be called early; before any of the other CPUs are + * initialized (i.e. before smp_init()). + * + */ +void __init mtrr_bp_init(void) +{ + init_ifs(); + + if (cpu_has_mtrr) { + mtrr_if = &generic_mtrr_ops; + size_or_mask = 0xff000000; /* 36 bits */ + size_and_mask = 0x00f00000; + + /* This is an AMD specific MSR, but we assume(hope?) that + Intel will implement it to when they extend the address + bus of the Xeon. */ + if (cpuid_eax(0x80000000) >= 0x80000008) { + u32 phys_addr; + phys_addr = cpuid_eax(0x80000008) & 0xff; + /* CPUID workaround for Intel 0F33/0F34 CPU */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_data.x86 == 0xF && + boot_cpu_data.x86_model == 0x3 && + (boot_cpu_data.x86_mask == 0x3 || + boot_cpu_data.x86_mask == 0x4)) + phys_addr = 36; + + size_or_mask = ~((1ULL << (phys_addr - PAGE_SHIFT)) - 1); + size_and_mask = ~size_or_mask & 0xfffff00000ULL; + } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR && + boot_cpu_data.x86 == 6) { + /* VIA C* family have Intel style MTRRs, but + don't support PAE */ + size_or_mask = 0xfff00000; /* 32 bits */ + size_and_mask = 0; + } + } else { + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + if (cpu_has_k6_mtrr) { + /* Pre-Athlon (K6) AMD CPU MTRRs */ + mtrr_if = mtrr_ops[X86_VENDOR_AMD]; + size_or_mask = 0xfff00000; /* 32 bits */ + size_and_mask = 0; + } + break; + case X86_VENDOR_CENTAUR: + if (cpu_has_centaur_mcr) { + mtrr_if = mtrr_ops[X86_VENDOR_CENTAUR]; + size_or_mask = 0xfff00000; /* 32 bits */ + size_and_mask = 0; + } + break; + case X86_VENDOR_CYRIX: + if (cpu_has_cyrix_arr) { + mtrr_if = mtrr_ops[X86_VENDOR_CYRIX]; + size_or_mask = 0xfff00000; /* 32 bits */ + size_and_mask = 0; + } + break; + default: + break; + } + } + + if (mtrr_if) { + set_num_var_ranges(); + init_table(); + if (use_intel()) + get_mtrr_state(); + } +} + +void mtrr_ap_init(void) +{ + unsigned long flags; + + if (!mtrr_if || !use_intel()) + return; + /* + * Ideally we should hold mtrr_mutex here to avoid mtrr entries changed, + * but this routine will be called in cpu boot time, holding the lock + * breaks it. This routine is called in two cases: 1.very earily time + * of software resume, when there absolutely isn't mtrr entry changes; + * 2.cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug lock to + * prevent mtrr entry changes + */ + local_irq_save(flags); + + mtrr_if->set_all(); + + local_irq_restore(flags); +} + +/** + * Save current fixed-range MTRR state of the BSP + */ +void mtrr_save_state(void) +{ + int cpu = get_cpu(); + + if (cpu == 0) + mtrr_save_fixed_ranges(NULL); + else + smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1, 1); + put_cpu(); +} + +static int __init mtrr_init_finialize(void) +{ + if (!mtrr_if) + return 0; + if (use_intel()) + mtrr_state_warn(); + else { + /* The CPUs haven't MTRR and seemes not support SMP. They have + * specific drivers, we use a tricky method to support + * suspend/resume for them. + * TBD: is there any system with such CPU which supports + * suspend/resume? if no, we should remove the code. + */ + sysdev_driver_register(&cpu_sysdev_class, + &mtrr_sysdev_driver); + } + return 0; +} +subsys_initcall(mtrr_init_finialize); diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h new file mode 100644 index 00000000000..289dfe6030e --- /dev/null +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h @@ -0,0 +1,98 @@ +/* + * local mtrr defines. + */ + +#ifndef TRUE +#define TRUE 1 +#define FALSE 0 +#endif + +#define MTRRcap_MSR 0x0fe +#define MTRRdefType_MSR 0x2ff + +#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg)) +#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1) + +#define NUM_FIXED_RANGES 88 +#define MTRRfix64K_00000_MSR 0x250 +#define MTRRfix16K_80000_MSR 0x258 +#define MTRRfix16K_A0000_MSR 0x259 +#define MTRRfix4K_C0000_MSR 0x268 +#define MTRRfix4K_C8000_MSR 0x269 +#define MTRRfix4K_D0000_MSR 0x26a +#define MTRRfix4K_D8000_MSR 0x26b +#define MTRRfix4K_E0000_MSR 0x26c +#define MTRRfix4K_E8000_MSR 0x26d +#define MTRRfix4K_F0000_MSR 0x26e +#define MTRRfix4K_F8000_MSR 0x26f + +#define MTRR_CHANGE_MASK_FIXED 0x01 +#define MTRR_CHANGE_MASK_VARIABLE 0x02 +#define MTRR_CHANGE_MASK_DEFTYPE 0x04 + +/* In the Intel processor's MTRR interface, the MTRR type is always held in + an 8 bit field: */ +typedef u8 mtrr_type; + +struct mtrr_ops { + u32 vendor; + u32 use_intel_if; +// void (*init)(void); + void (*set)(unsigned int reg, unsigned long base, + unsigned long size, mtrr_type type); + void (*set_all)(void); + + void (*get)(unsigned int reg, unsigned long *base, + unsigned long *size, mtrr_type * type); + int (*get_free_region)(unsigned long base, unsigned long size, + int replace_reg); + int (*validate_add_page)(unsigned long base, unsigned long size, + unsigned int type); + int (*have_wrcomb)(void); +}; + +extern int generic_get_free_region(unsigned long base, unsigned long size, + int replace_reg); +extern int generic_validate_add_page(unsigned long base, unsigned long size, + unsigned int type); + +extern struct mtrr_ops generic_mtrr_ops; + +extern int positive_have_wrcomb(void); + +/* library functions for processor-specific routines */ +struct set_mtrr_context { + unsigned long flags; + unsigned long cr4val; + u32 deftype_lo; + u32 deftype_hi; + u32 ccr3; +}; + +struct mtrr_var_range { + u32 base_lo; + u32 base_hi; + u32 mask_lo; + u32 mask_hi; +}; + +void set_mtrr_done(struct set_mtrr_context *ctxt); +void set_mtrr_cache_disable(struct set_mtrr_context *ctxt); +void set_mtrr_prepare_save(struct set_mtrr_context *ctxt); + +void get_mtrr_state(void); + +extern void set_mtrr_ops(struct mtrr_ops * ops); + +extern u64 size_or_mask, size_and_mask; +extern struct mtrr_ops * mtrr_if; + +#define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd) +#define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1) + +extern unsigned int num_var_ranges; + +void mtrr_state_warn(void); +const char *mtrr_attrib_to_str(int x); +void mtrr_wrmsr(unsigned, unsigned, unsigned); + diff --git a/arch/x86/kernel/cpu/mtrr/state.c b/arch/x86/kernel/cpu/mtrr/state.c new file mode 100644 index 00000000000..c9014ca4a57 --- /dev/null +++ b/arch/x86/kernel/cpu/mtrr/state.c @@ -0,0 +1,79 @@ +#include +#include +#include +#include +#include +#include +#include "mtrr.h" + + +/* Put the processor into a state where MTRRs can be safely set */ +void set_mtrr_prepare_save(struct set_mtrr_context *ctxt) +{ + unsigned int cr0; + + /* Disable interrupts locally */ + local_irq_save(ctxt->flags); + + if (use_intel() || is_cpu(CYRIX)) { + + /* Save value of CR4 and clear Page Global Enable (bit 7) */ + if ( cpu_has_pge ) { + ctxt->cr4val = read_cr4(); + write_cr4(ctxt->cr4val & ~X86_CR4_PGE); + } + + /* Disable and flush caches. Note that wbinvd flushes the TLBs as + a side-effect */ + cr0 = read_cr0() | 0x40000000; + wbinvd(); + write_cr0(cr0); + wbinvd(); + + if (use_intel()) + /* Save MTRR state */ + rdmsr(MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi); + else + /* Cyrix ARRs - everything else were excluded at the top */ + ctxt->ccr3 = getCx86(CX86_CCR3); + } +} + +void set_mtrr_cache_disable(struct set_mtrr_context *ctxt) +{ + if (use_intel()) + /* Disable MTRRs, and set the default type to uncached */ + mtrr_wrmsr(MTRRdefType_MSR, ctxt->deftype_lo & 0xf300UL, + ctxt->deftype_hi); + else if (is_cpu(CYRIX)) + /* Cyrix ARRs - everything else were excluded at the top */ + setCx86(CX86_CCR3, (ctxt->ccr3 & 0x0f) | 0x10); +} + +/* Restore the processor after a set_mtrr_prepare */ +void set_mtrr_done(struct set_mtrr_context *ctxt) +{ + if (use_intel() || is_cpu(CYRIX)) { + + /* Flush caches and TLBs */ + wbinvd(); + + /* Restore MTRRdefType */ + if (use_intel()) + /* Intel (P6) standard MTRRs */ + mtrr_wrmsr(MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi); + else + /* Cyrix ARRs - everything else was excluded at the top */ + setCx86(CX86_CCR3, ctxt->ccr3); + + /* Enable caches */ + write_cr0(read_cr0() & 0xbfffffff); + + /* Restore value of CR4 */ + if ( cpu_has_pge ) + write_cr4(ctxt->cr4val); + } + /* Re-enable interrupts locally (if enabled previously) */ + local_irq_restore(ctxt->flags); +} + diff --git a/arch/x86_64/kernel/Makefile_64 b/arch/x86_64/kernel/Makefile_64 index a6d8216084a..e7480509103 100644 --- a/arch/x86_64/kernel/Makefile_64 +++ b/arch/x86_64/kernel/Makefile_64 @@ -15,7 +15,7 @@ obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_X86_MCE) += mce_64.o therm_throt.o obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o obj-$(CONFIG_X86_MCE_AMD) += mce_amd_64.o -obj-$(CONFIG_MTRR) += ../../i386/kernel/cpu/mtrr/ +obj-$(CONFIG_MTRR) += ../../x86/kernel/cpu/mtrr/ obj-$(CONFIG_ACPI) += acpi/ obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_MICROCODE) += microcode.o -- cgit v1.2.3-70-g09d2 From da957e111bb0c189a4a3bf8a00caaecb59ed94ca Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 11 Oct 2007 11:16:31 +0200 Subject: i386: move math-emu Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/i386/Kconfig | 2 +- arch/i386/Makefile | 2 +- arch/i386/math-emu/Makefile | 30 - arch/i386/math-emu/README | 427 --------- arch/i386/math-emu/control_w.h | 45 - arch/i386/math-emu/div_Xsig.S | 365 -------- arch/i386/math-emu/div_small.S | 47 - arch/i386/math-emu/errors.c | 739 --------------- arch/i386/math-emu/exception.h | 53 -- arch/i386/math-emu/fpu_arith.c | 174 ---- arch/i386/math-emu/fpu_asm.h | 32 - arch/i386/math-emu/fpu_aux.c | 204 ---- arch/i386/math-emu/fpu_emu.h | 218 ----- arch/i386/math-emu/fpu_entry.c | 761 --------------- arch/i386/math-emu/fpu_etc.c | 143 --- arch/i386/math-emu/fpu_proto.h | 140 --- arch/i386/math-emu/fpu_system.h | 90 -- arch/i386/math-emu/fpu_tags.c | 127 --- arch/i386/math-emu/fpu_trig.c | 1845 ------------------------------------- arch/i386/math-emu/get_address.c | 438 --------- arch/i386/math-emu/load_store.c | 272 ------ arch/i386/math-emu/mul_Xsig.S | 176 ---- arch/i386/math-emu/poly.h | 121 --- arch/i386/math-emu/poly_2xm1.c | 156 ---- arch/i386/math-emu/poly_atan.c | 229 ----- arch/i386/math-emu/poly_l2.c | 272 ------ arch/i386/math-emu/poly_sin.c | 397 -------- arch/i386/math-emu/poly_tan.c | 222 ----- arch/i386/math-emu/polynom_Xsig.S | 135 --- arch/i386/math-emu/reg_add_sub.c | 374 -------- arch/i386/math-emu/reg_compare.c | 381 -------- arch/i386/math-emu/reg_constant.c | 120 --- arch/i386/math-emu/reg_constant.h | 25 - arch/i386/math-emu/reg_convert.c | 53 -- arch/i386/math-emu/reg_divide.c | 207 ----- arch/i386/math-emu/reg_ld_str.c | 1375 --------------------------- arch/i386/math-emu/reg_mul.c | 132 --- arch/i386/math-emu/reg_norm.S | 147 --- arch/i386/math-emu/reg_round.S | 708 -------------- arch/i386/math-emu/reg_u_add.S | 167 ---- arch/i386/math-emu/reg_u_div.S | 471 ---------- arch/i386/math-emu/reg_u_mul.S | 148 --- arch/i386/math-emu/reg_u_sub.S | 272 ------ arch/i386/math-emu/round_Xsig.S | 141 --- arch/i386/math-emu/shr_Xsig.S | 87 -- arch/i386/math-emu/status_w.h | 67 -- arch/i386/math-emu/version.h | 12 - arch/i386/math-emu/wm_shrx.S | 204 ---- arch/i386/math-emu/wm_sqrt.S | 470 ---------- arch/x86/math-emu/Makefile | 30 + arch/x86/math-emu/README | 427 +++++++++ arch/x86/math-emu/control_w.h | 45 + arch/x86/math-emu/div_Xsig.S | 365 ++++++++ arch/x86/math-emu/div_small.S | 47 + arch/x86/math-emu/errors.c | 739 +++++++++++++++ arch/x86/math-emu/exception.h | 53 ++ arch/x86/math-emu/fpu_arith.c | 174 ++++ arch/x86/math-emu/fpu_asm.h | 32 + arch/x86/math-emu/fpu_aux.c | 204 ++++ arch/x86/math-emu/fpu_emu.h | 218 +++++ arch/x86/math-emu/fpu_entry.c | 761 +++++++++++++++ arch/x86/math-emu/fpu_etc.c | 143 +++ arch/x86/math-emu/fpu_proto.h | 140 +++ arch/x86/math-emu/fpu_system.h | 90 ++ arch/x86/math-emu/fpu_tags.c | 127 +++ arch/x86/math-emu/fpu_trig.c | 1845 +++++++++++++++++++++++++++++++++++++ arch/x86/math-emu/get_address.c | 438 +++++++++ arch/x86/math-emu/load_store.c | 272 ++++++ arch/x86/math-emu/mul_Xsig.S | 176 ++++ arch/x86/math-emu/poly.h | 121 +++ arch/x86/math-emu/poly_2xm1.c | 156 ++++ arch/x86/math-emu/poly_atan.c | 229 +++++ arch/x86/math-emu/poly_l2.c | 272 ++++++ arch/x86/math-emu/poly_sin.c | 397 ++++++++ arch/x86/math-emu/poly_tan.c | 222 +++++ arch/x86/math-emu/polynom_Xsig.S | 135 +++ arch/x86/math-emu/reg_add_sub.c | 374 ++++++++ arch/x86/math-emu/reg_compare.c | 381 ++++++++ arch/x86/math-emu/reg_constant.c | 120 +++ arch/x86/math-emu/reg_constant.h | 25 + arch/x86/math-emu/reg_convert.c | 53 ++ arch/x86/math-emu/reg_divide.c | 207 +++++ arch/x86/math-emu/reg_ld_str.c | 1375 +++++++++++++++++++++++++++ arch/x86/math-emu/reg_mul.c | 132 +++ arch/x86/math-emu/reg_norm.S | 147 +++ arch/x86/math-emu/reg_round.S | 708 ++++++++++++++ arch/x86/math-emu/reg_u_add.S | 167 ++++ arch/x86/math-emu/reg_u_div.S | 471 ++++++++++ arch/x86/math-emu/reg_u_mul.S | 148 +++ arch/x86/math-emu/reg_u_sub.S | 272 ++++++ arch/x86/math-emu/round_Xsig.S | 141 +++ arch/x86/math-emu/shr_Xsig.S | 87 ++ arch/x86/math-emu/status_w.h | 67 ++ arch/x86/math-emu/version.h | 12 + arch/x86/math-emu/wm_shrx.S | 204 ++++ arch/x86/math-emu/wm_sqrt.S | 470 ++++++++++ 96 files changed, 13421 insertions(+), 13421 deletions(-) delete mode 100644 arch/i386/math-emu/Makefile delete mode 100644 arch/i386/math-emu/README delete mode 100644 arch/i386/math-emu/control_w.h delete mode 100644 arch/i386/math-emu/div_Xsig.S delete mode 100644 arch/i386/math-emu/div_small.S delete mode 100644 arch/i386/math-emu/errors.c delete mode 100644 arch/i386/math-emu/exception.h delete mode 100644 arch/i386/math-emu/fpu_arith.c delete mode 100644 arch/i386/math-emu/fpu_asm.h delete mode 100644 arch/i386/math-emu/fpu_aux.c delete mode 100644 arch/i386/math-emu/fpu_emu.h delete mode 100644 arch/i386/math-emu/fpu_entry.c delete mode 100644 arch/i386/math-emu/fpu_etc.c delete mode 100644 arch/i386/math-emu/fpu_proto.h delete mode 100644 arch/i386/math-emu/fpu_system.h delete mode 100644 arch/i386/math-emu/fpu_tags.c delete mode 100644 arch/i386/math-emu/fpu_trig.c delete mode 100644 arch/i386/math-emu/get_address.c delete mode 100644 arch/i386/math-emu/load_store.c delete mode 100644 arch/i386/math-emu/mul_Xsig.S delete mode 100644 arch/i386/math-emu/poly.h delete mode 100644 arch/i386/math-emu/poly_2xm1.c delete mode 100644 arch/i386/math-emu/poly_atan.c delete mode 100644 arch/i386/math-emu/poly_l2.c delete mode 100644 arch/i386/math-emu/poly_sin.c delete mode 100644 arch/i386/math-emu/poly_tan.c delete mode 100644 arch/i386/math-emu/polynom_Xsig.S delete mode 100644 arch/i386/math-emu/reg_add_sub.c delete mode 100644 arch/i386/math-emu/reg_compare.c delete mode 100644 arch/i386/math-emu/reg_constant.c delete mode 100644 arch/i386/math-emu/reg_constant.h delete mode 100644 arch/i386/math-emu/reg_convert.c delete mode 100644 arch/i386/math-emu/reg_divide.c delete mode 100644 arch/i386/math-emu/reg_ld_str.c delete mode 100644 arch/i386/math-emu/reg_mul.c delete mode 100644 arch/i386/math-emu/reg_norm.S delete mode 100644 arch/i386/math-emu/reg_round.S delete mode 100644 arch/i386/math-emu/reg_u_add.S delete mode 100644 arch/i386/math-emu/reg_u_div.S delete mode 100644 arch/i386/math-emu/reg_u_mul.S delete mode 100644 arch/i386/math-emu/reg_u_sub.S delete mode 100644 arch/i386/math-emu/round_Xsig.S delete mode 100644 arch/i386/math-emu/shr_Xsig.S delete mode 100644 arch/i386/math-emu/status_w.h delete mode 100644 arch/i386/math-emu/version.h delete mode 100644 arch/i386/math-emu/wm_shrx.S delete mode 100644 arch/i386/math-emu/wm_sqrt.S create mode 100644 arch/x86/math-emu/Makefile create mode 100644 arch/x86/math-emu/README create mode 100644 arch/x86/math-emu/control_w.h create mode 100644 arch/x86/math-emu/div_Xsig.S create mode 100644 arch/x86/math-emu/div_small.S create mode 100644 arch/x86/math-emu/errors.c create mode 100644 arch/x86/math-emu/exception.h create mode 100644 arch/x86/math-emu/fpu_arith.c create mode 100644 arch/x86/math-emu/fpu_asm.h create mode 100644 arch/x86/math-emu/fpu_aux.c create mode 100644 arch/x86/math-emu/fpu_emu.h create mode 100644 arch/x86/math-emu/fpu_entry.c create mode 100644 arch/x86/math-emu/fpu_etc.c create mode 100644 arch/x86/math-emu/fpu_proto.h create mode 100644 arch/x86/math-emu/fpu_system.h create mode 100644 arch/x86/math-emu/fpu_tags.c create mode 100644 arch/x86/math-emu/fpu_trig.c create mode 100644 arch/x86/math-emu/get_address.c create mode 100644 arch/x86/math-emu/load_store.c create mode 100644 arch/x86/math-emu/mul_Xsig.S create mode 100644 arch/x86/math-emu/poly.h create mode 100644 arch/x86/math-emu/poly_2xm1.c create mode 100644 arch/x86/math-emu/poly_atan.c create mode 100644 arch/x86/math-emu/poly_l2.c create mode 100644 arch/x86/math-emu/poly_sin.c create mode 100644 arch/x86/math-emu/poly_tan.c create mode 100644 arch/x86/math-emu/polynom_Xsig.S create mode 100644 arch/x86/math-emu/reg_add_sub.c create mode 100644 arch/x86/math-emu/reg_compare.c create mode 100644 arch/x86/math-emu/reg_constant.c create mode 100644 arch/x86/math-emu/reg_constant.h create mode 100644 arch/x86/math-emu/reg_convert.c create mode 100644 arch/x86/math-emu/reg_divide.c create mode 100644 arch/x86/math-emu/reg_ld_str.c create mode 100644 arch/x86/math-emu/reg_mul.c create mode 100644 arch/x86/math-emu/reg_norm.S create mode 100644 arch/x86/math-emu/reg_round.S create mode 100644 arch/x86/math-emu/reg_u_add.S create mode 100644 arch/x86/math-emu/reg_u_div.S create mode 100644 arch/x86/math-emu/reg_u_mul.S create mode 100644 arch/x86/math-emu/reg_u_sub.S create mode 100644 arch/x86/math-emu/round_Xsig.S create mode 100644 arch/x86/math-emu/shr_Xsig.S create mode 100644 arch/x86/math-emu/status_w.h create mode 100644 arch/x86/math-emu/version.h create mode 100644 arch/x86/math-emu/wm_shrx.S create mode 100644 arch/x86/math-emu/wm_sqrt.S (limited to 'arch/x86') diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index fc86d41d252..561cc21914b 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -707,7 +707,7 @@ config MATH_EMULATION intend to use this kernel on different machines. More information about the internals of the Linux math coprocessor - emulation can be found in . + emulation can be found in . If you are not sure, say Y; apart from resulting in a 66 KB bigger kernel, it won't hurt. diff --git a/arch/i386/Makefile b/arch/i386/Makefile index dca07ae933d..fe374b65430 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -106,7 +106,7 @@ core-y += arch/i386/kernel/ \ arch/i386/mm/ \ $(mcore-y)/ \ arch/x86/crypto/ -drivers-$(CONFIG_MATH_EMULATION) += arch/i386/math-emu/ +drivers-$(CONFIG_MATH_EMULATION) += arch/x86/math-emu/ drivers-$(CONFIG_PCI) += arch/i386/pci/ # must be linked after kernel/ drivers-$(CONFIG_OPROFILE) += arch/i386/oprofile/ diff --git a/arch/i386/math-emu/Makefile b/arch/i386/math-emu/Makefile deleted file mode 100644 index 9c943fa6ce6..00000000000 --- a/arch/i386/math-emu/Makefile +++ /dev/null @@ -1,30 +0,0 @@ -# -# Makefile for wm-FPU-emu -# - -#DEBUG = -DDEBUGGING -DEBUG = -PARANOID = -DPARANOID -CFLAGS := $(CFLAGS) $(PARANOID) $(DEBUG) -fno-builtin $(MATH_EMULATION) - -EXTRA_AFLAGS := $(PARANOID) - -# From 'C' language sources: -C_OBJS =fpu_entry.o errors.o \ - fpu_arith.o fpu_aux.o fpu_etc.o fpu_tags.o fpu_trig.o \ - load_store.o get_address.o \ - poly_atan.o poly_l2.o poly_2xm1.o poly_sin.o poly_tan.o \ - reg_add_sub.o reg_compare.o reg_constant.o reg_convert.o \ - reg_ld_str.o reg_divide.o reg_mul.o - -# From 80x86 assembler sources: -A_OBJS =reg_u_add.o reg_u_div.o reg_u_mul.o reg_u_sub.o \ - div_small.o reg_norm.o reg_round.o \ - wm_shrx.o wm_sqrt.o \ - div_Xsig.o polynom_Xsig.o round_Xsig.o \ - shr_Xsig.o mul_Xsig.o - -obj-y =$(C_OBJS) $(A_OBJS) - -proto: - cproto -e -DMAKING_PROTO *.c >fpu_proto.h diff --git a/arch/i386/math-emu/README b/arch/i386/math-emu/README deleted file mode 100644 index e6235491d6e..00000000000 --- a/arch/i386/math-emu/README +++ /dev/null @@ -1,427 +0,0 @@ - +---------------------------------------------------------------------------+ - | wm-FPU-emu an FPU emulator for 80386 and 80486SX microprocessors. | - | | - | Copyright (C) 1992,1993,1994,1995,1996,1997,1999 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@melbpc.org.au | - | | - | This program is free software; you can redistribute it and/or modify | - | it under the terms of the GNU General Public License version 2 as | - | published by the Free Software Foundation. | - | | - | This program is distributed in the hope that it will be useful, | - | but WITHOUT ANY WARRANTY; without even the implied warranty of | - | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | - | GNU General Public License for more details. | - | | - | You should have received a copy of the GNU General Public License | - | along with this program; if not, write to the Free Software | - | Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | - | | - +---------------------------------------------------------------------------+ - - - -wm-FPU-emu is an FPU emulator for Linux. It is derived from wm-emu387 -which was my 80387 emulator for early versions of djgpp (gcc under -msdos); wm-emu387 was in turn based upon emu387 which was written by -DJ Delorie for djgpp. The interface to the Linux kernel is based upon -the original Linux math emulator by Linus Torvalds. - -My target FPU for wm-FPU-emu is that described in the Intel486 -Programmer's Reference Manual (1992 edition). Unfortunately, numerous -facets of the functioning of the FPU are not well covered in the -Reference Manual. The information in the manual has been supplemented -with measurements on real 80486's. Unfortunately, it is simply not -possible to be sure that all of the peculiarities of the 80486 have -been discovered, so there is always likely to be obscure differences -in the detailed behaviour of the emulator and a real 80486. - -wm-FPU-emu does not implement all of the behaviour of the 80486 FPU, -but is very close. See "Limitations" later in this file for a list of -some differences. - -Please report bugs, etc to me at: - billm@melbpc.org.au -or b.metzenthen@medoto.unimelb.edu.au - -For more information on the emulator and on floating point topics, see -my web pages, currently at http://www.suburbia.net/~billm/ - - ---Bill Metzenthen - December 1999 - - ------------------------ Internals of wm-FPU-emu ----------------------- - -Numeric algorithms: -(1) Add, subtract, and multiply. Nothing remarkable in these. -(2) Divide has been tuned to get reasonable performance. The algorithm - is not the obvious one which most people seem to use, but is designed - to take advantage of the characteristics of the 80386. I expect that - it has been invented many times before I discovered it, but I have not - seen it. It is based upon one of those ideas which one carries around - for years without ever bothering to check it out. -(3) The sqrt function has been tuned to get good performance. It is based - upon Newton's classic method. Performance was improved by capitalizing - upon the properties of Newton's method, and the code is once again - structured taking account of the 80386 characteristics. -(4) The trig, log, and exp functions are based in each case upon quasi- - "optimal" polynomial approximations. My definition of "optimal" was - based upon getting good accuracy with reasonable speed. -(5) The argument reducing code for the trig function effectively uses - a value of pi which is accurate to more than 128 bits. As a consequence, - the reduced argument is accurate to more than 64 bits for arguments up - to a few pi, and accurate to more than 64 bits for most arguments, - even for arguments approaching 2^63. This is far superior to an - 80486, which uses a value of pi which is accurate to 66 bits. - -The code of the emulator is complicated slightly by the need to -account for a limited form of re-entrancy. Normally, the emulator will -emulate each FPU instruction to completion without interruption. -However, it may happen that when the emulator is accessing the user -memory space, swapping may be needed. In this case the emulator may be -temporarily suspended while disk i/o takes place. During this time -another process may use the emulator, thereby perhaps changing static -variables. The code which accesses user memory is confined to five -files: - fpu_entry.c - reg_ld_str.c - load_store.c - get_address.c - errors.c -As from version 1.12 of the emulator, no static variables are used -(apart from those in the kernel's per-process tables). The emulator is -therefore now fully re-entrant, rather than having just the restricted -form of re-entrancy which is required by the Linux kernel. - ------------------------ Limitations of wm-FPU-emu ----------------------- - -There are a number of differences between the current wm-FPU-emu -(version 2.01) and the 80486 FPU (apart from bugs). The differences -are fewer than those which applied to the 1.xx series of the emulator. -Some of the more important differences are listed below: - -The Roundup flag does not have much meaning for the transcendental -functions and its 80486 value with these functions is likely to differ -from its emulator value. - -In a few rare cases the Underflow flag obtained with the emulator will -be different from that obtained with an 80486. This occurs when the -following conditions apply simultaneously: -(a) the operands have a higher precision than the current setting of the - precision control (PC) flags. -(b) the underflow exception is masked. -(c) the magnitude of the exact result (before rounding) is less than 2^-16382. -(d) the magnitude of the final result (after rounding) is exactly 2^-16382. -(e) the magnitude of the exact result would be exactly 2^-16382 if the - operands were rounded to the current precision before the arithmetic - operation was performed. -If all of these apply, the emulator will set the Underflow flag but a real -80486 will not. - -NOTE: Certain formats of Extended Real are UNSUPPORTED. They are -unsupported by the 80486. They are the Pseudo-NaNs, Pseudoinfinities, -and Unnormals. None of these will be generated by an 80486 or by the -emulator. Do not use them. The emulator treats them differently in -detail from the way an 80486 does. - -Self modifying code can cause the emulator to fail. An example of such -code is: - movl %esp,[%ebx] - fld1 -The FPU instruction may be (usually will be) loaded into the pre-fetch -queue of the CPU before the mov instruction is executed. If the -destination of the 'movl' overlaps the FPU instruction then the bytes -in the prefetch queue and memory will be inconsistent when the FPU -instruction is executed. The emulator will be invoked but will not be -able to find the instruction which caused the device-not-present -exception. For this case, the emulator cannot emulate the behaviour of -an 80486DX. - -Handling of the address size override prefix byte (0x67) has not been -extensively tested yet. A major problem exists because using it in -vm86 mode can cause a general protection fault. Address offsets -greater than 0xffff appear to be illegal in vm86 mode but are quite -acceptable (and work) in real mode. A small test program developed to -check the addressing, and which runs successfully in real mode, -crashes dosemu under Linux and also brings Windows down with a general -protection fault message when run under the MS-DOS prompt of Windows -3.1. (The program simply reads data from a valid address). - -The emulator supports 16-bit protected mode, with one difference from -an 80486DX. A 80486DX will allow some floating point instructions to -write a few bytes below the lowest address of the stack. The emulator -will not allow this in 16-bit protected mode: no instructions are -allowed to write outside the bounds set by the protection. - ------------------------ Performance of wm-FPU-emu ----------------------- - -Speed. ------ - -The speed of floating point computation with the emulator will depend -upon instruction mix. Relative performance is best for the instructions -which require most computation. The simple instructions are adversely -affected by the FPU instruction trap overhead. - - -Timing: Some simple timing tests have been made on the emulator functions. -The times include load/store instructions. All times are in microseconds -measured on a 33MHz 386 with 64k cache. The Turbo C tests were under -ms-dos, the next two columns are for emulators running with the djgpp -ms-dos extender. The final column is for wm-FPU-emu in Linux 0.97, -using libm4.0 (hard). - -function Turbo C djgpp 1.06 WM-emu387 wm-FPU-emu - - + 60.5 154.8 76.5 139.4 - - 61.1-65.5 157.3-160.8 76.2-79.5 142.9-144.7 - * 71.0 190.8 79.6 146.6 - / 61.2-75.0 261.4-266.9 75.3-91.6 142.2-158.1 - - sin() 310.8 4692.0 319.0 398.5 - cos() 284.4 4855.2 308.0 388.7 - tan() 495.0 8807.1 394.9 504.7 - atan() 328.9 4866.4 601.1 419.5-491.9 - - sqrt() 128.7 crashed 145.2 227.0 - log() 413.1-419.1 5103.4-5354.21 254.7-282.2 409.4-437.1 - exp() 479.1 6619.2 469.1 850.8 - - -The performance under Linux is improved by the use of look-ahead code. -The following results show the improvement which is obtained under -Linux due to the look-ahead code. Also given are the times for the -original Linux emulator with the 4.1 'soft' lib. - - [ Linus' note: I changed look-ahead to be the default under linux, as - there was no reason not to use it after I had edited it to be - disabled during tracing ] - - wm-FPU-emu w original w - look-ahead 'soft' lib - + 106.4 190.2 - - 108.6-111.6 192.4-216.2 - * 113.4 193.1 - / 108.8-124.4 700.1-706.2 - - sin() 390.5 2642.0 - cos() 381.5 2767.4 - tan() 496.5 3153.3 - atan() 367.2-435.5 2439.4-3396.8 - - sqrt() 195.1 4732.5 - log() 358.0-387.5 3359.2-3390.3 - exp() 619.3 4046.4 - - -These figures are now somewhat out-of-date. The emulator has become -progressively slower for most functions as more of the 80486 features -have been implemented. - - ------------------------ Accuracy of wm-FPU-emu ----------------------- - - -The accuracy of the emulator is in almost all cases equal to or better -than that of an Intel 80486 FPU. - -The results of the basic arithmetic functions (+,-,*,/), and fsqrt -match those of an 80486 FPU. They are the best possible; the error for -these never exceeds 1/2 an lsb. The fprem and fprem1 instructions -return exact results; they have no error. - - -The following table compares the emulator accuracy for the sqrt(), -trig and log functions against the Turbo C "emulator". For this table, -each function was tested at about 400 points. Ideal worst-case results -would be 64 bits. The reduced Turbo C accuracy of cos() and tan() for -arguments greater than pi/4 can be thought of as being related to the -precision of the argument x; e.g. an argument of pi/2-(1e-10) which is -accurate to 64 bits can result in a relative accuracy in cos() of -about 64 + log2(cos(x)) = 31 bits. - - -Function Tested x range Worst result Turbo C - (relative bits) - -sqrt(x) 1 .. 2 64.1 63.2 -atan(x) 1e-10 .. 200 64.2 62.8 -cos(x) 0 .. pi/2-(1e-10) 64.4 (x <= pi/4) 62.4 - 64.1 (x = pi/2-(1e-10)) 31.9 -sin(x) 1e-10 .. pi/2 64.0 62.8 -tan(x) 1e-10 .. pi/2-(1e-10) 64.0 (x <= pi/4) 62.1 - 64.1 (x = pi/2-(1e-10)) 31.9 -exp(x) 0 .. 1 63.1 ** 62.9 -log(x) 1+1e-6 .. 2 63.8 ** 62.1 - -** The accuracy for exp() and log() is low because the FPU (emulator) -does not compute them directly; two operations are required. - - -The emulator passes the "paranoia" tests (compiled with gcc 2.3.3 or -later) for 'float' variables (24 bit precision numbers) when precision -control is set to 24, 53 or 64 bits, and for 'double' variables (53 -bit precision numbers) when precision control is set to 53 bits (a -properly performing FPU cannot pass the 'paranoia' tests for 'double' -variables when precision control is set to 64 bits). - -The code for reducing the argument for the trig functions (fsin, fcos, -fptan and fsincos) has been improved and now effectively uses a value -for pi which is accurate to more than 128 bits precision. As a -consequence, the accuracy of these functions for large arguments has -been dramatically improved (and is now very much better than an 80486 -FPU). There is also now no degradation of accuracy for fcos and fptan -for operands close to pi/2. Measured results are (note that the -definition of accuracy has changed slightly from that used for the -above table): - -Function Tested x range Worst result - (absolute bits) - -cos(x) 0 .. 9.22e+18 62.0 -sin(x) 1e-16 .. 9.22e+18 62.1 -tan(x) 1e-16 .. 9.22e+18 61.8 - -It is possible with some effort to find very large arguments which -give much degraded precision. For example, the integer number - 8227740058411162616.0 -is within about 10e-7 of a multiple of pi. To find the tan (for -example) of this number to 64 bits precision it would be necessary to -have a value of pi which had about 150 bits precision. The FPU -emulator computes the result to about 42.6 bits precision (the correct -result is about -9.739715e-8). On the other hand, an 80486 FPU returns -0.01059, which in relative terms is hopelessly inaccurate. - -For arguments close to critical angles (which occur at multiples of -pi/2) the emulator is more accurate than an 80486 FPU. For very large -arguments, the emulator is far more accurate. - - -Prior to version 1.20 of the emulator, the accuracy of the results for -the transcendental functions (in their principal range) was not as -good as the results from an 80486 FPU. From version 1.20, the accuracy -has been considerably improved and these functions now give measured -worst-case results which are better than the worst-case results given -by an 80486 FPU. - -The following table gives the measured results for the emulator. The -number of randomly selected arguments in each case is about half a -million. The group of three columns gives the frequency of the given -accuracy in number of times per million, thus the second of these -columns shows that an accuracy of between 63.80 and 63.89 bits was -found at a rate of 133 times per one million measurements for fsin. -The results show that the fsin, fcos and fptan instructions return -results which are in error (i.e. less accurate than the best possible -result (which is 64 bits)) for about one per cent of all arguments -between -pi/2 and +pi/2. The other instructions have a lower -frequency of results which are in error. The last two columns give -the worst accuracy which was found (in bits) and the approximate value -of the argument which produced it. - - frequency (per M) - ------------------- --------------- -instr arg range # tests 63.7 63.8 63.9 worst at arg - bits bits bits bits ------ ------------ ------- ---- ---- ----- ----- -------- -fsin (0,pi/2) 547756 0 133 10673 63.89 0.451317 -fcos (0,pi/2) 547563 0 126 10532 63.85 0.700801 -fptan (0,pi/2) 536274 11 267 10059 63.74 0.784876 -fpatan 4 quadrants 517087 0 8 1855 63.88 0.435121 (4q) -fyl2x (0,20) 541861 0 0 1323 63.94 1.40923 (x) -fyl2xp1 (-.293,.414) 520256 0 0 5678 63.93 0.408542 (x) -f2xm1 (-1,1) 538847 4 481 6488 63.79 0.167709 - - -Tests performed on an 80486 FPU showed results of lower accuracy. The -following table gives the results which were obtained with an AMD -486DX2/66 (other tests indicate that an Intel 486DX produces -identical results). The tests were basically the same as those used -to measure the emulator (the values, being random, were in general not -the same). The total number of tests for each instruction are given -at the end of the table, in case each about 100k tests were performed. -Another line of figures at the end of the table shows that most of the -instructions return results which are in error for more than 10 -percent of the arguments tested. - -The numbers in the body of the table give the approx number of times a -result of the given accuracy in bits (given in the left-most column) -was obtained per one million arguments. For three of the instructions, -two columns of results are given: * The second column for f2xm1 gives -the number cases where the results of the first column were for a -positive argument, this shows that this instruction gives better -results for positive arguments than it does for negative. * In the -cases of fcos and fptan, the first column gives the results when all -cases where arguments greater than 1.5 were removed from the results -given in the second column. Unlike the emulator, an 80486 FPU returns -results of relatively poor accuracy for these instructions when the -argument approaches pi/2. The table does not show those cases when the -accuracy of the results were less than 62 bits, which occurs quite -often for fsin and fptan when the argument approaches pi/2. This poor -accuracy is discussed above in relation to the Turbo C "emulator", and -the accuracy of the value of pi. - - -bits f2xm1 f2xm1 fpatan fcos fcos fyl2x fyl2xp1 fsin fptan fptan -62.0 0 0 0 0 437 0 0 0 0 925 -62.1 0 0 10 0 894 0 0 0 0 1023 -62.2 14 0 0 0 1033 0 0 0 0 945 -62.3 57 0 0 0 1202 0 0 0 0 1023 -62.4 385 0 0 10 1292 0 23 0 0 1178 -62.5 1140 0 0 119 1649 0 39 0 0 1149 -62.6 2037 0 0 189 1620 0 16 0 0 1169 -62.7 5086 14 0 646 2315 10 101 35 39 1402 -62.8 8818 86 0 984 3050 59 287 131 224 2036 -62.9 11340 1355 0 2126 4153 79 605 357 321 1948 -63.0 15557 4750 0 3319 5376 246 1281 862 808 2688 -63.1 20016 8288 0 4620 6628 511 2569 1723 1510 3302 -63.2 24945 11127 10 6588 8098 1120 4470 2968 2990 4724 -63.3 25686 12382 69 8774 10682 1906 6775 4482 5474 7236 -63.4 29219 14722 79 11109 12311 3094 9414 7259 8912 10587 -63.5 30458 14936 393 13802 15014 5874 12666 9609 13762 15262 -63.6 32439 16448 1277 17945 19028 10226 15537 14657 19158 20346 -63.7 35031 16805 4067 23003 23947 18910 20116 21333 25001 26209 -63.8 33251 15820 7673 24781 25675 24617 25354 24440 29433 30329 -63.9 33293 16833 18529 28318 29233 31267 31470 27748 29676 30601 - -Per cent with error: - 30.9 3.2 18.5 9.8 13.1 11.6 17.4 -Total arguments tested: - 70194 70099 101784 100641 100641 101799 128853 114893 102675 102675 - - -------------------------- Contributors ------------------------------- - -A number of people have contributed to the development of the -emulator, often by just reporting bugs, sometimes with suggested -fixes, and a few kind people have provided me with access in one way -or another to an 80486 machine. Contributors include (to those people -who I may have forgotten, please forgive me): - -Linus Torvalds -Tommy.Thorn@daimi.aau.dk -Andrew.Tridgell@anu.edu.au -Nick Holloway, alfie@dcs.warwick.ac.uk -Hermano Moura, moura@dcs.gla.ac.uk -Jon Jagger, J.Jagger@scp.ac.uk -Lennart Benschop -Brian Gallew, geek+@CMU.EDU -Thomas Staniszewski, ts3v+@andrew.cmu.edu -Martin Howell, mph@plasma.apana.org.au -M Saggaf, alsaggaf@athena.mit.edu -Peter Barker, PETER@socpsy.sci.fau.edu -tom@vlsivie.tuwien.ac.at -Dan Russel, russed@rpi.edu -Daniel Carosone, danielce@ee.mu.oz.au -cae@jpmorgan.com -Hamish Coleman, t933093@minyos.xx.rmit.oz.au -Bruce Evans, bde@kralizec.zeta.org.au -Timo Korvola, Timo.Korvola@hut.fi -Rick Lyons, rick@razorback.brisnet.org.au -Rick, jrs@world.std.com - -...and numerous others who responded to my request for help with -a real 80486. - diff --git a/arch/i386/math-emu/control_w.h b/arch/i386/math-emu/control_w.h deleted file mode 100644 index ae2274dbd30..00000000000 --- a/arch/i386/math-emu/control_w.h +++ /dev/null @@ -1,45 +0,0 @@ -/*---------------------------------------------------------------------------+ - | control_w.h | - | | - | Copyright (C) 1992,1993 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@vaxc.cc.monash.edu.au | - | | - +---------------------------------------------------------------------------*/ - -#ifndef _CONTROLW_H_ -#define _CONTROLW_H_ - -#ifdef __ASSEMBLY__ -#define _Const_(x) $##x -#else -#define _Const_(x) x -#endif - -#define CW_RC _Const_(0x0C00) /* rounding control */ -#define CW_PC _Const_(0x0300) /* precision control */ - -#define CW_Precision Const_(0x0020) /* loss of precision mask */ -#define CW_Underflow Const_(0x0010) /* underflow mask */ -#define CW_Overflow Const_(0x0008) /* overflow mask */ -#define CW_ZeroDiv Const_(0x0004) /* divide by zero mask */ -#define CW_Denormal Const_(0x0002) /* denormalized operand mask */ -#define CW_Invalid Const_(0x0001) /* invalid operation mask */ - -#define CW_Exceptions _Const_(0x003f) /* all masks */ - -#define RC_RND _Const_(0x0000) -#define RC_DOWN _Const_(0x0400) -#define RC_UP _Const_(0x0800) -#define RC_CHOP _Const_(0x0C00) - -/* p 15-5: Precision control bits affect only the following: - ADD, SUB(R), MUL, DIV(R), and SQRT */ -#define PR_24_BITS _Const_(0x000) -#define PR_53_BITS _Const_(0x200) -#define PR_64_BITS _Const_(0x300) -#define PR_RESERVED_BITS _Const_(0x100) -/* FULL_PRECISION simulates all exceptions masked */ -#define FULL_PRECISION (PR_64_BITS | RC_RND | 0x3f) - -#endif /* _CONTROLW_H_ */ diff --git a/arch/i386/math-emu/div_Xsig.S b/arch/i386/math-emu/div_Xsig.S deleted file mode 100644 index f77ba3058b3..00000000000 --- a/arch/i386/math-emu/div_Xsig.S +++ /dev/null @@ -1,365 +0,0 @@ - .file "div_Xsig.S" -/*---------------------------------------------------------------------------+ - | div_Xsig.S | - | | - | Division subroutine for 96 bit quantities | - | | - | Copyright (C) 1994,1995 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@jacobi.maths.monash.edu.au | - | | - | | - +---------------------------------------------------------------------------*/ - -/*---------------------------------------------------------------------------+ - | Divide the 96 bit quantity pointed to by a, by that pointed to by b, and | - | put the 96 bit result at the location d. | - | | - | The result may not be accurate to 96 bits. It is intended for use where | - | a result better than 64 bits is required. The result should usually be | - | good to at least 94 bits. | - | The returned result is actually divided by one half. This is done to | - | prevent overflow. | - | | - | .aaaaaaaaaaaaaa / .bbbbbbbbbbbbb -> .dddddddddddd | - | | - | void div_Xsig(Xsig *a, Xsig *b, Xsig *dest) | - | | - +---------------------------------------------------------------------------*/ - -#include "exception.h" -#include "fpu_emu.h" - - -#define XsigLL(x) (x) -#define XsigL(x) 4(x) -#define XsigH(x) 8(x) - - -#ifndef NON_REENTRANT_FPU -/* - Local storage on the stack: - Accumulator: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0 - */ -#define FPU_accum_3 -4(%ebp) -#define FPU_accum_2 -8(%ebp) -#define FPU_accum_1 -12(%ebp) -#define FPU_accum_0 -16(%ebp) -#define FPU_result_3 -20(%ebp) -#define FPU_result_2 -24(%ebp) -#define FPU_result_1 -28(%ebp) - -#else -.data -/* - Local storage in a static area: - Accumulator: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0 - */ - .align 4,0 -FPU_accum_3: - .long 0 -FPU_accum_2: - .long 0 -FPU_accum_1: - .long 0 -FPU_accum_0: - .long 0 -FPU_result_3: - .long 0 -FPU_result_2: - .long 0 -FPU_result_1: - .long 0 -#endif /* NON_REENTRANT_FPU */ - - -.text -ENTRY(div_Xsig) - pushl %ebp - movl %esp,%ebp -#ifndef NON_REENTRANT_FPU - subl $28,%esp -#endif /* NON_REENTRANT_FPU */ - - pushl %esi - pushl %edi - pushl %ebx - - movl PARAM1,%esi /* pointer to num */ - movl PARAM2,%ebx /* pointer to denom */ - -#ifdef PARANOID - testl $0x80000000, XsigH(%ebx) /* Divisor */ - je L_bugged -#endif /* PARANOID */ - - -/*---------------------------------------------------------------------------+ - | Divide: Return arg1/arg2 to arg3. | - | | - | The maximum returned value is (ignoring exponents) | - | .ffffffff ffffffff | - | ------------------ = 1.ffffffff fffffffe | - | .80000000 00000000 | - | and the minimum is | - | .80000000 00000000 | - | ------------------ = .80000000 00000001 (rounded) | - | .ffffffff ffffffff | - | | - +---------------------------------------------------------------------------*/ - - /* Save extended dividend in local register */ - - /* Divide by 2 to prevent overflow */ - clc - movl XsigH(%esi),%eax - rcrl %eax - movl %eax,FPU_accum_3 - movl XsigL(%esi),%eax - rcrl %eax - movl %eax,FPU_accum_2 - movl XsigLL(%esi),%eax - rcrl %eax - movl %eax,FPU_accum_1 - movl $0,%eax - rcrl %eax - movl %eax,FPU_accum_0 - - movl FPU_accum_2,%eax /* Get the current num */ - movl FPU_accum_3,%edx - -/*----------------------------------------------------------------------*/ -/* Initialization done. - Do the first 32 bits. */ - - /* We will divide by a number which is too large */ - movl XsigH(%ebx),%ecx - addl $1,%ecx - jnc LFirst_div_not_1 - - /* here we need to divide by 100000000h, - i.e., no division at all.. */ - mov %edx,%eax - jmp LFirst_div_done - -LFirst_div_not_1: - divl %ecx /* Divide the numerator by the augmented - denom ms dw */ - -LFirst_div_done: - movl %eax,FPU_result_3 /* Put the result in the answer */ - - mull XsigH(%ebx) /* mul by the ms dw of the denom */ - - subl %eax,FPU_accum_2 /* Subtract from the num local reg */ - sbbl %edx,FPU_accum_3 - - movl FPU_result_3,%eax /* Get the result back */ - mull XsigL(%ebx) /* now mul the ls dw of the denom */ - - subl %eax,FPU_accum_1 /* Subtract from the num local reg */ - sbbl %edx,FPU_accum_2 - sbbl $0,FPU_accum_3 - je LDo_2nd_32_bits /* Must check for non-zero result here */ - -#ifdef PARANOID - jb L_bugged_1 -#endif /* PARANOID */ - - /* need to subtract another once of the denom */ - incl FPU_result_3 /* Correct the answer */ - - movl XsigL(%ebx),%eax - movl XsigH(%ebx),%edx - subl %eax,FPU_accum_1 /* Subtract from the num local reg */ - sbbl %edx,FPU_accum_2 - -#ifdef PARANOID - sbbl $0,FPU_accum_3 - jne L_bugged_1 /* Must check for non-zero result here */ -#endif /* PARANOID */ - -/*----------------------------------------------------------------------*/ -/* Half of the main problem is done, there is just a reduced numerator - to handle now. - Work with the second 32 bits, FPU_accum_0 not used from now on */ -LDo_2nd_32_bits: - movl FPU_accum_2,%edx /* get the reduced num */ - movl FPU_accum_1,%eax - - /* need to check for possible subsequent overflow */ - cmpl XsigH(%ebx),%edx - jb LDo_2nd_div - ja LPrevent_2nd_overflow - - cmpl XsigL(%ebx),%eax - jb LDo_2nd_div - -LPrevent_2nd_overflow: -/* The numerator is greater or equal, would cause overflow */ - /* prevent overflow */ - subl XsigL(%ebx),%eax - sbbl XsigH(%ebx),%edx - movl %edx,FPU_accum_2 - movl %eax,FPU_accum_1 - - incl FPU_result_3 /* Reflect the subtraction in the answer */ - -#ifdef PARANOID - je L_bugged_2 /* Can't bump the result to 1.0 */ -#endif /* PARANOID */ - -LDo_2nd_div: - cmpl $0,%ecx /* augmented denom msw */ - jnz LSecond_div_not_1 - - /* %ecx == 0, we are dividing by 1.0 */ - mov %edx,%eax - jmp LSecond_div_done - -LSecond_div_not_1: - divl %ecx /* Divide the numerator by the denom ms dw */ - -LSecond_div_done: - movl %eax,FPU_result_2 /* Put the result in the answer */ - - mull XsigH(%ebx) /* mul by the ms dw of the denom */ - - subl %eax,FPU_accum_1 /* Subtract from the num local reg */ - sbbl %edx,FPU_accum_2 - -#ifdef PARANOID - jc L_bugged_2 -#endif /* PARANOID */ - - movl FPU_result_2,%eax /* Get the result back */ - mull XsigL(%ebx) /* now mul the ls dw of the denom */ - - subl %eax,FPU_accum_0 /* Subtract from the num local reg */ - sbbl %edx,FPU_accum_1 /* Subtract from the num local reg */ - sbbl $0,FPU_accum_2 - -#ifdef PARANOID - jc L_bugged_2 -#endif /* PARANOID */ - - jz LDo_3rd_32_bits - -#ifdef PARANOID - cmpl $1,FPU_accum_2 - jne L_bugged_2 -#endif /* PARANOID */ - - /* need to subtract another once of the denom */ - movl XsigL(%ebx),%eax - movl XsigH(%ebx),%edx - subl %eax,FPU_accum_0 /* Subtract from the num local reg */ - sbbl %edx,FPU_accum_1 - sbbl $0,FPU_accum_2 - -#ifdef PARANOID - jc L_bugged_2 - jne L_bugged_2 -#endif /* PARANOID */ - - addl $1,FPU_result_2 /* Correct the answer */ - adcl $0,FPU_result_3 - -#ifdef PARANOID - jc L_bugged_2 /* Must check for non-zero result here */ -#endif /* PARANOID */ - -/*----------------------------------------------------------------------*/ -/* The division is essentially finished here, we just need to perform - tidying operations. - Deal with the 3rd 32 bits */ -LDo_3rd_32_bits: - /* We use an approximation for the third 32 bits. - To take account of the 3rd 32 bits of the divisor - (call them del), we subtract del * (a/b) */ - - movl FPU_result_3,%eax /* a/b */ - mull XsigLL(%ebx) /* del */ - - subl %edx,FPU_accum_1 - - /* A borrow indicates that the result is negative */ - jnb LTest_over - - movl XsigH(%ebx),%edx - addl %edx,FPU_accum_1 - - subl $1,FPU_result_2 /* Adjust the answer */ - sbbl $0,FPU_result_3 - - /* The above addition might not have been enough, check again. */ - movl FPU_accum_1,%edx /* get the reduced num */ - cmpl XsigH(%ebx),%edx /* denom */ - jb LDo_3rd_div - - movl XsigH(%ebx),%edx - addl %edx,FPU_accum_1 - - subl $1,FPU_result_2 /* Adjust the answer */ - sbbl $0,FPU_result_3 - jmp LDo_3rd_div - -LTest_over: - movl FPU_accum_1,%edx /* get the reduced num */ - - /* need to check for possible subsequent overflow */ - cmpl XsigH(%ebx),%edx /* denom */ - jb LDo_3rd_div - - /* prevent overflow */ - subl XsigH(%ebx),%edx - movl %edx,FPU_accum_1 - - addl $1,FPU_result_2 /* Reflect the subtraction in the answer */ - adcl $0,FPU_result_3 - -LDo_3rd_div: - movl FPU_accum_0,%eax - movl FPU_accum_1,%edx - divl XsigH(%ebx) - - movl %eax,FPU_result_1 /* Rough estimate of third word */ - - movl PARAM3,%esi /* pointer to answer */ - - movl FPU_result_1,%eax - movl %eax,XsigLL(%esi) - movl FPU_result_2,%eax - movl %eax,XsigL(%esi) - movl FPU_result_3,%eax - movl %eax,XsigH(%esi) - -L_exit: - popl %ebx - popl %edi - popl %esi - - leave - ret - - -#ifdef PARANOID -/* The logic is wrong if we got here */ -L_bugged: - pushl EX_INTERNAL|0x240 - call EXCEPTION - pop %ebx - jmp L_exit - -L_bugged_1: - pushl EX_INTERNAL|0x241 - call EXCEPTION - pop %ebx - jmp L_exit - -L_bugged_2: - pushl EX_INTERNAL|0x242 - call EXCEPTION - pop %ebx - jmp L_exit -#endif /* PARANOID */ diff --git a/arch/i386/math-emu/div_small.S b/arch/i386/math-emu/div_small.S deleted file mode 100644 index 47099628fa4..00000000000 --- a/arch/i386/math-emu/div_small.S +++ /dev/null @@ -1,47 +0,0 @@ - .file "div_small.S" -/*---------------------------------------------------------------------------+ - | div_small.S | - | | - | Divide a 64 bit integer by a 32 bit integer & return remainder. | - | | - | Copyright (C) 1992,1995 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@jacobi.maths.monash.edu.au | - | | - | | - +---------------------------------------------------------------------------*/ - -/*---------------------------------------------------------------------------+ - | unsigned long FPU_div_small(unsigned long long *x, unsigned long y) | - +---------------------------------------------------------------------------*/ - -#include "fpu_emu.h" - -.text -ENTRY(FPU_div_small) - pushl %ebp - movl %esp,%ebp - - pushl %esi - - movl PARAM1,%esi /* pointer to num */ - movl PARAM2,%ecx /* The denominator */ - - movl 4(%esi),%eax /* Get the current num msw */ - xorl %edx,%edx - divl %ecx - - movl %eax,4(%esi) - - movl (%esi),%eax /* Get the num lsw */ - divl %ecx - - movl %eax,(%esi) - - movl %edx,%eax /* Return the remainder in eax */ - - popl %esi - - leave - ret - diff --git a/arch/i386/math-emu/errors.c b/arch/i386/math-emu/errors.c deleted file mode 100644 index a1b0d22f697..00000000000 --- a/arch/i386/math-emu/errors.c +++ /dev/null @@ -1,739 +0,0 @@ -/*---------------------------------------------------------------------------+ - | errors.c | - | | - | The error handling functions for wm-FPU-emu | - | | - | Copyright (C) 1992,1993,1994,1996 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | - | E-mail billm@jacobi.maths.monash.edu.au | - | | - | | - +---------------------------------------------------------------------------*/ - -/*---------------------------------------------------------------------------+ - | Note: | - | The file contains code which accesses user memory. | - | Emulator static data may change when user memory is accessed, due to | - | other processes using the emulator while swapping is in progress. | - +---------------------------------------------------------------------------*/ - -#include - -#include - -#include "fpu_emu.h" -#include "fpu_system.h" -#include "exception.h" -#include "status_w.h" -#include "control_w.h" -#include "reg_constant.h" -#include "version.h" - -/* */ -#undef PRINT_MESSAGES -/* */ - - -#if 0 -void Un_impl(void) -{ - u_char byte1, FPU_modrm; - unsigned long address = FPU_ORIG_EIP; - - RE_ENTRANT_CHECK_OFF; - /* No need to check access_ok(), we have previously fetched these bytes. */ - printk("Unimplemented FPU Opcode at eip=%p : ", (void __user *) address); - if ( FPU_CS == __USER_CS ) - { - while ( 1 ) - { - FPU_get_user(byte1, (u_char __user *) address); - if ( (byte1 & 0xf8) == 0xd8 ) break; - printk("[%02x]", byte1); - address++; - } - printk("%02x ", byte1); - FPU_get_user(FPU_modrm, 1 + (u_char __user *) address); - - if (FPU_modrm >= 0300) - printk("%02x (%02x+%d)\n", FPU_modrm, FPU_modrm & 0xf8, FPU_modrm & 7); - else - printk("/%d\n", (FPU_modrm >> 3) & 7); - } - else - { - printk("cs selector = %04x\n", FPU_CS); - } - - RE_ENTRANT_CHECK_ON; - - EXCEPTION(EX_Invalid); - -} -#endif /* 0 */ - - -/* - Called for opcodes which are illegal and which are known to result in a - SIGILL with a real 80486. - */ -void FPU_illegal(void) -{ - math_abort(FPU_info,SIGILL); -} - - - -void FPU_printall(void) -{ - int i; - static const char *tag_desc[] = { "Valid", "Zero", "ERROR", "Empty", - "DeNorm", "Inf", "NaN" }; - u_char byte1, FPU_modrm; - unsigned long address = FPU_ORIG_EIP; - - RE_ENTRANT_CHECK_OFF; - /* No need to check access_ok(), we have previously fetched these bytes. */ - printk("At %p:", (void *) address); - if ( FPU_CS == __USER_CS ) - { -#define MAX_PRINTED_BYTES 20 - for ( i = 0; i < MAX_PRINTED_BYTES; i++ ) - { - FPU_get_user(byte1, (u_char __user *) address); - if ( (byte1 & 0xf8) == 0xd8 ) - { - printk(" %02x", byte1); - break; - } - printk(" [%02x]", byte1); - address++; - } - if ( i == MAX_PRINTED_BYTES ) - printk(" [more..]\n"); - else - { - FPU_get_user(FPU_modrm, 1 + (u_char __user *) address); - - if (FPU_modrm >= 0300) - printk(" %02x (%02x+%d)\n", FPU_modrm, FPU_modrm & 0xf8, FPU_modrm & 7); - else - printk(" /%d, mod=%d rm=%d\n", - (FPU_modrm >> 3) & 7, (FPU_modrm >> 6) & 3, FPU_modrm & 7); - } - } - else - { - printk("%04x\n", FPU_CS); - } - - partial_status = status_word(); - -#ifdef DEBUGGING -if ( partial_status & SW_Backward ) printk("SW: backward compatibility\n"); -if ( partial_status & SW_C3 ) printk("SW: condition bit 3\n"); -if ( partial_status & SW_C2 ) printk("SW: condition bit 2\n"); -if ( partial_status & SW_C1 ) printk("SW: condition bit 1\n"); -if ( partial_status & SW_C0 ) printk("SW: condition bit 0\n"); -if ( partial_status & SW_Summary ) printk("SW: exception summary\n"); -if ( partial_status & SW_Stack_Fault ) printk("SW: stack fault\n"); -if ( partial_status & SW_Precision ) printk("SW: loss of precision\n"); -if ( partial_status & SW_Underflow ) printk("SW: underflow\n"); -if ( partial_status & SW_Overflow ) printk("SW: overflow\n"); -if ( partial_status & SW_Zero_Div ) printk("SW: divide by zero\n"); -if ( partial_status & SW_Denorm_Op ) printk("SW: denormalized operand\n"); -if ( partial_status & SW_Invalid ) printk("SW: invalid operation\n"); -#endif /* DEBUGGING */ - - printk(" SW: b=%d st=%ld es=%d sf=%d cc=%d%d%d%d ef=%d%d%d%d%d%d\n", - partial_status & 0x8000 ? 1 : 0, /* busy */ - (partial_status & 0x3800) >> 11, /* stack top pointer */ - partial_status & 0x80 ? 1 : 0, /* Error summary status */ - partial_status & 0x40 ? 1 : 0, /* Stack flag */ - partial_status & SW_C3?1:0, partial_status & SW_C2?1:0, /* cc */ - partial_status & SW_C1?1:0, partial_status & SW_C0?1:0, /* cc */ - partial_status & SW_Precision?1:0, partial_status & SW_Underflow?1:0, - partial_status & SW_Overflow?1:0, partial_status & SW_Zero_Div?1:0, - partial_status & SW_Denorm_Op?1:0, partial_status & SW_Invalid?1:0); - -printk(" CW: ic=%d rc=%ld%ld pc=%ld%ld iem=%d ef=%d%d%d%d%d%d\n", - control_word & 0x1000 ? 1 : 0, - (control_word & 0x800) >> 11, (control_word & 0x400) >> 10, - (control_word & 0x200) >> 9, (control_word & 0x100) >> 8, - control_word & 0x80 ? 1 : 0, - control_word & SW_Precision?1:0, control_word & SW_Underflow?1:0, - control_word & SW_Overflow?1:0, control_word & SW_Zero_Div?1:0, - control_word & SW_Denorm_Op?1:0, control_word & SW_Invalid?1:0); - - for ( i = 0; i < 8; i++ ) - { - FPU_REG *r = &st(i); - u_char tagi = FPU_gettagi(i); - switch (tagi) - { - case TAG_Empty: - continue; - break; - case TAG_Zero: - case TAG_Special: - tagi = FPU_Special(r); - case TAG_Valid: - printk("st(%d) %c .%04lx %04lx %04lx %04lx e%+-6d ", i, - getsign(r) ? '-' : '+', - (long)(r->sigh >> 16), - (long)(r->sigh & 0xFFFF), - (long)(r->sigl >> 16), - (long)(r->sigl & 0xFFFF), - exponent(r) - EXP_BIAS + 1); - break; - default: - printk("Whoops! Error in errors.c: tag%d is %d ", i, tagi); - continue; - break; - } - printk("%s\n", tag_desc[(int) (unsigned) tagi]); - } - - RE_ENTRANT_CHECK_ON; - -} - -static struct { - int type; - const char *name; -} exception_names[] = { - { EX_StackOver, "stack overflow" }, - { EX_StackUnder, "stack underflow" }, - { EX_Precision, "loss of precision" }, - { EX_Underflow, "underflow" }, - { EX_Overflow, "overflow" }, - { EX_ZeroDiv, "divide by zero" }, - { EX_Denormal, "denormalized operand" }, - { EX_Invalid, "invalid operation" }, - { EX_INTERNAL, "INTERNAL BUG in "FPU_VERSION }, - { 0, NULL } -}; - -/* - EX_INTERNAL is always given with a code which indicates where the - error was detected. - - Internal error types: - 0x14 in fpu_etc.c - 0x1nn in a *.c file: - 0x101 in reg_add_sub.c - 0x102 in reg_mul.c - 0x104 in poly_atan.c - 0x105 in reg_mul.c - 0x107 in fpu_trig.c - 0x108 in reg_compare.c - 0x109 in reg_compare.c - 0x110 in reg_add_sub.c - 0x111 in fpe_entry.c - 0x112 in fpu_trig.c - 0x113 in errors.c - 0x115 in fpu_trig.c - 0x116 in fpu_trig.c - 0x117 in fpu_trig.c - 0x118 in fpu_trig.c - 0x119 in fpu_trig.c - 0x120 in poly_atan.c - 0x121 in reg_compare.c - 0x122 in reg_compare.c - 0x123 in reg_compare.c - 0x125 in fpu_trig.c - 0x126 in fpu_entry.c - 0x127 in poly_2xm1.c - 0x128 in fpu_entry.c - 0x129 in fpu_entry.c - 0x130 in get_address.c - 0x131 in get_address.c - 0x132 in get_address.c - 0x133 in get_address.c - 0x140 in load_store.c - 0x141 in load_store.c - 0x150 in poly_sin.c - 0x151 in poly_sin.c - 0x160 in reg_ld_str.c - 0x161 in reg_ld_str.c - 0x162 in reg_ld_str.c - 0x163 in reg_ld_str.c - 0x164 in reg_ld_str.c - 0x170 in fpu_tags.c - 0x171 in fpu_tags.c - 0x172 in fpu_tags.c - 0x180 in reg_convert.c - 0x2nn in an *.S file: - 0x201 in reg_u_add.S - 0x202 in reg_u_div.S - 0x203 in reg_u_div.S - 0x204 in reg_u_div.S - 0x205 in reg_u_mul.S - 0x206 in reg_u_sub.S - 0x207 in wm_sqrt.S - 0x208 in reg_div.S - 0x209 in reg_u_sub.S - 0x210 in reg_u_sub.S - 0x211 in reg_u_sub.S - 0x212 in reg_u_sub.S - 0x213 in wm_sqrt.S - 0x214 in wm_sqrt.S - 0x215 in wm_sqrt.S - 0x220 in reg_norm.S - 0x221 in reg_norm.S - 0x230 in reg_round.S - 0x231 in reg_round.S - 0x232 in reg_round.S - 0x233 in reg_round.S - 0x234 in reg_round.S - 0x235 in reg_round.S - 0x236 in reg_round.S - 0x240 in div_Xsig.S - 0x241 in div_Xsig.S - 0x242 in div_Xsig.S - */ - -asmlinkage void FPU_exception(int n) -{ - int i, int_type; - - int_type = 0; /* Needed only to stop compiler warnings */ - if ( n & EX_INTERNAL ) - { - int_type = n - EX_INTERNAL; - n = EX_INTERNAL; - /* Set lots of exception bits! */ - partial_status |= (SW_Exc_Mask | SW_Summary | SW_Backward); - } - else - { - /* Extract only the bits which we use to set the status word */ - n &= (SW_Exc_Mask); - /* Set the corresponding exception bit */ - partial_status |= n; - /* Set summary bits iff exception isn't masked */ - if ( partial_status & ~control_word & CW_Exceptions ) - partial_status |= (SW_Summary | SW_Backward); - if ( n & (SW_Stack_Fault | EX_Precision) ) - { - if ( !(n & SW_C1) ) - /* This bit distinguishes over- from underflow for a stack fault, - and roundup from round-down for precision loss. */ - partial_status &= ~SW_C1; - } - } - - RE_ENTRANT_CHECK_OFF; - if ( (~control_word & n & CW_Exceptions) || (n == EX_INTERNAL) ) - { -#ifdef PRINT_MESSAGES - /* My message from the sponsor */ - printk(FPU_VERSION" "__DATE__" (C) W. Metzenthen.\n"); -#endif /* PRINT_MESSAGES */ - - /* Get a name string for error reporting */ - for (i=0; exception_names[i].type; i++) - if ( (exception_names[i].type & n) == exception_names[i].type ) - break; - - if (exception_names[i].type) - { -#ifdef PRINT_MESSAGES - printk("FP Exception: %s!\n", exception_names[i].name); -#endif /* PRINT_MESSAGES */ - } - else - printk("FPU emulator: Unknown Exception: 0x%04x!\n", n); - - if ( n == EX_INTERNAL ) - { - printk("FPU emulator: Internal error type 0x%04x\n", int_type); - FPU_printall(); - } -#ifdef PRINT_MESSAGES - else - FPU_printall(); -#endif /* PRINT_MESSAGES */ - - /* - * The 80486 generates an interrupt on the next non-control FPU - * instruction. So we need some means of flagging it. - * We use the ES (Error Summary) bit for this. - */ - } - RE_ENTRANT_CHECK_ON; - -#ifdef __DEBUG__ - math_abort(FPU_info,SIGFPE); -#endif /* __DEBUG__ */ - -} - - -/* Real operation attempted on a NaN. */ -/* Returns < 0 if the exception is unmasked */ -int real_1op_NaN(FPU_REG *a) -{ - int signalling, isNaN; - - isNaN = (exponent(a) == EXP_OVER) && (a->sigh & 0x80000000); - - /* The default result for the case of two "equal" NaNs (signs may - differ) is chosen to reproduce 80486 behaviour */ - signalling = isNaN && !(a->sigh & 0x40000000); - - if ( !signalling ) - { - if ( !isNaN ) /* pseudo-NaN, or other unsupported? */ - { - if ( control_word & CW_Invalid ) - { - /* Masked response */ - reg_copy(&CONST_QNaN, a); - } - EXCEPTION(EX_Invalid); - return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special; - } - return TAG_Special; - } - - if ( control_word & CW_Invalid ) - { - /* The masked response */ - if ( !(a->sigh & 0x80000000) ) /* pseudo-NaN ? */ - { - reg_copy(&CONST_QNaN, a); - } - /* ensure a Quiet NaN */ - a->sigh |= 0x40000000; - } - - EXCEPTION(EX_Invalid); - - return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special; -} - - -/* Real operation attempted on two operands, one a NaN. */ -/* Returns < 0 if the exception is unmasked */ -int real_2op_NaN(FPU_REG const *b, u_char tagb, - int deststnr, - FPU_REG const *defaultNaN) -{ - FPU_REG *dest = &st(deststnr); - FPU_REG const *a = dest; - u_char taga = FPU_gettagi(deststnr); - FPU_REG const *x; - int signalling, unsupported; - - if ( taga == TAG_Special ) - taga = FPU_Special(a); - if ( tagb == TAG_Special ) - tagb = FPU_Special(b); - - /* TW_NaN is also used for unsupported data types. */ - unsupported = ((taga == TW_NaN) - && !((exponent(a) == EXP_OVER) && (a->sigh & 0x80000000))) - || ((tagb == TW_NaN) - && !((exponent(b) == EXP_OVER) && (b->sigh & 0x80000000))); - if ( unsupported ) - { - if ( control_word & CW_Invalid ) - { - /* Masked response */ - FPU_copy_to_regi(&CONST_QNaN, TAG_Special, deststnr); - } - EXCEPTION(EX_Invalid); - return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special; - } - - if (taga == TW_NaN) - { - x = a; - if (tagb == TW_NaN) - { - signalling = !(a->sigh & b->sigh & 0x40000000); - if ( significand(b) > significand(a) ) - x = b; - else if ( significand(b) == significand(a) ) - { - /* The default result for the case of two "equal" NaNs (signs may - differ) is chosen to reproduce 80486 behaviour */ - x = defaultNaN; - } - } - else - { - /* return the quiet version of the NaN in a */ - signalling = !(a->sigh & 0x40000000); - } - } - else -#ifdef PARANOID - if (tagb == TW_NaN) -#endif /* PARANOID */ - { - signalling = !(b->sigh & 0x40000000); - x = b; - } -#ifdef PARANOID - else - { - signalling = 0; - EXCEPTION(EX_INTERNAL|0x113); - x = &CONST_QNaN; - } -#endif /* PARANOID */ - - if ( (!signalling) || (control_word & CW_Invalid) ) - { - if ( ! x ) - x = b; - - if ( !(x->sigh & 0x80000000) ) /* pseudo-NaN ? */ - x = &CONST_QNaN; - - FPU_copy_to_regi(x, TAG_Special, deststnr); - - if ( !signalling ) - return TAG_Special; - - /* ensure a Quiet NaN */ - dest->sigh |= 0x40000000; - } - - EXCEPTION(EX_Invalid); - - return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special; -} - - -/* Invalid arith operation on Valid registers */ -/* Returns < 0 if the exception is unmasked */ -asmlinkage int arith_invalid(int deststnr) -{ - - EXCEPTION(EX_Invalid); - - if ( control_word & CW_Invalid ) - { - /* The masked response */ - FPU_copy_to_regi(&CONST_QNaN, TAG_Special, deststnr); - } - - return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Valid; - -} - - -/* Divide a finite number by zero */ -asmlinkage int FPU_divide_by_zero(int deststnr, u_char sign) -{ - FPU_REG *dest = &st(deststnr); - int tag = TAG_Valid; - - if ( control_word & CW_ZeroDiv ) - { - /* The masked response */ - FPU_copy_to_regi(&CONST_INF, TAG_Special, deststnr); - setsign(dest, sign); - tag = TAG_Special; - } - - EXCEPTION(EX_ZeroDiv); - - return (!(control_word & CW_ZeroDiv) ? FPU_Exception : 0) | tag; - -} - - -/* This may be called often, so keep it lean */ -int set_precision_flag(int flags) -{ - if ( control_word & CW_Precision ) - { - partial_status &= ~(SW_C1 & flags); - partial_status |= flags; /* The masked response */ - return 0; - } - else - { - EXCEPTION(flags); - return 1; - } -} - - -/* This may be called often, so keep it lean */ -asmlinkage void set_precision_flag_up(void) -{ - if ( control_word & CW_Precision ) - partial_status |= (SW_Precision | SW_C1); /* The masked response */ - else - EXCEPTION(EX_Precision | SW_C1); -} - - -/* This may be called often, so keep it lean */ -asmlinkage void set_precision_flag_down(void) -{ - if ( control_word & CW_Precision ) - { /* The masked response */ - partial_status &= ~SW_C1; - partial_status |= SW_Precision; - } - else - EXCEPTION(EX_Precision); -} - - -asmlinkage int denormal_operand(void) -{ - if ( control_word & CW_Denormal ) - { /* The masked response */ - partial_status |= SW_Denorm_Op; - return TAG_Special; - } - else - { - EXCEPTION(EX_Denormal); - return TAG_Special | FPU_Exception; - } -} - - -asmlinkage int arith_overflow(FPU_REG *dest) -{ - int tag = TAG_Valid; - - if ( control_word & CW_Overflow ) - { - /* The masked response */ -/* ###### The response here depends upon the rounding mode */ - reg_copy(&CONST_INF, dest); - tag = TAG_Special; - } - else - { - /* Subtract the magic number from the exponent */ - addexponent(dest, (-3 * (1 << 13))); - } - - EXCEPTION(EX_Overflow); - if ( control_word & CW_Overflow ) - { - /* The overflow exception is masked. */ - /* By definition, precision is lost. - The roundup bit (C1) is also set because we have - "rounded" upwards to Infinity. */ - EXCEPTION(EX_Precision | SW_C1); - return tag; - } - - return tag; - -} - - -asmlinkage int arith_underflow(FPU_REG *dest) -{ - int tag = TAG_Valid; - - if ( control_word & CW_Underflow ) - { - /* The masked response */ - if ( exponent16(dest) <= EXP_UNDER - 63 ) - { - reg_copy(&CONST_Z, dest); - partial_status &= ~SW_C1; /* Round down. */ - tag = TAG_Zero; - } - else - { - stdexp(dest); - } - } - else - { - /* Add the magic number to the exponent. */ - addexponent(dest, (3 * (1 << 13)) + EXTENDED_Ebias); - } - - EXCEPTION(EX_Underflow); - if ( control_word & CW_Underflow ) - { - /* The underflow exception is masked. */ - EXCEPTION(EX_Precision); - return tag; - } - - return tag; - -} - - -void FPU_stack_overflow(void) -{ - - if ( control_word & CW_Invalid ) - { - /* The masked response */ - top--; - FPU_copy_to_reg0(&CONST_QNaN, TAG_Special); - } - - EXCEPTION(EX_StackOver); - - return; - -} - - -void FPU_stack_underflow(void) -{ - - if ( control_word & CW_Invalid ) - { - /* The masked response */ - FPU_copy_to_reg0(&CONST_QNaN, TAG_Special); - } - - EXCEPTION(EX_StackUnder); - - return; - -} - - -void FPU_stack_underflow_i(int i) -{ - - if ( control_word & CW_Invalid ) - { - /* The masked response */ - FPU_copy_to_regi(&CONST_QNaN, TAG_Special, i); - } - - EXCEPTION(EX_StackUnder); - - return; - -} - - -void FPU_stack_underflow_pop(int i) -{ - - if ( control_word & CW_Invalid ) - { - /* The masked response */ - FPU_copy_to_regi(&CONST_QNaN, TAG_Special, i); - FPU_pop(); - } - - EXCEPTION(EX_StackUnder); - - return; - -} - diff --git a/arch/i386/math-emu/exception.h b/arch/i386/math-emu/exception.h deleted file mode 100644 index b463f21a811..00000000000 --- a/arch/i386/math-emu/exception.h +++ /dev/null @@ -1,53 +0,0 @@ -/*---------------------------------------------------------------------------+ - | exception.h | - | | - | Copyright (C) 1992 W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@vaxc.cc.monash.edu.au | - | | - +---------------------------------------------------------------------------*/ - -#ifndef _EXCEPTION_H_ -#define _EXCEPTION_H_ - - -#ifdef __ASSEMBLY__ -#define Const_(x) $##x -#else -#define Const_(x) x -#endif - -#ifndef SW_C1 -#include "fpu_emu.h" -#endif /* SW_C1 */ - -#define FPU_BUSY Const_(0x8000) /* FPU busy bit (8087 compatibility) */ -#define EX_ErrorSummary Const_(0x0080) /* Error summary status */ -/* Special exceptions: */ -#define EX_INTERNAL Const_(0x8000) /* Internal error in wm-FPU-emu */ -#define EX_StackOver Const_(0x0041|SW_C1) /* stack overflow */ -#define EX_StackUnder Const_(0x0041) /* stack underflow */ -/* Exception flags: */ -#define EX_Precision Const_(0x0020) /* loss of precision */ -#define EX_Underflow Const_(0x0010) /* underflow */ -#define EX_Overflow Const_(0x0008) /* overflow */ -#define EX_ZeroDiv Const_(0x0004) /* divide by zero */ -#define EX_Denormal Const_(0x0002) /* denormalized operand */ -#define EX_Invalid Const_(0x0001) /* invalid operation */ - - -#define PRECISION_LOST_UP Const_((EX_Precision | SW_C1)) -#define PRECISION_LOST_DOWN Const_(EX_Precision) - - -#ifndef __ASSEMBLY__ - -#ifdef DEBUG -#define EXCEPTION(x) { printk("exception in %s at line %d\n", \ - __FILE__, __LINE__); FPU_exception(x); } -#else -#define EXCEPTION(x) FPU_exception(x) -#endif - -#endif /* __ASSEMBLY__ */ - -#endif /* _EXCEPTION_H_ */ diff --git a/arch/i386/math-emu/fpu_arith.c b/arch/i386/math-emu/fpu_arith.c deleted file mode 100644 index 6972dec01af..00000000000 --- a/arch/i386/math-emu/fpu_arith.c +++ /dev/null @@ -1,174 +0,0 @@ -/*---------------------------------------------------------------------------+ - | fpu_arith.c | - | | - | Code to implement the FPU register/register arithmetic instructions | - | | - | Copyright (C) 1992,1993,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | - | E-mail billm@suburbia.net | - | | - | | - +---------------------------------------------------------------------------*/ - -#include "fpu_system.h" -#include "fpu_emu.h" -#include "control_w.h" -#include "status_w.h" - - -void fadd__(void) -{ - /* fadd st,st(i) */ - int i = FPU_rm; - clear_C1(); - FPU_add(&st(i), FPU_gettagi(i), 0, control_word); -} - - -void fmul__(void) -{ - /* fmul st,st(i) */ - int i = FPU_rm; - clear_C1(); - FPU_mul(&st(i), FPU_gettagi(i), 0, control_word); -} - - - -void fsub__(void) -{ - /* fsub st,st(i) */ - clear_C1(); - FPU_sub(0, FPU_rm, control_word); -} - - -void fsubr_(void) -{ - /* fsubr st,st(i) */ - clear_C1(); - FPU_sub(REV, FPU_rm, control_word); -} - - -void fdiv__(void) -{ - /* fdiv st,st(i) */ - clear_C1(); - FPU_div(0, FPU_rm, control_word); -} - - -void fdivr_(void) -{ - /* fdivr st,st(i) */ - clear_C1(); - FPU_div(REV, FPU_rm, control_word); -} - - - -void fadd_i(void) -{ - /* fadd st(i),st */ - int i = FPU_rm; - clear_C1(); - FPU_add(&st(i), FPU_gettagi(i), i, control_word); -} - - -void fmul_i(void) -{ - /* fmul st(i),st */ - clear_C1(); - FPU_mul(&st(0), FPU_gettag0(), FPU_rm, control_word); -} - - -void fsubri(void) -{ - /* fsubr st(i),st */ - clear_C1(); - FPU_sub(DEST_RM, FPU_rm, control_word); -} - - -void fsub_i(void) -{ - /* fsub st(i),st */ - clear_C1(); - FPU_sub(REV|DEST_RM, FPU_rm, control_word); -} - - -void fdivri(void) -{ - /* fdivr st(i),st */ - clear_C1(); - FPU_div(DEST_RM, FPU_rm, control_word); -} - - -void fdiv_i(void) -{ - /* fdiv st(i),st */ - clear_C1(); - FPU_div(REV|DEST_RM, FPU_rm, control_word); -} - - - -void faddp_(void) -{ - /* faddp st(i),st */ - int i = FPU_rm; - clear_C1(); - if ( FPU_add(&st(i), FPU_gettagi(i), i, control_word) >= 0 ) - FPU_pop(); -} - - -void fmulp_(void) -{ - /* fmulp st(i),st */ - clear_C1(); - if ( FPU_mul(&st(0), FPU_gettag0(), FPU_rm, control_word) >= 0 ) - FPU_pop(); -} - - - -void fsubrp(void) -{ - /* fsubrp st(i),st */ - clear_C1(); - if ( FPU_sub(DEST_RM, FPU_rm, control_word) >= 0 ) - FPU_pop(); -} - - -void fsubp_(void) -{ - /* fsubp st(i),st */ - clear_C1(); - if ( FPU_sub(REV|DEST_RM, FPU_rm, control_word) >= 0 ) - FPU_pop(); -} - - -void fdivrp(void) -{ - /* fdivrp st(i),st */ - clear_C1(); - if ( FPU_div(DEST_RM, FPU_rm, control_word) >= 0 ) - FPU_pop(); -} - - -void fdivp_(void) -{ - /* fdivp st(i),st */ - clear_C1(); - if ( FPU_div(REV|DEST_RM, FPU_rm, control_word) >= 0 ) - FPU_pop(); -} diff --git a/arch/i386/math-emu/fpu_asm.h b/arch/i386/math-emu/fpu_asm.h deleted file mode 100644 index 9ba12416df1..00000000000 --- a/arch/i386/math-emu/fpu_asm.h +++ /dev/null @@ -1,32 +0,0 @@ -/*---------------------------------------------------------------------------+ - | fpu_asm.h | - | | - | Copyright (C) 1992,1995,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@suburbia.net | - | | - +---------------------------------------------------------------------------*/ - -#ifndef _FPU_ASM_H_ -#define _FPU_ASM_H_ - -#include - -#define EXCEPTION FPU_exception - - -#define PARAM1 8(%ebp) -#define PARAM2 12(%ebp) -#define PARAM3 16(%ebp) -#define PARAM4 20(%ebp) -#define PARAM5 24(%ebp) -#define PARAM6 28(%ebp) -#define PARAM7 32(%ebp) - -#define SIGL_OFFSET 0 -#define EXP(x) 8(x) -#define SIG(x) SIGL_OFFSET##(x) -#define SIGL(x) SIGL_OFFSET##(x) -#define SIGH(x) 4(x) - -#endif /* _FPU_ASM_H_ */ diff --git a/arch/i386/math-emu/fpu_aux.c b/arch/i386/math-emu/fpu_aux.c deleted file mode 100644 index 20886cfb9f7..00000000000 --- a/arch/i386/math-emu/fpu_aux.c +++ /dev/null @@ -1,204 +0,0 @@ -/*---------------------------------------------------------------------------+ - | fpu_aux.c | - | | - | Code to implement some of the FPU auxiliary instructions. | - | | - | Copyright (C) 1992,1993,1994,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | - | E-mail billm@suburbia.net | - | | - | | - +---------------------------------------------------------------------------*/ - -#include "fpu_system.h" -#include "exception.h" -#include "fpu_emu.h" -#include "status_w.h" -#include "control_w.h" - - -static void fnop(void) -{ -} - -static void fclex(void) -{ - partial_status &= ~(SW_Backward|SW_Summary|SW_Stack_Fault|SW_Precision| - SW_Underflow|SW_Overflow|SW_Zero_Div|SW_Denorm_Op| - SW_Invalid); - no_ip_update = 1; -} - -/* Needs to be externally visible */ -void finit(void) -{ - control_word = 0x037f; - partial_status = 0; - top = 0; /* We don't keep top in the status word internally. */ - fpu_tag_word = 0xffff; - /* The behaviour is different from that detailed in - Section 15.1.6 of the Intel manual */ - operand_address.offset = 0; - operand_address.selector = 0; - instruction_address.offset = 0; - instruction_address.selector = 0; - instruction_address.opcode = 0; - no_ip_update = 1; -} - -/* - * These are nops on the i387.. - */ -#define feni fnop -#define fdisi fnop -#define fsetpm fnop - -static FUNC const finit_table[] = { - feni, fdisi, fclex, finit, - fsetpm, FPU_illegal, FPU_illegal, FPU_illegal -}; - -void finit_(void) -{ - (finit_table[FPU_rm])(); -} - - -static void fstsw_ax(void) -{ - *(short *) &FPU_EAX = status_word(); - no_ip_update = 1; -} - -static FUNC const fstsw_table[] = { - fstsw_ax, FPU_illegal, FPU_illegal, FPU_illegal, - FPU_illegal, FPU_illegal, FPU_illegal, FPU_illegal -}; - -void fstsw_(void) -{ - (fstsw_table[FPU_rm])(); -} - - -static FUNC const fp_nop_table[] = { - fnop, FPU_illegal, FPU_illegal, FPU_illegal, - FPU_illegal, FPU_illegal, FPU_illegal, FPU_illegal -}; - -void fp_nop(void) -{ - (fp_nop_table[FPU_rm])(); -} - - -void fld_i_(void) -{ - FPU_REG *st_new_ptr; - int i; - u_char tag; - - if ( STACK_OVERFLOW ) - { FPU_stack_overflow(); return; } - - /* fld st(i) */ - i = FPU_rm; - if ( NOT_EMPTY(i) ) - { - reg_copy(&st(i), st_new_ptr); - tag = FPU_gettagi(i); - push(); - FPU_settag0(tag); - } - else - { - if ( control_word & CW_Invalid ) - { - /* The masked response */ - FPU_stack_underflow(); - } - else - EXCEPTION(EX_StackUnder); - } - -} - - -void fxch_i(void) -{ - /* fxch st(i) */ - FPU_REG t; - int i = FPU_rm; - FPU_REG *st0_ptr = &st(0), *sti_ptr = &st(i); - long tag_word = fpu_tag_word; - int regnr = top & 7, regnri = ((regnr + i) & 7); - u_char st0_tag = (tag_word >> (regnr*2)) & 3; - u_char sti_tag = (tag_word >> (regnri*2)) & 3; - - if ( st0_tag == TAG_Empty ) - { - if ( sti_tag == TAG_Empty ) - { - FPU_stack_underflow(); - FPU_stack_underflow_i(i); - return; - } - if ( control_word & CW_Invalid ) - { - /* Masked response */ - FPU_copy_to_reg0(sti_ptr, sti_tag); - } - FPU_stack_underflow_i(i); - return; - } - if ( sti_tag == TAG_Empty ) - { - if ( control_word & CW_Invalid ) - { - /* Masked response */ - FPU_copy_to_regi(st0_ptr, st0_tag, i); - } - FPU_stack_underflow(); - return; - } - clear_C1(); - - reg_copy(st0_ptr, &t); - reg_copy(sti_ptr, st0_ptr); - reg_copy(&t, sti_ptr); - - tag_word &= ~(3 << (regnr*2)) & ~(3 << (regnri*2)); - tag_word |= (sti_tag << (regnr*2)) | (st0_tag << (regnri*2)); - fpu_tag_word = tag_word; -} - - -void ffree_(void) -{ - /* ffree st(i) */ - FPU_settagi(FPU_rm, TAG_Empty); -} - - -void ffreep(void) -{ - /* ffree st(i) + pop - unofficial code */ - FPU_settagi(FPU_rm, TAG_Empty); - FPU_pop(); -} - - -void fst_i_(void) -{ - /* fst st(i) */ - FPU_copy_to_regi(&st(0), FPU_gettag0(), FPU_rm); -} - - -void fstp_i(void) -{ - /* fstp st(i) */ - FPU_copy_to_regi(&st(0), FPU_gettag0(), FPU_rm); - FPU_pop(); -} - diff --git a/arch/i386/math-emu/fpu_emu.h b/arch/i386/math-emu/fpu_emu.h deleted file mode 100644 index 65120f52385..00000000000 --- a/arch/i386/math-emu/fpu_emu.h +++ /dev/null @@ -1,218 +0,0 @@ -/*---------------------------------------------------------------------------+ - | fpu_emu.h | - | | - | Copyright (C) 1992,1993,1994,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@suburbia.net | - | | - +---------------------------------------------------------------------------*/ - - -#ifndef _FPU_EMU_H_ -#define _FPU_EMU_H_ - -/* - * Define PECULIAR_486 to get a closer approximation to 80486 behaviour, - * rather than behaviour which appears to be cleaner. - * This is a matter of opinion: for all I know, the 80486 may simply - * be complying with the IEEE spec. Maybe one day I'll get to see the - * spec... - */ -#define PECULIAR_486 - -#ifdef __ASSEMBLY__ -#include "fpu_asm.h" -#define Const(x) $##x -#else -#define Const(x) x -#endif - -#define EXP_BIAS Const(0) -#define EXP_OVER Const(0x4000) /* smallest invalid large exponent */ -#define EXP_UNDER Const(-0x3fff) /* largest invalid small exponent */ -#define EXP_WAY_UNDER Const(-0x6000) /* Below the smallest denormal, but - still a 16 bit nr. */ -#define EXP_Infinity EXP_OVER -#define EXP_NaN EXP_OVER - -#define EXTENDED_Ebias Const(0x3fff) -#define EXTENDED_Emin (-0x3ffe) /* smallest valid exponent */ - -#define SIGN_POS Const(0) -#define SIGN_NEG Const(0x80) - -#define SIGN_Positive Const(0) -#define SIGN_Negative Const(0x8000) - - -/* Keep the order TAG_Valid, TAG_Zero, TW_Denormal */ -/* The following fold to 2 (Special) in the Tag Word */ -#define TW_Denormal Const(4) /* De-normal */ -#define TW_Infinity Const(5) /* + or - infinity */ -#define TW_NaN Const(6) /* Not a Number */ -#define TW_Unsupported Const(7) /* Not supported by an 80486 */ - -#define TAG_Valid Const(0) /* valid */ -#define TAG_Zero Const(1) /* zero */ -#define TAG_Special Const(2) /* De-normal, + or - infinity, - or Not a Number */ -#define TAG_Empty Const(3) /* empty */ -#define TAG_Error Const(0x80) /* probably need to abort */ - -#define LOADED_DATA Const(10101) /* Special st() number to identify - loaded data (not on stack). */ - -/* A few flags (must be >= 0x10). */ -#define REV 0x10 -#define DEST_RM 0x20 -#define LOADED 0x40 - -#define FPU_Exception Const(0x80000000) /* Added to tag returns. */ - - -#ifndef __ASSEMBLY__ - -#include "fpu_system.h" - -#include /* for struct _fpstate */ -#include -#include - -/* -#define RE_ENTRANT_CHECKING - */ - -#ifdef RE_ENTRANT_CHECKING -extern u_char emulating; -# define RE_ENTRANT_CHECK_OFF emulating = 0 -# define RE_ENTRANT_CHECK_ON emulating = 1 -#else -# define RE_ENTRANT_CHECK_OFF -# define RE_ENTRANT_CHECK_ON -#endif /* RE_ENTRANT_CHECKING */ - -#define FWAIT_OPCODE 0x9b -#define OP_SIZE_PREFIX 0x66 -#define ADDR_SIZE_PREFIX 0x67 -#define PREFIX_CS 0x2e -#define PREFIX_DS 0x3e -#define PREFIX_ES 0x26 -#define PREFIX_SS 0x36 -#define PREFIX_FS 0x64 -#define PREFIX_GS 0x65 -#define PREFIX_REPE 0xf3 -#define PREFIX_REPNE 0xf2 -#define PREFIX_LOCK 0xf0 -#define PREFIX_CS_ 1 -#define PREFIX_DS_ 2 -#define PREFIX_ES_ 3 -#define PREFIX_FS_ 4 -#define PREFIX_GS_ 5 -#define PREFIX_SS_ 6 -#define PREFIX_DEFAULT 7 - -struct address { - unsigned int offset; - unsigned int selector:16; - unsigned int opcode:11; - unsigned int empty:5; -}; -struct fpu__reg { - unsigned sigl; - unsigned sigh; - short exp; -}; - -typedef void (*FUNC)(void); -typedef struct fpu__reg FPU_REG; -typedef void (*FUNC_ST0)(FPU_REG *st0_ptr, u_char st0_tag); -typedef struct { u_char address_size, operand_size, segment; } - overrides; -/* This structure is 32 bits: */ -typedef struct { overrides override; - u_char default_mode; } fpu_addr_modes; -/* PROTECTED has a restricted meaning in the emulator; it is used - to signal that the emulator needs to do special things to ensure - that protection is respected in a segmented model. */ -#define PROTECTED 4 -#define SIXTEEN 1 /* We rely upon this being 1 (true) */ -#define VM86 SIXTEEN -#define PM16 (SIXTEEN | PROTECTED) -#define SEG32 PROTECTED -extern u_char const data_sizes_16[32]; - -#define register_base ((u_char *) registers ) -#define fpu_register(x) ( * ((FPU_REG *)( register_base + 10 * (x & 7) )) ) -#define st(x) ( * ((FPU_REG *)( register_base + 10 * ((top+x) & 7) )) ) - -#define STACK_OVERFLOW (FPU_stackoverflow(&st_new_ptr)) -#define NOT_EMPTY(i) (!FPU_empty_i(i)) - -#define NOT_EMPTY_ST0 (st0_tag ^ TAG_Empty) - -#define poppop() { FPU_pop(); FPU_pop(); } - -/* push() does not affect the tags */ -#define push() { top--; } - -#define signbyte(a) (((u_char *)(a))[9]) -#define getsign(a) (signbyte(a) & 0x80) -#define setsign(a,b) { if (b) signbyte(a) |= 0x80; else signbyte(a) &= 0x7f; } -#define copysign(a,b) { if (getsign(a)) signbyte(b) |= 0x80; \ - else signbyte(b) &= 0x7f; } -#define changesign(a) { signbyte(a) ^= 0x80; } -#define setpositive(a) { signbyte(a) &= 0x7f; } -#define setnegative(a) { signbyte(a) |= 0x80; } -#define signpositive(a) ( (signbyte(a) & 0x80) == 0 ) -#define signnegative(a) (signbyte(a) & 0x80) - -static inline void reg_copy(FPU_REG const *x, FPU_REG *y) -{ - *(short *)&(y->exp) = *(const short *)&(x->exp); - *(long long *)&(y->sigl) = *(const long long *)&(x->sigl); -} - -#define exponent(x) (((*(short *)&((x)->exp)) & 0x7fff) - EXTENDED_Ebias) -#define setexponentpos(x,y) { (*(short *)&((x)->exp)) = \ - ((y) + EXTENDED_Ebias) & 0x7fff; } -#define exponent16(x) (*(short *)&((x)->exp)) -#define setexponent16(x,y) { (*(short *)&((x)->exp)) = (y); } -#define addexponent(x,y) { (*(short *)&((x)->exp)) += (y); } -#define stdexp(x) { (*(short *)&((x)->exp)) += EXTENDED_Ebias; } - -#define isdenormal(ptr) (exponent(ptr) == EXP_BIAS+EXP_UNDER) - -#define significand(x) ( ((unsigned long long *)&((x)->sigl))[0] ) - - -/*----- Prototypes for functions written in assembler -----*/ -/* extern void reg_move(FPU_REG *a, FPU_REG *b); */ - -asmlinkage int FPU_normalize(FPU_REG *x); -asmlinkage int FPU_normalize_nuo(FPU_REG *x); -asmlinkage int FPU_u_sub(FPU_REG const *arg1, FPU_REG const *arg2, - FPU_REG *answ, unsigned int control_w, u_char sign, - int expa, int expb); -asmlinkage int FPU_u_mul(FPU_REG const *arg1, FPU_REG const *arg2, - FPU_REG *answ, unsigned int control_w, u_char sign, - int expon); -asmlinkage int FPU_u_div(FPU_REG const *arg1, FPU_REG const *arg2, - FPU_REG *answ, unsigned int control_w, u_char sign); -asmlinkage int FPU_u_add(FPU_REG const *arg1, FPU_REG const *arg2, - FPU_REG *answ, unsigned int control_w, u_char sign, - int expa, int expb); -asmlinkage int wm_sqrt(FPU_REG *n, int dummy1, int dummy2, - unsigned int control_w, u_char sign); -asmlinkage unsigned FPU_shrx(void *l, unsigned x); -asmlinkage unsigned FPU_shrxs(void *v, unsigned x); -asmlinkage unsigned long FPU_div_small(unsigned long long *x, unsigned long y); -asmlinkage int FPU_round(FPU_REG *arg, unsigned int extent, int dummy, - unsigned int control_w, u_char sign); - -#ifndef MAKING_PROTO -#include "fpu_proto.h" -#endif - -#endif /* __ASSEMBLY__ */ - -#endif /* _FPU_EMU_H_ */ diff --git a/arch/i386/math-emu/fpu_entry.c b/arch/i386/math-emu/fpu_entry.c deleted file mode 100644 index 1853524c8b5..00000000000 --- a/arch/i386/math-emu/fpu_entry.c +++ /dev/null @@ -1,761 +0,0 @@ -/*---------------------------------------------------------------------------+ - | fpu_entry.c | - | | - | The entry functions for wm-FPU-emu | - | | - | Copyright (C) 1992,1993,1994,1996,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | - | E-mail billm@suburbia.net | - | | - | See the files "README" and "COPYING" for further copyright and warranty | - | information. | - | | - +---------------------------------------------------------------------------*/ - -/*---------------------------------------------------------------------------+ - | Note: | - | The file contains code which accesses user memory. | - | Emulator static data may change when user memory is accessed, due to | - | other processes using the emulator while swapping is in progress. | - +---------------------------------------------------------------------------*/ - -/*---------------------------------------------------------------------------+ - | math_emulate(), restore_i387_soft() and save_i387_soft() are the only | - | entry points for wm-FPU-emu. | - +---------------------------------------------------------------------------*/ - -#include -#include - -#include -#include - -#include "fpu_system.h" -#include "fpu_emu.h" -#include "exception.h" -#include "control_w.h" -#include "status_w.h" - -#define __BAD__ FPU_illegal /* Illegal on an 80486, causes SIGILL */ - -#ifndef NO_UNDOC_CODE /* Un-documented FPU op-codes supported by default. */ - -/* WARNING: These codes are not documented by Intel in their 80486 manual - and may not work on FPU clones or later Intel FPUs. */ - -/* Changes to support the un-doc codes provided by Linus Torvalds. */ - -#define _d9_d8_ fstp_i /* unofficial code (19) */ -#define _dc_d0_ fcom_st /* unofficial code (14) */ -#define _dc_d8_ fcompst /* unofficial code (1c) */ -#define _dd_c8_ fxch_i /* unofficial code (0d) */ -#define _de_d0_ fcompst /* unofficial code (16) */ -#define _df_c0_ ffreep /* unofficial code (07) ffree + pop */ -#define _df_c8_ fxch_i /* unofficial code (0f) */ -#define _df_d0_ fstp_i /* unofficial code (17) */ -#define _df_d8_ fstp_i /* unofficial code (1f) */ - -static FUNC const st_instr_table[64] = { - fadd__, fld_i_, __BAD__, __BAD__, fadd_i, ffree_, faddp_, _df_c0_, - fmul__, fxch_i, __BAD__, __BAD__, fmul_i, _dd_c8_, fmulp_, _df_c8_, - fcom_st, fp_nop, __BAD__, __BAD__, _dc_d0_, fst_i_, _de_d0_, _df_d0_, - fcompst, _d9_d8_, __BAD__, __BAD__, _dc_d8_, fstp_i, fcompp, _df_d8_, - fsub__, FPU_etc, __BAD__, finit_, fsubri, fucom_, fsubrp, fstsw_, - fsubr_, fconst, fucompp, __BAD__, fsub_i, fucomp, fsubp_, __BAD__, - fdiv__, FPU_triga, __BAD__, __BAD__, fdivri, __BAD__, fdivrp, __BAD__, - fdivr_, FPU_trigb, __BAD__, __BAD__, fdiv_i, __BAD__, fdivp_, __BAD__, -}; - -#else /* Support only documented FPU op-codes */ - -static FUNC const st_instr_table[64] = { - fadd__, fld_i_, __BAD__, __BAD__, fadd_i, ffree_, faddp_, __BAD__, - fmul__, fxch_i, __BAD__, __BAD__, fmul_i, __BAD__, fmulp_, __BAD__, - fcom_st, fp_nop, __BAD__, __BAD__, __BAD__, fst_i_, __BAD__, __BAD__, - fcompst, __BAD__, __BAD__, __BAD__, __BAD__, fstp_i, fcompp, __BAD__, - fsub__, FPU_etc, __BAD__, finit_, fsubri, fucom_, fsubrp, fstsw_, - fsubr_, fconst, fucompp, __BAD__, fsub_i, fucomp, fsubp_, __BAD__, - fdiv__, FPU_triga, __BAD__, __BAD__, fdivri, __BAD__, fdivrp, __BAD__, - fdivr_, FPU_trigb, __BAD__, __BAD__, fdiv_i, __BAD__, fdivp_, __BAD__, -}; - -#endif /* NO_UNDOC_CODE */ - - -#define _NONE_ 0 /* Take no special action */ -#define _REG0_ 1 /* Need to check for not empty st(0) */ -#define _REGI_ 2 /* Need to check for not empty st(0) and st(rm) */ -#define _REGi_ 0 /* Uses st(rm) */ -#define _PUSH_ 3 /* Need to check for space to push onto stack */ -#define _null_ 4 /* Function illegal or not implemented */ -#define _REGIi 5 /* Uses st(0) and st(rm), result to st(rm) */ -#define _REGIp 6 /* Uses st(0) and st(rm), result to st(rm) then pop */ -#define _REGIc 0 /* Compare st(0) and st(rm) */ -#define _REGIn 0 /* Uses st(0) and st(rm), but handle checks later */ - -#ifndef NO_UNDOC_CODE - -/* Un-documented FPU op-codes supported by default. (see above) */ - -static u_char const type_table[64] = { - _REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _REGi_, - _REGI_, _REGIn, _null_, _null_, _REGIi, _REGI_, _REGIp, _REGI_, - _REGIc, _NONE_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_, - _REGIc, _REG0_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_, - _REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_, - _REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_, - _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_, - _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_ -}; - -#else /* Support only documented FPU op-codes */ - -static u_char const type_table[64] = { - _REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _null_, - _REGI_, _REGIn, _null_, _null_, _REGIi, _null_, _REGIp, _null_, - _REGIc, _NONE_, _null_, _null_, _null_, _REG0_, _null_, _null_, - _REGIc, _null_, _null_, _null_, _null_, _REG0_, _REGIc, _null_, - _REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_, - _REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_, - _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_, - _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_ -}; - -#endif /* NO_UNDOC_CODE */ - - -#ifdef RE_ENTRANT_CHECKING -u_char emulating=0; -#endif /* RE_ENTRANT_CHECKING */ - -static int valid_prefix(u_char *Byte, u_char __user **fpu_eip, - overrides *override); - -asmlinkage void math_emulate(long arg) -{ - u_char FPU_modrm, byte1; - unsigned short code; - fpu_addr_modes addr_modes; - int unmasked; - FPU_REG loaded_data; - FPU_REG *st0_ptr; - u_char loaded_tag, st0_tag; - void __user *data_address; - struct address data_sel_off; - struct address entry_sel_off; - unsigned long code_base = 0; - unsigned long code_limit = 0; /* Initialized to stop compiler warnings */ - struct desc_struct code_descriptor; - -#ifdef RE_ENTRANT_CHECKING - if ( emulating ) - { - printk("ERROR: wm-FPU-emu is not RE-ENTRANT!\n"); - } - RE_ENTRANT_CHECK_ON; -#endif /* RE_ENTRANT_CHECKING */ - - if (!used_math()) - { - finit(); - set_used_math(); - } - - SETUP_DATA_AREA(arg); - - FPU_ORIG_EIP = FPU_EIP; - - if ( (FPU_EFLAGS & 0x00020000) != 0 ) - { - /* Virtual 8086 mode */ - addr_modes.default_mode = VM86; - FPU_EIP += code_base = FPU_CS << 4; - code_limit = code_base + 0xffff; /* Assumes code_base <= 0xffff0000 */ - } - else if ( FPU_CS == __USER_CS && FPU_DS == __USER_DS ) - { - addr_modes.default_mode = 0; - } - else if ( FPU_CS == __KERNEL_CS ) - { - printk("math_emulate: %04x:%08lx\n",FPU_CS,FPU_EIP); - panic("Math emulation needed in kernel"); - } - else - { - - if ( (FPU_CS & 4) != 4 ) /* Must be in the LDT */ - { - /* Can only handle segmented addressing via the LDT - for now, and it must be 16 bit */ - printk("FPU emulator: Unsupported addressing mode\n"); - math_abort(FPU_info, SIGILL); - } - - code_descriptor = LDT_DESCRIPTOR(FPU_CS); - if ( SEG_D_SIZE(code_descriptor) ) - { - /* The above test may be wrong, the book is not clear */ - /* Segmented 32 bit protected mode */ - addr_modes.default_mode = SEG32; - } - else - { - /* 16 bit protected mode */ - addr_modes.default_mode = PM16; - } - FPU_EIP += code_base = SEG_BASE_ADDR(code_descriptor); - code_limit = code_base - + (SEG_LIMIT(code_descriptor)+1) * SEG_GRANULARITY(code_descriptor) - - 1; - if ( code_limit < code_base ) code_limit = 0xffffffff; - } - - FPU_lookahead = 1; - if (current->ptrace & PT_PTRACED) - FPU_lookahead = 0; - - if ( !valid_prefix(&byte1, (u_char __user **)&FPU_EIP, - &addr_modes.override) ) - { - RE_ENTRANT_CHECK_OFF; - printk("FPU emulator: Unknown prefix byte 0x%02x, probably due to\n" - "FPU emulator: self-modifying code! (emulation impossible)\n", - byte1); - RE_ENTRANT_CHECK_ON; - EXCEPTION(EX_INTERNAL|0x126); - math_abort(FPU_info,SIGILL); - } - -do_another_FPU_instruction: - - no_ip_update = 0; - - FPU_EIP++; /* We have fetched the prefix and first code bytes. */ - - if ( addr_modes.default_mode ) - { - /* This checks for the minimum instruction bytes. - We also need to check any extra (address mode) code access. */ - if ( FPU_EIP > code_limit ) - math_abort(FPU_info,SIGSEGV); - } - - if ( (byte1 & 0xf8) != 0xd8 ) - { - if ( byte1 == FWAIT_OPCODE ) - { - if (partial_status & SW_Summary) - goto do_the_FPU_interrupt; - else - goto FPU_fwait_done; - } -#ifdef PARANOID - EXCEPTION(EX_INTERNAL|0x128); - math_abort(FPU_info,SIGILL); -#endif /* PARANOID */ - } - - RE_ENTRANT_CHECK_OFF; - FPU_code_access_ok(1); - FPU_get_user(FPU_modrm, (u_char __user *) FPU_EIP); - RE_ENTRANT_CHECK_ON; - FPU_EIP++; - - if (partial_status & SW_Summary) - { - /* Ignore the error for now if the current instruction is a no-wait - control instruction */ - /* The 80486 manual contradicts itself on this topic, - but a real 80486 uses the following instructions: - fninit, fnstenv, fnsave, fnstsw, fnstenv, fnclex. - */ - code = (FPU_modrm << 8) | byte1; - if ( ! ( (((code & 0xf803) == 0xe003) || /* fnclex, fninit, fnstsw */ - (((code & 0x3003) == 0x3001) && /* fnsave, fnstcw, fnstenv, - fnstsw */ - ((code & 0xc000) != 0xc000))) ) ) - { - /* - * We need to simulate the action of the kernel to FPU - * interrupts here. - */ - do_the_FPU_interrupt: - - FPU_EIP = FPU_ORIG_EIP; /* Point to current FPU instruction. */ - - RE_ENTRANT_CHECK_OFF; - current->thread.trap_no = 16; - current->thread.error_code = 0; - send_sig(SIGFPE, current, 1); - return; - } - } - - entry_sel_off.offset = FPU_ORIG_EIP; - entry_sel_off.selector = FPU_CS; - entry_sel_off.opcode = (byte1 << 8) | FPU_modrm; - - FPU_rm = FPU_modrm & 7; - - if ( FPU_modrm < 0300 ) - { - /* All of these instructions use the mod/rm byte to get a data address */ - - if ( (addr_modes.default_mode & SIXTEEN) - ^ (addr_modes.override.address_size == ADDR_SIZE_PREFIX) ) - data_address = FPU_get_address_16(FPU_modrm, &FPU_EIP, &data_sel_off, - addr_modes); - else - data_address = FPU_get_address(FPU_modrm, &FPU_EIP, &data_sel_off, - addr_modes); - - if ( addr_modes.default_mode ) - { - if ( FPU_EIP-1 > code_limit ) - math_abort(FPU_info,SIGSEGV); - } - - if ( !(byte1 & 1) ) - { - unsigned short status1 = partial_status; - - st0_ptr = &st(0); - st0_tag = FPU_gettag0(); - - /* Stack underflow has priority */ - if ( NOT_EMPTY_ST0 ) - { - if ( addr_modes.default_mode & PROTECTED ) - { - /* This table works for 16 and 32 bit protected mode */ - if ( access_limit < data_sizes_16[(byte1 >> 1) & 3] ) - math_abort(FPU_info,SIGSEGV); - } - - unmasked = 0; /* Do this here to stop compiler warnings. */ - switch ( (byte1 >> 1) & 3 ) - { - case 0: - unmasked = FPU_load_single((float __user *)data_address, - &loaded_data); - loaded_tag = unmasked & 0xff; - unmasked &= ~0xff; - break; - case 1: - loaded_tag = FPU_load_int32((long __user *)data_address, &loaded_data); - break; - case 2: - unmasked = FPU_load_double((double __user *)data_address, - &loaded_data); - loaded_tag = unmasked & 0xff; - unmasked &= ~0xff; - break; - case 3: - default: /* Used here to suppress gcc warnings. */ - loaded_tag = FPU_load_int16((short __user *)data_address, &loaded_data); - break; - } - - /* No more access to user memory, it is safe - to use static data now */ - - /* NaN operands have the next priority. */ - /* We have to delay looking at st(0) until after - loading the data, because that data might contain an SNaN */ - if ( ((st0_tag == TAG_Special) && isNaN(st0_ptr)) || - ((loaded_tag == TAG_Special) && isNaN(&loaded_data)) ) - { - /* Restore the status word; we might have loaded a - denormal. */ - partial_status = status1; - if ( (FPU_modrm & 0x30) == 0x10 ) - { - /* fcom or fcomp */ - EXCEPTION(EX_Invalid); - setcc(SW_C3 | SW_C2 | SW_C0); - if ( (FPU_modrm & 0x08) && (control_word & CW_Invalid) ) - FPU_pop(); /* fcomp, masked, so we pop. */ - } - else - { - if ( loaded_tag == TAG_Special ) - loaded_tag = FPU_Special(&loaded_data); -#ifdef PECULIAR_486 - /* This is not really needed, but gives behaviour - identical to an 80486 */ - if ( (FPU_modrm & 0x28) == 0x20 ) - /* fdiv or fsub */ - real_2op_NaN(&loaded_data, loaded_tag, 0, &loaded_data); - else -#endif /* PECULIAR_486 */ - /* fadd, fdivr, fmul, or fsubr */ - real_2op_NaN(&loaded_data, loaded_tag, 0, st0_ptr); - } - goto reg_mem_instr_done; - } - - if ( unmasked && !((FPU_modrm & 0x30) == 0x10) ) - { - /* Is not a comparison instruction. */ - if ( (FPU_modrm & 0x38) == 0x38 ) - { - /* fdivr */ - if ( (st0_tag == TAG_Zero) && - ((loaded_tag == TAG_Valid) - || (loaded_tag == TAG_Special - && isdenormal(&loaded_data))) ) - { - if ( FPU_divide_by_zero(0, getsign(&loaded_data)) - < 0 ) - { - /* We use the fact here that the unmasked - exception in the loaded data was for a - denormal operand */ - /* Restore the state of the denormal op bit */ - partial_status &= ~SW_Denorm_Op; - partial_status |= status1 & SW_Denorm_Op; - } - else - setsign(st0_ptr, getsign(&loaded_data)); - } - } - goto reg_mem_instr_done; - } - - switch ( (FPU_modrm >> 3) & 7 ) - { - case 0: /* fadd */ - clear_C1(); - FPU_add(&loaded_data, loaded_tag, 0, control_word); - break; - case 1: /* fmul */ - clear_C1(); - FPU_mul(&loaded_data, loaded_tag, 0, control_word); - break; - case 2: /* fcom */ - FPU_compare_st_data(&loaded_data, loaded_tag); - break; - case 3: /* fcomp */ - if ( !FPU_compare_st_data(&loaded_data, loaded_tag) - && !unmasked ) - FPU_pop(); - break; - case 4: /* fsub */ - clear_C1(); - FPU_sub(LOADED|loaded_tag, (int)&loaded_data, control_word); - break; - case 5: /* fsubr */ - clear_C1(); - FPU_sub(REV|LOADED|loaded_tag, (int)&loaded_data, control_word); - break; - case 6: /* fdiv */ - clear_C1(); - FPU_div(LOADED|loaded_tag, (int)&loaded_data, control_word); - break; - case 7: /* fdivr */ - clear_C1(); - if ( st0_tag == TAG_Zero ) - partial_status = status1; /* Undo any denorm tag, - zero-divide has priority. */ - FPU_div(REV|LOADED|loaded_tag, (int)&loaded_data, control_word); - break; - } - } - else - { - if ( (FPU_modrm & 0x30) == 0x10 ) - { - /* The instruction is fcom or fcomp */ - EXCEPTION(EX_StackUnder); - setcc(SW_C3 | SW_C2 | SW_C0); - if ( (FPU_modrm & 0x08) && (control_word & CW_Invalid) ) - FPU_pop(); /* fcomp */ - } - else - FPU_stack_underflow(); - } - reg_mem_instr_done: - operand_address = data_sel_off; - } - else - { - if ( !(no_ip_update = - FPU_load_store(((FPU_modrm & 0x38) | (byte1 & 6)) >> 1, - addr_modes, data_address)) ) - { - operand_address = data_sel_off; - } - } - - } - else - { - /* None of these instructions access user memory */ - u_char instr_index = (FPU_modrm & 0x38) | (byte1 & 7); - -#ifdef PECULIAR_486 - /* This is supposed to be undefined, but a real 80486 seems - to do this: */ - operand_address.offset = 0; - operand_address.selector = FPU_DS; -#endif /* PECULIAR_486 */ - - st0_ptr = &st(0); - st0_tag = FPU_gettag0(); - switch ( type_table[(int) instr_index] ) - { - case _NONE_: /* also _REGIc: _REGIn */ - break; - case _REG0_: - if ( !NOT_EMPTY_ST0 ) - { - FPU_stack_underflow(); - goto FPU_instruction_done; - } - break; - case _REGIi: - if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) ) - { - FPU_stack_underflow_i(FPU_rm); - goto FPU_instruction_done; - } - break; - case _REGIp: - if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) ) - { - FPU_stack_underflow_pop(FPU_rm); - goto FPU_instruction_done; - } - break; - case _REGI_: - if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) ) - { - FPU_stack_underflow(); - goto FPU_instruction_done; - } - break; - case _PUSH_: /* Only used by the fld st(i) instruction */ - break; - case _null_: - FPU_illegal(); - goto FPU_instruction_done; - default: - EXCEPTION(EX_INTERNAL|0x111); - goto FPU_instruction_done; - } - (*st_instr_table[(int) instr_index])(); - -FPU_instruction_done: - ; - } - - if ( ! no_ip_update ) - instruction_address = entry_sel_off; - -FPU_fwait_done: - -#ifdef DEBUG - RE_ENTRANT_CHECK_OFF; - FPU_printall(); - RE_ENTRANT_CHECK_ON; -#endif /* DEBUG */ - - if (FPU_lookahead && !need_resched()) - { - FPU_ORIG_EIP = FPU_EIP - code_base; - if ( valid_prefix(&byte1, (u_char __user **)&FPU_EIP, - &addr_modes.override) ) - goto do_another_FPU_instruction; - } - - if ( addr_modes.default_mode ) - FPU_EIP -= code_base; - - RE_ENTRANT_CHECK_OFF; -} - - -/* Support for prefix bytes is not yet complete. To properly handle - all prefix bytes, further changes are needed in the emulator code - which accesses user address space. Access to separate segments is - important for msdos emulation. */ -static int valid_prefix(u_char *Byte, u_char __user **fpu_eip, - overrides *override) -{ - u_char byte; - u_char __user *ip = *fpu_eip; - - *override = (overrides) { 0, 0, PREFIX_DEFAULT }; /* defaults */ - - RE_ENTRANT_CHECK_OFF; - FPU_code_access_ok(1); - FPU_get_user(byte, ip); - RE_ENTRANT_CHECK_ON; - - while ( 1 ) - { - switch ( byte ) - { - case ADDR_SIZE_PREFIX: - override->address_size = ADDR_SIZE_PREFIX; - goto do_next_byte; - - case OP_SIZE_PREFIX: - override->operand_size = OP_SIZE_PREFIX; - goto do_next_byte; - - case PREFIX_CS: - override->segment = PREFIX_CS_; - goto do_next_byte; - case PREFIX_ES: - override->segment = PREFIX_ES_; - goto do_next_byte; - case PREFIX_SS: - override->segment = PREFIX_SS_; - goto do_next_byte; - case PREFIX_FS: - override->segment = PREFIX_FS_; - goto do_next_byte; - case PREFIX_GS: - override->segment = PREFIX_GS_; - goto do_next_byte; - case PREFIX_DS: - override->segment = PREFIX_DS_; - goto do_next_byte; - -/* lock is not a valid prefix for FPU instructions, - let the cpu handle it to generate a SIGILL. */ -/* case PREFIX_LOCK: */ - - /* rep.. prefixes have no meaning for FPU instructions */ - case PREFIX_REPE: - case PREFIX_REPNE: - - do_next_byte: - ip++; - RE_ENTRANT_CHECK_OFF; - FPU_code_access_ok(1); - FPU_get_user(byte, ip); - RE_ENTRANT_CHECK_ON; - break; - case FWAIT_OPCODE: - *Byte = byte; - return 1; - default: - if ( (byte & 0xf8) == 0xd8 ) - { - *Byte = byte; - *fpu_eip = ip; - return 1; - } - else - { - /* Not a valid sequence of prefix bytes followed by - an FPU instruction. */ - *Byte = byte; /* Needed for error message. */ - return 0; - } - } - } -} - - -void math_abort(struct info * info, unsigned int signal) -{ - FPU_EIP = FPU_ORIG_EIP; - current->thread.trap_no = 16; - current->thread.error_code = 0; - send_sig(signal,current,1); - RE_ENTRANT_CHECK_OFF; - __asm__("movl %0,%%esp ; ret": :"g" (((long) info)-4)); -#ifdef PARANOID - printk("ERROR: wm-FPU-emu math_abort failed!\n"); -#endif /* PARANOID */ -} - - - -#define S387 ((struct i387_soft_struct *)s387) -#define sstatus_word() \ - ((S387->swd & ~SW_Top & 0xffff) | ((S387->ftop << SW_Top_Shift) & SW_Top)) - -int restore_i387_soft(void *s387, struct _fpstate __user *buf) -{ - u_char __user *d = (u_char __user *)buf; - int offset, other, i, tags, regnr, tag, newtop; - - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_READ, d, 7*4 + 8*10); - if (__copy_from_user(&S387->cwd, d, 7*4)) - return -1; - RE_ENTRANT_CHECK_ON; - - d += 7*4; - - S387->ftop = (S387->swd >> SW_Top_Shift) & 7; - offset = (S387->ftop & 7) * 10; - other = 80 - offset; - - RE_ENTRANT_CHECK_OFF; - /* Copy all registers in stack order. */ - if (__copy_from_user(((u_char *)&S387->st_space)+offset, d, other)) - return -1; - if ( offset ) - if (__copy_from_user((u_char *)&S387->st_space, d+other, offset)) - return -1; - RE_ENTRANT_CHECK_ON; - - /* The tags may need to be corrected now. */ - tags = S387->twd; - newtop = S387->ftop; - for ( i = 0; i < 8; i++ ) - { - regnr = (i+newtop) & 7; - if ( ((tags >> ((regnr & 7)*2)) & 3) != TAG_Empty ) - { - /* The loaded data over-rides all other cases. */ - tag = FPU_tagof((FPU_REG *)((u_char *)S387->st_space + 10*regnr)); - tags &= ~(3 << (regnr*2)); - tags |= (tag & 3) << (regnr*2); - } - } - S387->twd = tags; - - return 0; -} - - -int save_i387_soft(void *s387, struct _fpstate __user * buf) -{ - u_char __user *d = (u_char __user *)buf; - int offset = (S387->ftop & 7) * 10, other = 80 - offset; - - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_WRITE, d, 7*4 + 8*10); -#ifdef PECULIAR_486 - S387->cwd &= ~0xe080; - /* An 80486 sets nearly all of the reserved bits to 1. */ - S387->cwd |= 0xffff0040; - S387->swd = sstatus_word() | 0xffff0000; - S387->twd |= 0xffff0000; - S387->fcs &= ~0xf8000000; - S387->fos |= 0xffff0000; -#endif /* PECULIAR_486 */ - if (__copy_to_user(d, &S387->cwd, 7*4)) - return -1; - RE_ENTRANT_CHECK_ON; - - d += 7*4; - - RE_ENTRANT_CHECK_OFF; - /* Copy all registers in stack order. */ - if (__copy_to_user(d, ((u_char *)&S387->st_space)+offset, other)) - return -1; - if ( offset ) - if (__copy_to_user(d+other, (u_char *)&S387->st_space, offset)) - return -1; - RE_ENTRANT_CHECK_ON; - - return 1; -} diff --git a/arch/i386/math-emu/fpu_etc.c b/arch/i386/math-emu/fpu_etc.c deleted file mode 100644 index e3b5d465587..00000000000 --- a/arch/i386/math-emu/fpu_etc.c +++ /dev/null @@ -1,143 +0,0 @@ -/*---------------------------------------------------------------------------+ - | fpu_etc.c | - | | - | Implement a few FPU instructions. | - | | - | Copyright (C) 1992,1993,1994,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@suburbia.net | - | | - | | - +---------------------------------------------------------------------------*/ - -#include "fpu_system.h" -#include "exception.h" -#include "fpu_emu.h" -#include "status_w.h" -#include "reg_constant.h" - - -static void fchs(FPU_REG *st0_ptr, u_char st0tag) -{ - if ( st0tag ^ TAG_Empty ) - { - signbyte(st0_ptr) ^= SIGN_NEG; - clear_C1(); - } - else - FPU_stack_underflow(); -} - - -static void fabs(FPU_REG *st0_ptr, u_char st0tag) -{ - if ( st0tag ^ TAG_Empty ) - { - setpositive(st0_ptr); - clear_C1(); - } - else - FPU_stack_underflow(); -} - - -static void ftst_(FPU_REG *st0_ptr, u_char st0tag) -{ - switch (st0tag) - { - case TAG_Zero: - setcc(SW_C3); - break; - case TAG_Valid: - if (getsign(st0_ptr) == SIGN_POS) - setcc(0); - else - setcc(SW_C0); - break; - case TAG_Special: - switch ( FPU_Special(st0_ptr) ) - { - case TW_Denormal: - if (getsign(st0_ptr) == SIGN_POS) - setcc(0); - else - setcc(SW_C0); - if ( denormal_operand() < 0 ) - { -#ifdef PECULIAR_486 - /* This is weird! */ - if (getsign(st0_ptr) == SIGN_POS) - setcc(SW_C3); -#endif /* PECULIAR_486 */ - return; - } - break; - case TW_NaN: - setcc(SW_C0|SW_C2|SW_C3); /* Operand is not comparable */ - EXCEPTION(EX_Invalid); - break; - case TW_Infinity: - if (getsign(st0_ptr) == SIGN_POS) - setcc(0); - else - setcc(SW_C0); - break; - default: - setcc(SW_C0|SW_C2|SW_C3); /* Operand is not comparable */ - EXCEPTION(EX_INTERNAL|0x14); - break; - } - break; - case TAG_Empty: - setcc(SW_C0|SW_C2|SW_C3); - EXCEPTION(EX_StackUnder); - break; - } -} - - -static void fxam(FPU_REG *st0_ptr, u_char st0tag) -{ - int c = 0; - switch (st0tag) - { - case TAG_Empty: - c = SW_C3|SW_C0; - break; - case TAG_Zero: - c = SW_C3; - break; - case TAG_Valid: - c = SW_C2; - break; - case TAG_Special: - switch ( FPU_Special(st0_ptr) ) - { - case TW_Denormal: - c = SW_C2|SW_C3; /* Denormal */ - break; - case TW_NaN: - /* We also use NaN for unsupported types. */ - if ( (st0_ptr->sigh & 0x80000000) && (exponent(st0_ptr) == EXP_OVER) ) - c = SW_C0; - break; - case TW_Infinity: - c = SW_C2|SW_C0; - break; - } - } - if ( getsign(st0_ptr) == SIGN_NEG ) - c |= SW_C1; - setcc(c); -} - - -static FUNC_ST0 const fp_etc_table[] = { - fchs, fabs, (FUNC_ST0)FPU_illegal, (FUNC_ST0)FPU_illegal, - ftst_, fxam, (FUNC_ST0)FPU_illegal, (FUNC_ST0)FPU_illegal -}; - -void FPU_etc(void) -{ - (fp_etc_table[FPU_rm])(&st(0), FPU_gettag0()); -} diff --git a/arch/i386/math-emu/fpu_proto.h b/arch/i386/math-emu/fpu_proto.h deleted file mode 100644 index 37a8a7fe7e2..00000000000 --- a/arch/i386/math-emu/fpu_proto.h +++ /dev/null @@ -1,140 +0,0 @@ -#ifndef _FPU_PROTO_H -#define _FPU_PROTO_H - -/* errors.c */ -extern void FPU_illegal(void); -extern void FPU_printall(void); -asmlinkage void FPU_exception(int n); -extern int real_1op_NaN(FPU_REG *a); -extern int real_2op_NaN(FPU_REG const *b, u_char tagb, int deststnr, - FPU_REG const *defaultNaN); -asmlinkage int arith_invalid(int deststnr); -asmlinkage int FPU_divide_by_zero(int deststnr, u_char sign); -extern int set_precision_flag(int flags); -asmlinkage void set_precision_flag_up(void); -asmlinkage void set_precision_flag_down(void); -asmlinkage int denormal_operand(void); -asmlinkage int arith_overflow(FPU_REG *dest); -asmlinkage int arith_underflow(FPU_REG *dest); -extern void FPU_stack_overflow(void); -extern void FPU_stack_underflow(void); -extern void FPU_stack_underflow_i(int i); -extern void FPU_stack_underflow_pop(int i); -/* fpu_arith.c */ -extern void fadd__(void); -extern void fmul__(void); -extern void fsub__(void); -extern void fsubr_(void); -extern void fdiv__(void); -extern void fdivr_(void); -extern void fadd_i(void); -extern void fmul_i(void); -extern void fsubri(void); -extern void fsub_i(void); -extern void fdivri(void); -extern void fdiv_i(void); -extern void faddp_(void); -extern void fmulp_(void); -extern void fsubrp(void); -extern void fsubp_(void); -extern void fdivrp(void); -extern void fdivp_(void); -/* fpu_aux.c */ -extern void finit(void); -extern void finit_(void); -extern void fstsw_(void); -extern void fp_nop(void); -extern void fld_i_(void); -extern void fxch_i(void); -extern void ffree_(void); -extern void ffreep(void); -extern void fst_i_(void); -extern void fstp_i(void); -/* fpu_entry.c */ -asmlinkage extern void math_emulate(long arg); -extern void math_abort(struct info *info, unsigned int signal); -/* fpu_etc.c */ -extern void FPU_etc(void); -/* fpu_tags.c */ -extern int FPU_gettag0(void); -extern int FPU_gettagi(int stnr); -extern int FPU_gettag(int regnr); -extern void FPU_settag0(int tag); -extern void FPU_settagi(int stnr, int tag); -extern void FPU_settag(int regnr, int tag); -extern int FPU_Special(FPU_REG const *ptr); -extern int isNaN(FPU_REG const *ptr); -extern void FPU_pop(void); -extern int FPU_empty_i(int stnr); -extern int FPU_stackoverflow(FPU_REG **st_new_ptr); -extern void FPU_copy_to_regi(FPU_REG const *r, u_char tag, int stnr); -extern void FPU_copy_to_reg1(FPU_REG const *r, u_char tag); -extern void FPU_copy_to_reg0(FPU_REG const *r, u_char tag); -/* fpu_trig.c */ -extern void FPU_triga(void); -extern void FPU_trigb(void); -/* get_address.c */ -extern void __user *FPU_get_address(u_char FPU_modrm, unsigned long *fpu_eip, - struct address *addr, fpu_addr_modes addr_modes); -extern void __user *FPU_get_address_16(u_char FPU_modrm, unsigned long *fpu_eip, - struct address *addr, fpu_addr_modes addr_modes); -/* load_store.c */ -extern int FPU_load_store(u_char type, fpu_addr_modes addr_modes, - void __user *data_address); -/* poly_2xm1.c */ -extern int poly_2xm1(u_char sign, FPU_REG *arg, FPU_REG *result); -/* poly_atan.c */ -extern void poly_atan(FPU_REG *st0_ptr, u_char st0_tag, FPU_REG *st1_ptr, - u_char st1_tag); -/* poly_l2.c */ -extern void poly_l2(FPU_REG *st0_ptr, FPU_REG *st1_ptr, u_char st1_sign); -extern int poly_l2p1(u_char s0, u_char s1, FPU_REG *r0, FPU_REG *r1, - FPU_REG *d); -/* poly_sin.c */ -extern void poly_sine(FPU_REG *st0_ptr); -extern void poly_cos(FPU_REG *st0_ptr); -/* poly_tan.c */ -extern void poly_tan(FPU_REG *st0_ptr); -/* reg_add_sub.c */ -extern int FPU_add(FPU_REG const *b, u_char tagb, int destrnr, int control_w); -extern int FPU_sub(int flags, int rm, int control_w); -/* reg_compare.c */ -extern int FPU_compare_st_data(FPU_REG const *loaded_data, u_char loaded_tag); -extern void fcom_st(void); -extern void fcompst(void); -extern void fcompp(void); -extern void fucom_(void); -extern void fucomp(void); -extern void fucompp(void); -/* reg_constant.c */ -extern void fconst(void); -/* reg_ld_str.c */ -extern int FPU_load_extended(long double __user *s, int stnr); -extern int FPU_load_double(double __user *dfloat, FPU_REG *loaded_data); -extern int FPU_load_single(float __user *single, FPU_REG *loaded_data); -extern int FPU_load_int64(long long __user *_s); -extern int FPU_load_int32(long __user *_s, FPU_REG *loaded_data); -extern int FPU_load_int16(short __user *_s, FPU_REG *loaded_data); -extern int FPU_load_bcd(u_char __user *s); -extern int FPU_store_extended(FPU_REG *st0_ptr, u_char st0_tag, - long double __user *d); -extern int FPU_store_double(FPU_REG *st0_ptr, u_char st0_tag, double __user *dfloat); -extern int FPU_store_single(FPU_REG *st0_ptr, u_char st0_tag, float __user *single); -extern int FPU_store_int64(FPU_REG *st0_ptr, u_char st0_tag, long long __user *d); -extern int FPU_store_int32(FPU_REG *st0_ptr, u_char st0_tag, long __user *d); -extern int FPU_store_int16(FPU_REG *st0_ptr, u_char st0_tag, short __user *d); -extern int FPU_store_bcd(FPU_REG *st0_ptr, u_char st0_tag, u_char __user *d); -extern int FPU_round_to_int(FPU_REG *r, u_char tag); -extern u_char __user *fldenv(fpu_addr_modes addr_modes, u_char __user *s); -extern void frstor(fpu_addr_modes addr_modes, u_char __user *data_address); -extern u_char __user *fstenv(fpu_addr_modes addr_modes, u_char __user *d); -extern void fsave(fpu_addr_modes addr_modes, u_char __user *data_address); -extern int FPU_tagof(FPU_REG *ptr); -/* reg_mul.c */ -extern int FPU_mul(FPU_REG const *b, u_char tagb, int deststnr, int control_w); - -extern int FPU_div(int flags, int regrm, int control_w); -/* reg_convert.c */ -extern int FPU_to_exp16(FPU_REG const *a, FPU_REG *x); -#endif /* _FPU_PROTO_H */ - diff --git a/arch/i386/math-emu/fpu_system.h b/arch/i386/math-emu/fpu_system.h deleted file mode 100644 index a3ae28c49dd..00000000000 --- a/arch/i386/math-emu/fpu_system.h +++ /dev/null @@ -1,90 +0,0 @@ -/*---------------------------------------------------------------------------+ - | fpu_system.h | - | | - | Copyright (C) 1992,1994,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@suburbia.net | - | | - +---------------------------------------------------------------------------*/ - -#ifndef _FPU_SYSTEM_H -#define _FPU_SYSTEM_H - -/* system dependent definitions */ - -#include -#include -#include - -/* This sets the pointer FPU_info to point to the argument part - of the stack frame of math_emulate() */ -#define SETUP_DATA_AREA(arg) FPU_info = (struct info *) &arg - -/* s is always from a cpu register, and the cpu does bounds checking - * during register load --> no further bounds checks needed */ -#define LDT_DESCRIPTOR(s) (((struct desc_struct *)current->mm->context.ldt)[(s) >> 3]) -#define SEG_D_SIZE(x) ((x).b & (3 << 21)) -#define SEG_G_BIT(x) ((x).b & (1 << 23)) -#define SEG_GRANULARITY(x) (((x).b & (1 << 23)) ? 4096 : 1) -#define SEG_286_MODE(x) ((x).b & ( 0xff000000 | 0xf0000 | (1 << 23))) -#define SEG_BASE_ADDR(s) (((s).b & 0xff000000) \ - | (((s).b & 0xff) << 16) | ((s).a >> 16)) -#define SEG_LIMIT(s) (((s).b & 0xff0000) | ((s).a & 0xffff)) -#define SEG_EXECUTE_ONLY(s) (((s).b & ((1 << 11) | (1 << 9))) == (1 << 11)) -#define SEG_WRITE_PERM(s) (((s).b & ((1 << 11) | (1 << 9))) == (1 << 9)) -#define SEG_EXPAND_DOWN(s) (((s).b & ((1 << 11) | (1 << 10))) \ - == (1 << 10)) - -#define I387 (current->thread.i387) -#define FPU_info (I387.soft.info) - -#define FPU_CS (*(unsigned short *) &(FPU_info->___cs)) -#define FPU_SS (*(unsigned short *) &(FPU_info->___ss)) -#define FPU_DS (*(unsigned short *) &(FPU_info->___ds)) -#define FPU_EAX (FPU_info->___eax) -#define FPU_EFLAGS (FPU_info->___eflags) -#define FPU_EIP (FPU_info->___eip) -#define FPU_ORIG_EIP (FPU_info->___orig_eip) - -#define FPU_lookahead (I387.soft.lookahead) - -/* nz if ip_offset and cs_selector are not to be set for the current - instruction. */ -#define no_ip_update (*(u_char *)&(I387.soft.no_update)) -#define FPU_rm (*(u_char *)&(I387.soft.rm)) - -/* Number of bytes of data which can be legally accessed by the current - instruction. This only needs to hold a number <= 108, so a byte will do. */ -#define access_limit (*(u_char *)&(I387.soft.alimit)) - -#define partial_status (I387.soft.swd) -#define control_word (I387.soft.cwd) -#define fpu_tag_word (I387.soft.twd) -#define registers (I387.soft.st_space) -#define top (I387.soft.ftop) - -#define instruction_address (*(struct address *)&I387.soft.fip) -#define operand_address (*(struct address *)&I387.soft.foo) - -#define FPU_access_ok(x,y,z) if ( !access_ok(x,y,z) ) \ - math_abort(FPU_info,SIGSEGV) -#define FPU_abort math_abort(FPU_info, SIGSEGV) - -#undef FPU_IGNORE_CODE_SEGV -#ifdef FPU_IGNORE_CODE_SEGV -/* access_ok() is very expensive, and causes the emulator to run - about 20% slower if applied to the code. Anyway, errors due to bad - code addresses should be much rarer than errors due to bad data - addresses. */ -#define FPU_code_access_ok(z) -#else -/* A simpler test than access_ok() can probably be done for - FPU_code_access_ok() because the only possible error is to step - past the upper boundary of a legal code area. */ -#define FPU_code_access_ok(z) FPU_access_ok(VERIFY_READ,(void __user *)FPU_EIP,z) -#endif - -#define FPU_get_user(x,y) get_user((x),(y)) -#define FPU_put_user(x,y) put_user((x),(y)) - -#endif diff --git a/arch/i386/math-emu/fpu_tags.c b/arch/i386/math-emu/fpu_tags.c deleted file mode 100644 index cb436fe20e4..00000000000 --- a/arch/i386/math-emu/fpu_tags.c +++ /dev/null @@ -1,127 +0,0 @@ -/*---------------------------------------------------------------------------+ - | fpu_tags.c | - | | - | Set FPU register tags. | - | | - | Copyright (C) 1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | - | E-mail billm@jacobi.maths.monash.edu.au | - | | - | | - +---------------------------------------------------------------------------*/ - -#include "fpu_emu.h" -#include "fpu_system.h" -#include "exception.h" - - -void FPU_pop(void) -{ - fpu_tag_word |= 3 << ((top & 7)*2); - top++; -} - - -int FPU_gettag0(void) -{ - return (fpu_tag_word >> ((top & 7)*2)) & 3; -} - - -int FPU_gettagi(int stnr) -{ - return (fpu_tag_word >> (((top+stnr) & 7)*2)) & 3; -} - - -int FPU_gettag(int regnr) -{ - return (fpu_tag_word >> ((regnr & 7)*2)) & 3; -} - - -void FPU_settag0(int tag) -{ - int regnr = top; - regnr &= 7; - fpu_tag_word &= ~(3 << (regnr*2)); - fpu_tag_word |= (tag & 3) << (regnr*2); -} - - -void FPU_settagi(int stnr, int tag) -{ - int regnr = stnr+top; - regnr &= 7; - fpu_tag_word &= ~(3 << (regnr*2)); - fpu_tag_word |= (tag & 3) << (regnr*2); -} - - -void FPU_settag(int regnr, int tag) -{ - regnr &= 7; - fpu_tag_word &= ~(3 << (regnr*2)); - fpu_tag_word |= (tag & 3) << (regnr*2); -} - - -int FPU_Special(FPU_REG const *ptr) -{ - int exp = exponent(ptr); - - if ( exp == EXP_BIAS+EXP_UNDER ) - return TW_Denormal; - else if ( exp != EXP_BIAS+EXP_OVER ) - return TW_NaN; - else if ( (ptr->sigh == 0x80000000) && (ptr->sigl == 0) ) - return TW_Infinity; - return TW_NaN; -} - - -int isNaN(FPU_REG const *ptr) -{ - return ( (exponent(ptr) == EXP_BIAS+EXP_OVER) - && !((ptr->sigh == 0x80000000) && (ptr->sigl == 0)) ); -} - - -int FPU_empty_i(int stnr) -{ - int regnr = (top+stnr) & 7; - - return ((fpu_tag_word >> (regnr*2)) & 3) == TAG_Empty; -} - - -int FPU_stackoverflow(FPU_REG **st_new_ptr) -{ - *st_new_ptr = &st(-1); - - return ((fpu_tag_word >> (((top - 1) & 7)*2)) & 3) != TAG_Empty; -} - - -void FPU_copy_to_regi(FPU_REG const *r, u_char tag, int stnr) -{ - reg_copy(r, &st(stnr)); - FPU_settagi(stnr, tag); -} - -void FPU_copy_to_reg1(FPU_REG const *r, u_char tag) -{ - reg_copy(r, &st(1)); - FPU_settagi(1, tag); -} - -void FPU_copy_to_reg0(FPU_REG const *r, u_char tag) -{ - int regnr = top; - regnr &= 7; - - reg_copy(r, &st(0)); - - fpu_tag_word &= ~(3 << (regnr*2)); - fpu_tag_word |= (tag & 3) << (regnr*2); -} diff --git a/arch/i386/math-emu/fpu_trig.c b/arch/i386/math-emu/fpu_trig.c deleted file mode 100644 index 403cbde1d42..00000000000 --- a/arch/i386/math-emu/fpu_trig.c +++ /dev/null @@ -1,1845 +0,0 @@ -/*---------------------------------------------------------------------------+ - | fpu_trig.c | - | | - | Implementation of the FPU "transcendental" functions. | - | | - | Copyright (C) 1992,1993,1994,1997,1999 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@melbpc.org.au | - | | - | | - +---------------------------------------------------------------------------*/ - -#include "fpu_system.h" -#include "exception.h" -#include "fpu_emu.h" -#include "status_w.h" -#include "control_w.h" -#include "reg_constant.h" - -static void rem_kernel(unsigned long long st0, unsigned long long *y, - unsigned long long st1, - unsigned long long q, int n); - -#define BETTER_THAN_486 - -#define FCOS 4 - -/* Used only by fptan, fsin, fcos, and fsincos. */ -/* This routine produces very accurate results, similar to - using a value of pi with more than 128 bits precision. */ -/* Limited measurements show no results worse than 64 bit precision - except for the results for arguments close to 2^63, where the - precision of the result sometimes degrades to about 63.9 bits */ -static int trig_arg(FPU_REG *st0_ptr, int even) -{ - FPU_REG tmp; - u_char tmptag; - unsigned long long q; - int old_cw = control_word, saved_status = partial_status; - int tag, st0_tag = TAG_Valid; - - if ( exponent(st0_ptr) >= 63 ) - { - partial_status |= SW_C2; /* Reduction incomplete. */ - return -1; - } - - control_word &= ~CW_RC; - control_word |= RC_CHOP; - - setpositive(st0_ptr); - tag = FPU_u_div(st0_ptr, &CONST_PI2, &tmp, PR_64_BITS | RC_CHOP | 0x3f, - SIGN_POS); - - FPU_round_to_int(&tmp, tag); /* Fortunately, this can't overflow - to 2^64 */ - q = significand(&tmp); - if ( q ) - { - rem_kernel(significand(st0_ptr), - &significand(&tmp), - significand(&CONST_PI2), - q, exponent(st0_ptr) - exponent(&CONST_PI2)); - setexponent16(&tmp, exponent(&CONST_PI2)); - st0_tag = FPU_normalize(&tmp); - FPU_copy_to_reg0(&tmp, st0_tag); - } - - if ( (even && !(q & 1)) || (!even && (q & 1)) ) - { - st0_tag = FPU_sub(REV|LOADED|TAG_Valid, (int)&CONST_PI2, FULL_PRECISION); - -#ifdef BETTER_THAN_486 - /* So far, the results are exact but based upon a 64 bit - precision approximation to pi/2. The technique used - now is equivalent to using an approximation to pi/2 which - is accurate to about 128 bits. */ - if ( (exponent(st0_ptr) <= exponent(&CONST_PI2extra) + 64) || (q > 1) ) - { - /* This code gives the effect of having pi/2 to better than - 128 bits precision. */ - - significand(&tmp) = q + 1; - setexponent16(&tmp, 63); - FPU_normalize(&tmp); - tmptag = - FPU_u_mul(&CONST_PI2extra, &tmp, &tmp, FULL_PRECISION, SIGN_POS, - exponent(&CONST_PI2extra) + exponent(&tmp)); - setsign(&tmp, getsign(&CONST_PI2extra)); - st0_tag = FPU_add(&tmp, tmptag, 0, FULL_PRECISION); - if ( signnegative(st0_ptr) ) - { - /* CONST_PI2extra is negative, so the result of the addition - can be negative. This means that the argument is actually - in a different quadrant. The correction is always < pi/2, - so it can't overflow into yet another quadrant. */ - setpositive(st0_ptr); - q++; - } - } -#endif /* BETTER_THAN_486 */ - } -#ifdef BETTER_THAN_486 - else - { - /* So far, the results are exact but based upon a 64 bit - precision approximation to pi/2. The technique used - now is equivalent to using an approximation to pi/2 which - is accurate to about 128 bits. */ - if ( ((q > 0) && (exponent(st0_ptr) <= exponent(&CONST_PI2extra) + 64)) - || (q > 1) ) - { - /* This code gives the effect of having p/2 to better than - 128 bits precision. */ - - significand(&tmp) = q; - setexponent16(&tmp, 63); - FPU_normalize(&tmp); /* This must return TAG_Valid */ - tmptag = FPU_u_mul(&CONST_PI2extra, &tmp, &tmp, FULL_PRECISION, - SIGN_POS, - exponent(&CONST_PI2extra) + exponent(&tmp)); - setsign(&tmp, getsign(&CONST_PI2extra)); - st0_tag = FPU_sub(LOADED|(tmptag & 0x0f), (int)&tmp, - FULL_PRECISION); - if ( (exponent(st0_ptr) == exponent(&CONST_PI2)) && - ((st0_ptr->sigh > CONST_PI2.sigh) - || ((st0_ptr->sigh == CONST_PI2.sigh) - && (st0_ptr->sigl > CONST_PI2.sigl))) ) - { - /* CONST_PI2extra is negative, so the result of the - subtraction can be larger than pi/2. This means - that the argument is actually in a different quadrant. - The correction is always < pi/2, so it can't overflow - into yet another quadrant. */ - st0_tag = FPU_sub(REV|LOADED|TAG_Valid, (int)&CONST_PI2, - FULL_PRECISION); - q++; - } - } - } -#endif /* BETTER_THAN_486 */ - - FPU_settag0(st0_tag); - control_word = old_cw; - partial_status = saved_status & ~SW_C2; /* Reduction complete. */ - - return (q & 3) | even; -} - - -/* Convert a long to register */ -static void convert_l2reg(long const *arg, int deststnr) -{ - int tag; - long num = *arg; - u_char sign; - FPU_REG *dest = &st(deststnr); - - if (num == 0) - { - FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr); - return; - } - - if (num > 0) - { sign = SIGN_POS; } - else - { num = -num; sign = SIGN_NEG; } - - dest->sigh = num; - dest->sigl = 0; - setexponent16(dest, 31); - tag = FPU_normalize(dest); - FPU_settagi(deststnr, tag); - setsign(dest, sign); - return; -} - - -static void single_arg_error(FPU_REG *st0_ptr, u_char st0_tag) -{ - if ( st0_tag == TAG_Empty ) - FPU_stack_underflow(); /* Puts a QNaN in st(0) */ - else if ( st0_tag == TW_NaN ) - real_1op_NaN(st0_ptr); /* return with a NaN in st(0) */ -#ifdef PARANOID - else - EXCEPTION(EX_INTERNAL|0x0112); -#endif /* PARANOID */ -} - - -static void single_arg_2_error(FPU_REG *st0_ptr, u_char st0_tag) -{ - int isNaN; - - switch ( st0_tag ) - { - case TW_NaN: - isNaN = (exponent(st0_ptr) == EXP_OVER) && (st0_ptr->sigh & 0x80000000); - if ( isNaN && !(st0_ptr->sigh & 0x40000000) ) /* Signaling ? */ - { - EXCEPTION(EX_Invalid); - if ( control_word & CW_Invalid ) - { - /* The masked response */ - /* Convert to a QNaN */ - st0_ptr->sigh |= 0x40000000; - push(); - FPU_copy_to_reg0(st0_ptr, TAG_Special); - } - } - else if ( isNaN ) - { - /* A QNaN */ - push(); - FPU_copy_to_reg0(st0_ptr, TAG_Special); - } - else - { - /* pseudoNaN or other unsupported */ - EXCEPTION(EX_Invalid); - if ( control_word & CW_Invalid ) - { - /* The masked response */ - FPU_copy_to_reg0(&CONST_QNaN, TAG_Special); - push(); - FPU_copy_to_reg0(&CONST_QNaN, TAG_Special); - } - } - break; /* return with a NaN in st(0) */ -#ifdef PARANOID - default: - EXCEPTION(EX_INTERNAL|0x0112); -#endif /* PARANOID */ - } -} - - -/*---------------------------------------------------------------------------*/ - -static void f2xm1(FPU_REG *st0_ptr, u_char tag) -{ - FPU_REG a; - - clear_C1(); - - if ( tag == TAG_Valid ) - { - /* For an 80486 FPU, the result is undefined if the arg is >= 1.0 */ - if ( exponent(st0_ptr) < 0 ) - { - denormal_arg: - - FPU_to_exp16(st0_ptr, &a); - - /* poly_2xm1(x) requires 0 < st(0) < 1. */ - poly_2xm1(getsign(st0_ptr), &a, st0_ptr); - } - set_precision_flag_up(); /* 80486 appears to always do this */ - return; - } - - if ( tag == TAG_Zero ) - return; - - if ( tag == TAG_Special ) - tag = FPU_Special(st0_ptr); - - switch ( tag ) - { - case TW_Denormal: - if ( denormal_operand() < 0 ) - return; - goto denormal_arg; - case TW_Infinity: - if ( signnegative(st0_ptr) ) - { - /* -infinity gives -1 (p16-10) */ - FPU_copy_to_reg0(&CONST_1, TAG_Valid); - setnegative(st0_ptr); - } - return; - default: - single_arg_error(st0_ptr, tag); - } -} - - -static void fptan(FPU_REG *st0_ptr, u_char st0_tag) -{ - FPU_REG *st_new_ptr; - int q; - u_char arg_sign = getsign(st0_ptr); - - /* Stack underflow has higher priority */ - if ( st0_tag == TAG_Empty ) - { - FPU_stack_underflow(); /* Puts a QNaN in st(0) */ - if ( control_word & CW_Invalid ) - { - st_new_ptr = &st(-1); - push(); - FPU_stack_underflow(); /* Puts a QNaN in the new st(0) */ - } - return; - } - - if ( STACK_OVERFLOW ) - { FPU_stack_overflow(); return; } - - if ( st0_tag == TAG_Valid ) - { - if ( exponent(st0_ptr) > -40 ) - { - if ( (q = trig_arg(st0_ptr, 0)) == -1 ) - { - /* Operand is out of range */ - return; - } - - poly_tan(st0_ptr); - setsign(st0_ptr, (q & 1) ^ (arg_sign != 0)); - set_precision_flag_up(); /* We do not really know if up or down */ - } - else - { - /* For a small arg, the result == the argument */ - /* Underflow may happen */ - - denormal_arg: - - FPU_to_exp16(st0_ptr, st0_ptr); - - st0_tag = FPU_round(st0_ptr, 1, 0, FULL_PRECISION, arg_sign); - FPU_settag0(st0_tag); - } - push(); - FPU_copy_to_reg0(&CONST_1, TAG_Valid); - return; - } - - if ( st0_tag == TAG_Zero ) - { - push(); - FPU_copy_to_reg0(&CONST_1, TAG_Valid); - setcc(0); - return; - } - - if ( st0_tag == TAG_Special ) - st0_tag = FPU_Special(st0_ptr); - - if ( st0_tag == TW_Denormal ) - { - if ( denormal_operand() < 0 ) - return; - - goto denormal_arg; - } - - if ( st0_tag == TW_Infinity ) - { - /* The 80486 treats infinity as an invalid operand */ - if ( arith_invalid(0) >= 0 ) - { - st_new_ptr = &st(-1); - push(); - arith_invalid(0); - } - return; - } - - single_arg_2_error(st0_ptr, st0_tag); -} - - -static void fxtract(FPU_REG *st0_ptr, u_char st0_tag) -{ - FPU_REG *st_new_ptr; - u_char sign; - register FPU_REG *st1_ptr = st0_ptr; /* anticipate */ - - if ( STACK_OVERFLOW ) - { FPU_stack_overflow(); return; } - - clear_C1(); - - if ( st0_tag == TAG_Valid ) - { - long e; - - push(); - sign = getsign(st1_ptr); - reg_copy(st1_ptr, st_new_ptr); - setexponent16(st_new_ptr, exponent(st_new_ptr)); - - denormal_arg: - - e = exponent16(st_new_ptr); - convert_l2reg(&e, 1); - setexponentpos(st_new_ptr, 0); - setsign(st_new_ptr, sign); - FPU_settag0(TAG_Valid); /* Needed if arg was a denormal */ - return; - } - else if ( st0_tag == TAG_Zero ) - { - sign = getsign(st0_ptr); - - if ( FPU_divide_by_zero(0, SIGN_NEG) < 0 ) - return; - - push(); - FPU_copy_to_reg0(&CONST_Z, TAG_Zero); - setsign(st_new_ptr, sign); - return; - } - - if ( st0_tag == TAG_Special ) - st0_tag = FPU_Special(st0_ptr); - - if ( st0_tag == TW_Denormal ) - { - if (denormal_operand() < 0 ) - return; - - push(); - sign = getsign(st1_ptr); - FPU_to_exp16(st1_ptr, st_new_ptr); - goto denormal_arg; - } - else if ( st0_tag == TW_Infinity ) - { - sign = getsign(st0_ptr); - setpositive(st0_ptr); - push(); - FPU_copy_to_reg0(&CONST_INF, TAG_Special); - setsign(st_new_ptr, sign); - return; - } - else if ( st0_tag == TW_NaN ) - { - if ( real_1op_NaN(st0_ptr) < 0 ) - return; - - push(); - FPU_copy_to_reg0(st0_ptr, TAG_Special); - return; - } - else if ( st0_tag == TAG_Empty ) - { - /* Is this the correct behaviour? */ - if ( control_word & EX_Invalid ) - { - FPU_stack_underflow(); - push(); - FPU_stack_underflow(); - } - else - EXCEPTION(EX_StackUnder); - } -#ifdef PARANOID - else - EXCEPTION(EX_INTERNAL | 0x119); -#endif /* PARANOID */ -} - - -static void fdecstp(void) -{ - clear_C1(); - top--; -} - -static void fincstp(void) -{ - clear_C1(); - top++; -} - - -static void fsqrt_(FPU_REG *st0_ptr, u_char st0_tag) -{ - int expon; - - clear_C1(); - - if ( st0_tag == TAG_Valid ) - { - u_char tag; - - if (signnegative(st0_ptr)) - { - arith_invalid(0); /* sqrt(negative) is invalid */ - return; - } - - /* make st(0) in [1.0 .. 4.0) */ - expon = exponent(st0_ptr); - - denormal_arg: - - setexponent16(st0_ptr, (expon & 1)); - - /* Do the computation, the sign of the result will be positive. */ - tag = wm_sqrt(st0_ptr, 0, 0, control_word, SIGN_POS); - addexponent(st0_ptr, expon >> 1); - FPU_settag0(tag); - return; - } - - if ( st0_tag == TAG_Zero ) - return; - - if ( st0_tag == TAG_Special ) - st0_tag = FPU_Special(st0_ptr); - - if ( st0_tag == TW_Infinity ) - { - if ( signnegative(st0_ptr) ) - arith_invalid(0); /* sqrt(-Infinity) is invalid */ - return; - } - else if ( st0_tag == TW_Denormal ) - { - if (signnegative(st0_ptr)) - { - arith_invalid(0); /* sqrt(negative) is invalid */ - return; - } - - if ( denormal_operand() < 0 ) - return; - - FPU_to_exp16(st0_ptr, st0_ptr); - - expon = exponent16(st0_ptr); - - goto denormal_arg; - } - - single_arg_error(st0_ptr, st0_tag); - -} - - -static void frndint_(FPU_REG *st0_ptr, u_char st0_tag) -{ - int flags, tag; - - if ( st0_tag == TAG_Valid ) - { - u_char sign; - - denormal_arg: - - sign = getsign(st0_ptr); - - if (exponent(st0_ptr) > 63) - return; - - if ( st0_tag == TW_Denormal ) - { - if (denormal_operand() < 0 ) - return; - } - - /* Fortunately, this can't overflow to 2^64 */ - if ( (flags = FPU_round_to_int(st0_ptr, st0_tag)) ) - set_precision_flag(flags); - - setexponent16(st0_ptr, 63); - tag = FPU_normalize(st0_ptr); - setsign(st0_ptr, sign); - FPU_settag0(tag); - return; - } - - if ( st0_tag == TAG_Zero ) - return; - - if ( st0_tag == TAG_Special ) - st0_tag = FPU_Special(st0_ptr); - - if ( st0_tag == TW_Denormal ) - goto denormal_arg; - else if ( st0_tag == TW_Infinity ) - return; - else - single_arg_error(st0_ptr, st0_tag); -} - - -static int fsin(FPU_REG *st0_ptr, u_char tag) -{ - u_char arg_sign = getsign(st0_ptr); - - if ( tag == TAG_Valid ) - { - int q; - - if ( exponent(st0_ptr) > -40 ) - { - if ( (q = trig_arg(st0_ptr, 0)) == -1 ) - { - /* Operand is out of range */ - return 1; - } - - poly_sine(st0_ptr); - - if (q & 2) - changesign(st0_ptr); - - setsign(st0_ptr, getsign(st0_ptr) ^ arg_sign); - - /* We do not really know if up or down */ - set_precision_flag_up(); - return 0; - } - else - { - /* For a small arg, the result == the argument */ - set_precision_flag_up(); /* Must be up. */ - return 0; - } - } - - if ( tag == TAG_Zero ) - { - setcc(0); - return 0; - } - - if ( tag == TAG_Special ) - tag = FPU_Special(st0_ptr); - - if ( tag == TW_Denormal ) - { - if ( denormal_operand() < 0 ) - return 1; - - /* For a small arg, the result == the argument */ - /* Underflow may happen */ - FPU_to_exp16(st0_ptr, st0_ptr); - - tag = FPU_round(st0_ptr, 1, 0, FULL_PRECISION, arg_sign); - - FPU_settag0(tag); - - return 0; - } - else if ( tag == TW_Infinity ) - { - /* The 80486 treats infinity as an invalid operand */ - arith_invalid(0); - return 1; - } - else - { - single_arg_error(st0_ptr, tag); - return 1; - } -} - - -static int f_cos(FPU_REG *st0_ptr, u_char tag) -{ - u_char st0_sign; - - st0_sign = getsign(st0_ptr); - - if ( tag == TAG_Valid ) - { - int q; - - if ( exponent(st0_ptr) > -40 ) - { - if ( (exponent(st0_ptr) < 0) - || ((exponent(st0_ptr) == 0) - && (significand(st0_ptr) <= 0xc90fdaa22168c234LL)) ) - { - poly_cos(st0_ptr); - - /* We do not really know if up or down */ - set_precision_flag_down(); - - return 0; - } - else if ( (q = trig_arg(st0_ptr, FCOS)) != -1 ) - { - poly_sine(st0_ptr); - - if ((q+1) & 2) - changesign(st0_ptr); - - /* We do not really know if up or down */ - set_precision_flag_down(); - - return 0; - } - else - { - /* Operand is out of range */ - return 1; - } - } - else - { - denormal_arg: - - setcc(0); - FPU_copy_to_reg0(&CONST_1, TAG_Valid); -#ifdef PECULIAR_486 - set_precision_flag_down(); /* 80486 appears to do this. */ -#else - set_precision_flag_up(); /* Must be up. */ -#endif /* PECULIAR_486 */ - return 0; - } - } - else if ( tag == TAG_Zero ) - { - FPU_copy_to_reg0(&CONST_1, TAG_Valid); - setcc(0); - return 0; - } - - if ( tag == TAG_Special ) - tag = FPU_Special(st0_ptr); - - if ( tag == TW_Denormal ) - { - if ( denormal_operand() < 0 ) - return 1; - - goto denormal_arg; - } - else if ( tag == TW_Infinity ) - { - /* The 80486 treats infinity as an invalid operand */ - arith_invalid(0); - return 1; - } - else - { - single_arg_error(st0_ptr, tag); /* requires st0_ptr == &st(0) */ - return 1; - } -} - - -static void fcos(FPU_REG *st0_ptr, u_char st0_tag) -{ - f_cos(st0_ptr, st0_tag); -} - - -static void fsincos(FPU_REG *st0_ptr, u_char st0_tag) -{ - FPU_REG *st_new_ptr; - FPU_REG arg; - u_char tag; - - /* Stack underflow has higher priority */ - if ( st0_tag == TAG_Empty ) - { - FPU_stack_underflow(); /* Puts a QNaN in st(0) */ - if ( control_word & CW_Invalid ) - { - st_new_ptr = &st(-1); - push(); - FPU_stack_underflow(); /* Puts a QNaN in the new st(0) */ - } - return; - } - - if ( STACK_OVERFLOW ) - { FPU_stack_overflow(); return; } - - if ( st0_tag == TAG_Special ) - tag = FPU_Special(st0_ptr); - else - tag = st0_tag; - - if ( tag == TW_NaN ) - { - single_arg_2_error(st0_ptr, TW_NaN); - return; - } - else if ( tag == TW_Infinity ) - { - /* The 80486 treats infinity as an invalid operand */ - if ( arith_invalid(0) >= 0 ) - { - /* Masked response */ - push(); - arith_invalid(0); - } - return; - } - - reg_copy(st0_ptr, &arg); - if ( !fsin(st0_ptr, st0_tag) ) - { - push(); - FPU_copy_to_reg0(&arg, st0_tag); - f_cos(&st(0), st0_tag); - } - else - { - /* An error, so restore st(0) */ - FPU_copy_to_reg0(&arg, st0_tag); - } -} - - -/*---------------------------------------------------------------------------*/ -/* The following all require two arguments: st(0) and st(1) */ - -/* A lean, mean kernel for the fprem instructions. This relies upon - the division and rounding to an integer in do_fprem giving an - exact result. Because of this, rem_kernel() needs to deal only with - the least significant 64 bits, the more significant bits of the - result must be zero. - */ -static void rem_kernel(unsigned long long st0, unsigned long long *y, - unsigned long long st1, - unsigned long long q, int n) -{ - int dummy; - unsigned long long x; - - x = st0 << n; - - /* Do the required multiplication and subtraction in the one operation */ - - /* lsw x -= lsw st1 * lsw q */ - asm volatile ("mull %4; subl %%eax,%0; sbbl %%edx,%1" - :"=m" (((unsigned *)&x)[0]), "=m" (((unsigned *)&x)[1]), - "=a" (dummy) - :"2" (((unsigned *)&st1)[0]), "m" (((unsigned *)&q)[0]) - :"%dx"); - /* msw x -= msw st1 * lsw q */ - asm volatile ("mull %3; subl %%eax,%0" - :"=m" (((unsigned *)&x)[1]), "=a" (dummy) - :"1" (((unsigned *)&st1)[1]), "m" (((unsigned *)&q)[0]) - :"%dx"); - /* msw x -= lsw st1 * msw q */ - asm volatile ("mull %3; subl %%eax,%0" - :"=m" (((unsigned *)&x)[1]), "=a" (dummy) - :"1" (((unsigned *)&st1)[0]), "m" (((unsigned *)&q)[1]) - :"%dx"); - - *y = x; -} - - -/* Remainder of st(0) / st(1) */ -/* This routine produces exact results, i.e. there is never any - rounding or truncation, etc of the result. */ -static void do_fprem(FPU_REG *st0_ptr, u_char st0_tag, int round) -{ - FPU_REG *st1_ptr = &st(1); - u_char st1_tag = FPU_gettagi(1); - - if ( !((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid)) ) - { - FPU_REG tmp, st0, st1; - u_char st0_sign, st1_sign; - u_char tmptag; - int tag; - int old_cw; - int expdif; - long long q; - unsigned short saved_status; - int cc; - - fprem_valid: - /* Convert registers for internal use. */ - st0_sign = FPU_to_exp16(st0_ptr, &st0); - st1_sign = FPU_to_exp16(st1_ptr, &st1); - expdif = exponent16(&st0) - exponent16(&st1); - - old_cw = control_word; - cc = 0; - - /* We want the status following the denorm tests, but don't want - the status changed by the arithmetic operations. */ - saved_status = partial_status; - control_word &= ~CW_RC; - control_word |= RC_CHOP; - - if ( expdif < 64 ) - { - /* This should be the most common case */ - - if ( expdif > -2 ) - { - u_char sign = st0_sign ^ st1_sign; - tag = FPU_u_div(&st0, &st1, &tmp, - PR_64_BITS | RC_CHOP | 0x3f, - sign); - setsign(&tmp, sign); - - if ( exponent(&tmp) >= 0 ) - { - FPU_round_to_int(&tmp, tag); /* Fortunately, this can't - overflow to 2^64 */ - q = significand(&tmp); - - rem_kernel(significand(&st0), - &significand(&tmp), - significand(&st1), - q, expdif); - - setexponent16(&tmp, exponent16(&st1)); - } - else - { - reg_copy(&st0, &tmp); - q = 0; - } - - if ( (round == RC_RND) && (tmp.sigh & 0xc0000000) ) - { - /* We may need to subtract st(1) once more, - to get a result <= 1/2 of st(1). */ - unsigned long long x; - expdif = exponent16(&st1) - exponent16(&tmp); - if ( expdif <= 1 ) - { - if ( expdif == 0 ) - x = significand(&st1) - significand(&tmp); - else /* expdif is 1 */ - x = (significand(&st1) << 1) - significand(&tmp); - if ( (x < significand(&tmp)) || - /* or equi-distant (from 0 & st(1)) and q is odd */ - ((x == significand(&tmp)) && (q & 1) ) ) - { - st0_sign = ! st0_sign; - significand(&tmp) = x; - q++; - } - } - } - - if (q & 4) cc |= SW_C0; - if (q & 2) cc |= SW_C3; - if (q & 1) cc |= SW_C1; - } - else - { - control_word = old_cw; - setcc(0); - return; - } - } - else - { - /* There is a large exponent difference ( >= 64 ) */ - /* To make much sense, the code in this section should - be done at high precision. */ - int exp_1, N; - u_char sign; - - /* prevent overflow here */ - /* N is 'a number between 32 and 63' (p26-113) */ - reg_copy(&st0, &tmp); - tmptag = st0_tag; - N = (expdif & 0x0000001f) + 32; /* This choice gives results - identical to an AMD 486 */ - setexponent16(&tmp, N); - exp_1 = exponent16(&st1); - setexponent16(&st1, 0); - expdif -= N; - - sign = getsign(&tmp) ^ st1_sign; - tag = FPU_u_div(&tmp, &st1, &tmp, PR_64_BITS | RC_CHOP | 0x3f, - sign); - setsign(&tmp, sign); - - FPU_round_to_int(&tmp, tag); /* Fortunately, this can't - overflow to 2^64 */ - - rem_kernel(significand(&st0), - &significand(&tmp), - significand(&st1), - significand(&tmp), - exponent(&tmp) - ); - setexponent16(&tmp, exp_1 + expdif); - - /* It is possible for the operation to be complete here. - What does the IEEE standard say? The Intel 80486 manual - implies that the operation will never be completed at this - point, and the behaviour of a real 80486 confirms this. - */ - if ( !(tmp.sigh | tmp.sigl) ) - { - /* The result is zero */ - control_word = old_cw; - partial_status = saved_status; - FPU_copy_to_reg0(&CONST_Z, TAG_Zero); - setsign(&st0, st0_sign); -#ifdef PECULIAR_486 - setcc(SW_C2); -#else - setcc(0); -#endif /* PECULIAR_486 */ - return; - } - cc = SW_C2; - } - - control_word = old_cw; - partial_status = saved_status; - tag = FPU_normalize_nuo(&tmp); - reg_copy(&tmp, st0_ptr); - - /* The only condition to be looked for is underflow, - and it can occur here only if underflow is unmasked. */ - if ( (exponent16(&tmp) <= EXP_UNDER) && (tag != TAG_Zero) - && !(control_word & CW_Underflow) ) - { - setcc(cc); - tag = arith_underflow(st0_ptr); - setsign(st0_ptr, st0_sign); - FPU_settag0(tag); - return; - } - else if ( (exponent16(&tmp) > EXP_UNDER) || (tag == TAG_Zero) ) - { - stdexp(st0_ptr); - setsign(st0_ptr, st0_sign); - } - else - { - tag = FPU_round(st0_ptr, 0, 0, FULL_PRECISION, st0_sign); - } - FPU_settag0(tag); - setcc(cc); - - return; - } - - if ( st0_tag == TAG_Special ) - st0_tag = FPU_Special(st0_ptr); - if ( st1_tag == TAG_Special ) - st1_tag = FPU_Special(st1_ptr); - - if ( ((st0_tag == TAG_Valid) && (st1_tag == TW_Denormal)) - || ((st0_tag == TW_Denormal) && (st1_tag == TAG_Valid)) - || ((st0_tag == TW_Denormal) && (st1_tag == TW_Denormal)) ) - { - if ( denormal_operand() < 0 ) - return; - goto fprem_valid; - } - else if ( (st0_tag == TAG_Empty) || (st1_tag == TAG_Empty) ) - { - FPU_stack_underflow(); - return; - } - else if ( st0_tag == TAG_Zero ) - { - if ( st1_tag == TAG_Valid ) - { - setcc(0); return; - } - else if ( st1_tag == TW_Denormal ) - { - if ( denormal_operand() < 0 ) - return; - setcc(0); return; - } - else if ( st1_tag == TAG_Zero ) - { arith_invalid(0); return; } /* fprem(?,0) always invalid */ - else if ( st1_tag == TW_Infinity ) - { setcc(0); return; } - } - else if ( (st0_tag == TAG_Valid) || (st0_tag == TW_Denormal) ) - { - if ( st1_tag == TAG_Zero ) - { - arith_invalid(0); /* fprem(Valid,Zero) is invalid */ - return; - } - else if ( st1_tag != TW_NaN ) - { - if ( ((st0_tag == TW_Denormal) || (st1_tag == TW_Denormal)) - && (denormal_operand() < 0) ) - return; - - if ( st1_tag == TW_Infinity ) - { - /* fprem(Valid,Infinity) is o.k. */ - setcc(0); return; - } - } - } - else if ( st0_tag == TW_Infinity ) - { - if ( st1_tag != TW_NaN ) - { - arith_invalid(0); /* fprem(Infinity,?) is invalid */ - return; - } - } - - /* One of the registers must contain a NaN if we got here. */ - -#ifdef PARANOID - if ( (st0_tag != TW_NaN) && (st1_tag != TW_NaN) ) - EXCEPTION(EX_INTERNAL | 0x118); -#endif /* PARANOID */ - - real_2op_NaN(st1_ptr, st1_tag, 0, st1_ptr); - -} - - -/* ST(1) <- ST(1) * log ST; pop ST */ -static void fyl2x(FPU_REG *st0_ptr, u_char st0_tag) -{ - FPU_REG *st1_ptr = &st(1), exponent; - u_char st1_tag = FPU_gettagi(1); - u_char sign; - int e, tag; - - clear_C1(); - - if ( (st0_tag == TAG_Valid) && (st1_tag == TAG_Valid) ) - { - both_valid: - /* Both regs are Valid or Denormal */ - if ( signpositive(st0_ptr) ) - { - if ( st0_tag == TW_Denormal ) - FPU_to_exp16(st0_ptr, st0_ptr); - else - /* Convert st(0) for internal use. */ - setexponent16(st0_ptr, exponent(st0_ptr)); - - if ( (st0_ptr->sigh == 0x80000000) && (st0_ptr->sigl == 0) ) - { - /* Special case. The result can be precise. */ - u_char esign; - e = exponent16(st0_ptr); - if ( e >= 0 ) - { - exponent.sigh = e; - esign = SIGN_POS; - } - else - { - exponent.sigh = -e; - esign = SIGN_NEG; - } - exponent.sigl = 0; - setexponent16(&exponent, 31); - tag = FPU_normalize_nuo(&exponent); - stdexp(&exponent); - setsign(&exponent, esign); - tag = FPU_mul(&exponent, tag, 1, FULL_PRECISION); - if ( tag >= 0 ) - FPU_settagi(1, tag); - } - else - { - /* The usual case */ - sign = getsign(st1_ptr); - if ( st1_tag == TW_Denormal ) - FPU_to_exp16(st1_ptr, st1_ptr); - else - /* Convert st(1) for internal use. */ - setexponent16(st1_ptr, exponent(st1_ptr)); - poly_l2(st0_ptr, st1_ptr, sign); - } - } - else - { - /* negative */ - if ( arith_invalid(1) < 0 ) - return; - } - - FPU_pop(); - - return; - } - - if ( st0_tag == TAG_Special ) - st0_tag = FPU_Special(st0_ptr); - if ( st1_tag == TAG_Special ) - st1_tag = FPU_Special(st1_ptr); - - if ( (st0_tag == TAG_Empty) || (st1_tag == TAG_Empty) ) - { - FPU_stack_underflow_pop(1); - return; - } - else if ( (st0_tag <= TW_Denormal) && (st1_tag <= TW_Denormal) ) - { - if ( st0_tag == TAG_Zero ) - { - if ( st1_tag == TAG_Zero ) - { - /* Both args zero is invalid */ - if ( arith_invalid(1) < 0 ) - return; - } - else - { - u_char sign; - sign = getsign(st1_ptr)^SIGN_NEG; - if ( FPU_divide_by_zero(1, sign) < 0 ) - return; - - setsign(st1_ptr, sign); - } - } - else if ( st1_tag == TAG_Zero ) - { - /* st(1) contains zero, st(0) valid <> 0 */ - /* Zero is the valid answer */ - sign = getsign(st1_ptr); - - if ( signnegative(st0_ptr) ) - { - /* log(negative) */ - if ( arith_invalid(1) < 0 ) - return; - } - else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) - return; - else - { - if ( exponent(st0_ptr) < 0 ) - sign ^= SIGN_NEG; - - FPU_copy_to_reg1(&CONST_Z, TAG_Zero); - setsign(st1_ptr, sign); - } - } - else - { - /* One or both operands are denormals. */ - if ( denormal_operand() < 0 ) - return; - goto both_valid; - } - } - else if ( (st0_tag == TW_NaN) || (st1_tag == TW_NaN) ) - { - if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 ) - return; - } - /* One or both arg must be an infinity */ - else if ( st0_tag == TW_Infinity ) - { - if ( (signnegative(st0_ptr)) || (st1_tag == TAG_Zero) ) - { - /* log(-infinity) or 0*log(infinity) */ - if ( arith_invalid(1) < 0 ) - return; - } - else - { - u_char sign = getsign(st1_ptr); - - if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) ) - return; - - FPU_copy_to_reg1(&CONST_INF, TAG_Special); - setsign(st1_ptr, sign); - } - } - /* st(1) must be infinity here */ - else if ( ((st0_tag == TAG_Valid) || (st0_tag == TW_Denormal)) - && ( signpositive(st0_ptr) ) ) - { - if ( exponent(st0_ptr) >= 0 ) - { - if ( (exponent(st0_ptr) == 0) && - (st0_ptr->sigh == 0x80000000) && - (st0_ptr->sigl == 0) ) - { - /* st(0) holds 1.0 */ - /* infinity*log(1) */ - if ( arith_invalid(1) < 0 ) - return; - } - /* else st(0) is positive and > 1.0 */ - } - else - { - /* st(0) is positive and < 1.0 */ - - if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) - return; - - changesign(st1_ptr); - } - } - else - { - /* st(0) must be zero or negative */ - if ( st0_tag == TAG_Zero ) - { - /* This should be invalid, but a real 80486 is happy with it. */ - -#ifndef PECULIAR_486 - sign = getsign(st1_ptr); - if ( FPU_divide_by_zero(1, sign) < 0 ) - return; -#endif /* PECULIAR_486 */ - - changesign(st1_ptr); - } - else if ( arith_invalid(1) < 0 ) /* log(negative) */ - return; - } - - FPU_pop(); -} - - -static void fpatan(FPU_REG *st0_ptr, u_char st0_tag) -{ - FPU_REG *st1_ptr = &st(1); - u_char st1_tag = FPU_gettagi(1); - int tag; - - clear_C1(); - if ( !((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid)) ) - { - valid_atan: - - poly_atan(st0_ptr, st0_tag, st1_ptr, st1_tag); - - FPU_pop(); - - return; - } - - if ( st0_tag == TAG_Special ) - st0_tag = FPU_Special(st0_ptr); - if ( st1_tag == TAG_Special ) - st1_tag = FPU_Special(st1_ptr); - - if ( ((st0_tag == TAG_Valid) && (st1_tag == TW_Denormal)) - || ((st0_tag == TW_Denormal) && (st1_tag == TAG_Valid)) - || ((st0_tag == TW_Denormal) && (st1_tag == TW_Denormal)) ) - { - if ( denormal_operand() < 0 ) - return; - - goto valid_atan; - } - else if ( (st0_tag == TAG_Empty) || (st1_tag == TAG_Empty) ) - { - FPU_stack_underflow_pop(1); - return; - } - else if ( (st0_tag == TW_NaN) || (st1_tag == TW_NaN) ) - { - if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) >= 0 ) - FPU_pop(); - return; - } - else if ( (st0_tag == TW_Infinity) || (st1_tag == TW_Infinity) ) - { - u_char sign = getsign(st1_ptr); - if ( st0_tag == TW_Infinity ) - { - if ( st1_tag == TW_Infinity ) - { - if ( signpositive(st0_ptr) ) - { - FPU_copy_to_reg1(&CONST_PI4, TAG_Valid); - } - else - { - setpositive(st1_ptr); - tag = FPU_u_add(&CONST_PI4, &CONST_PI2, st1_ptr, - FULL_PRECISION, SIGN_POS, - exponent(&CONST_PI4), exponent(&CONST_PI2)); - if ( tag >= 0 ) - FPU_settagi(1, tag); - } - } - else - { - if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) ) - return; - - if ( signpositive(st0_ptr) ) - { - FPU_copy_to_reg1(&CONST_Z, TAG_Zero); - setsign(st1_ptr, sign); /* An 80486 preserves the sign */ - FPU_pop(); - return; - } - else - { - FPU_copy_to_reg1(&CONST_PI, TAG_Valid); - } - } - } - else - { - /* st(1) is infinity, st(0) not infinity */ - if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) - return; - - FPU_copy_to_reg1(&CONST_PI2, TAG_Valid); - } - setsign(st1_ptr, sign); - } - else if ( st1_tag == TAG_Zero ) - { - /* st(0) must be valid or zero */ - u_char sign = getsign(st1_ptr); - - if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) - return; - - if ( signpositive(st0_ptr) ) - { - /* An 80486 preserves the sign */ - FPU_pop(); - return; - } - - FPU_copy_to_reg1(&CONST_PI, TAG_Valid); - setsign(st1_ptr, sign); - } - else if ( st0_tag == TAG_Zero ) - { - /* st(1) must be TAG_Valid here */ - u_char sign = getsign(st1_ptr); - - if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) ) - return; - - FPU_copy_to_reg1(&CONST_PI2, TAG_Valid); - setsign(st1_ptr, sign); - } -#ifdef PARANOID - else - EXCEPTION(EX_INTERNAL | 0x125); -#endif /* PARANOID */ - - FPU_pop(); - set_precision_flag_up(); /* We do not really know if up or down */ -} - - -static void fprem(FPU_REG *st0_ptr, u_char st0_tag) -{ - do_fprem(st0_ptr, st0_tag, RC_CHOP); -} - - -static void fprem1(FPU_REG *st0_ptr, u_char st0_tag) -{ - do_fprem(st0_ptr, st0_tag, RC_RND); -} - - -static void fyl2xp1(FPU_REG *st0_ptr, u_char st0_tag) -{ - u_char sign, sign1; - FPU_REG *st1_ptr = &st(1), a, b; - u_char st1_tag = FPU_gettagi(1); - - clear_C1(); - if ( !((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid)) ) - { - valid_yl2xp1: - - sign = getsign(st0_ptr); - sign1 = getsign(st1_ptr); - - FPU_to_exp16(st0_ptr, &a); - FPU_to_exp16(st1_ptr, &b); - - if ( poly_l2p1(sign, sign1, &a, &b, st1_ptr) ) - return; - - FPU_pop(); - return; - } - - if ( st0_tag == TAG_Special ) - st0_tag = FPU_Special(st0_ptr); - if ( st1_tag == TAG_Special ) - st1_tag = FPU_Special(st1_ptr); - - if ( ((st0_tag == TAG_Valid) && (st1_tag == TW_Denormal)) - || ((st0_tag == TW_Denormal) && (st1_tag == TAG_Valid)) - || ((st0_tag == TW_Denormal) && (st1_tag == TW_Denormal)) ) - { - if ( denormal_operand() < 0 ) - return; - - goto valid_yl2xp1; - } - else if ( (st0_tag == TAG_Empty) | (st1_tag == TAG_Empty) ) - { - FPU_stack_underflow_pop(1); - return; - } - else if ( st0_tag == TAG_Zero ) - { - switch ( st1_tag ) - { - case TW_Denormal: - if ( denormal_operand() < 0 ) - return; - - case TAG_Zero: - case TAG_Valid: - setsign(st0_ptr, getsign(st0_ptr) ^ getsign(st1_ptr)); - FPU_copy_to_reg1(st0_ptr, st0_tag); - break; - - case TW_Infinity: - /* Infinity*log(1) */ - if ( arith_invalid(1) < 0 ) - return; - break; - - case TW_NaN: - if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 ) - return; - break; - - default: -#ifdef PARANOID - EXCEPTION(EX_INTERNAL | 0x116); - return; -#endif /* PARANOID */ - break; - } - } - else if ( (st0_tag == TAG_Valid) || (st0_tag == TW_Denormal) ) - { - switch ( st1_tag ) - { - case TAG_Zero: - if ( signnegative(st0_ptr) ) - { - if ( exponent(st0_ptr) >= 0 ) - { - /* st(0) holds <= -1.0 */ -#ifdef PECULIAR_486 /* Stupid 80486 doesn't worry about log(negative). */ - changesign(st1_ptr); -#else - if ( arith_invalid(1) < 0 ) - return; -#endif /* PECULIAR_486 */ - } - else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) - return; - else - changesign(st1_ptr); - } - else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) - return; - break; - - case TW_Infinity: - if ( signnegative(st0_ptr) ) - { - if ( (exponent(st0_ptr) >= 0) && - !((st0_ptr->sigh == 0x80000000) && - (st0_ptr->sigl == 0)) ) - { - /* st(0) holds < -1.0 */ -#ifdef PECULIAR_486 /* Stupid 80486 doesn't worry about log(negative). */ - changesign(st1_ptr); -#else - if ( arith_invalid(1) < 0 ) return; -#endif /* PECULIAR_486 */ - } - else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) - return; - else - changesign(st1_ptr); - } - else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) - return; - break; - - case TW_NaN: - if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 ) - return; - } - - } - else if ( st0_tag == TW_NaN ) - { - if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 ) - return; - } - else if ( st0_tag == TW_Infinity ) - { - if ( st1_tag == TW_NaN ) - { - if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 ) - return; - } - else if ( signnegative(st0_ptr) ) - { -#ifndef PECULIAR_486 - /* This should have higher priority than denormals, but... */ - if ( arith_invalid(1) < 0 ) /* log(-infinity) */ - return; -#endif /* PECULIAR_486 */ - if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) ) - return; -#ifdef PECULIAR_486 - /* Denormal operands actually get higher priority */ - if ( arith_invalid(1) < 0 ) /* log(-infinity) */ - return; -#endif /* PECULIAR_486 */ - } - else if ( st1_tag == TAG_Zero ) - { - /* log(infinity) */ - if ( arith_invalid(1) < 0 ) - return; - } - - /* st(1) must be valid here. */ - - else if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) ) - return; - - /* The Manual says that log(Infinity) is invalid, but a real - 80486 sensibly says that it is o.k. */ - else - { - u_char sign = getsign(st1_ptr); - FPU_copy_to_reg1(&CONST_INF, TAG_Special); - setsign(st1_ptr, sign); - } - } -#ifdef PARANOID - else - { - EXCEPTION(EX_INTERNAL | 0x117); - return; - } -#endif /* PARANOID */ - - FPU_pop(); - return; - -} - - -static void fscale(FPU_REG *st0_ptr, u_char st0_tag) -{ - FPU_REG *st1_ptr = &st(1); - u_char st1_tag = FPU_gettagi(1); - int old_cw = control_word; - u_char sign = getsign(st0_ptr); - - clear_C1(); - if ( !((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid)) ) - { - long scale; - FPU_REG tmp; - - /* Convert register for internal use. */ - setexponent16(st0_ptr, exponent(st0_ptr)); - - valid_scale: - - if ( exponent(st1_ptr) > 30 ) - { - /* 2^31 is far too large, would require 2^(2^30) or 2^(-2^30) */ - - if ( signpositive(st1_ptr) ) - { - EXCEPTION(EX_Overflow); - FPU_copy_to_reg0(&CONST_INF, TAG_Special); - } - else - { - EXCEPTION(EX_Underflow); - FPU_copy_to_reg0(&CONST_Z, TAG_Zero); - } - setsign(st0_ptr, sign); - return; - } - - control_word &= ~CW_RC; - control_word |= RC_CHOP; - reg_copy(st1_ptr, &tmp); - FPU_round_to_int(&tmp, st1_tag); /* This can never overflow here */ - control_word = old_cw; - scale = signnegative(st1_ptr) ? -tmp.sigl : tmp.sigl; - scale += exponent16(st0_ptr); - - setexponent16(st0_ptr, scale); - - /* Use FPU_round() to properly detect under/overflow etc */ - FPU_round(st0_ptr, 0, 0, control_word, sign); - - return; - } - - if ( st0_tag == TAG_Special ) - st0_tag = FPU_Special(st0_ptr); - if ( st1_tag == TAG_Special ) - st1_tag = FPU_Special(st1_ptr); - - if ( (st0_tag == TAG_Valid) || (st0_tag == TW_Denormal) ) - { - switch ( st1_tag ) - { - case TAG_Valid: - /* st(0) must be a denormal */ - if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) - return; - - FPU_to_exp16(st0_ptr, st0_ptr); /* Will not be left on stack */ - goto valid_scale; - - case TAG_Zero: - if ( st0_tag == TW_Denormal ) - denormal_operand(); - return; - - case TW_Denormal: - denormal_operand(); - return; - - case TW_Infinity: - if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) - return; - - if ( signpositive(st1_ptr) ) - FPU_copy_to_reg0(&CONST_INF, TAG_Special); - else - FPU_copy_to_reg0(&CONST_Z, TAG_Zero); - setsign(st0_ptr, sign); - return; - - case TW_NaN: - real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr); - return; - } - } - else if ( st0_tag == TAG_Zero ) - { - switch ( st1_tag ) - { - case TAG_Valid: - case TAG_Zero: - return; - - case TW_Denormal: - denormal_operand(); - return; - - case TW_Infinity: - if ( signpositive(st1_ptr) ) - arith_invalid(0); /* Zero scaled by +Infinity */ - return; - - case TW_NaN: - real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr); - return; - } - } - else if ( st0_tag == TW_Infinity ) - { - switch ( st1_tag ) - { - case TAG_Valid: - case TAG_Zero: - return; - - case TW_Denormal: - denormal_operand(); - return; - - case TW_Infinity: - if ( signnegative(st1_ptr) ) - arith_invalid(0); /* Infinity scaled by -Infinity */ - return; - - case TW_NaN: - real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr); - return; - } - } - else if ( st0_tag == TW_NaN ) - { - if ( st1_tag != TAG_Empty ) - { real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr); return; } - } - -#ifdef PARANOID - if ( !((st0_tag == TAG_Empty) || (st1_tag == TAG_Empty)) ) - { - EXCEPTION(EX_INTERNAL | 0x115); - return; - } -#endif - - /* At least one of st(0), st(1) must be empty */ - FPU_stack_underflow(); - -} - - -/*---------------------------------------------------------------------------*/ - -static FUNC_ST0 const trig_table_a[] = { - f2xm1, fyl2x, fptan, fpatan, - fxtract, fprem1, (FUNC_ST0)fdecstp, (FUNC_ST0)fincstp -}; - -void FPU_triga(void) -{ - (trig_table_a[FPU_rm])(&st(0), FPU_gettag0()); -} - - -static FUNC_ST0 const trig_table_b[] = - { - fprem, fyl2xp1, fsqrt_, fsincos, frndint_, fscale, (FUNC_ST0)fsin, fcos - }; - -void FPU_trigb(void) -{ - (trig_table_b[FPU_rm])(&st(0), FPU_gettag0()); -} diff --git a/arch/i386/math-emu/get_address.c b/arch/i386/math-emu/get_address.c deleted file mode 100644 index 2e2c51a8bd3..00000000000 --- a/arch/i386/math-emu/get_address.c +++ /dev/null @@ -1,438 +0,0 @@ -/*---------------------------------------------------------------------------+ - | get_address.c | - | | - | Get the effective address from an FPU instruction. | - | | - | Copyright (C) 1992,1993,1994,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@suburbia.net | - | | - | | - +---------------------------------------------------------------------------*/ - -/*---------------------------------------------------------------------------+ - | Note: | - | The file contains code which accesses user memory. | - | Emulator static data may change when user memory is accessed, due to | - | other processes using the emulator while swapping is in progress. | - +---------------------------------------------------------------------------*/ - - -#include - -#include -#include - -#include "fpu_system.h" -#include "exception.h" -#include "fpu_emu.h" - - -#define FPU_WRITE_BIT 0x10 - -static int reg_offset[] = { - offsetof(struct info,___eax), - offsetof(struct info,___ecx), - offsetof(struct info,___edx), - offsetof(struct info,___ebx), - offsetof(struct info,___esp), - offsetof(struct info,___ebp), - offsetof(struct info,___esi), - offsetof(struct info,___edi) -}; - -#define REG_(x) (*(long *)(reg_offset[(x)]+(u_char *) FPU_info)) - -static int reg_offset_vm86[] = { - offsetof(struct info,___cs), - offsetof(struct info,___vm86_ds), - offsetof(struct info,___vm86_es), - offsetof(struct info,___vm86_fs), - offsetof(struct info,___vm86_gs), - offsetof(struct info,___ss), - offsetof(struct info,___vm86_ds) - }; - -#define VM86_REG_(x) (*(unsigned short *) \ - (reg_offset_vm86[((unsigned)x)]+(u_char *) FPU_info)) - -/* This dummy, gs is not saved on the stack. */ -#define ___GS ___ds - -static int reg_offset_pm[] = { - offsetof(struct info,___cs), - offsetof(struct info,___ds), - offsetof(struct info,___es), - offsetof(struct info,___fs), - offsetof(struct info,___GS), - offsetof(struct info,___ss), - offsetof(struct info,___ds) - }; - -#define PM_REG_(x) (*(unsigned short *) \ - (reg_offset_pm[((unsigned)x)]+(u_char *) FPU_info)) - - -/* Decode the SIB byte. This function assumes mod != 0 */ -static int sib(int mod, unsigned long *fpu_eip) -{ - u_char ss,index,base; - long offset; - - RE_ENTRANT_CHECK_OFF; - FPU_code_access_ok(1); - FPU_get_user(base, (u_char __user *) (*fpu_eip)); /* The SIB byte */ - RE_ENTRANT_CHECK_ON; - (*fpu_eip)++; - ss = base >> 6; - index = (base >> 3) & 7; - base &= 7; - - if ((mod == 0) && (base == 5)) - offset = 0; /* No base register */ - else - offset = REG_(base); - - if (index == 4) - { - /* No index register */ - /* A non-zero ss is illegal */ - if ( ss ) - EXCEPTION(EX_Invalid); - } - else - { - offset += (REG_(index)) << ss; - } - - if (mod == 1) - { - /* 8 bit signed displacement */ - long displacement; - RE_ENTRANT_CHECK_OFF; - FPU_code_access_ok(1); - FPU_get_user(displacement, (signed char __user *) (*fpu_eip)); - offset += displacement; - RE_ENTRANT_CHECK_ON; - (*fpu_eip)++; - } - else if (mod == 2 || base == 5) /* The second condition also has mod==0 */ - { - /* 32 bit displacement */ - long displacement; - RE_ENTRANT_CHECK_OFF; - FPU_code_access_ok(4); - FPU_get_user(displacement, (long __user *) (*fpu_eip)); - offset += displacement; - RE_ENTRANT_CHECK_ON; - (*fpu_eip) += 4; - } - - return offset; -} - - -static unsigned long vm86_segment(u_char segment, - struct address *addr) -{ - segment--; -#ifdef PARANOID - if ( segment > PREFIX_SS_ ) - { - EXCEPTION(EX_INTERNAL|0x130); - math_abort(FPU_info,SIGSEGV); - } -#endif /* PARANOID */ - addr->selector = VM86_REG_(segment); - return (unsigned long)VM86_REG_(segment) << 4; -} - - -/* This should work for 16 and 32 bit protected mode. */ -static long pm_address(u_char FPU_modrm, u_char segment, - struct address *addr, long offset) -{ - struct desc_struct descriptor; - unsigned long base_address, limit, address, seg_top; - - segment--; - -#ifdef PARANOID - /* segment is unsigned, so this also detects if segment was 0: */ - if ( segment > PREFIX_SS_ ) - { - EXCEPTION(EX_INTERNAL|0x132); - math_abort(FPU_info,SIGSEGV); - } -#endif /* PARANOID */ - - switch ( segment ) - { - /* gs isn't used by the kernel, so it still has its - user-space value. */ - case PREFIX_GS_-1: - /* N.B. - movl %seg, mem is a 2 byte write regardless of prefix */ - savesegment(gs, addr->selector); - break; - default: - addr->selector = PM_REG_(segment); - } - - descriptor = LDT_DESCRIPTOR(PM_REG_(segment)); - base_address = SEG_BASE_ADDR(descriptor); - address = base_address + offset; - limit = base_address - + (SEG_LIMIT(descriptor)+1) * SEG_GRANULARITY(descriptor) - 1; - if ( limit < base_address ) limit = 0xffffffff; - - if ( SEG_EXPAND_DOWN(descriptor) ) - { - if ( SEG_G_BIT(descriptor) ) - seg_top = 0xffffffff; - else - { - seg_top = base_address + (1 << 20); - if ( seg_top < base_address ) seg_top = 0xffffffff; - } - access_limit = - (address <= limit) || (address >= seg_top) ? 0 : - ((seg_top-address) >= 255 ? 255 : seg_top-address); - } - else - { - access_limit = - (address > limit) || (address < base_address) ? 0 : - ((limit-address) >= 254 ? 255 : limit-address+1); - } - if ( SEG_EXECUTE_ONLY(descriptor) || - (!SEG_WRITE_PERM(descriptor) && (FPU_modrm & FPU_WRITE_BIT)) ) - { - access_limit = 0; - } - return address; -} - - -/* - MOD R/M byte: MOD == 3 has a special use for the FPU - SIB byte used iff R/M = 100b - - 7 6 5 4 3 2 1 0 - ..... ......... ......... - MOD OPCODE(2) R/M - - - SIB byte - - 7 6 5 4 3 2 1 0 - ..... ......... ......... - SS INDEX BASE - -*/ - -void __user *FPU_get_address(u_char FPU_modrm, unsigned long *fpu_eip, - struct address *addr, - fpu_addr_modes addr_modes) -{ - u_char mod; - unsigned rm = FPU_modrm & 7; - long *cpu_reg_ptr; - int address = 0; /* Initialized just to stop compiler warnings. */ - - /* Memory accessed via the cs selector is write protected - in `non-segmented' 32 bit protected mode. */ - if ( !addr_modes.default_mode && (FPU_modrm & FPU_WRITE_BIT) - && (addr_modes.override.segment == PREFIX_CS_) ) - { - math_abort(FPU_info,SIGSEGV); - } - - addr->selector = FPU_DS; /* Default, for 32 bit non-segmented mode. */ - - mod = (FPU_modrm >> 6) & 3; - - if (rm == 4 && mod != 3) - { - address = sib(mod, fpu_eip); - } - else - { - cpu_reg_ptr = & REG_(rm); - switch (mod) - { - case 0: - if (rm == 5) - { - /* Special case: disp32 */ - RE_ENTRANT_CHECK_OFF; - FPU_code_access_ok(4); - FPU_get_user(address, (unsigned long __user *) (*fpu_eip)); - (*fpu_eip) += 4; - RE_ENTRANT_CHECK_ON; - addr->offset = address; - return (void __user *) address; - } - else - { - address = *cpu_reg_ptr; /* Just return the contents - of the cpu register */ - addr->offset = address; - return (void __user *) address; - } - case 1: - /* 8 bit signed displacement */ - RE_ENTRANT_CHECK_OFF; - FPU_code_access_ok(1); - FPU_get_user(address, (signed char __user *) (*fpu_eip)); - RE_ENTRANT_CHECK_ON; - (*fpu_eip)++; - break; - case 2: - /* 32 bit displacement */ - RE_ENTRANT_CHECK_OFF; - FPU_code_access_ok(4); - FPU_get_user(address, (long __user *) (*fpu_eip)); - (*fpu_eip) += 4; - RE_ENTRANT_CHECK_ON; - break; - case 3: - /* Not legal for the FPU */ - EXCEPTION(EX_Invalid); - } - address += *cpu_reg_ptr; - } - - addr->offset = address; - - switch ( addr_modes.default_mode ) - { - case 0: - break; - case VM86: - address += vm86_segment(addr_modes.override.segment, addr); - break; - case PM16: - case SEG32: - address = pm_address(FPU_modrm, addr_modes.override.segment, - addr, address); - break; - default: - EXCEPTION(EX_INTERNAL|0x133); - } - - return (void __user *)address; -} - - -void __user *FPU_get_address_16(u_char FPU_modrm, unsigned long *fpu_eip, - struct address *addr, - fpu_addr_modes addr_modes) -{ - u_char mod; - unsigned rm = FPU_modrm & 7; - int address = 0; /* Default used for mod == 0 */ - - /* Memory accessed via the cs selector is write protected - in `non-segmented' 32 bit protected mode. */ - if ( !addr_modes.default_mode && (FPU_modrm & FPU_WRITE_BIT) - && (addr_modes.override.segment == PREFIX_CS_) ) - { - math_abort(FPU_info,SIGSEGV); - } - - addr->selector = FPU_DS; /* Default, for 32 bit non-segmented mode. */ - - mod = (FPU_modrm >> 6) & 3; - - switch (mod) - { - case 0: - if (rm == 6) - { - /* Special case: disp16 */ - RE_ENTRANT_CHECK_OFF; - FPU_code_access_ok(2); - FPU_get_user(address, (unsigned short __user *) (*fpu_eip)); - (*fpu_eip) += 2; - RE_ENTRANT_CHECK_ON; - goto add_segment; - } - break; - case 1: - /* 8 bit signed displacement */ - RE_ENTRANT_CHECK_OFF; - FPU_code_access_ok(1); - FPU_get_user(address, (signed char __user *) (*fpu_eip)); - RE_ENTRANT_CHECK_ON; - (*fpu_eip)++; - break; - case 2: - /* 16 bit displacement */ - RE_ENTRANT_CHECK_OFF; - FPU_code_access_ok(2); - FPU_get_user(address, (unsigned short __user *) (*fpu_eip)); - (*fpu_eip) += 2; - RE_ENTRANT_CHECK_ON; - break; - case 3: - /* Not legal for the FPU */ - EXCEPTION(EX_Invalid); - break; - } - switch ( rm ) - { - case 0: - address += FPU_info->___ebx + FPU_info->___esi; - break; - case 1: - address += FPU_info->___ebx + FPU_info->___edi; - break; - case 2: - address += FPU_info->___ebp + FPU_info->___esi; - if ( addr_modes.override.segment == PREFIX_DEFAULT ) - addr_modes.override.segment = PREFIX_SS_; - break; - case 3: - address += FPU_info->___ebp + FPU_info->___edi; - if ( addr_modes.override.segment == PREFIX_DEFAULT ) - addr_modes.override.segment = PREFIX_SS_; - break; - case 4: - address += FPU_info->___esi; - break; - case 5: - address += FPU_info->___edi; - break; - case 6: - address += FPU_info->___ebp; - if ( addr_modes.override.segment == PREFIX_DEFAULT ) - addr_modes.override.segment = PREFIX_SS_; - break; - case 7: - address += FPU_info->___ebx; - break; - } - - add_segment: - address &= 0xffff; - - addr->offset = address; - - switch ( addr_modes.default_mode ) - { - case 0: - break; - case VM86: - address += vm86_segment(addr_modes.override.segment, addr); - break; - case PM16: - case SEG32: - address = pm_address(FPU_modrm, addr_modes.override.segment, - addr, address); - break; - default: - EXCEPTION(EX_INTERNAL|0x131); - } - - return (void __user *)address ; -} diff --git a/arch/i386/math-emu/load_store.c b/arch/i386/math-emu/load_store.c deleted file mode 100644 index eebd6fb1c8a..00000000000 --- a/arch/i386/math-emu/load_store.c +++ /dev/null @@ -1,272 +0,0 @@ -/*---------------------------------------------------------------------------+ - | load_store.c | - | | - | This file contains most of the code to interpret the FPU instructions | - | which load and store from user memory. | - | | - | Copyright (C) 1992,1993,1994,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@suburbia.net | - | | - | | - +---------------------------------------------------------------------------*/ - -/*---------------------------------------------------------------------------+ - | Note: | - | The file contains code which accesses user memory. | - | Emulator static data may change when user memory is accessed, due to | - | other processes using the emulator while swapping is in progress. | - +---------------------------------------------------------------------------*/ - -#include - -#include "fpu_system.h" -#include "exception.h" -#include "fpu_emu.h" -#include "status_w.h" -#include "control_w.h" - - -#define _NONE_ 0 /* st0_ptr etc not needed */ -#define _REG0_ 1 /* Will be storing st(0) */ -#define _PUSH_ 3 /* Need to check for space to push onto stack */ -#define _null_ 4 /* Function illegal or not implemented */ - -#define pop_0() { FPU_settag0(TAG_Empty); top++; } - - -static u_char const type_table[32] = { - _PUSH_, _PUSH_, _PUSH_, _PUSH_, - _null_, _null_, _null_, _null_, - _REG0_, _REG0_, _REG0_, _REG0_, - _REG0_, _REG0_, _REG0_, _REG0_, - _NONE_, _null_, _NONE_, _PUSH_, - _NONE_, _PUSH_, _null_, _PUSH_, - _NONE_, _null_, _NONE_, _REG0_, - _NONE_, _REG0_, _NONE_, _REG0_ - }; - -u_char const data_sizes_16[32] = { - 4, 4, 8, 2, 0, 0, 0, 0, - 4, 4, 8, 2, 4, 4, 8, 2, - 14, 0, 94, 10, 2, 10, 0, 8, - 14, 0, 94, 10, 2, 10, 2, 8 -}; - -static u_char const data_sizes_32[32] = { - 4, 4, 8, 2, 0, 0, 0, 0, - 4, 4, 8, 2, 4, 4, 8, 2, - 28, 0,108, 10, 2, 10, 0, 8, - 28, 0,108, 10, 2, 10, 2, 8 -}; - -int FPU_load_store(u_char type, fpu_addr_modes addr_modes, - void __user *data_address) -{ - FPU_REG loaded_data; - FPU_REG *st0_ptr; - u_char st0_tag = TAG_Empty; /* This is just to stop a gcc warning. */ - u_char loaded_tag; - - st0_ptr = NULL; /* Initialized just to stop compiler warnings. */ - - if ( addr_modes.default_mode & PROTECTED ) - { - if ( addr_modes.default_mode == SEG32 ) - { - if ( access_limit < data_sizes_32[type] ) - math_abort(FPU_info,SIGSEGV); - } - else if ( addr_modes.default_mode == PM16 ) - { - if ( access_limit < data_sizes_16[type] ) - math_abort(FPU_info,SIGSEGV); - } -#ifdef PARANOID - else - EXCEPTION(EX_INTERNAL|0x140); -#endif /* PARANOID */ - } - - switch ( type_table[type] ) - { - case _NONE_: - break; - case _REG0_: - st0_ptr = &st(0); /* Some of these instructions pop after - storing */ - st0_tag = FPU_gettag0(); - break; - case _PUSH_: - { - if ( FPU_gettagi(-1) != TAG_Empty ) - { FPU_stack_overflow(); return 0; } - top--; - st0_ptr = &st(0); - } - break; - case _null_: - FPU_illegal(); - return 0; -#ifdef PARANOID - default: - EXCEPTION(EX_INTERNAL|0x141); - return 0; -#endif /* PARANOID */ - } - - switch ( type ) - { - case 000: /* fld m32real */ - clear_C1(); - loaded_tag = FPU_load_single((float __user *)data_address, &loaded_data); - if ( (loaded_tag == TAG_Special) - && isNaN(&loaded_data) - && (real_1op_NaN(&loaded_data) < 0) ) - { - top++; - break; - } - FPU_copy_to_reg0(&loaded_data, loaded_tag); - break; - case 001: /* fild m32int */ - clear_C1(); - loaded_tag = FPU_load_int32((long __user *)data_address, &loaded_data); - FPU_copy_to_reg0(&loaded_data, loaded_tag); - break; - case 002: /* fld m64real */ - clear_C1(); - loaded_tag = FPU_load_double((double __user *)data_address, &loaded_data); - if ( (loaded_tag == TAG_Special) - && isNaN(&loaded_data) - && (real_1op_NaN(&loaded_data) < 0) ) - { - top++; - break; - } - FPU_copy_to_reg0(&loaded_data, loaded_tag); - break; - case 003: /* fild m16int */ - clear_C1(); - loaded_tag = FPU_load_int16((short __user *)data_address, &loaded_data); - FPU_copy_to_reg0(&loaded_data, loaded_tag); - break; - case 010: /* fst m32real */ - clear_C1(); - FPU_store_single(st0_ptr, st0_tag, (float __user *)data_address); - break; - case 011: /* fist m32int */ - clear_C1(); - FPU_store_int32(st0_ptr, st0_tag, (long __user *)data_address); - break; - case 012: /* fst m64real */ - clear_C1(); - FPU_store_double(st0_ptr, st0_tag, (double __user *)data_address); - break; - case 013: /* fist m16int */ - clear_C1(); - FPU_store_int16(st0_ptr, st0_tag, (short __user *)data_address); - break; - case 014: /* fstp m32real */ - clear_C1(); - if ( FPU_store_single(st0_ptr, st0_tag, (float __user *)data_address) ) - pop_0(); /* pop only if the number was actually stored - (see the 80486 manual p16-28) */ - break; - case 015: /* fistp m32int */ - clear_C1(); - if ( FPU_store_int32(st0_ptr, st0_tag, (long __user *)data_address) ) - pop_0(); /* pop only if the number was actually stored - (see the 80486 manual p16-28) */ - break; - case 016: /* fstp m64real */ - clear_C1(); - if ( FPU_store_double(st0_ptr, st0_tag, (double __user *)data_address) ) - pop_0(); /* pop only if the number was actually stored - (see the 80486 manual p16-28) */ - break; - case 017: /* fistp m16int */ - clear_C1(); - if ( FPU_store_int16(st0_ptr, st0_tag, (short __user *)data_address) ) - pop_0(); /* pop only if the number was actually stored - (see the 80486 manual p16-28) */ - break; - case 020: /* fldenv m14/28byte */ - fldenv(addr_modes, (u_char __user *)data_address); - /* Ensure that the values just loaded are not changed by - fix-up operations. */ - return 1; - case 022: /* frstor m94/108byte */ - frstor(addr_modes, (u_char __user *)data_address); - /* Ensure that the values just loaded are not changed by - fix-up operations. */ - return 1; - case 023: /* fbld m80dec */ - clear_C1(); - loaded_tag = FPU_load_bcd((u_char __user *)data_address); - FPU_settag0(loaded_tag); - break; - case 024: /* fldcw */ - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_READ, data_address, 2); - FPU_get_user(control_word, (unsigned short __user *) data_address); - RE_ENTRANT_CHECK_ON; - if ( partial_status & ~control_word & CW_Exceptions ) - partial_status |= (SW_Summary | SW_Backward); - else - partial_status &= ~(SW_Summary | SW_Backward); -#ifdef PECULIAR_486 - control_word |= 0x40; /* An 80486 appears to always set this bit */ -#endif /* PECULIAR_486 */ - return 1; - case 025: /* fld m80real */ - clear_C1(); - loaded_tag = FPU_load_extended((long double __user *)data_address, 0); - FPU_settag0(loaded_tag); - break; - case 027: /* fild m64int */ - clear_C1(); - loaded_tag = FPU_load_int64((long long __user *)data_address); - if (loaded_tag == TAG_Error) - return 0; - FPU_settag0(loaded_tag); - break; - case 030: /* fstenv m14/28byte */ - fstenv(addr_modes, (u_char __user *)data_address); - return 1; - case 032: /* fsave */ - fsave(addr_modes, (u_char __user *)data_address); - return 1; - case 033: /* fbstp m80dec */ - clear_C1(); - if ( FPU_store_bcd(st0_ptr, st0_tag, (u_char __user *)data_address) ) - pop_0(); /* pop only if the number was actually stored - (see the 80486 manual p16-28) */ - break; - case 034: /* fstcw m16int */ - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_WRITE,data_address,2); - FPU_put_user(control_word, (unsigned short __user *) data_address); - RE_ENTRANT_CHECK_ON; - return 1; - case 035: /* fstp m80real */ - clear_C1(); - if ( FPU_store_extended(st0_ptr, st0_tag, (long double __user *)data_address) ) - pop_0(); /* pop only if the number was actually stored - (see the 80486 manual p16-28) */ - break; - case 036: /* fstsw m2byte */ - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_WRITE,data_address,2); - FPU_put_user(status_word(),(unsigned short __user *) data_address); - RE_ENTRANT_CHECK_ON; - return 1; - case 037: /* fistp m64int */ - clear_C1(); - if ( FPU_store_int64(st0_ptr, st0_tag, (long long __user *)data_address) ) - pop_0(); /* pop only if the number was actually stored - (see the 80486 manual p16-28) */ - break; - } - return 0; -} diff --git a/arch/i386/math-emu/mul_Xsig.S b/arch/i386/math-emu/mul_Xsig.S deleted file mode 100644 index 717785a53eb..00000000000 --- a/arch/i386/math-emu/mul_Xsig.S +++ /dev/null @@ -1,176 +0,0 @@ -/*---------------------------------------------------------------------------+ - | mul_Xsig.S | - | | - | Multiply a 12 byte fixed point number by another fixed point number. | - | | - | Copyright (C) 1992,1994,1995 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@jacobi.maths.monash.edu.au | - | | - | Call from C as: | - | void mul32_Xsig(Xsig *x, unsigned b) | - | | - | void mul64_Xsig(Xsig *x, unsigned long long *b) | - | | - | void mul_Xsig_Xsig(Xsig *x, unsigned *b) | - | | - | The result is neither rounded nor normalized, and the ls bit or so may | - | be wrong. | - | | - +---------------------------------------------------------------------------*/ - .file "mul_Xsig.S" - - -#include "fpu_emu.h" - -.text -ENTRY(mul32_Xsig) - pushl %ebp - movl %esp,%ebp - subl $16,%esp - pushl %esi - - movl PARAM1,%esi - movl PARAM2,%ecx - - xor %eax,%eax - movl %eax,-4(%ebp) - movl %eax,-8(%ebp) - - movl (%esi),%eax /* lsl of Xsig */ - mull %ecx /* msl of b */ - movl %edx,-12(%ebp) - - movl 4(%esi),%eax /* midl of Xsig */ - mull %ecx /* msl of b */ - addl %eax,-12(%ebp) - adcl %edx,-8(%ebp) - adcl $0,-4(%ebp) - - movl 8(%esi),%eax /* msl of Xsig */ - mull %ecx /* msl of b */ - addl %eax,-8(%ebp) - adcl %edx,-4(%ebp) - - movl -12(%ebp),%eax - movl %eax,(%esi) - movl -8(%ebp),%eax - movl %eax,4(%esi) - movl -4(%ebp),%eax - movl %eax,8(%esi) - - popl %esi - leave - ret - - -ENTRY(mul64_Xsig) - pushl %ebp - movl %esp,%ebp - subl $16,%esp - pushl %esi - - movl PARAM1,%esi - movl PARAM2,%ecx - - xor %eax,%eax - movl %eax,-4(%ebp) - movl %eax,-8(%ebp) - - movl (%esi),%eax /* lsl of Xsig */ - mull 4(%ecx) /* msl of b */ - movl %edx,-12(%ebp) - - movl 4(%esi),%eax /* midl of Xsig */ - mull (%ecx) /* lsl of b */ - addl %edx,-12(%ebp) - adcl $0,-8(%ebp) - adcl $0,-4(%ebp) - - movl 4(%esi),%eax /* midl of Xsig */ - mull 4(%ecx) /* msl of b */ - addl %eax,-12(%ebp) - adcl %edx,-8(%ebp) - adcl $0,-4(%ebp) - - movl 8(%esi),%eax /* msl of Xsig */ - mull (%ecx) /* lsl of b */ - addl %eax,-12(%ebp) - adcl %edx,-8(%ebp) - adcl $0,-4(%ebp) - - movl 8(%esi),%eax /* msl of Xsig */ - mull 4(%ecx) /* msl of b */ - addl %eax,-8(%ebp) - adcl %edx,-4(%ebp) - - movl -12(%ebp),%eax - movl %eax,(%esi) - movl -8(%ebp),%eax - movl %eax,4(%esi) - movl -4(%ebp),%eax - movl %eax,8(%esi) - - popl %esi - leave - ret - - - -ENTRY(mul_Xsig_Xsig) - pushl %ebp - movl %esp,%ebp - subl $16,%esp - pushl %esi - - movl PARAM1,%esi - movl PARAM2,%ecx - - xor %eax,%eax - movl %eax,-4(%ebp) - movl %eax,-8(%ebp) - - movl (%esi),%eax /* lsl of Xsig */ - mull 8(%ecx) /* msl of b */ - movl %edx,-12(%ebp) - - movl 4(%esi),%eax /* midl of Xsig */ - mull 4(%ecx) /* midl of b */ - addl %edx,-12(%ebp) - adcl $0,-8(%ebp) - adcl $0,-4(%ebp) - - movl 8(%esi),%eax /* msl of Xsig */ - mull (%ecx) /* lsl of b */ - addl %edx,-12(%ebp) - adcl $0,-8(%ebp) - adcl $0,-4(%ebp) - - movl 4(%esi),%eax /* midl of Xsig */ - mull 8(%ecx) /* msl of b */ - addl %eax,-12(%ebp) - adcl %edx,-8(%ebp) - adcl $0,-4(%ebp) - - movl 8(%esi),%eax /* msl of Xsig */ - mull 4(%ecx) /* midl of b */ - addl %eax,-12(%ebp) - adcl %edx,-8(%ebp) - adcl $0,-4(%ebp) - - movl 8(%esi),%eax /* msl of Xsig */ - mull 8(%ecx) /* msl of b */ - addl %eax,-8(%ebp) - adcl %edx,-4(%ebp) - - movl -12(%ebp),%edx - movl %edx,(%esi) - movl -8(%ebp),%edx - movl %edx,4(%esi) - movl -4(%ebp),%edx - movl %edx,8(%esi) - - popl %esi - leave - ret - diff --git a/arch/i386/math-emu/poly.h b/arch/i386/math-emu/poly.h deleted file mode 100644 index 4db79811492..00000000000 --- a/arch/i386/math-emu/poly.h +++ /dev/null @@ -1,121 +0,0 @@ -/*---------------------------------------------------------------------------+ - | poly.h | - | | - | Header file for the FPU-emu poly*.c source files. | - | | - | Copyright (C) 1994,1999 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@melbpc.org.au | - | | - | Declarations and definitions for functions operating on Xsig (12-byte | - | extended-significand) quantities. | - | | - +---------------------------------------------------------------------------*/ - -#ifndef _POLY_H -#define _POLY_H - -/* This 12-byte structure is used to improve the accuracy of computation - of transcendental functions. - Intended to be used to get results better than 8-byte computation - allows. 9-byte would probably be sufficient. - */ -typedef struct { - unsigned long lsw; - unsigned long midw; - unsigned long msw; -} Xsig; - -asmlinkage void mul64(unsigned long long const *a, unsigned long long const *b, - unsigned long long *result); -asmlinkage void polynomial_Xsig(Xsig *, const unsigned long long *x, - const unsigned long long terms[], const int n); - -asmlinkage void mul32_Xsig(Xsig *, const unsigned long mult); -asmlinkage void mul64_Xsig(Xsig *, const unsigned long long *mult); -asmlinkage void mul_Xsig_Xsig(Xsig *dest, const Xsig *mult); - -asmlinkage void shr_Xsig(Xsig *, const int n); -asmlinkage int round_Xsig(Xsig *); -asmlinkage int norm_Xsig(Xsig *); -asmlinkage void div_Xsig(Xsig *x1, const Xsig *x2, const Xsig *dest); - -/* Macro to extract the most significant 32 bits from a long long */ -#define LL_MSW(x) (((unsigned long *)&x)[1]) - -/* Macro to initialize an Xsig struct */ -#define MK_XSIG(a,b,c) { c, b, a } - -/* Macro to access the 8 ms bytes of an Xsig as a long long */ -#define XSIG_LL(x) (*(unsigned long long *)&x.midw) - - -/* - Need to run gcc with optimizations on to get these to - actually be in-line. - */ - -/* Multiply two fixed-point 32 bit numbers, producing a 32 bit result. - The answer is the ms word of the product. */ -/* Some versions of gcc make it difficult to stop eax from being clobbered. - Merely specifying that it is used doesn't work... - */ -static inline unsigned long mul_32_32(const unsigned long arg1, - const unsigned long arg2) -{ - int retval; - asm volatile ("mull %2; movl %%edx,%%eax" \ - :"=a" (retval) \ - :"0" (arg1), "g" (arg2) \ - :"dx"); - return retval; -} - - -/* Add the 12 byte Xsig x2 to Xsig dest, with no checks for overflow. */ -static inline void add_Xsig_Xsig(Xsig *dest, const Xsig *x2) -{ - asm volatile ("movl %1,%%edi; movl %2,%%esi;\n" - "movl (%%esi),%%eax; addl %%eax,(%%edi);\n" - "movl 4(%%esi),%%eax; adcl %%eax,4(%%edi);\n" - "movl 8(%%esi),%%eax; adcl %%eax,8(%%edi);\n" - :"=g" (*dest):"g" (dest), "g" (x2) - :"ax","si","di"); -} - - -/* Add the 12 byte Xsig x2 to Xsig dest, adjust exp if overflow occurs. */ -/* Note: the constraints in the asm statement didn't always work properly - with gcc 2.5.8. Changing from using edi to using ecx got around the - problem, but keep fingers crossed! */ -static inline void add_two_Xsig(Xsig *dest, const Xsig *x2, long int *exp) -{ - asm volatile ("movl %2,%%ecx; movl %3,%%esi;\n" - "movl (%%esi),%%eax; addl %%eax,(%%ecx);\n" - "movl 4(%%esi),%%eax; adcl %%eax,4(%%ecx);\n" - "movl 8(%%esi),%%eax; adcl %%eax,8(%%ecx);\n" - "jnc 0f;\n" - "rcrl 8(%%ecx); rcrl 4(%%ecx); rcrl (%%ecx)\n" - "movl %4,%%ecx; incl (%%ecx)\n" - "movl $1,%%eax; jmp 1f;\n" - "0: xorl %%eax,%%eax;\n" - "1:\n" - :"=g" (*exp), "=g" (*dest) - :"g" (dest), "g" (x2), "g" (exp) - :"cx","si","ax"); -} - - -/* Negate (subtract from 1.0) the 12 byte Xsig */ -/* This is faster in a loop on my 386 than using the "neg" instruction. */ -static inline void negate_Xsig(Xsig *x) -{ - asm volatile("movl %1,%%esi;\n" - "xorl %%ecx,%%ecx;\n" - "movl %%ecx,%%eax; subl (%%esi),%%eax; movl %%eax,(%%esi);\n" - "movl %%ecx,%%eax; sbbl 4(%%esi),%%eax; movl %%eax,4(%%esi);\n" - "movl %%ecx,%%eax; sbbl 8(%%esi),%%eax; movl %%eax,8(%%esi);\n" - :"=g" (*x):"g" (x):"si","ax","cx"); -} - -#endif /* _POLY_H */ diff --git a/arch/i386/math-emu/poly_2xm1.c b/arch/i386/math-emu/poly_2xm1.c deleted file mode 100644 index 9766ad5e974..00000000000 --- a/arch/i386/math-emu/poly_2xm1.c +++ /dev/null @@ -1,156 +0,0 @@ -/*---------------------------------------------------------------------------+ - | poly_2xm1.c | - | | - | Function to compute 2^x-1 by a polynomial approximation. | - | | - | Copyright (C) 1992,1993,1994,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | - | E-mail billm@suburbia.net | - | | - | | - +---------------------------------------------------------------------------*/ - -#include "exception.h" -#include "reg_constant.h" -#include "fpu_emu.h" -#include "fpu_system.h" -#include "control_w.h" -#include "poly.h" - - -#define HIPOWER 11 -static const unsigned long long lterms[HIPOWER] = -{ - 0x0000000000000000LL, /* This term done separately as 12 bytes */ - 0xf5fdeffc162c7543LL, - 0x1c6b08d704a0bfa6LL, - 0x0276556df749cc21LL, - 0x002bb0ffcf14f6b8LL, - 0x0002861225ef751cLL, - 0x00001ffcbfcd5422LL, - 0x00000162c005d5f1LL, - 0x0000000da96ccb1bLL, - 0x0000000078d1b897LL, - 0x000000000422b029LL -}; - -static const Xsig hiterm = MK_XSIG(0xb17217f7, 0xd1cf79ab, 0xc8a39194); - -/* Four slices: 0.0 : 0.25 : 0.50 : 0.75 : 1.0, - These numbers are 2^(1/4), 2^(1/2), and 2^(3/4) - */ -static const Xsig shiftterm0 = MK_XSIG(0, 0, 0); -static const Xsig shiftterm1 = MK_XSIG(0x9837f051, 0x8db8a96f, 0x46ad2318); -static const Xsig shiftterm2 = MK_XSIG(0xb504f333, 0xf9de6484, 0x597d89b3); -static const Xsig shiftterm3 = MK_XSIG(0xd744fcca, 0xd69d6af4, 0x39a68bb9); - -static const Xsig *shiftterm[] = { &shiftterm0, &shiftterm1, - &shiftterm2, &shiftterm3 }; - - -/*--- poly_2xm1() -----------------------------------------------------------+ - | Requires st(0) which is TAG_Valid and < 1. | - +---------------------------------------------------------------------------*/ -int poly_2xm1(u_char sign, FPU_REG *arg, FPU_REG *result) -{ - long int exponent, shift; - unsigned long long Xll; - Xsig accumulator, Denom, argSignif; - u_char tag; - - exponent = exponent16(arg); - -#ifdef PARANOID - if ( exponent >= 0 ) /* Don't want a |number| >= 1.0 */ - { - /* Number negative, too large, or not Valid. */ - EXCEPTION(EX_INTERNAL|0x127); - return 1; - } -#endif /* PARANOID */ - - argSignif.lsw = 0; - XSIG_LL(argSignif) = Xll = significand(arg); - - if ( exponent == -1 ) - { - shift = (argSignif.msw & 0x40000000) ? 3 : 2; - /* subtract 0.5 or 0.75 */ - exponent -= 2; - XSIG_LL(argSignif) <<= 2; - Xll <<= 2; - } - else if ( exponent == -2 ) - { - shift = 1; - /* subtract 0.25 */ - exponent--; - XSIG_LL(argSignif) <<= 1; - Xll <<= 1; - } - else - shift = 0; - - if ( exponent < -2 ) - { - /* Shift the argument right by the required places. */ - if ( FPU_shrx(&Xll, -2-exponent) >= 0x80000000U ) - Xll++; /* round up */ - } - - accumulator.lsw = accumulator.midw = accumulator.msw = 0; - polynomial_Xsig(&accumulator, &Xll, lterms, HIPOWER-1); - mul_Xsig_Xsig(&accumulator, &argSignif); - shr_Xsig(&accumulator, 3); - - mul_Xsig_Xsig(&argSignif, &hiterm); /* The leading term */ - add_two_Xsig(&accumulator, &argSignif, &exponent); - - if ( shift ) - { - /* The argument is large, use the identity: - f(x+a) = f(a) * (f(x) + 1) - 1; - */ - shr_Xsig(&accumulator, - exponent); - accumulator.msw |= 0x80000000; /* add 1.0 */ - mul_Xsig_Xsig(&accumulator, shiftterm[shift]); - accumulator.msw &= 0x3fffffff; /* subtract 1.0 */ - exponent = 1; - } - - if ( sign != SIGN_POS ) - { - /* The argument is negative, use the identity: - f(-x) = -f(x) / (1 + f(x)) - */ - Denom.lsw = accumulator.lsw; - XSIG_LL(Denom) = XSIG_LL(accumulator); - if ( exponent < 0 ) - shr_Xsig(&Denom, - exponent); - else if ( exponent > 0 ) - { - /* exponent must be 1 here */ - XSIG_LL(Denom) <<= 1; - if ( Denom.lsw & 0x80000000 ) - XSIG_LL(Denom) |= 1; - (Denom.lsw) <<= 1; - } - Denom.msw |= 0x80000000; /* add 1.0 */ - div_Xsig(&accumulator, &Denom, &accumulator); - } - - /* Convert to 64 bit signed-compatible */ - exponent += round_Xsig(&accumulator); - - result = &st(0); - significand(result) = XSIG_LL(accumulator); - setexponent16(result, exponent); - - tag = FPU_round(result, 1, 0, FULL_PRECISION, sign); - - setsign(result, sign); - FPU_settag0(tag); - - return 0; - -} diff --git a/arch/i386/math-emu/poly_atan.c b/arch/i386/math-emu/poly_atan.c deleted file mode 100644 index 82f702952f6..00000000000 --- a/arch/i386/math-emu/poly_atan.c +++ /dev/null @@ -1,229 +0,0 @@ -/*---------------------------------------------------------------------------+ - | poly_atan.c | - | | - | Compute the arctan of a FPU_REG, using a polynomial approximation. | - | | - | Copyright (C) 1992,1993,1994,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | - | E-mail billm@suburbia.net | - | | - | | - +---------------------------------------------------------------------------*/ - -#include "exception.h" -#include "reg_constant.h" -#include "fpu_emu.h" -#include "fpu_system.h" -#include "status_w.h" -#include "control_w.h" -#include "poly.h" - - -#define HIPOWERon 6 /* odd poly, negative terms */ -static const unsigned long long oddnegterms[HIPOWERon] = -{ - 0x0000000000000000LL, /* Dummy (not for - 1.0) */ - 0x015328437f756467LL, - 0x0005dda27b73dec6LL, - 0x0000226bf2bfb91aLL, - 0x000000ccc439c5f7LL, - 0x0000000355438407LL -} ; - -#define HIPOWERop 6 /* odd poly, positive terms */ -static const unsigned long long oddplterms[HIPOWERop] = -{ -/* 0xaaaaaaaaaaaaaaabLL, transferred to fixedpterm[] */ - 0x0db55a71875c9ac2LL, - 0x0029fce2d67880b0LL, - 0x0000dfd3908b4596LL, - 0x00000550fd61dab4LL, - 0x0000001c9422b3f9LL, - 0x000000003e3301e1LL -}; - -static const unsigned long long denomterm = 0xebd9b842c5c53a0eLL; - -static const Xsig fixedpterm = MK_XSIG(0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa); - -static const Xsig pi_signif = MK_XSIG(0xc90fdaa2, 0x2168c234, 0xc4c6628b); - - -/*--- poly_atan() -----------------------------------------------------------+ - | | - +---------------------------------------------------------------------------*/ -void poly_atan(FPU_REG *st0_ptr, u_char st0_tag, - FPU_REG *st1_ptr, u_char st1_tag) -{ - u_char transformed, inverted, - sign1, sign2; - int exponent; - long int dummy_exp; - Xsig accumulator, Numer, Denom, accumulatore, argSignif, - argSq, argSqSq; - u_char tag; - - sign1 = getsign(st0_ptr); - sign2 = getsign(st1_ptr); - if ( st0_tag == TAG_Valid ) - { - exponent = exponent(st0_ptr); - } - else - { - /* This gives non-compatible stack contents... */ - FPU_to_exp16(st0_ptr, st0_ptr); - exponent = exponent16(st0_ptr); - } - if ( st1_tag == TAG_Valid ) - { - exponent -= exponent(st1_ptr); - } - else - { - /* This gives non-compatible stack contents... */ - FPU_to_exp16(st1_ptr, st1_ptr); - exponent -= exponent16(st1_ptr); - } - - if ( (exponent < 0) || ((exponent == 0) && - ((st0_ptr->sigh < st1_ptr->sigh) || - ((st0_ptr->sigh == st1_ptr->sigh) && - (st0_ptr->sigl < st1_ptr->sigl))) ) ) - { - inverted = 1; - Numer.lsw = Denom.lsw = 0; - XSIG_LL(Numer) = significand(st0_ptr); - XSIG_LL(Denom) = significand(st1_ptr); - } - else - { - inverted = 0; - exponent = -exponent; - Numer.lsw = Denom.lsw = 0; - XSIG_LL(Numer) = significand(st1_ptr); - XSIG_LL(Denom) = significand(st0_ptr); - } - div_Xsig(&Numer, &Denom, &argSignif); - exponent += norm_Xsig(&argSignif); - - if ( (exponent >= -1) - || ((exponent == -2) && (argSignif.msw > 0xd413ccd0)) ) - { - /* The argument is greater than sqrt(2)-1 (=0.414213562...) */ - /* Convert the argument by an identity for atan */ - transformed = 1; - - if ( exponent >= 0 ) - { -#ifdef PARANOID - if ( !( (exponent == 0) && - (argSignif.lsw == 0) && (argSignif.midw == 0) && - (argSignif.msw == 0x80000000) ) ) - { - EXCEPTION(EX_INTERNAL|0x104); /* There must be a logic error */ - return; - } -#endif /* PARANOID */ - argSignif.msw = 0; /* Make the transformed arg -> 0.0 */ - } - else - { - Numer.lsw = Denom.lsw = argSignif.lsw; - XSIG_LL(Numer) = XSIG_LL(Denom) = XSIG_LL(argSignif); - - if ( exponent < -1 ) - shr_Xsig(&Numer, -1-exponent); - negate_Xsig(&Numer); - - shr_Xsig(&Denom, -exponent); - Denom.msw |= 0x80000000; - - div_Xsig(&Numer, &Denom, &argSignif); - - exponent = -1 + norm_Xsig(&argSignif); - } - } - else - { - transformed = 0; - } - - argSq.lsw = argSignif.lsw; argSq.midw = argSignif.midw; - argSq.msw = argSignif.msw; - mul_Xsig_Xsig(&argSq, &argSq); - - argSqSq.lsw = argSq.lsw; argSqSq.midw = argSq.midw; argSqSq.msw = argSq.msw; - mul_Xsig_Xsig(&argSqSq, &argSqSq); - - accumulatore.lsw = argSq.lsw; - XSIG_LL(accumulatore) = XSIG_LL(argSq); - - shr_Xsig(&argSq, 2*(-1-exponent-1)); - shr_Xsig(&argSqSq, 4*(-1-exponent-1)); - - /* Now have argSq etc with binary point at the left - .1xxxxxxxx */ - - /* Do the basic fixed point polynomial evaluation */ - accumulator.msw = accumulator.midw = accumulator.lsw = 0; - polynomial_Xsig(&accumulator, &XSIG_LL(argSqSq), - oddplterms, HIPOWERop-1); - mul64_Xsig(&accumulator, &XSIG_LL(argSq)); - negate_Xsig(&accumulator); - polynomial_Xsig(&accumulator, &XSIG_LL(argSqSq), oddnegterms, HIPOWERon-1); - negate_Xsig(&accumulator); - add_two_Xsig(&accumulator, &fixedpterm, &dummy_exp); - - mul64_Xsig(&accumulatore, &denomterm); - shr_Xsig(&accumulatore, 1 + 2*(-1-exponent)); - accumulatore.msw |= 0x80000000; - - div_Xsig(&accumulator, &accumulatore, &accumulator); - - mul_Xsig_Xsig(&accumulator, &argSignif); - mul_Xsig_Xsig(&accumulator, &argSq); - - shr_Xsig(&accumulator, 3); - negate_Xsig(&accumulator); - add_Xsig_Xsig(&accumulator, &argSignif); - - if ( transformed ) - { - /* compute pi/4 - accumulator */ - shr_Xsig(&accumulator, -1-exponent); - negate_Xsig(&accumulator); - add_Xsig_Xsig(&accumulator, &pi_signif); - exponent = -1; - } - - if ( inverted ) - { - /* compute pi/2 - accumulator */ - shr_Xsig(&accumulator, -exponent); - negate_Xsig(&accumulator); - add_Xsig_Xsig(&accumulator, &pi_signif); - exponent = 0; - } - - if ( sign1 ) - { - /* compute pi - accumulator */ - shr_Xsig(&accumulator, 1 - exponent); - negate_Xsig(&accumulator); - add_Xsig_Xsig(&accumulator, &pi_signif); - exponent = 1; - } - - exponent += round_Xsig(&accumulator); - - significand(st1_ptr) = XSIG_LL(accumulator); - setexponent16(st1_ptr, exponent); - - tag = FPU_round(st1_ptr, 1, 0, FULL_PRECISION, sign2); - FPU_settagi(1, tag); - - set_precision_flag_up(); /* We do not really know if up or down, - use this as the default. */ - -} diff --git a/arch/i386/math-emu/poly_l2.c b/arch/i386/math-emu/poly_l2.c deleted file mode 100644 index dd00e1d5b07..00000000000 --- a/arch/i386/math-emu/poly_l2.c +++ /dev/null @@ -1,272 +0,0 @@ -/*---------------------------------------------------------------------------+ - | poly_l2.c | - | | - | Compute the base 2 log of a FPU_REG, using a polynomial approximation. | - | | - | Copyright (C) 1992,1993,1994,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | - | E-mail billm@suburbia.net | - | | - | | - +---------------------------------------------------------------------------*/ - - -#include "exception.h" -#include "reg_constant.h" -#include "fpu_emu.h" -#include "fpu_system.h" -#include "control_w.h" -#include "poly.h" - - -static void log2_kernel(FPU_REG const *arg, u_char argsign, - Xsig *accum_result, long int *expon); - - -/*--- poly_l2() -------------------------------------------------------------+ - | Base 2 logarithm by a polynomial approximation. | - +---------------------------------------------------------------------------*/ -void poly_l2(FPU_REG *st0_ptr, FPU_REG *st1_ptr, u_char st1_sign) -{ - long int exponent, expon, expon_expon; - Xsig accumulator, expon_accum, yaccum; - u_char sign, argsign; - FPU_REG x; - int tag; - - exponent = exponent16(st0_ptr); - - /* From st0_ptr, make a number > sqrt(2)/2 and < sqrt(2) */ - if ( st0_ptr->sigh > (unsigned)0xb504f334 ) - { - /* Treat as sqrt(2)/2 < st0_ptr < 1 */ - significand(&x) = - significand(st0_ptr); - setexponent16(&x, -1); - exponent++; - argsign = SIGN_NEG; - } - else - { - /* Treat as 1 <= st0_ptr < sqrt(2) */ - x.sigh = st0_ptr->sigh - 0x80000000; - x.sigl = st0_ptr->sigl; - setexponent16(&x, 0); - argsign = SIGN_POS; - } - tag = FPU_normalize_nuo(&x); - - if ( tag == TAG_Zero ) - { - expon = 0; - accumulator.msw = accumulator.midw = accumulator.lsw = 0; - } - else - { - log2_kernel(&x, argsign, &accumulator, &expon); - } - - if ( exponent < 0 ) - { - sign = SIGN_NEG; - exponent = -exponent; - } - else - sign = SIGN_POS; - expon_accum.msw = exponent; expon_accum.midw = expon_accum.lsw = 0; - if ( exponent ) - { - expon_expon = 31 + norm_Xsig(&expon_accum); - shr_Xsig(&accumulator, expon_expon - expon); - - if ( sign ^ argsign ) - negate_Xsig(&accumulator); - add_Xsig_Xsig(&accumulator, &expon_accum); - } - else - { - expon_expon = expon; - sign = argsign; - } - - yaccum.lsw = 0; XSIG_LL(yaccum) = significand(st1_ptr); - mul_Xsig_Xsig(&accumulator, &yaccum); - - expon_expon += round_Xsig(&accumulator); - - if ( accumulator.msw == 0 ) - { - FPU_copy_to_reg1(&CONST_Z, TAG_Zero); - return; - } - - significand(st1_ptr) = XSIG_LL(accumulator); - setexponent16(st1_ptr, expon_expon + exponent16(st1_ptr) + 1); - - tag = FPU_round(st1_ptr, 1, 0, FULL_PRECISION, sign ^ st1_sign); - FPU_settagi(1, tag); - - set_precision_flag_up(); /* 80486 appears to always do this */ - - return; - -} - - -/*--- poly_l2p1() -----------------------------------------------------------+ - | Base 2 logarithm by a polynomial approximation. | - | log2(x+1) | - +---------------------------------------------------------------------------*/ -int poly_l2p1(u_char sign0, u_char sign1, - FPU_REG *st0_ptr, FPU_REG *st1_ptr, FPU_REG *dest) -{ - u_char tag; - long int exponent; - Xsig accumulator, yaccum; - - if ( exponent16(st0_ptr) < 0 ) - { - log2_kernel(st0_ptr, sign0, &accumulator, &exponent); - - yaccum.lsw = 0; - XSIG_LL(yaccum) = significand(st1_ptr); - mul_Xsig_Xsig(&accumulator, &yaccum); - - exponent += round_Xsig(&accumulator); - - exponent += exponent16(st1_ptr) + 1; - if ( exponent < EXP_WAY_UNDER ) exponent = EXP_WAY_UNDER; - - significand(dest) = XSIG_LL(accumulator); - setexponent16(dest, exponent); - - tag = FPU_round(dest, 1, 0, FULL_PRECISION, sign0 ^ sign1); - FPU_settagi(1, tag); - - if ( tag == TAG_Valid ) - set_precision_flag_up(); /* 80486 appears to always do this */ - } - else - { - /* The magnitude of st0_ptr is far too large. */ - - if ( sign0 != SIGN_POS ) - { - /* Trying to get the log of a negative number. */ -#ifdef PECULIAR_486 /* Stupid 80486 doesn't worry about log(negative). */ - changesign(st1_ptr); -#else - if ( arith_invalid(1) < 0 ) - return 1; -#endif /* PECULIAR_486 */ - } - - /* 80486 appears to do this */ - if ( sign0 == SIGN_NEG ) - set_precision_flag_down(); - else - set_precision_flag_up(); - } - - if ( exponent(dest) <= EXP_UNDER ) - EXCEPTION(EX_Underflow); - - return 0; - -} - - - - -#undef HIPOWER -#define HIPOWER 10 -static const unsigned long long logterms[HIPOWER] = -{ - 0x2a8eca5705fc2ef0LL, - 0xf6384ee1d01febceLL, - 0x093bb62877cdf642LL, - 0x006985d8a9ec439bLL, - 0x0005212c4f55a9c8LL, - 0x00004326a16927f0LL, - 0x0000038d1d80a0e7LL, - 0x0000003141cc80c6LL, - 0x00000002b1668c9fLL, - 0x000000002c7a46aaLL -}; - -static const unsigned long leadterm = 0xb8000000; - - -/*--- log2_kernel() ---------------------------------------------------------+ - | Base 2 logarithm by a polynomial approximation. | - | log2(x+1) | - +---------------------------------------------------------------------------*/ -static void log2_kernel(FPU_REG const *arg, u_char argsign, Xsig *accum_result, - long int *expon) -{ - long int exponent, adj; - unsigned long long Xsq; - Xsig accumulator, Numer, Denom, argSignif, arg_signif; - - exponent = exponent16(arg); - Numer.lsw = Denom.lsw = 0; - XSIG_LL(Numer) = XSIG_LL(Denom) = significand(arg); - if ( argsign == SIGN_POS ) - { - shr_Xsig(&Denom, 2 - (1 + exponent)); - Denom.msw |= 0x80000000; - div_Xsig(&Numer, &Denom, &argSignif); - } - else - { - shr_Xsig(&Denom, 1 - (1 + exponent)); - negate_Xsig(&Denom); - if ( Denom.msw & 0x80000000 ) - { - div_Xsig(&Numer, &Denom, &argSignif); - exponent ++; - } - else - { - /* Denom must be 1.0 */ - argSignif.lsw = Numer.lsw; argSignif.midw = Numer.midw; - argSignif.msw = Numer.msw; - } - } - -#ifndef PECULIAR_486 - /* Should check here that |local_arg| is within the valid range */ - if ( exponent >= -2 ) - { - if ( (exponent > -2) || - (argSignif.msw > (unsigned)0xafb0ccc0) ) - { - /* The argument is too large */ - } - } -#endif /* PECULIAR_486 */ - - arg_signif.lsw = argSignif.lsw; XSIG_LL(arg_signif) = XSIG_LL(argSignif); - adj = norm_Xsig(&argSignif); - accumulator.lsw = argSignif.lsw; XSIG_LL(accumulator) = XSIG_LL(argSignif); - mul_Xsig_Xsig(&accumulator, &accumulator); - shr_Xsig(&accumulator, 2*(-1 - (1 + exponent + adj))); - Xsq = XSIG_LL(accumulator); - if ( accumulator.lsw & 0x80000000 ) - Xsq++; - - accumulator.msw = accumulator.midw = accumulator.lsw = 0; - /* Do the basic fixed point polynomial evaluation */ - polynomial_Xsig(&accumulator, &Xsq, logterms, HIPOWER-1); - - mul_Xsig_Xsig(&accumulator, &argSignif); - shr_Xsig(&accumulator, 6 - adj); - - mul32_Xsig(&arg_signif, leadterm); - add_two_Xsig(&accumulator, &arg_signif, &exponent); - - *expon = exponent + 1; - accum_result->lsw = accumulator.lsw; - accum_result->midw = accumulator.midw; - accum_result->msw = accumulator.msw; - -} diff --git a/arch/i386/math-emu/poly_sin.c b/arch/i386/math-emu/poly_sin.c deleted file mode 100644 index a36313fb06f..00000000000 --- a/arch/i386/math-emu/poly_sin.c +++ /dev/null @@ -1,397 +0,0 @@ -/*---------------------------------------------------------------------------+ - | poly_sin.c | - | | - | Computation of an approximation of the sin function and the cosine | - | function by a polynomial. | - | | - | Copyright (C) 1992,1993,1994,1997,1999 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | - | E-mail billm@melbpc.org.au | - | | - | | - +---------------------------------------------------------------------------*/ - - -#include "exception.h" -#include "reg_constant.h" -#include "fpu_emu.h" -#include "fpu_system.h" -#include "control_w.h" -#include "poly.h" - - -#define N_COEFF_P 4 -#define N_COEFF_N 4 - -static const unsigned long long pos_terms_l[N_COEFF_P] = -{ - 0xaaaaaaaaaaaaaaabLL, - 0x00d00d00d00cf906LL, - 0x000006b99159a8bbLL, - 0x000000000d7392e6LL -}; - -static const unsigned long long neg_terms_l[N_COEFF_N] = -{ - 0x2222222222222167LL, - 0x0002e3bc74aab624LL, - 0x0000000b09229062LL, - 0x00000000000c7973LL -}; - - - -#define N_COEFF_PH 4 -#define N_COEFF_NH 4 -static const unsigned long long pos_terms_h[N_COEFF_PH] = -{ - 0x0000000000000000LL, - 0x05b05b05b05b0406LL, - 0x000049f93edd91a9LL, - 0x00000000c9c9ed62LL -}; - -static const unsigned long long neg_terms_h[N_COEFF_NH] = -{ - 0xaaaaaaaaaaaaaa98LL, - 0x001a01a01a019064LL, - 0x0000008f76c68a77LL, - 0x0000000000d58f5eLL -}; - - -/*--- poly_sine() -----------------------------------------------------------+ - | | - +---------------------------------------------------------------------------*/ -void poly_sine(FPU_REG *st0_ptr) -{ - int exponent, echange; - Xsig accumulator, argSqrd, argTo4; - unsigned long fix_up, adj; - unsigned long long fixed_arg; - FPU_REG result; - - exponent = exponent(st0_ptr); - - accumulator.lsw = accumulator.midw = accumulator.msw = 0; - - /* Split into two ranges, for arguments below and above 1.0 */ - /* The boundary between upper and lower is approx 0.88309101259 */ - if ( (exponent < -1) || ((exponent == -1) && (st0_ptr->sigh <= 0xe21240aa)) ) - { - /* The argument is <= 0.88309101259 */ - - argSqrd.msw = st0_ptr->sigh; argSqrd.midw = st0_ptr->sigl; argSqrd.lsw = 0; - mul64_Xsig(&argSqrd, &significand(st0_ptr)); - shr_Xsig(&argSqrd, 2*(-1-exponent)); - argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw; - argTo4.lsw = argSqrd.lsw; - mul_Xsig_Xsig(&argTo4, &argTo4); - - polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l, - N_COEFF_N-1); - mul_Xsig_Xsig(&accumulator, &argSqrd); - negate_Xsig(&accumulator); - - polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l, - N_COEFF_P-1); - - shr_Xsig(&accumulator, 2); /* Divide by four */ - accumulator.msw |= 0x80000000; /* Add 1.0 */ - - mul64_Xsig(&accumulator, &significand(st0_ptr)); - mul64_Xsig(&accumulator, &significand(st0_ptr)); - mul64_Xsig(&accumulator, &significand(st0_ptr)); - - /* Divide by four, FPU_REG compatible, etc */ - exponent = 3*exponent; - - /* The minimum exponent difference is 3 */ - shr_Xsig(&accumulator, exponent(st0_ptr) - exponent); - - negate_Xsig(&accumulator); - XSIG_LL(accumulator) += significand(st0_ptr); - - echange = round_Xsig(&accumulator); - - setexponentpos(&result, exponent(st0_ptr) + echange); - } - else - { - /* The argument is > 0.88309101259 */ - /* We use sin(st(0)) = cos(pi/2-st(0)) */ - - fixed_arg = significand(st0_ptr); - - if ( exponent == 0 ) - { - /* The argument is >= 1.0 */ - - /* Put the binary point at the left. */ - fixed_arg <<= 1; - } - /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */ - fixed_arg = 0x921fb54442d18469LL - fixed_arg; - /* There is a special case which arises due to rounding, to fix here. */ - if ( fixed_arg == 0xffffffffffffffffLL ) - fixed_arg = 0; - - XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0; - mul64_Xsig(&argSqrd, &fixed_arg); - - XSIG_LL(argTo4) = XSIG_LL(argSqrd); argTo4.lsw = argSqrd.lsw; - mul_Xsig_Xsig(&argTo4, &argTo4); - - polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h, - N_COEFF_NH-1); - mul_Xsig_Xsig(&accumulator, &argSqrd); - negate_Xsig(&accumulator); - - polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h, - N_COEFF_PH-1); - negate_Xsig(&accumulator); - - mul64_Xsig(&accumulator, &fixed_arg); - mul64_Xsig(&accumulator, &fixed_arg); - - shr_Xsig(&accumulator, 3); - negate_Xsig(&accumulator); - - add_Xsig_Xsig(&accumulator, &argSqrd); - - shr_Xsig(&accumulator, 1); - - accumulator.lsw |= 1; /* A zero accumulator here would cause problems */ - negate_Xsig(&accumulator); - - /* The basic computation is complete. Now fix the answer to - compensate for the error due to the approximation used for - pi/2 - */ - - /* This has an exponent of -65 */ - fix_up = 0x898cc517; - /* The fix-up needs to be improved for larger args */ - if ( argSqrd.msw & 0xffc00000 ) - { - /* Get about 32 bit precision in these: */ - fix_up -= mul_32_32(0x898cc517, argSqrd.msw) / 6; - } - fix_up = mul_32_32(fix_up, LL_MSW(fixed_arg)); - - adj = accumulator.lsw; /* temp save */ - accumulator.lsw -= fix_up; - if ( accumulator.lsw > adj ) - XSIG_LL(accumulator) --; - - echange = round_Xsig(&accumulator); - - setexponentpos(&result, echange - 1); - } - - significand(&result) = XSIG_LL(accumulator); - setsign(&result, getsign(st0_ptr)); - FPU_copy_to_reg0(&result, TAG_Valid); - -#ifdef PARANOID - if ( (exponent(&result) >= 0) - && (significand(&result) > 0x8000000000000000LL) ) - { - EXCEPTION(EX_INTERNAL|0x150); - } -#endif /* PARANOID */ - -} - - - -/*--- poly_cos() ------------------------------------------------------------+ - | | - +---------------------------------------------------------------------------*/ -void poly_cos(FPU_REG *st0_ptr) -{ - FPU_REG result; - long int exponent, exp2, echange; - Xsig accumulator, argSqrd, fix_up, argTo4; - unsigned long long fixed_arg; - -#ifdef PARANOID - if ( (exponent(st0_ptr) > 0) - || ((exponent(st0_ptr) == 0) - && (significand(st0_ptr) > 0xc90fdaa22168c234LL)) ) - { - EXCEPTION(EX_Invalid); - FPU_copy_to_reg0(&CONST_QNaN, TAG_Special); - return; - } -#endif /* PARANOID */ - - exponent = exponent(st0_ptr); - - accumulator.lsw = accumulator.midw = accumulator.msw = 0; - - if ( (exponent < -1) || ((exponent == -1) && (st0_ptr->sigh <= 0xb00d6f54)) ) - { - /* arg is < 0.687705 */ - - argSqrd.msw = st0_ptr->sigh; argSqrd.midw = st0_ptr->sigl; - argSqrd.lsw = 0; - mul64_Xsig(&argSqrd, &significand(st0_ptr)); - - if ( exponent < -1 ) - { - /* shift the argument right by the required places */ - shr_Xsig(&argSqrd, 2*(-1-exponent)); - } - - argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw; - argTo4.lsw = argSqrd.lsw; - mul_Xsig_Xsig(&argTo4, &argTo4); - - polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h, - N_COEFF_NH-1); - mul_Xsig_Xsig(&accumulator, &argSqrd); - negate_Xsig(&accumulator); - - polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h, - N_COEFF_PH-1); - negate_Xsig(&accumulator); - - mul64_Xsig(&accumulator, &significand(st0_ptr)); - mul64_Xsig(&accumulator, &significand(st0_ptr)); - shr_Xsig(&accumulator, -2*(1+exponent)); - - shr_Xsig(&accumulator, 3); - negate_Xsig(&accumulator); - - add_Xsig_Xsig(&accumulator, &argSqrd); - - shr_Xsig(&accumulator, 1); - - /* It doesn't matter if accumulator is all zero here, the - following code will work ok */ - negate_Xsig(&accumulator); - - if ( accumulator.lsw & 0x80000000 ) - XSIG_LL(accumulator) ++; - if ( accumulator.msw == 0 ) - { - /* The result is 1.0 */ - FPU_copy_to_reg0(&CONST_1, TAG_Valid); - return; - } - else - { - significand(&result) = XSIG_LL(accumulator); - - /* will be a valid positive nr with expon = -1 */ - setexponentpos(&result, -1); - } - } - else - { - fixed_arg = significand(st0_ptr); - - if ( exponent == 0 ) - { - /* The argument is >= 1.0 */ - - /* Put the binary point at the left. */ - fixed_arg <<= 1; - } - /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */ - fixed_arg = 0x921fb54442d18469LL - fixed_arg; - /* There is a special case which arises due to rounding, to fix here. */ - if ( fixed_arg == 0xffffffffffffffffLL ) - fixed_arg = 0; - - exponent = -1; - exp2 = -1; - - /* A shift is needed here only for a narrow range of arguments, - i.e. for fixed_arg approx 2^-32, but we pick up more... */ - if ( !(LL_MSW(fixed_arg) & 0xffff0000) ) - { - fixed_arg <<= 16; - exponent -= 16; - exp2 -= 16; - } - - XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0; - mul64_Xsig(&argSqrd, &fixed_arg); - - if ( exponent < -1 ) - { - /* shift the argument right by the required places */ - shr_Xsig(&argSqrd, 2*(-1-exponent)); - } - - argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw; - argTo4.lsw = argSqrd.lsw; - mul_Xsig_Xsig(&argTo4, &argTo4); - - polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l, - N_COEFF_N-1); - mul_Xsig_Xsig(&accumulator, &argSqrd); - negate_Xsig(&accumulator); - - polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l, - N_COEFF_P-1); - - shr_Xsig(&accumulator, 2); /* Divide by four */ - accumulator.msw |= 0x80000000; /* Add 1.0 */ - - mul64_Xsig(&accumulator, &fixed_arg); - mul64_Xsig(&accumulator, &fixed_arg); - mul64_Xsig(&accumulator, &fixed_arg); - - /* Divide by four, FPU_REG compatible, etc */ - exponent = 3*exponent; - - /* The minimum exponent difference is 3 */ - shr_Xsig(&accumulator, exp2 - exponent); - - negate_Xsig(&accumulator); - XSIG_LL(accumulator) += fixed_arg; - - /* The basic computation is complete. Now fix the answer to - compensate for the error due to the approximation used for - pi/2 - */ - - /* This has an exponent of -65 */ - XSIG_LL(fix_up) = 0x898cc51701b839a2ll; - fix_up.lsw = 0; - - /* The fix-up needs to be improved for larger args */ - if ( argSqrd.msw & 0xffc00000 ) - { - /* Get about 32 bit precision in these: */ - fix_up.msw -= mul_32_32(0x898cc517, argSqrd.msw) / 2; - fix_up.msw += mul_32_32(0x898cc517, argTo4.msw) / 24; - } - - exp2 += norm_Xsig(&accumulator); - shr_Xsig(&accumulator, 1); /* Prevent overflow */ - exp2++; - shr_Xsig(&fix_up, 65 + exp2); - - add_Xsig_Xsig(&accumulator, &fix_up); - - echange = round_Xsig(&accumulator); - - setexponentpos(&result, exp2 + echange); - significand(&result) = XSIG_LL(accumulator); - } - - FPU_copy_to_reg0(&result, TAG_Valid); - -#ifdef PARANOID - if ( (exponent(&result) >= 0) - && (significand(&result) > 0x8000000000000000LL) ) - { - EXCEPTION(EX_INTERNAL|0x151); - } -#endif /* PARANOID */ - -} diff --git a/arch/i386/math-emu/poly_tan.c b/arch/i386/math-emu/poly_tan.c deleted file mode 100644 index 8df3e03b6e6..00000000000 --- a/arch/i386/math-emu/poly_tan.c +++ /dev/null @@ -1,222 +0,0 @@ -/*---------------------------------------------------------------------------+ - | poly_tan.c | - | | - | Compute the tan of a FPU_REG, using a polynomial approximation. | - | | - | Copyright (C) 1992,1993,1994,1997,1999 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@melbpc.org.au | - | | - | | - +---------------------------------------------------------------------------*/ - -#include "exception.h" -#include "reg_constant.h" -#include "fpu_emu.h" -#include "fpu_system.h" -#include "control_w.h" -#include "poly.h" - - -#define HiPOWERop 3 /* odd poly, positive terms */ -static const unsigned long long oddplterm[HiPOWERop] = -{ - 0x0000000000000000LL, - 0x0051a1cf08fca228LL, - 0x0000000071284ff7LL -}; - -#define HiPOWERon 2 /* odd poly, negative terms */ -static const unsigned long long oddnegterm[HiPOWERon] = -{ - 0x1291a9a184244e80LL, - 0x0000583245819c21LL -}; - -#define HiPOWERep 2 /* even poly, positive terms */ -static const unsigned long long evenplterm[HiPOWERep] = -{ - 0x0e848884b539e888LL, - 0x00003c7f18b887daLL -}; - -#define HiPOWERen 2 /* even poly, negative terms */ -static const unsigned long long evennegterm[HiPOWERen] = -{ - 0xf1f0200fd51569ccLL, - 0x003afb46105c4432LL -}; - -static const unsigned long long twothirds = 0xaaaaaaaaaaaaaaabLL; - - -/*--- poly_tan() ------------------------------------------------------------+ - | | - +---------------------------------------------------------------------------*/ -void poly_tan(FPU_REG *st0_ptr) -{ - long int exponent; - int invert; - Xsig argSq, argSqSq, accumulatoro, accumulatore, accum, - argSignif, fix_up; - unsigned long adj; - - exponent = exponent(st0_ptr); - -#ifdef PARANOID - if ( signnegative(st0_ptr) ) /* Can't hack a number < 0.0 */ - { arith_invalid(0); return; } /* Need a positive number */ -#endif /* PARANOID */ - - /* Split the problem into two domains, smaller and larger than pi/4 */ - if ( (exponent == 0) || ((exponent == -1) && (st0_ptr->sigh > 0xc90fdaa2)) ) - { - /* The argument is greater than (approx) pi/4 */ - invert = 1; - accum.lsw = 0; - XSIG_LL(accum) = significand(st0_ptr); - - if ( exponent == 0 ) - { - /* The argument is >= 1.0 */ - /* Put the binary point at the left. */ - XSIG_LL(accum) <<= 1; - } - /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */ - XSIG_LL(accum) = 0x921fb54442d18469LL - XSIG_LL(accum); - /* This is a special case which arises due to rounding. */ - if ( XSIG_LL(accum) == 0xffffffffffffffffLL ) - { - FPU_settag0(TAG_Valid); - significand(st0_ptr) = 0x8a51e04daabda360LL; - setexponent16(st0_ptr, (0x41 + EXTENDED_Ebias) | SIGN_Negative); - return; - } - - argSignif.lsw = accum.lsw; - XSIG_LL(argSignif) = XSIG_LL(accum); - exponent = -1 + norm_Xsig(&argSignif); - } - else - { - invert = 0; - argSignif.lsw = 0; - XSIG_LL(accum) = XSIG_LL(argSignif) = significand(st0_ptr); - - if ( exponent < -1 ) - { - /* shift the argument right by the required places */ - if ( FPU_shrx(&XSIG_LL(accum), -1-exponent) >= 0x80000000U ) - XSIG_LL(accum) ++; /* round up */ - } - } - - XSIG_LL(argSq) = XSIG_LL(accum); argSq.lsw = accum.lsw; - mul_Xsig_Xsig(&argSq, &argSq); - XSIG_LL(argSqSq) = XSIG_LL(argSq); argSqSq.lsw = argSq.lsw; - mul_Xsig_Xsig(&argSqSq, &argSqSq); - - /* Compute the negative terms for the numerator polynomial */ - accumulatoro.msw = accumulatoro.midw = accumulatoro.lsw = 0; - polynomial_Xsig(&accumulatoro, &XSIG_LL(argSqSq), oddnegterm, HiPOWERon-1); - mul_Xsig_Xsig(&accumulatoro, &argSq); - negate_Xsig(&accumulatoro); - /* Add the positive terms */ - polynomial_Xsig(&accumulatoro, &XSIG_LL(argSqSq), oddplterm, HiPOWERop-1); - - - /* Compute the positive terms for the denominator polynomial */ - accumulatore.msw = accumulatore.midw = accumulatore.lsw = 0; - polynomial_Xsig(&accumulatore, &XSIG_LL(argSqSq), evenplterm, HiPOWERep-1); - mul_Xsig_Xsig(&accumulatore, &argSq); - negate_Xsig(&accumulatore); - /* Add the negative terms */ - polynomial_Xsig(&accumulatore, &XSIG_LL(argSqSq), evennegterm, HiPOWERen-1); - /* Multiply by arg^2 */ - mul64_Xsig(&accumulatore, &XSIG_LL(argSignif)); - mul64_Xsig(&accumulatore, &XSIG_LL(argSignif)); - /* de-normalize and divide by 2 */ - shr_Xsig(&accumulatore, -2*(1+exponent) + 1); - negate_Xsig(&accumulatore); /* This does 1 - accumulator */ - - /* Now find the ratio. */ - if ( accumulatore.msw == 0 ) - { - /* accumulatoro must contain 1.0 here, (actually, 0) but it - really doesn't matter what value we use because it will - have negligible effect in later calculations - */ - XSIG_LL(accum) = 0x8000000000000000LL; - accum.lsw = 0; - } - else - { - div_Xsig(&accumulatoro, &accumulatore, &accum); - } - - /* Multiply by 1/3 * arg^3 */ - mul64_Xsig(&accum, &XSIG_LL(argSignif)); - mul64_Xsig(&accum, &XSIG_LL(argSignif)); - mul64_Xsig(&accum, &XSIG_LL(argSignif)); - mul64_Xsig(&accum, &twothirds); - shr_Xsig(&accum, -2*(exponent+1)); - - /* tan(arg) = arg + accum */ - add_two_Xsig(&accum, &argSignif, &exponent); - - if ( invert ) - { - /* We now have the value of tan(pi_2 - arg) where pi_2 is an - approximation for pi/2 - */ - /* The next step is to fix the answer to compensate for the - error due to the approximation used for pi/2 - */ - - /* This is (approx) delta, the error in our approx for pi/2 - (see above). It has an exponent of -65 - */ - XSIG_LL(fix_up) = 0x898cc51701b839a2LL; - fix_up.lsw = 0; - - if ( exponent == 0 ) - adj = 0xffffffff; /* We want approx 1.0 here, but - this is close enough. */ - else if ( exponent > -30 ) - { - adj = accum.msw >> -(exponent+1); /* tan */ - adj = mul_32_32(adj, adj); /* tan^2 */ - } - else - adj = 0; - adj = mul_32_32(0x898cc517, adj); /* delta * tan^2 */ - - fix_up.msw += adj; - if ( !(fix_up.msw & 0x80000000) ) /* did fix_up overflow ? */ - { - /* Yes, we need to add an msb */ - shr_Xsig(&fix_up, 1); - fix_up.msw |= 0x80000000; - shr_Xsig(&fix_up, 64 + exponent); - } - else - shr_Xsig(&fix_up, 65 + exponent); - - add_two_Xsig(&accum, &fix_up, &exponent); - - /* accum now contains tan(pi/2 - arg). - Use tan(arg) = 1.0 / tan(pi/2 - arg) - */ - accumulatoro.lsw = accumulatoro.midw = 0; - accumulatoro.msw = 0x80000000; - div_Xsig(&accumulatoro, &accum, &accum); - exponent = - exponent - 1; - } - - /* Transfer the result */ - round_Xsig(&accum); - FPU_settag0(TAG_Valid); - significand(st0_ptr) = XSIG_LL(accum); - setexponent16(st0_ptr, exponent + EXTENDED_Ebias); /* Result is positive. */ - -} diff --git a/arch/i386/math-emu/polynom_Xsig.S b/arch/i386/math-emu/polynom_Xsig.S deleted file mode 100644 index 17315c89ff3..00000000000 --- a/arch/i386/math-emu/polynom_Xsig.S +++ /dev/null @@ -1,135 +0,0 @@ -/*---------------------------------------------------------------------------+ - | polynomial_Xsig.S | - | | - | Fixed point arithmetic polynomial evaluation. | - | | - | Copyright (C) 1992,1993,1994,1995 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@jacobi.maths.monash.edu.au | - | | - | Call from C as: | - | void polynomial_Xsig(Xsig *accum, unsigned long long x, | - | unsigned long long terms[], int n) | - | | - | Computes: | - | terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x | - | and adds the result to the 12 byte Xsig. | - | The terms[] are each 8 bytes, but all computation is performed to 12 byte | - | precision. | - | | - | This function must be used carefully: most overflow of intermediate | - | results is controlled, but overflow of the result is not. | - | | - +---------------------------------------------------------------------------*/ - .file "polynomial_Xsig.S" - -#include "fpu_emu.h" - - -#define TERM_SIZE $8 -#define SUM_MS -20(%ebp) /* sum ms long */ -#define SUM_MIDDLE -24(%ebp) /* sum middle long */ -#define SUM_LS -28(%ebp) /* sum ls long */ -#define ACCUM_MS -4(%ebp) /* accum ms long */ -#define ACCUM_MIDDLE -8(%ebp) /* accum middle long */ -#define ACCUM_LS -12(%ebp) /* accum ls long */ -#define OVERFLOWED -16(%ebp) /* addition overflow flag */ - -.text -ENTRY(polynomial_Xsig) - pushl %ebp - movl %esp,%ebp - subl $32,%esp - pushl %esi - pushl %edi - pushl %ebx - - movl PARAM2,%esi /* x */ - movl PARAM3,%edi /* terms */ - - movl TERM_SIZE,%eax - mull PARAM4 /* n */ - addl %eax,%edi - - movl 4(%edi),%edx /* terms[n] */ - movl %edx,SUM_MS - movl (%edi),%edx /* terms[n] */ - movl %edx,SUM_MIDDLE - xor %eax,%eax - movl %eax,SUM_LS - movb %al,OVERFLOWED - - subl TERM_SIZE,%edi - decl PARAM4 - js L_accum_done - -L_accum_loop: - xor %eax,%eax - movl %eax,ACCUM_MS - movl %eax,ACCUM_MIDDLE - - movl SUM_MIDDLE,%eax - mull (%esi) /* x ls long */ - movl %edx,ACCUM_LS - - movl SUM_MIDDLE,%eax - mull 4(%esi) /* x ms long */ - addl %eax,ACCUM_LS - adcl %edx,ACCUM_MIDDLE - adcl $0,ACCUM_MS - - movl SUM_MS,%eax - mull (%esi) /* x ls long */ - addl %eax,ACCUM_LS - adcl %edx,ACCUM_MIDDLE - adcl $0,ACCUM_MS - - movl SUM_MS,%eax - mull 4(%esi) /* x ms long */ - addl %eax,ACCUM_MIDDLE - adcl %edx,ACCUM_MS - - testb $0xff,OVERFLOWED - jz L_no_overflow - - movl (%esi),%eax - addl %eax,ACCUM_MIDDLE - movl 4(%esi),%eax - adcl %eax,ACCUM_MS /* This could overflow too */ - -L_no_overflow: - -/* - * Now put the sum of next term and the accumulator - * into the sum register - */ - movl ACCUM_LS,%eax - addl (%edi),%eax /* term ls long */ - movl %eax,SUM_LS - movl ACCUM_MIDDLE,%eax - adcl (%edi),%eax /* term ls long */ - movl %eax,SUM_MIDDLE - movl ACCUM_MS,%eax - adcl 4(%edi),%eax /* term ms long */ - movl %eax,SUM_MS - sbbb %al,%al - movb %al,OVERFLOWED /* Used in the next iteration */ - - subl TERM_SIZE,%edi - decl PARAM4 - jns L_accum_loop - -L_accum_done: - movl PARAM1,%edi /* accum */ - movl SUM_LS,%eax - addl %eax,(%edi) - movl SUM_MIDDLE,%eax - adcl %eax,4(%edi) - movl SUM_MS,%eax - adcl %eax,8(%edi) - - popl %ebx - popl %edi - popl %esi - leave - ret diff --git a/arch/i386/math-emu/reg_add_sub.c b/arch/i386/math-emu/reg_add_sub.c deleted file mode 100644 index 7cd3b37ac08..00000000000 --- a/arch/i386/math-emu/reg_add_sub.c +++ /dev/null @@ -1,374 +0,0 @@ -/*---------------------------------------------------------------------------+ - | reg_add_sub.c | - | | - | Functions to add or subtract two registers and put the result in a third. | - | | - | Copyright (C) 1992,1993,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | - | E-mail billm@suburbia.net | - | | - | | - +---------------------------------------------------------------------------*/ - -/*---------------------------------------------------------------------------+ - | For each function, the destination may be any FPU_REG, including one of | - | the source FPU_REGs. | - | Each function returns 0 if the answer is o.k., otherwise a non-zero | - | value is returned, indicating either an exception condition or an | - | internal error. | - +---------------------------------------------------------------------------*/ - -#include "exception.h" -#include "reg_constant.h" -#include "fpu_emu.h" -#include "control_w.h" -#include "fpu_system.h" - -static -int add_sub_specials(FPU_REG const *a, u_char taga, u_char signa, - FPU_REG const *b, u_char tagb, u_char signb, - FPU_REG *dest, int deststnr, int control_w); - -/* - Operates on st(0) and st(n), or on st(0) and temporary data. - The destination must be one of the source st(x). - */ -int FPU_add(FPU_REG const *b, u_char tagb, int deststnr, int control_w) -{ - FPU_REG *a = &st(0); - FPU_REG *dest = &st(deststnr); - u_char signb = getsign(b); - u_char taga = FPU_gettag0(); - u_char signa = getsign(a); - u_char saved_sign = getsign(dest); - int diff, tag, expa, expb; - - if ( !(taga | tagb) ) - { - expa = exponent(a); - expb = exponent(b); - - valid_add: - /* Both registers are valid */ - if (!(signa ^ signb)) - { - /* signs are the same */ - tag = FPU_u_add(a, b, dest, control_w, signa, expa, expb); - } - else - { - /* The signs are different, so do a subtraction */ - diff = expa - expb; - if (!diff) - { - diff = a->sigh - b->sigh; /* This works only if the ms bits - are identical. */ - if (!diff) - { - diff = a->sigl > b->sigl; - if (!diff) - diff = -(a->sigl < b->sigl); - } - } - - if (diff > 0) - { - tag = FPU_u_sub(a, b, dest, control_w, signa, expa, expb); - } - else if ( diff < 0 ) - { - tag = FPU_u_sub(b, a, dest, control_w, signb, expb, expa); - } - else - { - FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr); - /* sign depends upon rounding mode */ - setsign(dest, ((control_w & CW_RC) != RC_DOWN) - ? SIGN_POS : SIGN_NEG); - return TAG_Zero; - } - } - - if ( tag < 0 ) - { - setsign(dest, saved_sign); - return tag; - } - FPU_settagi(deststnr, tag); - return tag; - } - - if ( taga == TAG_Special ) - taga = FPU_Special(a); - if ( tagb == TAG_Special ) - tagb = FPU_Special(b); - - if ( ((taga == TAG_Valid) && (tagb == TW_Denormal)) - || ((taga == TW_Denormal) && (tagb == TAG_Valid)) - || ((taga == TW_Denormal) && (tagb == TW_Denormal)) ) - { - FPU_REG x, y; - - if ( denormal_operand() < 0 ) - return FPU_Exception; - - FPU_to_exp16(a, &x); - FPU_to_exp16(b, &y); - a = &x; - b = &y; - expa = exponent16(a); - expb = exponent16(b); - goto valid_add; - } - - if ( (taga == TW_NaN) || (tagb == TW_NaN) ) - { - if ( deststnr == 0 ) - return real_2op_NaN(b, tagb, deststnr, a); - else - return real_2op_NaN(a, taga, deststnr, a); - } - - return add_sub_specials(a, taga, signa, b, tagb, signb, - dest, deststnr, control_w); -} - - -/* Subtract b from a. (a-b) -> dest */ -int FPU_sub(int flags, int rm, int control_w) -{ - FPU_REG const *a, *b; - FPU_REG *dest; - u_char taga, tagb, signa, signb, saved_sign, sign; - int diff, tag = 0, expa, expb, deststnr; - - a = &st(0); - taga = FPU_gettag0(); - - deststnr = 0; - if ( flags & LOADED ) - { - b = (FPU_REG *)rm; - tagb = flags & 0x0f; - } - else - { - b = &st(rm); - tagb = FPU_gettagi(rm); - - if ( flags & DEST_RM ) - deststnr = rm; - } - - signa = getsign(a); - signb = getsign(b); - - if ( flags & REV ) - { - signa ^= SIGN_NEG; - signb ^= SIGN_NEG; - } - - dest = &st(deststnr); - saved_sign = getsign(dest); - - if ( !(taga | tagb) ) - { - expa = exponent(a); - expb = exponent(b); - - valid_subtract: - /* Both registers are valid */ - - diff = expa - expb; - - if (!diff) - { - diff = a->sigh - b->sigh; /* Works only if ms bits are identical */ - if (!diff) - { - diff = a->sigl > b->sigl; - if (!diff) - diff = -(a->sigl < b->sigl); - } - } - - switch ( (((int)signa)*2 + signb) / SIGN_NEG ) - { - case 0: /* P - P */ - case 3: /* N - N */ - if (diff > 0) - { - /* |a| > |b| */ - tag = FPU_u_sub(a, b, dest, control_w, signa, expa, expb); - } - else if ( diff == 0 ) - { - FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr); - - /* sign depends upon rounding mode */ - setsign(dest, ((control_w & CW_RC) != RC_DOWN) - ? SIGN_POS : SIGN_NEG); - return TAG_Zero; - } - else - { - sign = signa ^ SIGN_NEG; - tag = FPU_u_sub(b, a, dest, control_w, sign, expb, expa); - } - break; - case 1: /* P - N */ - tag = FPU_u_add(a, b, dest, control_w, SIGN_POS, expa, expb); - break; - case 2: /* N - P */ - tag = FPU_u_add(a, b, dest, control_w, SIGN_NEG, expa, expb); - break; -#ifdef PARANOID - default: - EXCEPTION(EX_INTERNAL|0x111); - return -1; -#endif - } - if ( tag < 0 ) - { - setsign(dest, saved_sign); - return tag; - } - FPU_settagi(deststnr, tag); - return tag; - } - - if ( taga == TAG_Special ) - taga = FPU_Special(a); - if ( tagb == TAG_Special ) - tagb = FPU_Special(b); - - if ( ((taga == TAG_Valid) && (tagb == TW_Denormal)) - || ((taga == TW_Denormal) && (tagb == TAG_Valid)) - || ((taga == TW_Denormal) && (tagb == TW_Denormal)) ) - { - FPU_REG x, y; - - if ( denormal_operand() < 0 ) - return FPU_Exception; - - FPU_to_exp16(a, &x); - FPU_to_exp16(b, &y); - a = &x; - b = &y; - expa = exponent16(a); - expb = exponent16(b); - - goto valid_subtract; - } - - if ( (taga == TW_NaN) || (tagb == TW_NaN) ) - { - FPU_REG const *d1, *d2; - if ( flags & REV ) - { - d1 = b; - d2 = a; - } - else - { - d1 = a; - d2 = b; - } - if ( flags & LOADED ) - return real_2op_NaN(b, tagb, deststnr, d1); - if ( flags & DEST_RM ) - return real_2op_NaN(a, taga, deststnr, d2); - else - return real_2op_NaN(b, tagb, deststnr, d2); - } - - return add_sub_specials(a, taga, signa, b, tagb, signb ^ SIGN_NEG, - dest, deststnr, control_w); -} - - -static -int add_sub_specials(FPU_REG const *a, u_char taga, u_char signa, - FPU_REG const *b, u_char tagb, u_char signb, - FPU_REG *dest, int deststnr, int control_w) -{ - if ( ((taga == TW_Denormal) || (tagb == TW_Denormal)) - && (denormal_operand() < 0) ) - return FPU_Exception; - - if (taga == TAG_Zero) - { - if (tagb == TAG_Zero) - { - /* Both are zero, result will be zero. */ - u_char different_signs = signa ^ signb; - - FPU_copy_to_regi(a, TAG_Zero, deststnr); - if ( different_signs ) - { - /* Signs are different. */ - /* Sign of answer depends upon rounding mode. */ - setsign(dest, ((control_w & CW_RC) != RC_DOWN) - ? SIGN_POS : SIGN_NEG); - } - else - setsign(dest, signa); /* signa may differ from the sign of a. */ - return TAG_Zero; - } - else - { - reg_copy(b, dest); - if ( (tagb == TW_Denormal) && (b->sigh & 0x80000000) ) - { - /* A pseudoDenormal, convert it. */ - addexponent(dest, 1); - tagb = TAG_Valid; - } - else if ( tagb > TAG_Empty ) - tagb = TAG_Special; - setsign(dest, signb); /* signb may differ from the sign of b. */ - FPU_settagi(deststnr, tagb); - return tagb; - } - } - else if (tagb == TAG_Zero) - { - reg_copy(a, dest); - if ( (taga == TW_Denormal) && (a->sigh & 0x80000000) ) - { - /* A pseudoDenormal */ - addexponent(dest, 1); - taga = TAG_Valid; - } - else if ( taga > TAG_Empty ) - taga = TAG_Special; - setsign(dest, signa); /* signa may differ from the sign of a. */ - FPU_settagi(deststnr, taga); - return taga; - } - else if (taga == TW_Infinity) - { - if ( (tagb != TW_Infinity) || (signa == signb) ) - { - FPU_copy_to_regi(a, TAG_Special, deststnr); - setsign(dest, signa); /* signa may differ from the sign of a. */ - return taga; - } - /* Infinity-Infinity is undefined. */ - return arith_invalid(deststnr); - } - else if (tagb == TW_Infinity) - { - FPU_copy_to_regi(b, TAG_Special, deststnr); - setsign(dest, signb); /* signb may differ from the sign of b. */ - return tagb; - } - -#ifdef PARANOID - EXCEPTION(EX_INTERNAL|0x101); -#endif - - return FPU_Exception; -} - diff --git a/arch/i386/math-emu/reg_compare.c b/arch/i386/math-emu/reg_compare.c deleted file mode 100644 index f37c5b5a35a..00000000000 --- a/arch/i386/math-emu/reg_compare.c +++ /dev/null @@ -1,381 +0,0 @@ -/*---------------------------------------------------------------------------+ - | reg_compare.c | - | | - | Compare two floating point registers | - | | - | Copyright (C) 1992,1993,1994,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | - | E-mail billm@suburbia.net | - | | - | | - +---------------------------------------------------------------------------*/ - -/*---------------------------------------------------------------------------+ - | compare() is the core FPU_REG comparison function | - +---------------------------------------------------------------------------*/ - -#include "fpu_system.h" -#include "exception.h" -#include "fpu_emu.h" -#include "control_w.h" -#include "status_w.h" - - -static int compare(FPU_REG const *b, int tagb) -{ - int diff, exp0, expb; - u_char st0_tag; - FPU_REG *st0_ptr; - FPU_REG x, y; - u_char st0_sign, signb = getsign(b); - - st0_ptr = &st(0); - st0_tag = FPU_gettag0(); - st0_sign = getsign(st0_ptr); - - if ( tagb == TAG_Special ) - tagb = FPU_Special(b); - if ( st0_tag == TAG_Special ) - st0_tag = FPU_Special(st0_ptr); - - if ( ((st0_tag != TAG_Valid) && (st0_tag != TW_Denormal)) - || ((tagb != TAG_Valid) && (tagb != TW_Denormal)) ) - { - if ( st0_tag == TAG_Zero ) - { - if ( tagb == TAG_Zero ) return COMP_A_eq_B; - if ( tagb == TAG_Valid ) - return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B); - if ( tagb == TW_Denormal ) - return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B) - | COMP_Denormal; - } - else if ( tagb == TAG_Zero ) - { - if ( st0_tag == TAG_Valid ) - return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B); - if ( st0_tag == TW_Denormal ) - return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B) - | COMP_Denormal; - } - - if ( st0_tag == TW_Infinity ) - { - if ( (tagb == TAG_Valid) || (tagb == TAG_Zero) ) - return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B); - else if ( tagb == TW_Denormal ) - return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B) - | COMP_Denormal; - else if ( tagb == TW_Infinity ) - { - /* The 80486 book says that infinities can be equal! */ - return (st0_sign == signb) ? COMP_A_eq_B : - ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B); - } - /* Fall through to the NaN code */ - } - else if ( tagb == TW_Infinity ) - { - if ( (st0_tag == TAG_Valid) || (st0_tag == TAG_Zero) ) - return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B); - if ( st0_tag == TW_Denormal ) - return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B) - | COMP_Denormal; - /* Fall through to the NaN code */ - } - - /* The only possibility now should be that one of the arguments - is a NaN */ - if ( (st0_tag == TW_NaN) || (tagb == TW_NaN) ) - { - int signalling = 0, unsupported = 0; - if ( st0_tag == TW_NaN ) - { - signalling = (st0_ptr->sigh & 0xc0000000) == 0x80000000; - unsupported = !((exponent(st0_ptr) == EXP_OVER) - && (st0_ptr->sigh & 0x80000000)); - } - if ( tagb == TW_NaN ) - { - signalling |= (b->sigh & 0xc0000000) == 0x80000000; - unsupported |= !((exponent(b) == EXP_OVER) - && (b->sigh & 0x80000000)); - } - if ( signalling || unsupported ) - return COMP_No_Comp | COMP_SNaN | COMP_NaN; - else - /* Neither is a signaling NaN */ - return COMP_No_Comp | COMP_NaN; - } - - EXCEPTION(EX_Invalid); - } - - if (st0_sign != signb) - { - return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B) - | ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ? - COMP_Denormal : 0); - } - - if ( (st0_tag == TW_Denormal) || (tagb == TW_Denormal) ) - { - FPU_to_exp16(st0_ptr, &x); - FPU_to_exp16(b, &y); - st0_ptr = &x; - b = &y; - exp0 = exponent16(st0_ptr); - expb = exponent16(b); - } - else - { - exp0 = exponent(st0_ptr); - expb = exponent(b); - } - -#ifdef PARANOID - if (!(st0_ptr->sigh & 0x80000000)) EXCEPTION(EX_Invalid); - if (!(b->sigh & 0x80000000)) EXCEPTION(EX_Invalid); -#endif /* PARANOID */ - - diff = exp0 - expb; - if ( diff == 0 ) - { - diff = st0_ptr->sigh - b->sigh; /* Works only if ms bits are - identical */ - if ( diff == 0 ) - { - diff = st0_ptr->sigl > b->sigl; - if ( diff == 0 ) - diff = -(st0_ptr->sigl < b->sigl); - } - } - - if ( diff > 0 ) - { - return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B) - | ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ? - COMP_Denormal : 0); - } - if ( diff < 0 ) - { - return ((st0_sign == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B) - | ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ? - COMP_Denormal : 0); - } - - return COMP_A_eq_B - | ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ? - COMP_Denormal : 0); - -} - - -/* This function requires that st(0) is not empty */ -int FPU_compare_st_data(FPU_REG const *loaded_data, u_char loaded_tag) -{ - int f = 0, c; - - c = compare(loaded_data, loaded_tag); - - if (c & COMP_NaN) - { - EXCEPTION(EX_Invalid); - f = SW_C3 | SW_C2 | SW_C0; - } - else - switch (c & 7) - { - case COMP_A_lt_B: - f = SW_C0; - break; - case COMP_A_eq_B: - f = SW_C3; - break; - case COMP_A_gt_B: - f = 0; - break; - case COMP_No_Comp: - f = SW_C3 | SW_C2 | SW_C0; - break; -#ifdef PARANOID - default: - EXCEPTION(EX_INTERNAL|0x121); - f = SW_C3 | SW_C2 | SW_C0; - break; -#endif /* PARANOID */ - } - setcc(f); - if (c & COMP_Denormal) - { - return denormal_operand() < 0; - } - return 0; -} - - -static int compare_st_st(int nr) -{ - int f = 0, c; - FPU_REG *st_ptr; - - if ( !NOT_EMPTY(0) || !NOT_EMPTY(nr) ) - { - setcc(SW_C3 | SW_C2 | SW_C0); - /* Stack fault */ - EXCEPTION(EX_StackUnder); - return !(control_word & CW_Invalid); - } - - st_ptr = &st(nr); - c = compare(st_ptr, FPU_gettagi(nr)); - if (c & COMP_NaN) - { - setcc(SW_C3 | SW_C2 | SW_C0); - EXCEPTION(EX_Invalid); - return !(control_word & CW_Invalid); - } - else - switch (c & 7) - { - case COMP_A_lt_B: - f = SW_C0; - break; - case COMP_A_eq_B: - f = SW_C3; - break; - case COMP_A_gt_B: - f = 0; - break; - case COMP_No_Comp: - f = SW_C3 | SW_C2 | SW_C0; - break; -#ifdef PARANOID - default: - EXCEPTION(EX_INTERNAL|0x122); - f = SW_C3 | SW_C2 | SW_C0; - break; -#endif /* PARANOID */ - } - setcc(f); - if (c & COMP_Denormal) - { - return denormal_operand() < 0; - } - return 0; -} - - -static int compare_u_st_st(int nr) -{ - int f = 0, c; - FPU_REG *st_ptr; - - if ( !NOT_EMPTY(0) || !NOT_EMPTY(nr) ) - { - setcc(SW_C3 | SW_C2 | SW_C0); - /* Stack fault */ - EXCEPTION(EX_StackUnder); - return !(control_word & CW_Invalid); - } - - st_ptr = &st(nr); - c = compare(st_ptr, FPU_gettagi(nr)); - if (c & COMP_NaN) - { - setcc(SW_C3 | SW_C2 | SW_C0); - if (c & COMP_SNaN) /* This is the only difference between - un-ordered and ordinary comparisons */ - { - EXCEPTION(EX_Invalid); - return !(control_word & CW_Invalid); - } - return 0; - } - else - switch (c & 7) - { - case COMP_A_lt_B: - f = SW_C0; - break; - case COMP_A_eq_B: - f = SW_C3; - break; - case COMP_A_gt_B: - f = 0; - break; - case COMP_No_Comp: - f = SW_C3 | SW_C2 | SW_C0; - break; -#ifdef PARANOID - default: - EXCEPTION(EX_INTERNAL|0x123); - f = SW_C3 | SW_C2 | SW_C0; - break; -#endif /* PARANOID */ - } - setcc(f); - if (c & COMP_Denormal) - { - return denormal_operand() < 0; - } - return 0; -} - -/*---------------------------------------------------------------------------*/ - -void fcom_st(void) -{ - /* fcom st(i) */ - compare_st_st(FPU_rm); -} - - -void fcompst(void) -{ - /* fcomp st(i) */ - if ( !compare_st_st(FPU_rm) ) - FPU_pop(); -} - - -void fcompp(void) -{ - /* fcompp */ - if (FPU_rm != 1) - { - FPU_illegal(); - return; - } - if ( !compare_st_st(1) ) - poppop(); -} - - -void fucom_(void) -{ - /* fucom st(i) */ - compare_u_st_st(FPU_rm); - -} - - -void fucomp(void) -{ - /* fucomp st(i) */ - if ( !compare_u_st_st(FPU_rm) ) - FPU_pop(); -} - - -void fucompp(void) -{ - /* fucompp */ - if (FPU_rm == 1) - { - if ( !compare_u_st_st(1) ) - poppop(); - } - else - FPU_illegal(); -} diff --git a/arch/i386/math-emu/reg_constant.c b/arch/i386/math-emu/reg_constant.c deleted file mode 100644 index a8501580196..00000000000 --- a/arch/i386/math-emu/reg_constant.c +++ /dev/null @@ -1,120 +0,0 @@ -/*---------------------------------------------------------------------------+ - | reg_constant.c | - | | - | All of the constant FPU_REGs | - | | - | Copyright (C) 1992,1993,1994,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@suburbia.net | - | | - | | - +---------------------------------------------------------------------------*/ - -#include "fpu_system.h" -#include "fpu_emu.h" -#include "status_w.h" -#include "reg_constant.h" -#include "control_w.h" - - -#define MAKE_REG(s,e,l,h) { l, h, \ - ((EXTENDED_Ebias+(e)) | ((SIGN_##s != 0)*0x8000)) } - -FPU_REG const CONST_1 = MAKE_REG(POS, 0, 0x00000000, 0x80000000); -#if 0 -FPU_REG const CONST_2 = MAKE_REG(POS, 1, 0x00000000, 0x80000000); -FPU_REG const CONST_HALF = MAKE_REG(POS, -1, 0x00000000, 0x80000000); -#endif /* 0 */ -static FPU_REG const CONST_L2T = MAKE_REG(POS, 1, 0xcd1b8afe, 0xd49a784b); -static FPU_REG const CONST_L2E = MAKE_REG(POS, 0, 0x5c17f0bc, 0xb8aa3b29); -FPU_REG const CONST_PI = MAKE_REG(POS, 1, 0x2168c235, 0xc90fdaa2); -FPU_REG const CONST_PI2 = MAKE_REG(POS, 0, 0x2168c235, 0xc90fdaa2); -FPU_REG const CONST_PI4 = MAKE_REG(POS, -1, 0x2168c235, 0xc90fdaa2); -static FPU_REG const CONST_LG2 = MAKE_REG(POS, -2, 0xfbcff799, 0x9a209a84); -static FPU_REG const CONST_LN2 = MAKE_REG(POS, -1, 0xd1cf79ac, 0xb17217f7); - -/* Extra bits to take pi/2 to more than 128 bits precision. */ -FPU_REG const CONST_PI2extra = MAKE_REG(NEG, -66, - 0xfc8f8cbb, 0xece675d1); - -/* Only the sign (and tag) is used in internal zeroes */ -FPU_REG const CONST_Z = MAKE_REG(POS, EXP_UNDER, 0x0, 0x0); - -/* Only the sign and significand (and tag) are used in internal NaNs */ -/* The 80486 never generates one of these -FPU_REG const CONST_SNAN = MAKE_REG(POS, EXP_OVER, 0x00000001, 0x80000000); - */ -/* This is the real indefinite QNaN */ -FPU_REG const CONST_QNaN = MAKE_REG(NEG, EXP_OVER, 0x00000000, 0xC0000000); - -/* Only the sign (and tag) is used in internal infinities */ -FPU_REG const CONST_INF = MAKE_REG(POS, EXP_OVER, 0x00000000, 0x80000000); - - -static void fld_const(FPU_REG const *c, int adj, u_char tag) -{ - FPU_REG *st_new_ptr; - - if ( STACK_OVERFLOW ) - { - FPU_stack_overflow(); - return; - } - push(); - reg_copy(c, st_new_ptr); - st_new_ptr->sigl += adj; /* For all our fldxxx constants, we don't need to - borrow or carry. */ - FPU_settag0(tag); - clear_C1(); -} - -/* A fast way to find out whether x is one of RC_DOWN or RC_CHOP - (and not one of RC_RND or RC_UP). - */ -#define DOWN_OR_CHOP(x) (x & RC_DOWN) - -static void fld1(int rc) -{ - fld_const(&CONST_1, 0, TAG_Valid); -} - -static void fldl2t(int rc) -{ - fld_const(&CONST_L2T, (rc == RC_UP) ? 1 : 0, TAG_Valid); -} - -static void fldl2e(int rc) -{ - fld_const(&CONST_L2E, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid); -} - -static void fldpi(int rc) -{ - fld_const(&CONST_PI, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid); -} - -static void fldlg2(int rc) -{ - fld_const(&CONST_LG2, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid); -} - -static void fldln2(int rc) -{ - fld_const(&CONST_LN2, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid); -} - -static void fldz(int rc) -{ - fld_const(&CONST_Z, 0, TAG_Zero); -} - -typedef void (*FUNC_RC)(int); - -static FUNC_RC constants_table[] = { - fld1, fldl2t, fldl2e, fldpi, fldlg2, fldln2, fldz, (FUNC_RC)FPU_illegal -}; - -void fconst(void) -{ - (constants_table[FPU_rm])(control_word & CW_RC); -} diff --git a/arch/i386/math-emu/reg_constant.h b/arch/i386/math-emu/reg_constant.h deleted file mode 100644 index 1bffaec3a13..00000000000 --- a/arch/i386/math-emu/reg_constant.h +++ /dev/null @@ -1,25 +0,0 @@ -/*---------------------------------------------------------------------------+ - | reg_constant.h | - | | - | Copyright (C) 1992 W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@vaxc.cc.monash.edu.au | - | | - +---------------------------------------------------------------------------*/ - -#ifndef _REG_CONSTANT_H_ -#define _REG_CONSTANT_H_ - -#include "fpu_emu.h" - -extern FPU_REG const CONST_1; -extern FPU_REG const CONST_PI; -extern FPU_REG const CONST_PI2; -extern FPU_REG const CONST_PI2extra; -extern FPU_REG const CONST_PI4; -extern FPU_REG const CONST_Z; -extern FPU_REG const CONST_PINF; -extern FPU_REG const CONST_INF; -extern FPU_REG const CONST_MINF; -extern FPU_REG const CONST_QNaN; - -#endif /* _REG_CONSTANT_H_ */ diff --git a/arch/i386/math-emu/reg_convert.c b/arch/i386/math-emu/reg_convert.c deleted file mode 100644 index 45a25875270..00000000000 --- a/arch/i386/math-emu/reg_convert.c +++ /dev/null @@ -1,53 +0,0 @@ -/*---------------------------------------------------------------------------+ - | reg_convert.c | - | | - | Convert register representation. | - | | - | Copyright (C) 1992,1993,1994,1996,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | - | E-mail billm@suburbia.net | - | | - | | - +---------------------------------------------------------------------------*/ - -#include "exception.h" -#include "fpu_emu.h" - - -int FPU_to_exp16(FPU_REG const *a, FPU_REG *x) -{ - int sign = getsign(a); - - *(long long *)&(x->sigl) = *(const long long *)&(a->sigl); - - /* Set up the exponent as a 16 bit quantity. */ - setexponent16(x, exponent(a)); - - if ( exponent16(x) == EXP_UNDER ) - { - /* The number is a de-normal or pseudodenormal. */ - /* We only deal with the significand and exponent. */ - - if (x->sigh & 0x80000000) - { - /* Is a pseudodenormal. */ - /* This is non-80486 behaviour because the number - loses its 'denormal' identity. */ - addexponent(x, 1); - } - else - { - /* Is a denormal. */ - addexponent(x, 1); - FPU_normalize_nuo(x); - } - } - - if ( !(x->sigh & 0x80000000) ) - { - EXCEPTION(EX_INTERNAL | 0x180); - } - - return sign; -} - diff --git a/arch/i386/math-emu/reg_divide.c b/arch/i386/math-emu/reg_divide.c deleted file mode 100644 index 5cee7ff920d..00000000000 --- a/arch/i386/math-emu/reg_divide.c +++ /dev/null @@ -1,207 +0,0 @@ -/*---------------------------------------------------------------------------+ - | reg_divide.c | - | | - | Divide one FPU_REG by another and put the result in a destination FPU_REG.| - | | - | Copyright (C) 1996 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | - | E-mail billm@jacobi.maths.monash.edu.au | - | | - | Return value is the tag of the answer, or-ed with FPU_Exception if | - | one was raised, or -1 on internal error. | - | | - +---------------------------------------------------------------------------*/ - -/*---------------------------------------------------------------------------+ - | The destination may be any FPU_REG, including one of the source FPU_REGs. | - +---------------------------------------------------------------------------*/ - -#include "exception.h" -#include "reg_constant.h" -#include "fpu_emu.h" -#include "fpu_system.h" - -/* - Divide one register by another and put the result into a third register. - */ -int FPU_div(int flags, int rm, int control_w) -{ - FPU_REG x, y; - FPU_REG const *a, *b, *st0_ptr, *st_ptr; - FPU_REG *dest; - u_char taga, tagb, signa, signb, sign, saved_sign; - int tag, deststnr; - - if ( flags & DEST_RM ) - deststnr = rm; - else - deststnr = 0; - - if ( flags & REV ) - { - b = &st(0); - st0_ptr = b; - tagb = FPU_gettag0(); - if ( flags & LOADED ) - { - a = (FPU_REG *)rm; - taga = flags & 0x0f; - } - else - { - a = &st(rm); - st_ptr = a; - taga = FPU_gettagi(rm); - } - } - else - { - a = &st(0); - st0_ptr = a; - taga = FPU_gettag0(); - if ( flags & LOADED ) - { - b = (FPU_REG *)rm; - tagb = flags & 0x0f; - } - else - { - b = &st(rm); - st_ptr = b; - tagb = FPU_gettagi(rm); - } - } - - signa = getsign(a); - signb = getsign(b); - - sign = signa ^ signb; - - dest = &st(deststnr); - saved_sign = getsign(dest); - - if ( !(taga | tagb) ) - { - /* Both regs Valid, this should be the most common case. */ - reg_copy(a, &x); - reg_copy(b, &y); - setpositive(&x); - setpositive(&y); - tag = FPU_u_div(&x, &y, dest, control_w, sign); - - if ( tag < 0 ) - return tag; - - FPU_settagi(deststnr, tag); - return tag; - } - - if ( taga == TAG_Special ) - taga = FPU_Special(a); - if ( tagb == TAG_Special ) - tagb = FPU_Special(b); - - if ( ((taga == TAG_Valid) && (tagb == TW_Denormal)) - || ((taga == TW_Denormal) && (tagb == TAG_Valid)) - || ((taga == TW_Denormal) && (tagb == TW_Denormal)) ) - { - if ( denormal_operand() < 0 ) - return FPU_Exception; - - FPU_to_exp16(a, &x); - FPU_to_exp16(b, &y); - tag = FPU_u_div(&x, &y, dest, control_w, sign); - if ( tag < 0 ) - return tag; - - FPU_settagi(deststnr, tag); - return tag; - } - else if ( (taga <= TW_Denormal) && (tagb <= TW_Denormal) ) - { - if ( tagb != TAG_Zero ) - { - /* Want to find Zero/Valid */ - if ( tagb == TW_Denormal ) - { - if ( denormal_operand() < 0 ) - return FPU_Exception; - } - - /* The result is zero. */ - FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr); - setsign(dest, sign); - return TAG_Zero; - } - /* We have an exception condition, either 0/0 or Valid/Zero. */ - if ( taga == TAG_Zero ) - { - /* 0/0 */ - return arith_invalid(deststnr); - } - /* Valid/Zero */ - return FPU_divide_by_zero(deststnr, sign); - } - /* Must have infinities, NaNs, etc */ - else if ( (taga == TW_NaN) || (tagb == TW_NaN) ) - { - if ( flags & LOADED ) - return real_2op_NaN((FPU_REG *)rm, flags & 0x0f, 0, st0_ptr); - - if ( flags & DEST_RM ) - { - int tag; - tag = FPU_gettag0(); - if ( tag == TAG_Special ) - tag = FPU_Special(st0_ptr); - return real_2op_NaN(st0_ptr, tag, rm, (flags & REV) ? st0_ptr : &st(rm)); - } - else - { - int tag; - tag = FPU_gettagi(rm); - if ( tag == TAG_Special ) - tag = FPU_Special(&st(rm)); - return real_2op_NaN(&st(rm), tag, 0, (flags & REV) ? st0_ptr : &st(rm)); - } - } - else if (taga == TW_Infinity) - { - if (tagb == TW_Infinity) - { - /* infinity/infinity */ - return arith_invalid(deststnr); - } - else - { - /* tagb must be Valid or Zero */ - if ( (tagb == TW_Denormal) && (denormal_operand() < 0) ) - return FPU_Exception; - - /* Infinity divided by Zero or Valid does - not raise and exception, but returns Infinity */ - FPU_copy_to_regi(a, TAG_Special, deststnr); - setsign(dest, sign); - return taga; - } - } - else if (tagb == TW_Infinity) - { - if ( (taga == TW_Denormal) && (denormal_operand() < 0) ) - return FPU_Exception; - - /* The result is zero. */ - FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr); - setsign(dest, sign); - return TAG_Zero; - } -#ifdef PARANOID - else - { - EXCEPTION(EX_INTERNAL|0x102); - return FPU_Exception; - } -#endif /* PARANOID */ - - return 0; -} diff --git a/arch/i386/math-emu/reg_ld_str.c b/arch/i386/math-emu/reg_ld_str.c deleted file mode 100644 index e976caef649..00000000000 --- a/arch/i386/math-emu/reg_ld_str.c +++ /dev/null @@ -1,1375 +0,0 @@ -/*---------------------------------------------------------------------------+ - | reg_ld_str.c | - | | - | All of the functions which transfer data between user memory and FPU_REGs.| - | | - | Copyright (C) 1992,1993,1994,1996,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | - | E-mail billm@suburbia.net | - | | - | | - +---------------------------------------------------------------------------*/ - -/*---------------------------------------------------------------------------+ - | Note: | - | The file contains code which accesses user memory. | - | Emulator static data may change when user memory is accessed, due to | - | other processes using the emulator while swapping is in progress. | - +---------------------------------------------------------------------------*/ - -#include "fpu_emu.h" - -#include - -#include "fpu_system.h" -#include "exception.h" -#include "reg_constant.h" -#include "control_w.h" -#include "status_w.h" - - -#define DOUBLE_Emax 1023 /* largest valid exponent */ -#define DOUBLE_Ebias 1023 -#define DOUBLE_Emin (-1022) /* smallest valid exponent */ - -#define SINGLE_Emax 127 /* largest valid exponent */ -#define SINGLE_Ebias 127 -#define SINGLE_Emin (-126) /* smallest valid exponent */ - - -static u_char normalize_no_excep(FPU_REG *r, int exp, int sign) -{ - u_char tag; - - setexponent16(r, exp); - - tag = FPU_normalize_nuo(r); - stdexp(r); - if ( sign ) - setnegative(r); - - return tag; -} - - -int FPU_tagof(FPU_REG *ptr) -{ - int exp; - - exp = exponent16(ptr) & 0x7fff; - if ( exp == 0 ) - { - if ( !(ptr->sigh | ptr->sigl) ) - { - return TAG_Zero; - } - /* The number is a de-normal or pseudodenormal. */ - return TAG_Special; - } - - if ( exp == 0x7fff ) - { - /* Is an Infinity, a NaN, or an unsupported data type. */ - return TAG_Special; - } - - if ( !(ptr->sigh & 0x80000000) ) - { - /* Unsupported data type. */ - /* Valid numbers have the ms bit set to 1. */ - /* Unnormal. */ - return TAG_Special; - } - - return TAG_Valid; -} - - -/* Get a long double from user memory */ -int FPU_load_extended(long double __user *s, int stnr) -{ - FPU_REG *sti_ptr = &st(stnr); - - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_READ, s, 10); - __copy_from_user(sti_ptr, s, 10); - RE_ENTRANT_CHECK_ON; - - return FPU_tagof(sti_ptr); -} - - -/* Get a double from user memory */ -int FPU_load_double(double __user *dfloat, FPU_REG *loaded_data) -{ - int exp, tag, negative; - unsigned m64, l64; - - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_READ, dfloat, 8); - FPU_get_user(m64, 1 + (unsigned long __user *) dfloat); - FPU_get_user(l64, (unsigned long __user *) dfloat); - RE_ENTRANT_CHECK_ON; - - negative = (m64 & 0x80000000) ? SIGN_Negative : SIGN_Positive; - exp = ((m64 & 0x7ff00000) >> 20) - DOUBLE_Ebias + EXTENDED_Ebias; - m64 &= 0xfffff; - if ( exp > DOUBLE_Emax + EXTENDED_Ebias ) - { - /* Infinity or NaN */ - if ((m64 == 0) && (l64 == 0)) - { - /* +- infinity */ - loaded_data->sigh = 0x80000000; - loaded_data->sigl = 0x00000000; - exp = EXP_Infinity + EXTENDED_Ebias; - tag = TAG_Special; - } - else - { - /* Must be a signaling or quiet NaN */ - exp = EXP_NaN + EXTENDED_Ebias; - loaded_data->sigh = (m64 << 11) | 0x80000000; - loaded_data->sigh |= l64 >> 21; - loaded_data->sigl = l64 << 11; - tag = TAG_Special; /* The calling function must look for NaNs */ - } - } - else if ( exp < DOUBLE_Emin + EXTENDED_Ebias ) - { - /* Zero or de-normal */ - if ((m64 == 0) && (l64 == 0)) - { - /* Zero */ - reg_copy(&CONST_Z, loaded_data); - exp = 0; - tag = TAG_Zero; - } - else - { - /* De-normal */ - loaded_data->sigh = m64 << 11; - loaded_data->sigh |= l64 >> 21; - loaded_data->sigl = l64 << 11; - - return normalize_no_excep(loaded_data, DOUBLE_Emin, negative) - | (denormal_operand() < 0 ? FPU_Exception : 0); - } - } - else - { - loaded_data->sigh = (m64 << 11) | 0x80000000; - loaded_data->sigh |= l64 >> 21; - loaded_data->sigl = l64 << 11; - - tag = TAG_Valid; - } - - setexponent16(loaded_data, exp | negative); - - return tag; -} - - -/* Get a float from user memory */ -int FPU_load_single(float __user *single, FPU_REG *loaded_data) -{ - unsigned m32; - int exp, tag, negative; - - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_READ, single, 4); - FPU_get_user(m32, (unsigned long __user *) single); - RE_ENTRANT_CHECK_ON; - - negative = (m32 & 0x80000000) ? SIGN_Negative : SIGN_Positive; - - if (!(m32 & 0x7fffffff)) - { - /* Zero */ - reg_copy(&CONST_Z, loaded_data); - addexponent(loaded_data, negative); - return TAG_Zero; - } - exp = ((m32 & 0x7f800000) >> 23) - SINGLE_Ebias + EXTENDED_Ebias; - m32 = (m32 & 0x7fffff) << 8; - if ( exp < SINGLE_Emin + EXTENDED_Ebias ) - { - /* De-normals */ - loaded_data->sigh = m32; - loaded_data->sigl = 0; - - return normalize_no_excep(loaded_data, SINGLE_Emin, negative) - | (denormal_operand() < 0 ? FPU_Exception : 0); - } - else if ( exp > SINGLE_Emax + EXTENDED_Ebias ) - { - /* Infinity or NaN */ - if ( m32 == 0 ) - { - /* +- infinity */ - loaded_data->sigh = 0x80000000; - loaded_data->sigl = 0x00000000; - exp = EXP_Infinity + EXTENDED_Ebias; - tag = TAG_Special; - } - else - { - /* Must be a signaling or quiet NaN */ - exp = EXP_NaN + EXTENDED_Ebias; - loaded_data->sigh = m32 | 0x80000000; - loaded_data->sigl = 0; - tag = TAG_Special; /* The calling function must look for NaNs */ - } - } - else - { - loaded_data->sigh = m32 | 0x80000000; - loaded_data->sigl = 0; - tag = TAG_Valid; - } - - setexponent16(loaded_data, exp | negative); /* Set the sign. */ - - return tag; -} - - -/* Get a long long from user memory */ -int FPU_load_int64(long long __user *_s) -{ - long long s; - int sign; - FPU_REG *st0_ptr = &st(0); - - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_READ, _s, 8); - if (copy_from_user(&s,_s,8)) - FPU_abort; - RE_ENTRANT_CHECK_ON; - - if (s == 0) - { - reg_copy(&CONST_Z, st0_ptr); - return TAG_Zero; - } - - if (s > 0) - sign = SIGN_Positive; - else - { - s = -s; - sign = SIGN_Negative; - } - - significand(st0_ptr) = s; - - return normalize_no_excep(st0_ptr, 63, sign); -} - - -/* Get a long from user memory */ -int FPU_load_int32(long __user *_s, FPU_REG *loaded_data) -{ - long s; - int negative; - - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_READ, _s, 4); - FPU_get_user(s, _s); - RE_ENTRANT_CHECK_ON; - - if (s == 0) - { reg_copy(&CONST_Z, loaded_data); return TAG_Zero; } - - if (s > 0) - negative = SIGN_Positive; - else - { - s = -s; - negative = SIGN_Negative; - } - - loaded_data->sigh = s; - loaded_data->sigl = 0; - - return normalize_no_excep(loaded_data, 31, negative); -} - - -/* Get a short from user memory */ -int FPU_load_int16(short __user *_s, FPU_REG *loaded_data) -{ - int s, negative; - - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_READ, _s, 2); - /* Cast as short to get the sign extended. */ - FPU_get_user(s, _s); - RE_ENTRANT_CHECK_ON; - - if (s == 0) - { reg_copy(&CONST_Z, loaded_data); return TAG_Zero; } - - if (s > 0) - negative = SIGN_Positive; - else - { - s = -s; - negative = SIGN_Negative; - } - - loaded_data->sigh = s << 16; - loaded_data->sigl = 0; - - return normalize_no_excep(loaded_data, 15, negative); -} - - -/* Get a packed bcd array from user memory */ -int FPU_load_bcd(u_char __user *s) -{ - FPU_REG *st0_ptr = &st(0); - int pos; - u_char bcd; - long long l=0; - int sign; - - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_READ, s, 10); - RE_ENTRANT_CHECK_ON; - for ( pos = 8; pos >= 0; pos--) - { - l *= 10; - RE_ENTRANT_CHECK_OFF; - FPU_get_user(bcd, s+pos); - RE_ENTRANT_CHECK_ON; - l += bcd >> 4; - l *= 10; - l += bcd & 0x0f; - } - - RE_ENTRANT_CHECK_OFF; - FPU_get_user(sign, s+9); - sign = sign & 0x80 ? SIGN_Negative : SIGN_Positive; - RE_ENTRANT_CHECK_ON; - - if ( l == 0 ) - { - reg_copy(&CONST_Z, st0_ptr); - addexponent(st0_ptr, sign); /* Set the sign. */ - return TAG_Zero; - } - else - { - significand(st0_ptr) = l; - return normalize_no_excep(st0_ptr, 63, sign); - } -} - -/*===========================================================================*/ - -/* Put a long double into user memory */ -int FPU_store_extended(FPU_REG *st0_ptr, u_char st0_tag, long double __user *d) -{ - /* - The only exception raised by an attempt to store to an - extended format is the Invalid Stack exception, i.e. - attempting to store from an empty register. - */ - - if ( st0_tag != TAG_Empty ) - { - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_WRITE, d, 10); - - FPU_put_user(st0_ptr->sigl, (unsigned long __user *) d); - FPU_put_user(st0_ptr->sigh, (unsigned long __user *) ((u_char __user *)d + 4)); - FPU_put_user(exponent16(st0_ptr), (unsigned short __user *) ((u_char __user *)d + 8)); - RE_ENTRANT_CHECK_ON; - - return 1; - } - - /* Empty register (stack underflow) */ - EXCEPTION(EX_StackUnder); - if ( control_word & CW_Invalid ) - { - /* The masked response */ - /* Put out the QNaN indefinite */ - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_WRITE,d,10); - FPU_put_user(0, (unsigned long __user *) d); - FPU_put_user(0xc0000000, 1 + (unsigned long __user *) d); - FPU_put_user(0xffff, 4 + (short __user *) d); - RE_ENTRANT_CHECK_ON; - return 1; - } - else - return 0; - -} - - -/* Put a double into user memory */ -int FPU_store_double(FPU_REG *st0_ptr, u_char st0_tag, double __user *dfloat) -{ - unsigned long l[2]; - unsigned long increment = 0; /* avoid gcc warnings */ - int precision_loss; - int exp; - FPU_REG tmp; - - if ( st0_tag == TAG_Valid ) - { - reg_copy(st0_ptr, &tmp); - exp = exponent(&tmp); - - if ( exp < DOUBLE_Emin ) /* It may be a denormal */ - { - addexponent(&tmp, -DOUBLE_Emin + 52); /* largest exp to be 51 */ - - denormal_arg: - - if ( (precision_loss = FPU_round_to_int(&tmp, st0_tag)) ) - { -#ifdef PECULIAR_486 - /* Did it round to a non-denormal ? */ - /* This behaviour might be regarded as peculiar, it appears - that the 80486 rounds to the dest precision, then - converts to decide underflow. */ - if ( !((tmp.sigh == 0x00100000) && (tmp.sigl == 0) && - (st0_ptr->sigl & 0x000007ff)) ) -#endif /* PECULIAR_486 */ - { - EXCEPTION(EX_Underflow); - /* This is a special case: see sec 16.2.5.1 of - the 80486 book */ - if ( !(control_word & CW_Underflow) ) - return 0; - } - EXCEPTION(precision_loss); - if ( !(control_word & CW_Precision) ) - return 0; - } - l[0] = tmp.sigl; - l[1] = tmp.sigh; - } - else - { - if ( tmp.sigl & 0x000007ff ) - { - precision_loss = 1; - switch (control_word & CW_RC) - { - case RC_RND: - /* Rounding can get a little messy.. */ - increment = ((tmp.sigl & 0x7ff) > 0x400) | /* nearest */ - ((tmp.sigl & 0xc00) == 0xc00); /* odd -> even */ - break; - case RC_DOWN: /* towards -infinity */ - increment = signpositive(&tmp) ? 0 : tmp.sigl & 0x7ff; - break; - case RC_UP: /* towards +infinity */ - increment = signpositive(&tmp) ? tmp.sigl & 0x7ff : 0; - break; - case RC_CHOP: - increment = 0; - break; - } - - /* Truncate the mantissa */ - tmp.sigl &= 0xfffff800; - - if ( increment ) - { - if ( tmp.sigl >= 0xfffff800 ) - { - /* the sigl part overflows */ - if ( tmp.sigh == 0xffffffff ) - { - /* The sigh part overflows */ - tmp.sigh = 0x80000000; - exp++; - if (exp >= EXP_OVER) - goto overflow; - } - else - { - tmp.sigh ++; - } - tmp.sigl = 0x00000000; - } - else - { - /* We only need to increment sigl */ - tmp.sigl += 0x00000800; - } - } - } - else - precision_loss = 0; - - l[0] = (tmp.sigl >> 11) | (tmp.sigh << 21); - l[1] = ((tmp.sigh >> 11) & 0xfffff); - - if ( exp > DOUBLE_Emax ) - { - overflow: - EXCEPTION(EX_Overflow); - if ( !(control_word & CW_Overflow) ) - return 0; - set_precision_flag_up(); - if ( !(control_word & CW_Precision) ) - return 0; - - /* This is a special case: see sec 16.2.5.1 of the 80486 book */ - /* Overflow to infinity */ - l[0] = 0x00000000; /* Set to */ - l[1] = 0x7ff00000; /* + INF */ - } - else - { - if ( precision_loss ) - { - if ( increment ) - set_precision_flag_up(); - else - set_precision_flag_down(); - } - /* Add the exponent */ - l[1] |= (((exp+DOUBLE_Ebias) & 0x7ff) << 20); - } - } - } - else if (st0_tag == TAG_Zero) - { - /* Number is zero */ - l[0] = 0; - l[1] = 0; - } - else if ( st0_tag == TAG_Special ) - { - st0_tag = FPU_Special(st0_ptr); - if ( st0_tag == TW_Denormal ) - { - /* A denormal will always underflow. */ -#ifndef PECULIAR_486 - /* An 80486 is supposed to be able to generate - a denormal exception here, but... */ - /* Underflow has priority. */ - if ( control_word & CW_Underflow ) - denormal_operand(); -#endif /* PECULIAR_486 */ - reg_copy(st0_ptr, &tmp); - goto denormal_arg; - } - else if (st0_tag == TW_Infinity) - { - l[0] = 0; - l[1] = 0x7ff00000; - } - else if (st0_tag == TW_NaN) - { - /* Is it really a NaN ? */ - if ( (exponent(st0_ptr) == EXP_OVER) - && (st0_ptr->sigh & 0x80000000) ) - { - /* See if we can get a valid NaN from the FPU_REG */ - l[0] = (st0_ptr->sigl >> 11) | (st0_ptr->sigh << 21); - l[1] = ((st0_ptr->sigh >> 11) & 0xfffff); - if ( !(st0_ptr->sigh & 0x40000000) ) - { - /* It is a signalling NaN */ - EXCEPTION(EX_Invalid); - if ( !(control_word & CW_Invalid) ) - return 0; - l[1] |= (0x40000000 >> 11); - } - l[1] |= 0x7ff00000; - } - else - { - /* It is an unsupported data type */ - EXCEPTION(EX_Invalid); - if ( !(control_word & CW_Invalid) ) - return 0; - l[0] = 0; - l[1] = 0xfff80000; - } - } - } - else if ( st0_tag == TAG_Empty ) - { - /* Empty register (stack underflow) */ - EXCEPTION(EX_StackUnder); - if ( control_word & CW_Invalid ) - { - /* The masked response */ - /* Put out the QNaN indefinite */ - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_WRITE,dfloat,8); - FPU_put_user(0, (unsigned long __user *) dfloat); - FPU_put_user(0xfff80000, 1 + (unsigned long __user *) dfloat); - RE_ENTRANT_CHECK_ON; - return 1; - } - else - return 0; - } - if ( getsign(st0_ptr) ) - l[1] |= 0x80000000; - - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_WRITE,dfloat,8); - FPU_put_user(l[0], (unsigned long __user *)dfloat); - FPU_put_user(l[1], 1 + (unsigned long __user *)dfloat); - RE_ENTRANT_CHECK_ON; - - return 1; -} - - -/* Put a float into user memory */ -int FPU_store_single(FPU_REG *st0_ptr, u_char st0_tag, float __user *single) -{ - long templ = 0; - unsigned long increment = 0; /* avoid gcc warnings */ - int precision_loss; - int exp; - FPU_REG tmp; - - if ( st0_tag == TAG_Valid ) - { - - reg_copy(st0_ptr, &tmp); - exp = exponent(&tmp); - - if ( exp < SINGLE_Emin ) - { - addexponent(&tmp, -SINGLE_Emin + 23); /* largest exp to be 22 */ - - denormal_arg: - - if ( (precision_loss = FPU_round_to_int(&tmp, st0_tag)) ) - { -#ifdef PECULIAR_486 - /* Did it round to a non-denormal ? */ - /* This behaviour might be regarded as peculiar, it appears - that the 80486 rounds to the dest precision, then - converts to decide underflow. */ - if ( !((tmp.sigl == 0x00800000) && - ((st0_ptr->sigh & 0x000000ff) || st0_ptr->sigl)) ) -#endif /* PECULIAR_486 */ - { - EXCEPTION(EX_Underflow); - /* This is a special case: see sec 16.2.5.1 of - the 80486 book */ - if ( !(control_word & CW_Underflow) ) - return 0; - } - EXCEPTION(precision_loss); - if ( !(control_word & CW_Precision) ) - return 0; - } - templ = tmp.sigl; - } - else - { - if ( tmp.sigl | (tmp.sigh & 0x000000ff) ) - { - unsigned long sigh = tmp.sigh; - unsigned long sigl = tmp.sigl; - - precision_loss = 1; - switch (control_word & CW_RC) - { - case RC_RND: - increment = ((sigh & 0xff) > 0x80) /* more than half */ - || (((sigh & 0xff) == 0x80) && sigl) /* more than half */ - || ((sigh & 0x180) == 0x180); /* round to even */ - break; - case RC_DOWN: /* towards -infinity */ - increment = signpositive(&tmp) - ? 0 : (sigl | (sigh & 0xff)); - break; - case RC_UP: /* towards +infinity */ - increment = signpositive(&tmp) - ? (sigl | (sigh & 0xff)) : 0; - break; - case RC_CHOP: - increment = 0; - break; - } - - /* Truncate part of the mantissa */ - tmp.sigl = 0; - - if (increment) - { - if ( sigh >= 0xffffff00 ) - { - /* The sigh part overflows */ - tmp.sigh = 0x80000000; - exp++; - if ( exp >= EXP_OVER ) - goto overflow; - } - else - { - tmp.sigh &= 0xffffff00; - tmp.sigh += 0x100; - } - } - else - { - tmp.sigh &= 0xffffff00; /* Finish the truncation */ - } - } - else - precision_loss = 0; - - templ = (tmp.sigh >> 8) & 0x007fffff; - - if ( exp > SINGLE_Emax ) - { - overflow: - EXCEPTION(EX_Overflow); - if ( !(control_word & CW_Overflow) ) - return 0; - set_precision_flag_up(); - if ( !(control_word & CW_Precision) ) - return 0; - - /* This is a special case: see sec 16.2.5.1 of the 80486 book. */ - /* Masked response is overflow to infinity. */ - templ = 0x7f800000; - } - else - { - if ( precision_loss ) - { - if ( increment ) - set_precision_flag_up(); - else - set_precision_flag_down(); - } - /* Add the exponent */ - templ |= ((exp+SINGLE_Ebias) & 0xff) << 23; - } - } - } - else if (st0_tag == TAG_Zero) - { - templ = 0; - } - else if ( st0_tag == TAG_Special ) - { - st0_tag = FPU_Special(st0_ptr); - if (st0_tag == TW_Denormal) - { - reg_copy(st0_ptr, &tmp); - - /* A denormal will always underflow. */ -#ifndef PECULIAR_486 - /* An 80486 is supposed to be able to generate - a denormal exception here, but... */ - /* Underflow has priority. */ - if ( control_word & CW_Underflow ) - denormal_operand(); -#endif /* PECULIAR_486 */ - goto denormal_arg; - } - else if (st0_tag == TW_Infinity) - { - templ = 0x7f800000; - } - else if (st0_tag == TW_NaN) - { - /* Is it really a NaN ? */ - if ( (exponent(st0_ptr) == EXP_OVER) && (st0_ptr->sigh & 0x80000000) ) - { - /* See if we can get a valid NaN from the FPU_REG */ - templ = st0_ptr->sigh >> 8; - if ( !(st0_ptr->sigh & 0x40000000) ) - { - /* It is a signalling NaN */ - EXCEPTION(EX_Invalid); - if ( !(control_word & CW_Invalid) ) - return 0; - templ |= (0x40000000 >> 8); - } - templ |= 0x7f800000; - } - else - { - /* It is an unsupported data type */ - EXCEPTION(EX_Invalid); - if ( !(control_word & CW_Invalid) ) - return 0; - templ = 0xffc00000; - } - } -#ifdef PARANOID - else - { - EXCEPTION(EX_INTERNAL|0x164); - return 0; - } -#endif - } - else if ( st0_tag == TAG_Empty ) - { - /* Empty register (stack underflow) */ - EXCEPTION(EX_StackUnder); - if ( control_word & EX_Invalid ) - { - /* The masked response */ - /* Put out the QNaN indefinite */ - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_WRITE,single,4); - FPU_put_user(0xffc00000, (unsigned long __user *) single); - RE_ENTRANT_CHECK_ON; - return 1; - } - else - return 0; - } -#ifdef PARANOID - else - { - EXCEPTION(EX_INTERNAL|0x163); - return 0; - } -#endif - if ( getsign(st0_ptr) ) - templ |= 0x80000000; - - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_WRITE,single,4); - FPU_put_user(templ,(unsigned long __user *) single); - RE_ENTRANT_CHECK_ON; - - return 1; -} - - -/* Put a long long into user memory */ -int FPU_store_int64(FPU_REG *st0_ptr, u_char st0_tag, long long __user *d) -{ - FPU_REG t; - long long tll; - int precision_loss; - - if ( st0_tag == TAG_Empty ) - { - /* Empty register (stack underflow) */ - EXCEPTION(EX_StackUnder); - goto invalid_operand; - } - else if ( st0_tag == TAG_Special ) - { - st0_tag = FPU_Special(st0_ptr); - if ( (st0_tag == TW_Infinity) || - (st0_tag == TW_NaN) ) - { - EXCEPTION(EX_Invalid); - goto invalid_operand; - } - } - - reg_copy(st0_ptr, &t); - precision_loss = FPU_round_to_int(&t, st0_tag); - ((long *)&tll)[0] = t.sigl; - ((long *)&tll)[1] = t.sigh; - if ( (precision_loss == 1) || - ((t.sigh & 0x80000000) && - !((t.sigh == 0x80000000) && (t.sigl == 0) && - signnegative(&t))) ) - { - EXCEPTION(EX_Invalid); - /* This is a special case: see sec 16.2.5.1 of the 80486 book */ - invalid_operand: - if ( control_word & EX_Invalid ) - { - /* Produce something like QNaN "indefinite" */ - tll = 0x8000000000000000LL; - } - else - return 0; - } - else - { - if ( precision_loss ) - set_precision_flag(precision_loss); - if ( signnegative(&t) ) - tll = - tll; - } - - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_WRITE,d,8); - if (copy_to_user(d, &tll, 8)) - FPU_abort; - RE_ENTRANT_CHECK_ON; - - return 1; -} - - -/* Put a long into user memory */ -int FPU_store_int32(FPU_REG *st0_ptr, u_char st0_tag, long __user *d) -{ - FPU_REG t; - int precision_loss; - - if ( st0_tag == TAG_Empty ) - { - /* Empty register (stack underflow) */ - EXCEPTION(EX_StackUnder); - goto invalid_operand; - } - else if ( st0_tag == TAG_Special ) - { - st0_tag = FPU_Special(st0_ptr); - if ( (st0_tag == TW_Infinity) || - (st0_tag == TW_NaN) ) - { - EXCEPTION(EX_Invalid); - goto invalid_operand; - } - } - - reg_copy(st0_ptr, &t); - precision_loss = FPU_round_to_int(&t, st0_tag); - if (t.sigh || - ((t.sigl & 0x80000000) && - !((t.sigl == 0x80000000) && signnegative(&t))) ) - { - EXCEPTION(EX_Invalid); - /* This is a special case: see sec 16.2.5.1 of the 80486 book */ - invalid_operand: - if ( control_word & EX_Invalid ) - { - /* Produce something like QNaN "indefinite" */ - t.sigl = 0x80000000; - } - else - return 0; - } - else - { - if ( precision_loss ) - set_precision_flag(precision_loss); - if ( signnegative(&t) ) - t.sigl = -(long)t.sigl; - } - - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_WRITE,d,4); - FPU_put_user(t.sigl, (unsigned long __user *) d); - RE_ENTRANT_CHECK_ON; - - return 1; -} - - -/* Put a short into user memory */ -int FPU_store_int16(FPU_REG *st0_ptr, u_char st0_tag, short __user *d) -{ - FPU_REG t; - int precision_loss; - - if ( st0_tag == TAG_Empty ) - { - /* Empty register (stack underflow) */ - EXCEPTION(EX_StackUnder); - goto invalid_operand; - } - else if ( st0_tag == TAG_Special ) - { - st0_tag = FPU_Special(st0_ptr); - if ( (st0_tag == TW_Infinity) || - (st0_tag == TW_NaN) ) - { - EXCEPTION(EX_Invalid); - goto invalid_operand; - } - } - - reg_copy(st0_ptr, &t); - precision_loss = FPU_round_to_int(&t, st0_tag); - if (t.sigh || - ((t.sigl & 0xffff8000) && - !((t.sigl == 0x8000) && signnegative(&t))) ) - { - EXCEPTION(EX_Invalid); - /* This is a special case: see sec 16.2.5.1 of the 80486 book */ - invalid_operand: - if ( control_word & EX_Invalid ) - { - /* Produce something like QNaN "indefinite" */ - t.sigl = 0x8000; - } - else - return 0; - } - else - { - if ( precision_loss ) - set_precision_flag(precision_loss); - if ( signnegative(&t) ) - t.sigl = -t.sigl; - } - - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_WRITE,d,2); - FPU_put_user((short)t.sigl, d); - RE_ENTRANT_CHECK_ON; - - return 1; -} - - -/* Put a packed bcd array into user memory */ -int FPU_store_bcd(FPU_REG *st0_ptr, u_char st0_tag, u_char __user *d) -{ - FPU_REG t; - unsigned long long ll; - u_char b; - int i, precision_loss; - u_char sign = (getsign(st0_ptr) == SIGN_NEG) ? 0x80 : 0; - - if ( st0_tag == TAG_Empty ) - { - /* Empty register (stack underflow) */ - EXCEPTION(EX_StackUnder); - goto invalid_operand; - } - else if ( st0_tag == TAG_Special ) - { - st0_tag = FPU_Special(st0_ptr); - if ( (st0_tag == TW_Infinity) || - (st0_tag == TW_NaN) ) - { - EXCEPTION(EX_Invalid); - goto invalid_operand; - } - } - - reg_copy(st0_ptr, &t); - precision_loss = FPU_round_to_int(&t, st0_tag); - ll = significand(&t); - - /* Check for overflow, by comparing with 999999999999999999 decimal. */ - if ( (t.sigh > 0x0de0b6b3) || - ((t.sigh == 0x0de0b6b3) && (t.sigl > 0xa763ffff)) ) - { - EXCEPTION(EX_Invalid); - /* This is a special case: see sec 16.2.5.1 of the 80486 book */ - invalid_operand: - if ( control_word & CW_Invalid ) - { - /* Produce the QNaN "indefinite" */ - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_WRITE,d,10); - for ( i = 0; i < 7; i++) - FPU_put_user(0, d+i); /* These bytes "undefined" */ - FPU_put_user(0xc0, d+7); /* This byte "undefined" */ - FPU_put_user(0xff, d+8); - FPU_put_user(0xff, d+9); - RE_ENTRANT_CHECK_ON; - return 1; - } - else - return 0; - } - else if ( precision_loss ) - { - /* Precision loss doesn't stop the data transfer */ - set_precision_flag(precision_loss); - } - - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_WRITE,d,10); - RE_ENTRANT_CHECK_ON; - for ( i = 0; i < 9; i++) - { - b = FPU_div_small(&ll, 10); - b |= (FPU_div_small(&ll, 10)) << 4; - RE_ENTRANT_CHECK_OFF; - FPU_put_user(b, d+i); - RE_ENTRANT_CHECK_ON; - } - RE_ENTRANT_CHECK_OFF; - FPU_put_user(sign, d+9); - RE_ENTRANT_CHECK_ON; - - return 1; -} - -/*===========================================================================*/ - -/* r gets mangled such that sig is int, sign: - it is NOT normalized */ -/* The return value (in eax) is zero if the result is exact, - if bits are changed due to rounding, truncation, etc, then - a non-zero value is returned */ -/* Overflow is signalled by a non-zero return value (in eax). - In the case of overflow, the returned significand always has the - largest possible value */ -int FPU_round_to_int(FPU_REG *r, u_char tag) -{ - u_char very_big; - unsigned eax; - - if (tag == TAG_Zero) - { - /* Make sure that zero is returned */ - significand(r) = 0; - return 0; /* o.k. */ - } - - if (exponent(r) > 63) - { - r->sigl = r->sigh = ~0; /* The largest representable number */ - return 1; /* overflow */ - } - - eax = FPU_shrxs(&r->sigl, 63 - exponent(r)); - very_big = !(~(r->sigh) | ~(r->sigl)); /* test for 0xfff...fff */ -#define half_or_more (eax & 0x80000000) -#define frac_part (eax) -#define more_than_half ((eax & 0x80000001) == 0x80000001) - switch (control_word & CW_RC) - { - case RC_RND: - if ( more_than_half /* nearest */ - || (half_or_more && (r->sigl & 1)) ) /* odd -> even */ - { - if ( very_big ) return 1; /* overflow */ - significand(r) ++; - return PRECISION_LOST_UP; - } - break; - case RC_DOWN: - if (frac_part && getsign(r)) - { - if ( very_big ) return 1; /* overflow */ - significand(r) ++; - return PRECISION_LOST_UP; - } - break; - case RC_UP: - if (frac_part && !getsign(r)) - { - if ( very_big ) return 1; /* overflow */ - significand(r) ++; - return PRECISION_LOST_UP; - } - break; - case RC_CHOP: - break; - } - - return eax ? PRECISION_LOST_DOWN : 0; - -} - -/*===========================================================================*/ - -u_char __user *fldenv(fpu_addr_modes addr_modes, u_char __user *s) -{ - unsigned short tag_word = 0; - u_char tag; - int i; - - if ( (addr_modes.default_mode == VM86) || - ((addr_modes.default_mode == PM16) - ^ (addr_modes.override.operand_size == OP_SIZE_PREFIX)) ) - { - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_READ, s, 0x0e); - FPU_get_user(control_word, (unsigned short __user *) s); - FPU_get_user(partial_status, (unsigned short __user *) (s+2)); - FPU_get_user(tag_word, (unsigned short __user *) (s+4)); - FPU_get_user(instruction_address.offset, (unsigned short __user *) (s+6)); - FPU_get_user(instruction_address.selector, (unsigned short __user *) (s+8)); - FPU_get_user(operand_address.offset, (unsigned short __user *) (s+0x0a)); - FPU_get_user(operand_address.selector, (unsigned short __user *) (s+0x0c)); - RE_ENTRANT_CHECK_ON; - s += 0x0e; - if ( addr_modes.default_mode == VM86 ) - { - instruction_address.offset - += (instruction_address.selector & 0xf000) << 4; - operand_address.offset += (operand_address.selector & 0xf000) << 4; - } - } - else - { - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_READ, s, 0x1c); - FPU_get_user(control_word, (unsigned short __user *) s); - FPU_get_user(partial_status, (unsigned short __user *) (s+4)); - FPU_get_user(tag_word, (unsigned short __user *) (s+8)); - FPU_get_user(instruction_address.offset, (unsigned long __user *) (s+0x0c)); - FPU_get_user(instruction_address.selector, (unsigned short __user *) (s+0x10)); - FPU_get_user(instruction_address.opcode, (unsigned short __user *) (s+0x12)); - FPU_get_user(operand_address.offset, (unsigned long __user *) (s+0x14)); - FPU_get_user(operand_address.selector, (unsigned long __user *) (s+0x18)); - RE_ENTRANT_CHECK_ON; - s += 0x1c; - } - -#ifdef PECULIAR_486 - control_word &= ~0xe080; -#endif /* PECULIAR_486 */ - - top = (partial_status >> SW_Top_Shift) & 7; - - if ( partial_status & ~control_word & CW_Exceptions ) - partial_status |= (SW_Summary | SW_Backward); - else - partial_status &= ~(SW_Summary | SW_Backward); - - for ( i = 0; i < 8; i++ ) - { - tag = tag_word & 3; - tag_word >>= 2; - - if ( tag == TAG_Empty ) - /* New tag is empty. Accept it */ - FPU_settag(i, TAG_Empty); - else if ( FPU_gettag(i) == TAG_Empty ) - { - /* Old tag is empty and new tag is not empty. New tag is determined - by old reg contents */ - if ( exponent(&fpu_register(i)) == - EXTENDED_Ebias ) - { - if ( !(fpu_register(i).sigl | fpu_register(i).sigh) ) - FPU_settag(i, TAG_Zero); - else - FPU_settag(i, TAG_Special); - } - else if ( exponent(&fpu_register(i)) == 0x7fff - EXTENDED_Ebias ) - { - FPU_settag(i, TAG_Special); - } - else if ( fpu_register(i).sigh & 0x80000000 ) - FPU_settag(i, TAG_Valid); - else - FPU_settag(i, TAG_Special); /* An Un-normal */ - } - /* Else old tag is not empty and new tag is not empty. Old tag - remains correct */ - } - - return s; -} - - -void frstor(fpu_addr_modes addr_modes, u_char __user *data_address) -{ - int i, regnr; - u_char __user *s = fldenv(addr_modes, data_address); - int offset = (top & 7) * 10, other = 80 - offset; - - /* Copy all registers in stack order. */ - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_READ,s,80); - __copy_from_user(register_base+offset, s, other); - if ( offset ) - __copy_from_user(register_base, s+other, offset); - RE_ENTRANT_CHECK_ON; - - for ( i = 0; i < 8; i++ ) - { - regnr = (i+top) & 7; - if ( FPU_gettag(regnr) != TAG_Empty ) - /* The loaded data over-rides all other cases. */ - FPU_settag(regnr, FPU_tagof(&st(i))); - } - -} - - -u_char __user *fstenv(fpu_addr_modes addr_modes, u_char __user *d) -{ - if ( (addr_modes.default_mode == VM86) || - ((addr_modes.default_mode == PM16) - ^ (addr_modes.override.operand_size == OP_SIZE_PREFIX)) ) - { - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_WRITE,d,14); -#ifdef PECULIAR_486 - FPU_put_user(control_word & ~0xe080, (unsigned long __user *) d); -#else - FPU_put_user(control_word, (unsigned short __user *) d); -#endif /* PECULIAR_486 */ - FPU_put_user(status_word(), (unsigned short __user *) (d+2)); - FPU_put_user(fpu_tag_word, (unsigned short __user *) (d+4)); - FPU_put_user(instruction_address.offset, (unsigned short __user *) (d+6)); - FPU_put_user(operand_address.offset, (unsigned short __user *) (d+0x0a)); - if ( addr_modes.default_mode == VM86 ) - { - FPU_put_user((instruction_address.offset & 0xf0000) >> 4, - (unsigned short __user *) (d+8)); - FPU_put_user((operand_address.offset & 0xf0000) >> 4, - (unsigned short __user *) (d+0x0c)); - } - else - { - FPU_put_user(instruction_address.selector, (unsigned short __user *) (d+8)); - FPU_put_user(operand_address.selector, (unsigned short __user *) (d+0x0c)); - } - RE_ENTRANT_CHECK_ON; - d += 0x0e; - } - else - { - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_WRITE, d, 7*4); -#ifdef PECULIAR_486 - control_word &= ~0xe080; - /* An 80486 sets nearly all of the reserved bits to 1. */ - control_word |= 0xffff0040; - partial_status = status_word() | 0xffff0000; - fpu_tag_word |= 0xffff0000; - I387.soft.fcs &= ~0xf8000000; - I387.soft.fos |= 0xffff0000; -#endif /* PECULIAR_486 */ - if (__copy_to_user(d, &control_word, 7*4)) - FPU_abort; - RE_ENTRANT_CHECK_ON; - d += 0x1c; - } - - control_word |= CW_Exceptions; - partial_status &= ~(SW_Summary | SW_Backward); - - return d; -} - - -void fsave(fpu_addr_modes addr_modes, u_char __user *data_address) -{ - u_char __user *d; - int offset = (top & 7) * 10, other = 80 - offset; - - d = fstenv(addr_modes, data_address); - - RE_ENTRANT_CHECK_OFF; - FPU_access_ok(VERIFY_WRITE,d,80); - - /* Copy all registers in stack order. */ - if (__copy_to_user(d, register_base+offset, other)) - FPU_abort; - if ( offset ) - if (__copy_to_user(d+other, register_base, offset)) - FPU_abort; - RE_ENTRANT_CHECK_ON; - - finit(); -} - -/*===========================================================================*/ diff --git a/arch/i386/math-emu/reg_mul.c b/arch/i386/math-emu/reg_mul.c deleted file mode 100644 index 40f50b61bc6..00000000000 --- a/arch/i386/math-emu/reg_mul.c +++ /dev/null @@ -1,132 +0,0 @@ -/*---------------------------------------------------------------------------+ - | reg_mul.c | - | | - | Multiply one FPU_REG by another, put the result in a destination FPU_REG. | - | | - | Copyright (C) 1992,1993,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | - | E-mail billm@suburbia.net | - | | - | Returns the tag of the result if no exceptions or errors occurred. | - | | - +---------------------------------------------------------------------------*/ - -/*---------------------------------------------------------------------------+ - | The destination may be any FPU_REG, including one of the source FPU_REGs. | - +---------------------------------------------------------------------------*/ - -#include "fpu_emu.h" -#include "exception.h" -#include "reg_constant.h" -#include "fpu_system.h" - - -/* - Multiply two registers to give a register result. - The sources are st(deststnr) and (b,tagb,signb). - The destination is st(deststnr). - */ -/* This routine must be called with non-empty source registers */ -int FPU_mul(FPU_REG const *b, u_char tagb, int deststnr, int control_w) -{ - FPU_REG *a = &st(deststnr); - FPU_REG *dest = a; - u_char taga = FPU_gettagi(deststnr); - u_char saved_sign = getsign(dest); - u_char sign = (getsign(a) ^ getsign(b)); - int tag; - - - if ( !(taga | tagb) ) - { - /* Both regs Valid, this should be the most common case. */ - - tag = FPU_u_mul(a, b, dest, control_w, sign, exponent(a) + exponent(b)); - if ( tag < 0 ) - { - setsign(dest, saved_sign); - return tag; - } - FPU_settagi(deststnr, tag); - return tag; - } - - if ( taga == TAG_Special ) - taga = FPU_Special(a); - if ( tagb == TAG_Special ) - tagb = FPU_Special(b); - - if ( ((taga == TAG_Valid) && (tagb == TW_Denormal)) - || ((taga == TW_Denormal) && (tagb == TAG_Valid)) - || ((taga == TW_Denormal) && (tagb == TW_Denormal)) ) - { - FPU_REG x, y; - if ( denormal_operand() < 0 ) - return FPU_Exception; - - FPU_to_exp16(a, &x); - FPU_to_exp16(b, &y); - tag = FPU_u_mul(&x, &y, dest, control_w, sign, - exponent16(&x) + exponent16(&y)); - if ( tag < 0 ) - { - setsign(dest, saved_sign); - return tag; - } - FPU_settagi(deststnr, tag); - return tag; - } - else if ( (taga <= TW_Denormal) && (tagb <= TW_Denormal) ) - { - if ( ((tagb == TW_Denormal) || (taga == TW_Denormal)) - && (denormal_operand() < 0) ) - return FPU_Exception; - - /* Must have either both arguments == zero, or - one valid and the other zero. - The result is therefore zero. */ - FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr); - /* The 80486 book says that the answer is +0, but a real - 80486 behaves this way. - IEEE-754 apparently says it should be this way. */ - setsign(dest, sign); - return TAG_Zero; - } - /* Must have infinities, NaNs, etc */ - else if ( (taga == TW_NaN) || (tagb == TW_NaN) ) - { - return real_2op_NaN(b, tagb, deststnr, &st(0)); - } - else if ( ((taga == TW_Infinity) && (tagb == TAG_Zero)) - || ((tagb == TW_Infinity) && (taga == TAG_Zero)) ) - { - return arith_invalid(deststnr); /* Zero*Infinity is invalid */ - } - else if ( ((taga == TW_Denormal) || (tagb == TW_Denormal)) - && (denormal_operand() < 0) ) - { - return FPU_Exception; - } - else if (taga == TW_Infinity) - { - FPU_copy_to_regi(a, TAG_Special, deststnr); - setsign(dest, sign); - return TAG_Special; - } - else if (tagb == TW_Infinity) - { - FPU_copy_to_regi(b, TAG_Special, deststnr); - setsign(dest, sign); - return TAG_Special; - } - -#ifdef PARANOID - else - { - EXCEPTION(EX_INTERNAL|0x102); - return FPU_Exception; - } -#endif /* PARANOID */ - - return 0; -} diff --git a/arch/i386/math-emu/reg_norm.S b/arch/i386/math-emu/reg_norm.S deleted file mode 100644 index 8b6352efcee..00000000000 --- a/arch/i386/math-emu/reg_norm.S +++ /dev/null @@ -1,147 +0,0 @@ -/*---------------------------------------------------------------------------+ - | reg_norm.S | - | | - | Copyright (C) 1992,1993,1994,1995,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@suburbia.net | - | | - | Normalize the value in a FPU_REG. | - | | - | Call from C as: | - | int FPU_normalize(FPU_REG *n) | - | | - | int FPU_normalize_nuo(FPU_REG *n) | - | | - | Return value is the tag of the answer, or-ed with FPU_Exception if | - | one was raised, or -1 on internal error. | - | | - +---------------------------------------------------------------------------*/ - -#include "fpu_emu.h" - - -.text -ENTRY(FPU_normalize) - pushl %ebp - movl %esp,%ebp - pushl %ebx - - movl PARAM1,%ebx - - movl SIGH(%ebx),%edx - movl SIGL(%ebx),%eax - - orl %edx,%edx /* ms bits */ - js L_done /* Already normalized */ - jnz L_shift_1 /* Shift left 1 - 31 bits */ - - orl %eax,%eax - jz L_zero /* The contents are zero */ - - movl %eax,%edx - xorl %eax,%eax - subw $32,EXP(%ebx) /* This can cause an underflow */ - -/* We need to shift left by 1 - 31 bits */ -L_shift_1: - bsrl %edx,%ecx /* get the required shift in %ecx */ - subl $31,%ecx - negl %ecx - shld %cl,%eax,%edx - shl %cl,%eax - subw %cx,EXP(%ebx) /* This can cause an underflow */ - - movl %edx,SIGH(%ebx) - movl %eax,SIGL(%ebx) - -L_done: - cmpw EXP_OVER,EXP(%ebx) - jge L_overflow - - cmpw EXP_UNDER,EXP(%ebx) - jle L_underflow - -L_exit_valid: - movl TAG_Valid,%eax - - /* Convert the exponent to 80x87 form. */ - addw EXTENDED_Ebias,EXP(%ebx) - andw $0x7fff,EXP(%ebx) - -L_exit: - popl %ebx - leave - ret - - -L_zero: - movw $0,EXP(%ebx) - movl TAG_Zero,%eax - jmp L_exit - -L_underflow: - /* Convert the exponent to 80x87 form. */ - addw EXTENDED_Ebias,EXP(%ebx) - push %ebx - call arith_underflow - pop %ebx - jmp L_exit - -L_overflow: - /* Convert the exponent to 80x87 form. */ - addw EXTENDED_Ebias,EXP(%ebx) - push %ebx - call arith_overflow - pop %ebx - jmp L_exit - - - -/* Normalise without reporting underflow or overflow */ -ENTRY(FPU_normalize_nuo) - pushl %ebp - movl %esp,%ebp - pushl %ebx - - movl PARAM1,%ebx - - movl SIGH(%ebx),%edx - movl SIGL(%ebx),%eax - - orl %edx,%edx /* ms bits */ - js L_exit_nuo_valid /* Already normalized */ - jnz L_nuo_shift_1 /* Shift left 1 - 31 bits */ - - orl %eax,%eax - jz L_exit_nuo_zero /* The contents are zero */ - - movl %eax,%edx - xorl %eax,%eax - subw $32,EXP(%ebx) /* This can cause an underflow */ - -/* We need to shift left by 1 - 31 bits */ -L_nuo_shift_1: - bsrl %edx,%ecx /* get the required shift in %ecx */ - subl $31,%ecx - negl %ecx - shld %cl,%eax,%edx - shl %cl,%eax - subw %cx,EXP(%ebx) /* This can cause an underflow */ - - movl %edx,SIGH(%ebx) - movl %eax,SIGL(%ebx) - -L_exit_nuo_valid: - movl TAG_Valid,%eax - - popl %ebx - leave - ret - -L_exit_nuo_zero: - movl TAG_Zero,%eax - movw EXP_UNDER,EXP(%ebx) - - popl %ebx - leave - ret diff --git a/arch/i386/math-emu/reg_round.S b/arch/i386/math-emu/reg_round.S deleted file mode 100644 index d1d4e48b4f6..00000000000 --- a/arch/i386/math-emu/reg_round.S +++ /dev/null @@ -1,708 +0,0 @@ - .file "reg_round.S" -/*---------------------------------------------------------------------------+ - | reg_round.S | - | | - | Rounding/truncation/etc for FPU basic arithmetic functions. | - | | - | Copyright (C) 1993,1995,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@suburbia.net | - | | - | This code has four possible entry points. | - | The following must be entered by a jmp instruction: | - | fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit. | - | | - | The FPU_round entry point is intended to be used by C code. | - | From C, call as: | - | int FPU_round(FPU_REG *arg, unsigned int extent, unsigned int control_w) | - | | - | Return value is the tag of the answer, or-ed with FPU_Exception if | - | one was raised, or -1 on internal error. | - | | - | For correct "up" and "down" rounding, the argument must have the correct | - | sign. | - | | - +---------------------------------------------------------------------------*/ - -/*---------------------------------------------------------------------------+ - | Four entry points. | - | | - | Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points: | - | %eax:%ebx 64 bit significand | - | %edx 32 bit extension of the significand | - | %edi pointer to an FPU_REG for the result to be stored | - | stack calling function must have set up a C stack frame and | - | pushed %esi, %edi, and %ebx | - | | - | Needed just for the fpu_reg_round_sqrt entry point: | - | %cx A control word in the same format as the FPU control word. | - | Otherwise, PARAM4 must give such a value. | - | | - | | - | The significand and its extension are assumed to be exact in the | - | following sense: | - | If the significand by itself is the exact result then the significand | - | extension (%edx) must contain 0, otherwise the significand extension | - | must be non-zero. | - | If the significand extension is non-zero then the significand is | - | smaller than the magnitude of the correct exact result by an amount | - | greater than zero and less than one ls bit of the significand. | - | The significand extension is only required to have three possible | - | non-zero values: | - | less than 0x80000000 <=> the significand is less than 1/2 an ls | - | bit smaller than the magnitude of the | - | true exact result. | - | exactly 0x80000000 <=> the significand is exactly 1/2 an ls bit | - | smaller than the magnitude of the true | - | exact result. | - | greater than 0x80000000 <=> the significand is more than 1/2 an ls | - | bit smaller than the magnitude of the | - | true exact result. | - | | - +---------------------------------------------------------------------------*/ - -/*---------------------------------------------------------------------------+ - | The code in this module has become quite complex, but it should handle | - | all of the FPU flags which are set at this stage of the basic arithmetic | - | computations. | - | There are a few rare cases where the results are not set identically to | - | a real FPU. These require a bit more thought because at this stage the | - | results of the code here appear to be more consistent... | - | This may be changed in a future version. | - +---------------------------------------------------------------------------*/ - - -#include "fpu_emu.h" -#include "exception.h" -#include "control_w.h" - -/* Flags for FPU_bits_lost */ -#define LOST_DOWN $1 -#define LOST_UP $2 - -/* Flags for FPU_denormal */ -#define DENORMAL $1 -#define UNMASKED_UNDERFLOW $2 - - -#ifndef NON_REENTRANT_FPU -/* Make the code re-entrant by putting - local storage on the stack: */ -#define FPU_bits_lost (%esp) -#define FPU_denormal 1(%esp) - -#else -/* Not re-entrant, so we can gain speed by putting - local storage in a static area: */ -.data - .align 4,0 -FPU_bits_lost: - .byte 0 -FPU_denormal: - .byte 0 -#endif /* NON_REENTRANT_FPU */ - - -.text -.globl fpu_reg_round -.globl fpu_Arith_exit - -/* Entry point when called from C */ -ENTRY(FPU_round) - pushl %ebp - movl %esp,%ebp - pushl %esi - pushl %edi - pushl %ebx - - movl PARAM1,%edi - movl SIGH(%edi),%eax - movl SIGL(%edi),%ebx - movl PARAM2,%edx - -fpu_reg_round: /* Normal entry point */ - movl PARAM4,%ecx - -#ifndef NON_REENTRANT_FPU - pushl %ebx /* adjust the stack pointer */ -#endif /* NON_REENTRANT_FPU */ - -#ifdef PARANOID -/* Cannot use this here yet */ -/* orl %eax,%eax */ -/* jns L_entry_bugged */ -#endif /* PARANOID */ - - cmpw EXP_UNDER,EXP(%edi) - jle L_Make_denorm /* The number is a de-normal */ - - movb $0,FPU_denormal /* 0 -> not a de-normal */ - -Denorm_done: - movb $0,FPU_bits_lost /* No bits yet lost in rounding */ - - movl %ecx,%esi - andl CW_PC,%ecx - cmpl PR_64_BITS,%ecx - je LRound_To_64 - - cmpl PR_53_BITS,%ecx - je LRound_To_53 - - cmpl PR_24_BITS,%ecx - je LRound_To_24 - -#ifdef PECULIAR_486 -/* With the precision control bits set to 01 "(reserved)", a real 80486 - behaves as if the precision control bits were set to 11 "64 bits" */ - cmpl PR_RESERVED_BITS,%ecx - je LRound_To_64 -#ifdef PARANOID - jmp L_bugged_denorm_486 -#endif /* PARANOID */ -#else -#ifdef PARANOID - jmp L_bugged_denorm /* There is no bug, just a bad control word */ -#endif /* PARANOID */ -#endif /* PECULIAR_486 */ - - -/* Round etc to 24 bit precision */ -LRound_To_24: - movl %esi,%ecx - andl CW_RC,%ecx - cmpl RC_RND,%ecx - je LRound_nearest_24 - - cmpl RC_CHOP,%ecx - je LCheck_truncate_24 - - cmpl RC_UP,%ecx /* Towards +infinity */ - je LUp_24 - - cmpl RC_DOWN,%ecx /* Towards -infinity */ - je LDown_24 - -#ifdef PARANOID - jmp L_bugged_round24 -#endif /* PARANOID */ - -LUp_24: - cmpb SIGN_POS,PARAM5 - jne LCheck_truncate_24 /* If negative then up==truncate */ - - jmp LCheck_24_round_up - -LDown_24: - cmpb SIGN_POS,PARAM5 - je LCheck_truncate_24 /* If positive then down==truncate */ - -LCheck_24_round_up: - movl %eax,%ecx - andl $0x000000ff,%ecx - orl %ebx,%ecx - orl %edx,%ecx - jnz LDo_24_round_up - jmp L_Re_normalise - -LRound_nearest_24: - /* Do rounding of the 24th bit if needed (nearest or even) */ - movl %eax,%ecx - andl $0x000000ff,%ecx - cmpl $0x00000080,%ecx - jc LCheck_truncate_24 /* less than half, no increment needed */ - - jne LGreater_Half_24 /* greater than half, increment needed */ - - /* Possibly half, we need to check the ls bits */ - orl %ebx,%ebx - jnz LGreater_Half_24 /* greater than half, increment needed */ - - orl %edx,%edx - jnz LGreater_Half_24 /* greater than half, increment needed */ - - /* Exactly half, increment only if 24th bit is 1 (round to even) */ - testl $0x00000100,%eax - jz LDo_truncate_24 - -LGreater_Half_24: /* Rounding: increment at the 24th bit */ -LDo_24_round_up: - andl $0xffffff00,%eax /* Truncate to 24 bits */ - xorl %ebx,%ebx - movb LOST_UP,FPU_bits_lost - addl $0x00000100,%eax - jmp LCheck_Round_Overflow - -LCheck_truncate_24: - movl %eax,%ecx - andl $0x000000ff,%ecx - orl %ebx,%ecx - orl %edx,%ecx - jz L_Re_normalise /* No truncation needed */ - -LDo_truncate_24: - andl $0xffffff00,%eax /* Truncate to 24 bits */ - xorl %ebx,%ebx - movb LOST_DOWN,FPU_bits_lost - jmp L_Re_normalise - - -/* Round etc to 53 bit precision */ -LRound_To_53: - movl %esi,%ecx - andl CW_RC,%ecx - cmpl RC_RND,%ecx - je LRound_nearest_53 - - cmpl RC_CHOP,%ecx - je LCheck_truncate_53 - - cmpl RC_UP,%ecx /* Towards +infinity */ - je LUp_53 - - cmpl RC_DOWN,%ecx /* Towards -infinity */ - je LDown_53 - -#ifdef PARANOID - jmp L_bugged_round53 -#endif /* PARANOID */ - -LUp_53: - cmpb SIGN_POS,PARAM5 - jne LCheck_truncate_53 /* If negative then up==truncate */ - - jmp LCheck_53_round_up - -LDown_53: - cmpb SIGN_POS,PARAM5 - je LCheck_truncate_53 /* If positive then down==truncate */ - -LCheck_53_round_up: - movl %ebx,%ecx - andl $0x000007ff,%ecx - orl %edx,%ecx - jnz LDo_53_round_up - jmp L_Re_normalise - -LRound_nearest_53: - /* Do rounding of the 53rd bit if needed (nearest or even) */ - movl %ebx,%ecx - andl $0x000007ff,%ecx - cmpl $0x00000400,%ecx - jc LCheck_truncate_53 /* less than half, no increment needed */ - - jnz LGreater_Half_53 /* greater than half, increment needed */ - - /* Possibly half, we need to check the ls bits */ - orl %edx,%edx - jnz LGreater_Half_53 /* greater than half, increment needed */ - - /* Exactly half, increment only if 53rd bit is 1 (round to even) */ - testl $0x00000800,%ebx - jz LTruncate_53 - -LGreater_Half_53: /* Rounding: increment at the 53rd bit */ -LDo_53_round_up: - movb LOST_UP,FPU_bits_lost - andl $0xfffff800,%ebx /* Truncate to 53 bits */ - addl $0x00000800,%ebx - adcl $0,%eax - jmp LCheck_Round_Overflow - -LCheck_truncate_53: - movl %ebx,%ecx - andl $0x000007ff,%ecx - orl %edx,%ecx - jz L_Re_normalise - -LTruncate_53: - movb LOST_DOWN,FPU_bits_lost - andl $0xfffff800,%ebx /* Truncate to 53 bits */ - jmp L_Re_normalise - - -/* Round etc to 64 bit precision */ -LRound_To_64: - movl %esi,%ecx - andl CW_RC,%ecx - cmpl RC_RND,%ecx - je LRound_nearest_64 - - cmpl RC_CHOP,%ecx - je LCheck_truncate_64 - - cmpl RC_UP,%ecx /* Towards +infinity */ - je LUp_64 - - cmpl RC_DOWN,%ecx /* Towards -infinity */ - je LDown_64 - -#ifdef PARANOID - jmp L_bugged_round64 -#endif /* PARANOID */ - -LUp_64: - cmpb SIGN_POS,PARAM5 - jne LCheck_truncate_64 /* If negative then up==truncate */ - - orl %edx,%edx - jnz LDo_64_round_up - jmp L_Re_normalise - -LDown_64: - cmpb SIGN_POS,PARAM5 - je LCheck_truncate_64 /* If positive then down==truncate */ - - orl %edx,%edx - jnz LDo_64_round_up - jmp L_Re_normalise - -LRound_nearest_64: - cmpl $0x80000000,%edx - jc LCheck_truncate_64 - - jne LDo_64_round_up - - /* Now test for round-to-even */ - testb $1,%bl - jz LCheck_truncate_64 - -LDo_64_round_up: - movb LOST_UP,FPU_bits_lost - addl $1,%ebx - adcl $0,%eax - -LCheck_Round_Overflow: - jnc L_Re_normalise - - /* Overflow, adjust the result (significand to 1.0) */ - rcrl $1,%eax - rcrl $1,%ebx - incw EXP(%edi) - jmp L_Re_normalise - -LCheck_truncate_64: - orl %edx,%edx - jz L_Re_normalise - -LTruncate_64: - movb LOST_DOWN,FPU_bits_lost - -L_Re_normalise: - testb $0xff,FPU_denormal - jnz Normalise_result - -L_Normalised: - movl TAG_Valid,%edx - -L_deNormalised: - cmpb LOST_UP,FPU_bits_lost - je L_precision_lost_up - - cmpb LOST_DOWN,FPU_bits_lost - je L_precision_lost_down - -L_no_precision_loss: - /* store the result */ - -L_Store_significand: - movl %eax,SIGH(%edi) - movl %ebx,SIGL(%edi) - - cmpw EXP_OVER,EXP(%edi) - jge L_overflow - - movl %edx,%eax - - /* Convert the exponent to 80x87 form. */ - addw EXTENDED_Ebias,EXP(%edi) - andw $0x7fff,EXP(%edi) - -fpu_reg_round_signed_special_exit: - - cmpb SIGN_POS,PARAM5 - je fpu_reg_round_special_exit - - orw $0x8000,EXP(%edi) /* Negative sign for the result. */ - -fpu_reg_round_special_exit: - -#ifndef NON_REENTRANT_FPU - popl %ebx /* adjust the stack pointer */ -#endif /* NON_REENTRANT_FPU */ - -fpu_Arith_exit: - popl %ebx - popl %edi - popl %esi - leave - ret - - -/* - * Set the FPU status flags to represent precision loss due to - * round-up. - */ -L_precision_lost_up: - push %edx - push %eax - call set_precision_flag_up - popl %eax - popl %edx - jmp L_no_precision_loss - -/* - * Set the FPU status flags to represent precision loss due to - * truncation. - */ -L_precision_lost_down: - push %edx - push %eax - call set_precision_flag_down - popl %eax - popl %edx - jmp L_no_precision_loss - - -/* - * The number is a denormal (which might get rounded up to a normal) - * Shift the number right the required number of bits, which will - * have to be undone later... - */ -L_Make_denorm: - /* The action to be taken depends upon whether the underflow - exception is masked */ - testb CW_Underflow,%cl /* Underflow mask. */ - jz Unmasked_underflow /* Do not make a denormal. */ - - movb DENORMAL,FPU_denormal - - pushl %ecx /* Save */ - movw EXP_UNDER+1,%cx - subw EXP(%edi),%cx - - cmpw $64,%cx /* shrd only works for 0..31 bits */ - jnc Denorm_shift_more_than_63 - - cmpw $32,%cx /* shrd only works for 0..31 bits */ - jnc Denorm_shift_more_than_32 - -/* - * We got here without jumps by assuming that the most common requirement - * is for a small de-normalising shift. - * Shift by [1..31] bits - */ - addw %cx,EXP(%edi) - orl %edx,%edx /* extension */ - setne %ch /* Save whether %edx is non-zero */ - xorl %edx,%edx - shrd %cl,%ebx,%edx - shrd %cl,%eax,%ebx - shr %cl,%eax - orb %ch,%dl - popl %ecx - jmp Denorm_done - -/* Shift by [32..63] bits */ -Denorm_shift_more_than_32: - addw %cx,EXP(%edi) - subb $32,%cl - orl %edx,%edx - setne %ch - orb %ch,%bl - xorl %edx,%edx - shrd %cl,%ebx,%edx - shrd %cl,%eax,%ebx - shr %cl,%eax - orl %edx,%edx /* test these 32 bits */ - setne %cl - orb %ch,%bl - orb %cl,%bl - movl %ebx,%edx - movl %eax,%ebx - xorl %eax,%eax - popl %ecx - jmp Denorm_done - -/* Shift by [64..) bits */ -Denorm_shift_more_than_63: - cmpw $64,%cx - jne Denorm_shift_more_than_64 - -/* Exactly 64 bit shift */ - addw %cx,EXP(%edi) - xorl %ecx,%ecx - orl %edx,%edx - setne %cl - orl %ebx,%ebx - setne %ch - orb %ch,%cl - orb %cl,%al - movl %eax,%edx - xorl %eax,%eax - xorl %ebx,%ebx - popl %ecx - jmp Denorm_done - -Denorm_shift_more_than_64: - movw EXP_UNDER+1,EXP(%edi) -/* This is easy, %eax must be non-zero, so.. */ - movl $1,%edx - xorl %eax,%eax - xorl %ebx,%ebx - popl %ecx - jmp Denorm_done - - -Unmasked_underflow: - movb UNMASKED_UNDERFLOW,FPU_denormal - jmp Denorm_done - - -/* Undo the de-normalisation. */ -Normalise_result: - cmpb UNMASKED_UNDERFLOW,FPU_denormal - je Signal_underflow - -/* The number must be a denormal if we got here. */ -#ifdef PARANOID - /* But check it... just in case. */ - cmpw EXP_UNDER+1,EXP(%edi) - jne L_norm_bugged -#endif /* PARANOID */ - -#ifdef PECULIAR_486 - /* - * This implements a special feature of 80486 behaviour. - * Underflow will be signalled even if the number is - * not a denormal after rounding. - * This difference occurs only for masked underflow, and not - * in the unmasked case. - * Actual 80486 behaviour differs from this in some circumstances. - */ - orl %eax,%eax /* ms bits */ - js LPseudoDenormal /* Will be masked underflow */ -#else - orl %eax,%eax /* ms bits */ - js L_Normalised /* No longer a denormal */ -#endif /* PECULIAR_486 */ - - jnz LDenormal_adj_exponent - - orl %ebx,%ebx - jz L_underflow_to_zero /* The contents are zero */ - -LDenormal_adj_exponent: - decw EXP(%edi) - -LPseudoDenormal: - testb $0xff,FPU_bits_lost /* bits lost == underflow */ - movl TAG_Special,%edx - jz L_deNormalised - - /* There must be a masked underflow */ - push %eax - pushl EX_Underflow - call EXCEPTION - popl %eax - popl %eax - movl TAG_Special,%edx - jmp L_deNormalised - - -/* - * The operations resulted in a number too small to represent. - * Masked response. - */ -L_underflow_to_zero: - push %eax - call set_precision_flag_down - popl %eax - - push %eax - pushl EX_Underflow - call EXCEPTION - popl %eax - popl %eax - -/* Reduce the exponent to EXP_UNDER */ - movw EXP_UNDER,EXP(%edi) - movl TAG_Zero,%edx - jmp L_Store_significand - - -/* The operations resulted in a number too large to represent. */ -L_overflow: - addw EXTENDED_Ebias,EXP(%edi) /* Set for unmasked response. */ - push %edi - call arith_overflow - pop %edi - jmp fpu_reg_round_signed_special_exit - - -Signal_underflow: - /* The number may have been changed to a non-denormal */ - /* by the rounding operations. */ - cmpw EXP_UNDER,EXP(%edi) - jle Do_unmasked_underflow - - jmp L_Normalised - -Do_unmasked_underflow: - /* Increase the exponent by the magic number */ - addw $(3*(1<<13)),EXP(%edi) - push %eax - pushl EX_Underflow - call EXCEPTION - popl %eax - popl %eax - jmp L_Normalised - - -#ifdef PARANOID -#ifdef PECULIAR_486 -L_bugged_denorm_486: - pushl EX_INTERNAL|0x236 - call EXCEPTION - popl %ebx - jmp L_exception_exit -#else -L_bugged_denorm: - pushl EX_INTERNAL|0x230 - call EXCEPTION - popl %ebx - jmp L_exception_exit -#endif /* PECULIAR_486 */ - -L_bugged_round24: - pushl EX_INTERNAL|0x231 - call EXCEPTION - popl %ebx - jmp L_exception_exit - -L_bugged_round53: - pushl EX_INTERNAL|0x232 - call EXCEPTION - popl %ebx - jmp L_exception_exit - -L_bugged_round64: - pushl EX_INTERNAL|0x233 - call EXCEPTION - popl %ebx - jmp L_exception_exit - -L_norm_bugged: - pushl EX_INTERNAL|0x234 - call EXCEPTION - popl %ebx - jmp L_exception_exit - -L_entry_bugged: - pushl EX_INTERNAL|0x235 - call EXCEPTION - popl %ebx -L_exception_exit: - mov $-1,%eax - jmp fpu_reg_round_special_exit -#endif /* PARANOID */ diff --git a/arch/i386/math-emu/reg_u_add.S b/arch/i386/math-emu/reg_u_add.S deleted file mode 100644 index 47c4c2434d8..00000000000 --- a/arch/i386/math-emu/reg_u_add.S +++ /dev/null @@ -1,167 +0,0 @@ - .file "reg_u_add.S" -/*---------------------------------------------------------------------------+ - | reg_u_add.S | - | | - | Add two valid (TAG_Valid) FPU_REG numbers, of the same sign, and put the | - | result in a destination FPU_REG. | - | | - | Copyright (C) 1992,1993,1995,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | - | E-mail billm@suburbia.net | - | | - | Call from C as: | - | int FPU_u_add(FPU_REG *arg1, FPU_REG *arg2, FPU_REG *answ, | - | int control_w) | - | Return value is the tag of the answer, or-ed with FPU_Exception if | - | one was raised, or -1 on internal error. | - | | - +---------------------------------------------------------------------------*/ - -/* - | Kernel addition routine FPU_u_add(reg *arg1, reg *arg2, reg *answ). - | Takes two valid reg f.p. numbers (TAG_Valid), which are - | treated as unsigned numbers, - | and returns their sum as a TAG_Valid or TAG_Special f.p. number. - | The returned number is normalized. - | Basic checks are performed if PARANOID is defined. - */ - -#include "exception.h" -#include "fpu_emu.h" -#include "control_w.h" - -.text -ENTRY(FPU_u_add) - pushl %ebp - movl %esp,%ebp - pushl %esi - pushl %edi - pushl %ebx - - movl PARAM1,%esi /* source 1 */ - movl PARAM2,%edi /* source 2 */ - - movl PARAM6,%ecx - movl %ecx,%edx - subl PARAM7,%ecx /* exp1 - exp2 */ - jge L_arg1_larger - - /* num1 is smaller */ - movl SIGL(%esi),%ebx - movl SIGH(%esi),%eax - - movl %edi,%esi - movl PARAM7,%edx - negw %cx - jmp L_accum_loaded - -L_arg1_larger: - /* num1 has larger or equal exponent */ - movl SIGL(%edi),%ebx - movl SIGH(%edi),%eax - -L_accum_loaded: - movl PARAM3,%edi /* destination */ - movw %dx,EXP(%edi) /* Copy exponent to destination */ - - xorl %edx,%edx /* clear the extension */ - -#ifdef PARANOID - testl $0x80000000,%eax - je L_bugged - - testl $0x80000000,SIGH(%esi) - je L_bugged -#endif /* PARANOID */ - -/* The number to be shifted is in %eax:%ebx:%edx */ - cmpw $32,%cx /* shrd only works for 0..31 bits */ - jnc L_more_than_31 - -/* less than 32 bits */ - shrd %cl,%ebx,%edx - shrd %cl,%eax,%ebx - shr %cl,%eax - jmp L_shift_done - -L_more_than_31: - cmpw $64,%cx - jnc L_more_than_63 - - subb $32,%cl - jz L_exactly_32 - - shrd %cl,%eax,%edx - shr %cl,%eax - orl %ebx,%ebx - jz L_more_31_no_low /* none of the lowest bits is set */ - - orl $1,%edx /* record the fact in the extension */ - -L_more_31_no_low: - movl %eax,%ebx - xorl %eax,%eax - jmp L_shift_done - -L_exactly_32: - movl %ebx,%edx - movl %eax,%ebx - xorl %eax,%eax - jmp L_shift_done - -L_more_than_63: - cmpw $65,%cx - jnc L_more_than_64 - - movl %eax,%edx - orl %ebx,%ebx - jz L_more_63_no_low - - orl $1,%edx - jmp L_more_63_no_low - -L_more_than_64: - movl $1,%edx /* The shifted nr always at least one '1' */ - -L_more_63_no_low: - xorl %ebx,%ebx - xorl %eax,%eax - -L_shift_done: - /* Now do the addition */ - addl SIGL(%esi),%ebx - adcl SIGH(%esi),%eax - jnc L_round_the_result - - /* Overflow, adjust the result */ - rcrl $1,%eax - rcrl $1,%ebx - rcrl $1,%edx - jnc L_no_bit_lost - - orl $1,%edx - -L_no_bit_lost: - incw EXP(%edi) - -L_round_the_result: - jmp fpu_reg_round /* Round the result */ - - - -#ifdef PARANOID -/* If we ever get here then we have problems! */ -L_bugged: - pushl EX_INTERNAL|0x201 - call EXCEPTION - pop %ebx - movl $-1,%eax - jmp L_exit - -L_exit: - popl %ebx - popl %edi - popl %esi - leave - ret -#endif /* PARANOID */ diff --git a/arch/i386/math-emu/reg_u_div.S b/arch/i386/math-emu/reg_u_div.S deleted file mode 100644 index cc00654b6f9..00000000000 --- a/arch/i386/math-emu/reg_u_div.S +++ /dev/null @@ -1,471 +0,0 @@ - .file "reg_u_div.S" -/*---------------------------------------------------------------------------+ - | reg_u_div.S | - | | - | Divide one FPU_REG by another and put the result in a destination FPU_REG.| - | | - | Copyright (C) 1992,1993,1995,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | - | E-mail billm@suburbia.net | - | | - | | - +---------------------------------------------------------------------------*/ - -/*---------------------------------------------------------------------------+ - | Call from C as: | - | int FPU_u_div(FPU_REG *a, FPU_REG *b, FPU_REG *dest, | - | unsigned int control_word, char *sign) | - | | - | Does not compute the destination exponent, but does adjust it. | - | | - | Return value is the tag of the answer, or-ed with FPU_Exception if | - | one was raised, or -1 on internal error. | - +---------------------------------------------------------------------------*/ - -#include "exception.h" -#include "fpu_emu.h" -#include "control_w.h" - - -/* #define dSIGL(x) (x) */ -/* #define dSIGH(x) 4(x) */ - - -#ifndef NON_REENTRANT_FPU -/* - Local storage on the stack: - Result: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0 - Overflow flag: ovfl_flag - */ -#define FPU_accum_3 -4(%ebp) -#define FPU_accum_2 -8(%ebp) -#define FPU_accum_1 -12(%ebp) -#define FPU_accum_0 -16(%ebp) -#define FPU_result_1 -20(%ebp) -#define FPU_result_2 -24(%ebp) -#define FPU_ovfl_flag -28(%ebp) - -#else -.data -/* - Local storage in a static area: - Result: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0 - Overflow flag: ovfl_flag - */ - .align 4,0 -FPU_accum_3: - .long 0 -FPU_accum_2: - .long 0 -FPU_accum_1: - .long 0 -FPU_accum_0: - .long 0 -FPU_result_1: - .long 0 -FPU_result_2: - .long 0 -FPU_ovfl_flag: - .byte 0 -#endif /* NON_REENTRANT_FPU */ - -#define REGA PARAM1 -#define REGB PARAM2 -#define DEST PARAM3 - -.text -ENTRY(FPU_u_div) - pushl %ebp - movl %esp,%ebp -#ifndef NON_REENTRANT_FPU - subl $28,%esp -#endif /* NON_REENTRANT_FPU */ - - pushl %esi - pushl %edi - pushl %ebx - - movl REGA,%esi - movl REGB,%ebx - movl DEST,%edi - - movswl EXP(%esi),%edx - movswl EXP(%ebx),%eax - subl %eax,%edx - addl EXP_BIAS,%edx - - /* A denormal and a large number can cause an exponent underflow */ - cmpl EXP_WAY_UNDER,%edx - jg xExp_not_underflow - - /* Set to a really low value allow correct handling */ - movl EXP_WAY_UNDER,%edx - -xExp_not_underflow: - - movw %dx,EXP(%edi) - -#ifdef PARANOID -/* testl $0x80000000, SIGH(%esi) // Dividend */ -/* je L_bugged */ - testl $0x80000000, SIGH(%ebx) /* Divisor */ - je L_bugged -#endif /* PARANOID */ - -/* Check if the divisor can be treated as having just 32 bits */ - cmpl $0,SIGL(%ebx) - jnz L_Full_Division /* Can't do a quick divide */ - -/* We should be able to zip through the division here */ - movl SIGH(%ebx),%ecx /* The divisor */ - movl SIGH(%esi),%edx /* Dividend */ - movl SIGL(%esi),%eax /* Dividend */ - - cmpl %ecx,%edx - setaeb FPU_ovfl_flag /* Keep a record */ - jb L_no_adjust - - subl %ecx,%edx /* Prevent the overflow */ - -L_no_adjust: - /* Divide the 64 bit number by the 32 bit denominator */ - divl %ecx - movl %eax,FPU_result_2 - - /* Work on the remainder of the first division */ - xorl %eax,%eax - divl %ecx - movl %eax,FPU_result_1 - - /* Work on the remainder of the 64 bit division */ - xorl %eax,%eax - divl %ecx - - testb $255,FPU_ovfl_flag /* was the num > denom ? */ - je L_no_overflow - - /* Do the shifting here */ - /* increase the exponent */ - incw EXP(%edi) - - /* shift the mantissa right one bit */ - stc /* To set the ms bit */ - rcrl FPU_result_2 - rcrl FPU_result_1 - rcrl %eax - -L_no_overflow: - jmp LRound_precision /* Do the rounding as required */ - - -/*---------------------------------------------------------------------------+ - | Divide: Return arg1/arg2 to arg3. | - | | - | This routine does not use the exponents of arg1 and arg2, but does | - | adjust the exponent of arg3. | - | | - | The maximum returned value is (ignoring exponents) | - | .ffffffff ffffffff | - | ------------------ = 1.ffffffff fffffffe | - | .80000000 00000000 | - | and the minimum is | - | .80000000 00000000 | - | ------------------ = .80000000 00000001 (rounded) | - | .ffffffff ffffffff | - | | - +---------------------------------------------------------------------------*/ - - -L_Full_Division: - /* Save extended dividend in local register */ - movl SIGL(%esi),%eax - movl %eax,FPU_accum_2 - movl SIGH(%esi),%eax - movl %eax,FPU_accum_3 - xorl %eax,%eax - movl %eax,FPU_accum_1 /* zero the extension */ - movl %eax,FPU_accum_0 /* zero the extension */ - - movl SIGL(%esi),%eax /* Get the current num */ - movl SIGH(%esi),%edx - -/*----------------------------------------------------------------------*/ -/* Initialization done. - Do the first 32 bits. */ - - movb $0,FPU_ovfl_flag - cmpl SIGH(%ebx),%edx /* Test for imminent overflow */ - jb LLess_than_1 - ja LGreater_than_1 - - cmpl SIGL(%ebx),%eax - jb LLess_than_1 - -LGreater_than_1: -/* The dividend is greater or equal, would cause overflow */ - setaeb FPU_ovfl_flag /* Keep a record */ - - subl SIGL(%ebx),%eax - sbbl SIGH(%ebx),%edx /* Prevent the overflow */ - movl %eax,FPU_accum_2 - movl %edx,FPU_accum_3 - -LLess_than_1: -/* At this point, we have a dividend < divisor, with a record of - adjustment in FPU_ovfl_flag */ - - /* We will divide by a number which is too large */ - movl SIGH(%ebx),%ecx - addl $1,%ecx - jnc LFirst_div_not_1 - - /* here we need to divide by 100000000h, - i.e., no division at all.. */ - mov %edx,%eax - jmp LFirst_div_done - -LFirst_div_not_1: - divl %ecx /* Divide the numerator by the augmented - denom ms dw */ - -LFirst_div_done: - movl %eax,FPU_result_2 /* Put the result in the answer */ - - mull SIGH(%ebx) /* mul by the ms dw of the denom */ - - subl %eax,FPU_accum_2 /* Subtract from the num local reg */ - sbbl %edx,FPU_accum_3 - - movl FPU_result_2,%eax /* Get the result back */ - mull SIGL(%ebx) /* now mul the ls dw of the denom */ - - subl %eax,FPU_accum_1 /* Subtract from the num local reg */ - sbbl %edx,FPU_accum_2 - sbbl $0,FPU_accum_3 - je LDo_2nd_32_bits /* Must check for non-zero result here */ - -#ifdef PARANOID - jb L_bugged_1 -#endif /* PARANOID */ - - /* need to subtract another once of the denom */ - incl FPU_result_2 /* Correct the answer */ - - movl SIGL(%ebx),%eax - movl SIGH(%ebx),%edx - subl %eax,FPU_accum_1 /* Subtract from the num local reg */ - sbbl %edx,FPU_accum_2 - -#ifdef PARANOID - sbbl $0,FPU_accum_3 - jne L_bugged_1 /* Must check for non-zero result here */ -#endif /* PARANOID */ - -/*----------------------------------------------------------------------*/ -/* Half of the main problem is done, there is just a reduced numerator - to handle now. - Work with the second 32 bits, FPU_accum_0 not used from now on */ -LDo_2nd_32_bits: - movl FPU_accum_2,%edx /* get the reduced num */ - movl FPU_accum_1,%eax - - /* need to check for possible subsequent overflow */ - cmpl SIGH(%ebx),%edx - jb LDo_2nd_div - ja LPrevent_2nd_overflow - - cmpl SIGL(%ebx),%eax - jb LDo_2nd_div - -LPrevent_2nd_overflow: -/* The numerator is greater or equal, would cause overflow */ - /* prevent overflow */ - subl SIGL(%ebx),%eax - sbbl SIGH(%ebx),%edx - movl %edx,FPU_accum_2 - movl %eax,FPU_accum_1 - - incl FPU_result_2 /* Reflect the subtraction in the answer */ - -#ifdef PARANOID - je L_bugged_2 /* Can't bump the result to 1.0 */ -#endif /* PARANOID */ - -LDo_2nd_div: - cmpl $0,%ecx /* augmented denom msw */ - jnz LSecond_div_not_1 - - /* %ecx == 0, we are dividing by 1.0 */ - mov %edx,%eax - jmp LSecond_div_done - -LSecond_div_not_1: - divl %ecx /* Divide the numerator by the denom ms dw */ - -LSecond_div_done: - movl %eax,FPU_result_1 /* Put the result in the answer */ - - mull SIGH(%ebx) /* mul by the ms dw of the denom */ - - subl %eax,FPU_accum_1 /* Subtract from the num local reg */ - sbbl %edx,FPU_accum_2 - -#ifdef PARANOID - jc L_bugged_2 -#endif /* PARANOID */ - - movl FPU_result_1,%eax /* Get the result back */ - mull SIGL(%ebx) /* now mul the ls dw of the denom */ - - subl %eax,FPU_accum_0 /* Subtract from the num local reg */ - sbbl %edx,FPU_accum_1 /* Subtract from the num local reg */ - sbbl $0,FPU_accum_2 - -#ifdef PARANOID - jc L_bugged_2 -#endif /* PARANOID */ - - jz LDo_3rd_32_bits - -#ifdef PARANOID - cmpl $1,FPU_accum_2 - jne L_bugged_2 -#endif /* PARANOID */ - - /* need to subtract another once of the denom */ - movl SIGL(%ebx),%eax - movl SIGH(%ebx),%edx - subl %eax,FPU_accum_0 /* Subtract from the num local reg */ - sbbl %edx,FPU_accum_1 - sbbl $0,FPU_accum_2 - -#ifdef PARANOID - jc L_bugged_2 - jne L_bugged_2 -#endif /* PARANOID */ - - addl $1,FPU_result_1 /* Correct the answer */ - adcl $0,FPU_result_2 - -#ifdef PARANOID - jc L_bugged_2 /* Must check for non-zero result here */ -#endif /* PARANOID */ - -/*----------------------------------------------------------------------*/ -/* The division is essentially finished here, we just need to perform - tidying operations. - Deal with the 3rd 32 bits */ -LDo_3rd_32_bits: - movl FPU_accum_1,%edx /* get the reduced num */ - movl FPU_accum_0,%eax - - /* need to check for possible subsequent overflow */ - cmpl SIGH(%ebx),%edx /* denom */ - jb LRound_prep - ja LPrevent_3rd_overflow - - cmpl SIGL(%ebx),%eax /* denom */ - jb LRound_prep - -LPrevent_3rd_overflow: - /* prevent overflow */ - subl SIGL(%ebx),%eax - sbbl SIGH(%ebx),%edx - movl %edx,FPU_accum_1 - movl %eax,FPU_accum_0 - - addl $1,FPU_result_1 /* Reflect the subtraction in the answer */ - adcl $0,FPU_result_2 - jne LRound_prep - jnc LRound_prep - - /* This is a tricky spot, there is an overflow of the answer */ - movb $255,FPU_ovfl_flag /* Overflow -> 1.000 */ - -LRound_prep: -/* - * Prepare for rounding. - * To test for rounding, we just need to compare 2*accum with the - * denom. - */ - movl FPU_accum_0,%ecx - movl FPU_accum_1,%edx - movl %ecx,%eax - orl %edx,%eax - jz LRound_ovfl /* The accumulator contains zero. */ - - /* Multiply by 2 */ - clc - rcll $1,%ecx - rcll $1,%edx - jc LRound_large /* No need to compare, denom smaller */ - - subl SIGL(%ebx),%ecx - sbbl SIGH(%ebx),%edx - jnc LRound_not_small - - movl $0x70000000,%eax /* Denom was larger */ - jmp LRound_ovfl - -LRound_not_small: - jnz LRound_large - - movl $0x80000000,%eax /* Remainder was exactly 1/2 denom */ - jmp LRound_ovfl - -LRound_large: - movl $0xff000000,%eax /* Denom was smaller */ - -LRound_ovfl: -/* We are now ready to deal with rounding, but first we must get - the bits properly aligned */ - testb $255,FPU_ovfl_flag /* was the num > denom ? */ - je LRound_precision - - incw EXP(%edi) - - /* shift the mantissa right one bit */ - stc /* Will set the ms bit */ - rcrl FPU_result_2 - rcrl FPU_result_1 - rcrl %eax - -/* Round the result as required */ -LRound_precision: - decw EXP(%edi) /* binary point between 1st & 2nd bits */ - - movl %eax,%edx - movl FPU_result_1,%ebx - movl FPU_result_2,%eax - jmp fpu_reg_round - - -#ifdef PARANOID -/* The logic is wrong if we got here */ -L_bugged: - pushl EX_INTERNAL|0x202 - call EXCEPTION - pop %ebx - jmp L_exit - -L_bugged_1: - pushl EX_INTERNAL|0x203 - call EXCEPTION - pop %ebx - jmp L_exit - -L_bugged_2: - pushl EX_INTERNAL|0x204 - call EXCEPTION - pop %ebx - jmp L_exit - -L_exit: - movl $-1,%eax - popl %ebx - popl %edi - popl %esi - - leave - ret -#endif /* PARANOID */ diff --git a/arch/i386/math-emu/reg_u_mul.S b/arch/i386/math-emu/reg_u_mul.S deleted file mode 100644 index 973f12af97d..00000000000 --- a/arch/i386/math-emu/reg_u_mul.S +++ /dev/null @@ -1,148 +0,0 @@ - .file "reg_u_mul.S" -/*---------------------------------------------------------------------------+ - | reg_u_mul.S | - | | - | Core multiplication routine | - | | - | Copyright (C) 1992,1993,1995,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | - | E-mail billm@suburbia.net | - | | - | | - +---------------------------------------------------------------------------*/ - -/*---------------------------------------------------------------------------+ - | Basic multiplication routine. | - | Does not check the resulting exponent for overflow/underflow | - | | - | FPU_u_mul(FPU_REG *a, FPU_REG *b, FPU_REG *c, unsigned int cw); | - | | - | Internal working is at approx 128 bits. | - | Result is rounded to nearest 53 or 64 bits, using "nearest or even". | - +---------------------------------------------------------------------------*/ - -#include "exception.h" -#include "fpu_emu.h" -#include "control_w.h" - - - -#ifndef NON_REENTRANT_FPU -/* Local storage on the stack: */ -#define FPU_accum_0 -4(%ebp) /* ms word */ -#define FPU_accum_1 -8(%ebp) - -#else -/* Local storage in a static area: */ -.data - .align 4,0 -FPU_accum_0: - .long 0 -FPU_accum_1: - .long 0 -#endif /* NON_REENTRANT_FPU */ - - -.text -ENTRY(FPU_u_mul) - pushl %ebp - movl %esp,%ebp -#ifndef NON_REENTRANT_FPU - subl $8,%esp -#endif /* NON_REENTRANT_FPU */ - - pushl %esi - pushl %edi - pushl %ebx - - movl PARAM1,%esi - movl PARAM2,%edi - -#ifdef PARANOID - testl $0x80000000,SIGH(%esi) - jz L_bugged - testl $0x80000000,SIGH(%edi) - jz L_bugged -#endif /* PARANOID */ - - xorl %ecx,%ecx - xorl %ebx,%ebx - - movl SIGL(%esi),%eax - mull SIGL(%edi) - movl %eax,FPU_accum_0 - movl %edx,FPU_accum_1 - - movl SIGL(%esi),%eax - mull SIGH(%edi) - addl %eax,FPU_accum_1 - adcl %edx,%ebx -/* adcl $0,%ecx // overflow here is not possible */ - - movl SIGH(%esi),%eax - mull SIGL(%edi) - addl %eax,FPU_accum_1 - adcl %edx,%ebx - adcl $0,%ecx - - movl SIGH(%esi),%eax - mull SIGH(%edi) - addl %eax,%ebx - adcl %edx,%ecx - - /* Get the sum of the exponents. */ - movl PARAM6,%eax - subl EXP_BIAS-1,%eax - - /* Two denormals can cause an exponent underflow */ - cmpl EXP_WAY_UNDER,%eax - jg Exp_not_underflow - - /* Set to a really low value allow correct handling */ - movl EXP_WAY_UNDER,%eax - -Exp_not_underflow: - -/* Have now finished with the sources */ - movl PARAM3,%edi /* Point to the destination */ - movw %ax,EXP(%edi) - -/* Now make sure that the result is normalized */ - testl $0x80000000,%ecx - jnz LResult_Normalised - - /* Normalize by shifting left one bit */ - shll $1,FPU_accum_0 - rcll $1,FPU_accum_1 - rcll $1,%ebx - rcll $1,%ecx - decw EXP(%edi) - -LResult_Normalised: - movl FPU_accum_0,%eax - movl FPU_accum_1,%edx - orl %eax,%eax - jz L_extent_zero - - orl $1,%edx - -L_extent_zero: - movl %ecx,%eax - jmp fpu_reg_round - - -#ifdef PARANOID -L_bugged: - pushl EX_INTERNAL|0x205 - call EXCEPTION - pop %ebx - jmp L_exit - -L_exit: - popl %ebx - popl %edi - popl %esi - leave - ret -#endif /* PARANOID */ - diff --git a/arch/i386/math-emu/reg_u_sub.S b/arch/i386/math-emu/reg_u_sub.S deleted file mode 100644 index 1b6c24801d2..00000000000 --- a/arch/i386/math-emu/reg_u_sub.S +++ /dev/null @@ -1,272 +0,0 @@ - .file "reg_u_sub.S" -/*---------------------------------------------------------------------------+ - | reg_u_sub.S | - | | - | Core floating point subtraction routine. | - | | - | Copyright (C) 1992,1993,1995,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | - | E-mail billm@suburbia.net | - | | - | Call from C as: | - | int FPU_u_sub(FPU_REG *arg1, FPU_REG *arg2, FPU_REG *answ, | - | int control_w) | - | Return value is the tag of the answer, or-ed with FPU_Exception if | - | one was raised, or -1 on internal error. | - | | - +---------------------------------------------------------------------------*/ - -/* - | Kernel subtraction routine FPU_u_sub(reg *arg1, reg *arg2, reg *answ). - | Takes two valid reg f.p. numbers (TAG_Valid), which are - | treated as unsigned numbers, - | and returns their difference as a TAG_Valid or TAG_Zero f.p. - | number. - | The first number (arg1) must be the larger. - | The returned number is normalized. - | Basic checks are performed if PARANOID is defined. - */ - -#include "exception.h" -#include "fpu_emu.h" -#include "control_w.h" - -.text -ENTRY(FPU_u_sub) - pushl %ebp - movl %esp,%ebp - pushl %esi - pushl %edi - pushl %ebx - - movl PARAM1,%esi /* source 1 */ - movl PARAM2,%edi /* source 2 */ - - movl PARAM6,%ecx - subl PARAM7,%ecx /* exp1 - exp2 */ - -#ifdef PARANOID - /* source 2 is always smaller than source 1 */ - js L_bugged_1 - - testl $0x80000000,SIGH(%edi) /* The args are assumed to be be normalized */ - je L_bugged_2 - - testl $0x80000000,SIGH(%esi) - je L_bugged_2 -#endif /* PARANOID */ - -/*--------------------------------------+ - | Form a register holding the | - | smaller number | - +--------------------------------------*/ - movl SIGH(%edi),%eax /* register ms word */ - movl SIGL(%edi),%ebx /* register ls word */ - - movl PARAM3,%edi /* destination */ - movl PARAM6,%edx - movw %dx,EXP(%edi) /* Copy exponent to destination */ - - xorl %edx,%edx /* register extension */ - -/*--------------------------------------+ - | Shift the temporary register | - | right the required number of | - | places. | - +--------------------------------------*/ - - cmpw $32,%cx /* shrd only works for 0..31 bits */ - jnc L_more_than_31 - -/* less than 32 bits */ - shrd %cl,%ebx,%edx - shrd %cl,%eax,%ebx - shr %cl,%eax - jmp L_shift_done - -L_more_than_31: - cmpw $64,%cx - jnc L_more_than_63 - - subb $32,%cl - jz L_exactly_32 - - shrd %cl,%eax,%edx - shr %cl,%eax - orl %ebx,%ebx - jz L_more_31_no_low /* none of the lowest bits is set */ - - orl $1,%edx /* record the fact in the extension */ - -L_more_31_no_low: - movl %eax,%ebx - xorl %eax,%eax - jmp L_shift_done - -L_exactly_32: - movl %ebx,%edx - movl %eax,%ebx - xorl %eax,%eax - jmp L_shift_done - -L_more_than_63: - cmpw $65,%cx - jnc L_more_than_64 - - /* Shift right by 64 bits */ - movl %eax,%edx - orl %ebx,%ebx - jz L_more_63_no_low - - orl $1,%edx - jmp L_more_63_no_low - -L_more_than_64: - jne L_more_than_65 - - /* Shift right by 65 bits */ - /* Carry is clear if we get here */ - movl %eax,%edx - rcrl %edx - jnc L_shift_65_nc - - orl $1,%edx - jmp L_more_63_no_low - -L_shift_65_nc: - orl %ebx,%ebx - jz L_more_63_no_low - - orl $1,%edx - jmp L_more_63_no_low - -L_more_than_65: - movl $1,%edx /* The shifted nr always at least one '1' */ - -L_more_63_no_low: - xorl %ebx,%ebx - xorl %eax,%eax - -L_shift_done: -L_subtr: -/*------------------------------+ - | Do the subtraction | - +------------------------------*/ - xorl %ecx,%ecx - subl %edx,%ecx - movl %ecx,%edx - movl SIGL(%esi),%ecx - sbbl %ebx,%ecx - movl %ecx,%ebx - movl SIGH(%esi),%ecx - sbbl %eax,%ecx - movl %ecx,%eax - -#ifdef PARANOID - /* We can never get a borrow */ - jc L_bugged -#endif /* PARANOID */ - -/*--------------------------------------+ - | Normalize the result | - +--------------------------------------*/ - testl $0x80000000,%eax - jnz L_round /* no shifting needed */ - - orl %eax,%eax - jnz L_shift_1 /* shift left 1 - 31 bits */ - - orl %ebx,%ebx - jnz L_shift_32 /* shift left 32 - 63 bits */ - -/* - * A rare case, the only one which is non-zero if we got here - * is: 1000000 .... 0000 - * -0111111 .... 1111 1 - * -------------------- - * 0000000 .... 0000 1 - */ - - cmpl $0x80000000,%edx - jnz L_must_be_zero - - /* Shift left 64 bits */ - subw $64,EXP(%edi) - xchg %edx,%eax - jmp fpu_reg_round - -L_must_be_zero: -#ifdef PARANOID - orl %edx,%edx - jnz L_bugged_3 -#endif /* PARANOID */ - - /* The result is zero */ - movw $0,EXP(%edi) /* exponent */ - movl $0,SIGL(%edi) - movl $0,SIGH(%edi) - movl TAG_Zero,%eax - jmp L_exit - -L_shift_32: - movl %ebx,%eax - movl %edx,%ebx - movl $0,%edx - subw $32,EXP(%edi) /* Can get underflow here */ - -/* We need to shift left by 1 - 31 bits */ -L_shift_1: - bsrl %eax,%ecx /* get the required shift in %ecx */ - subl $31,%ecx - negl %ecx - shld %cl,%ebx,%eax - shld %cl,%edx,%ebx - shl %cl,%edx - subw %cx,EXP(%edi) /* Can get underflow here */ - -L_round: - jmp fpu_reg_round /* Round the result */ - - -#ifdef PARANOID -L_bugged_1: - pushl EX_INTERNAL|0x206 - call EXCEPTION - pop %ebx - jmp L_error_exit - -L_bugged_2: - pushl EX_INTERNAL|0x209 - call EXCEPTION - pop %ebx - jmp L_error_exit - -L_bugged_3: - pushl EX_INTERNAL|0x210 - call EXCEPTION - pop %ebx - jmp L_error_exit - -L_bugged_4: - pushl EX_INTERNAL|0x211 - call EXCEPTION - pop %ebx - jmp L_error_exit - -L_bugged: - pushl EX_INTERNAL|0x212 - call EXCEPTION - pop %ebx - jmp L_error_exit - -L_error_exit: - movl $-1,%eax - -#endif /* PARANOID */ - -L_exit: - popl %ebx - popl %edi - popl %esi - leave - ret diff --git a/arch/i386/math-emu/round_Xsig.S b/arch/i386/math-emu/round_Xsig.S deleted file mode 100644 index bbe0e87718e..00000000000 --- a/arch/i386/math-emu/round_Xsig.S +++ /dev/null @@ -1,141 +0,0 @@ -/*---------------------------------------------------------------------------+ - | round_Xsig.S | - | | - | Copyright (C) 1992,1993,1994,1995 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@jacobi.maths.monash.edu.au | - | | - | Normalize and round a 12 byte quantity. | - | Call from C as: | - | int round_Xsig(Xsig *n) | - | | - | Normalize a 12 byte quantity. | - | Call from C as: | - | int norm_Xsig(Xsig *n) | - | | - | Each function returns the size of the shift (nr of bits). | - | | - +---------------------------------------------------------------------------*/ - .file "round_Xsig.S" - -#include "fpu_emu.h" - - -.text -ENTRY(round_Xsig) - pushl %ebp - movl %esp,%ebp - pushl %ebx /* Reserve some space */ - pushl %ebx - pushl %esi - - movl PARAM1,%esi - - movl 8(%esi),%edx - movl 4(%esi),%ebx - movl (%esi),%eax - - movl $0,-4(%ebp) - - orl %edx,%edx /* ms bits */ - js L_round /* Already normalized */ - jnz L_shift_1 /* Shift left 1 - 31 bits */ - - movl %ebx,%edx - movl %eax,%ebx - xorl %eax,%eax - movl $-32,-4(%ebp) - -/* We need to shift left by 1 - 31 bits */ -L_shift_1: - bsrl %edx,%ecx /* get the required shift in %ecx */ - subl $31,%ecx - negl %ecx - subl %ecx,-4(%ebp) - shld %cl,%ebx,%edx - shld %cl,%eax,%ebx - shl %cl,%eax - -L_round: - testl $0x80000000,%eax - jz L_exit - - addl $1,%ebx - adcl $0,%edx - jnz L_exit - - movl $0x80000000,%edx - incl -4(%ebp) - -L_exit: - movl %edx,8(%esi) - movl %ebx,4(%esi) - movl %eax,(%esi) - - movl -4(%ebp),%eax - - popl %esi - popl %ebx - leave - ret - - - - -ENTRY(norm_Xsig) - pushl %ebp - movl %esp,%ebp - pushl %ebx /* Reserve some space */ - pushl %ebx - pushl %esi - - movl PARAM1,%esi - - movl 8(%esi),%edx - movl 4(%esi),%ebx - movl (%esi),%eax - - movl $0,-4(%ebp) - - orl %edx,%edx /* ms bits */ - js L_n_exit /* Already normalized */ - jnz L_n_shift_1 /* Shift left 1 - 31 bits */ - - movl %ebx,%edx - movl %eax,%ebx - xorl %eax,%eax - movl $-32,-4(%ebp) - - orl %edx,%edx /* ms bits */ - js L_n_exit /* Normalized now */ - jnz L_n_shift_1 /* Shift left 1 - 31 bits */ - - movl %ebx,%edx - movl %eax,%ebx - xorl %eax,%eax - addl $-32,-4(%ebp) - jmp L_n_exit /* Might not be normalized, - but shift no more. */ - -/* We need to shift left by 1 - 31 bits */ -L_n_shift_1: - bsrl %edx,%ecx /* get the required shift in %ecx */ - subl $31,%ecx - negl %ecx - subl %ecx,-4(%ebp) - shld %cl,%ebx,%edx - shld %cl,%eax,%ebx - shl %cl,%eax - -L_n_exit: - movl %edx,8(%esi) - movl %ebx,4(%esi) - movl %eax,(%esi) - - movl -4(%ebp),%eax - - popl %esi - popl %ebx - leave - ret - diff --git a/arch/i386/math-emu/shr_Xsig.S b/arch/i386/math-emu/shr_Xsig.S deleted file mode 100644 index 31cdd118e91..00000000000 --- a/arch/i386/math-emu/shr_Xsig.S +++ /dev/null @@ -1,87 +0,0 @@ - .file "shr_Xsig.S" -/*---------------------------------------------------------------------------+ - | shr_Xsig.S | - | | - | 12 byte right shift function | - | | - | Copyright (C) 1992,1994,1995 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@jacobi.maths.monash.edu.au | - | | - | Call from C as: | - | void shr_Xsig(Xsig *arg, unsigned nr) | - | | - | Extended shift right function. | - | Fastest for small shifts. | - | Shifts the 12 byte quantity pointed to by the first arg (arg) | - | right by the number of bits specified by the second arg (nr). | - | | - +---------------------------------------------------------------------------*/ - -#include "fpu_emu.h" - -.text -ENTRY(shr_Xsig) - push %ebp - movl %esp,%ebp - pushl %esi - movl PARAM2,%ecx - movl PARAM1,%esi - cmpl $32,%ecx /* shrd only works for 0..31 bits */ - jnc L_more_than_31 - -/* less than 32 bits */ - pushl %ebx - movl (%esi),%eax /* lsl */ - movl 4(%esi),%ebx /* midl */ - movl 8(%esi),%edx /* msl */ - shrd %cl,%ebx,%eax - shrd %cl,%edx,%ebx - shr %cl,%edx - movl %eax,(%esi) - movl %ebx,4(%esi) - movl %edx,8(%esi) - popl %ebx - popl %esi - leave - ret - -L_more_than_31: - cmpl $64,%ecx - jnc L_more_than_63 - - subb $32,%cl - movl 4(%esi),%eax /* midl */ - movl 8(%esi),%edx /* msl */ - shrd %cl,%edx,%eax - shr %cl,%edx - movl %eax,(%esi) - movl %edx,4(%esi) - movl $0,8(%esi) - popl %esi - leave - ret - -L_more_than_63: - cmpl $96,%ecx - jnc L_more_than_95 - - subb $64,%cl - movl 8(%esi),%eax /* msl */ - shr %cl,%eax - xorl %edx,%edx - movl %eax,(%esi) - movl %edx,4(%esi) - movl %edx,8(%esi) - popl %esi - leave - ret - -L_more_than_95: - xorl %eax,%eax - movl %eax,(%esi) - movl %eax,4(%esi) - movl %eax,8(%esi) - popl %esi - leave - ret diff --git a/arch/i386/math-emu/status_w.h b/arch/i386/math-emu/status_w.h deleted file mode 100644 index 59e73302aa6..00000000000 --- a/arch/i386/math-emu/status_w.h +++ /dev/null @@ -1,67 +0,0 @@ -/*---------------------------------------------------------------------------+ - | status_w.h | - | | - | Copyright (C) 1992,1993 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@vaxc.cc.monash.edu.au | - | | - +---------------------------------------------------------------------------*/ - -#ifndef _STATUS_H_ -#define _STATUS_H_ - -#include "fpu_emu.h" /* for definition of PECULIAR_486 */ - -#ifdef __ASSEMBLY__ -#define Const__(x) $##x -#else -#define Const__(x) x -#endif - -#define SW_Backward Const__(0x8000) /* backward compatibility */ -#define SW_C3 Const__(0x4000) /* condition bit 3 */ -#define SW_Top Const__(0x3800) /* top of stack */ -#define SW_Top_Shift Const__(11) /* shift for top of stack bits */ -#define SW_C2 Const__(0x0400) /* condition bit 2 */ -#define SW_C1 Const__(0x0200) /* condition bit 1 */ -#define SW_C0 Const__(0x0100) /* condition bit 0 */ -#define SW_Summary Const__(0x0080) /* exception summary */ -#define SW_Stack_Fault Const__(0x0040) /* stack fault */ -#define SW_Precision Const__(0x0020) /* loss of precision */ -#define SW_Underflow Const__(0x0010) /* underflow */ -#define SW_Overflow Const__(0x0008) /* overflow */ -#define SW_Zero_Div Const__(0x0004) /* divide by zero */ -#define SW_Denorm_Op Const__(0x0002) /* denormalized operand */ -#define SW_Invalid Const__(0x0001) /* invalid operation */ - -#define SW_Exc_Mask Const__(0x27f) /* Status word exception bit mask */ - -#ifndef __ASSEMBLY__ - -#define COMP_A_gt_B 1 -#define COMP_A_eq_B 2 -#define COMP_A_lt_B 3 -#define COMP_No_Comp 4 -#define COMP_Denormal 0x20 -#define COMP_NaN 0x40 -#define COMP_SNaN 0x80 - -#define status_word() \ - ((partial_status & ~SW_Top & 0xffff) | ((top << SW_Top_Shift) & SW_Top)) -static inline void setcc(int cc) -{ - partial_status &= ~(SW_C0|SW_C1|SW_C2|SW_C3); - partial_status |= (cc) & (SW_C0|SW_C1|SW_C2|SW_C3); -} - -#ifdef PECULIAR_486 - /* Default, this conveys no information, but an 80486 does it. */ - /* Clear the SW_C1 bit, "other bits undefined". */ -# define clear_C1() { partial_status &= ~SW_C1; } -# else -# define clear_C1() -#endif /* PECULIAR_486 */ - -#endif /* __ASSEMBLY__ */ - -#endif /* _STATUS_H_ */ diff --git a/arch/i386/math-emu/version.h b/arch/i386/math-emu/version.h deleted file mode 100644 index a0d73a1d2b6..00000000000 --- a/arch/i386/math-emu/version.h +++ /dev/null @@ -1,12 +0,0 @@ -/*---------------------------------------------------------------------------+ - | version.h | - | | - | | - | Copyright (C) 1992,1993,1994,1996,1997,1999 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | - | E-mail billm@melbpc.org.au | - | | - | | - +---------------------------------------------------------------------------*/ - -#define FPU_VERSION "wm-FPU-emu version 2.01" diff --git a/arch/i386/math-emu/wm_shrx.S b/arch/i386/math-emu/wm_shrx.S deleted file mode 100644 index 51842831798..00000000000 --- a/arch/i386/math-emu/wm_shrx.S +++ /dev/null @@ -1,204 +0,0 @@ - .file "wm_shrx.S" -/*---------------------------------------------------------------------------+ - | wm_shrx.S | - | | - | 64 bit right shift functions | - | | - | Copyright (C) 1992,1995 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@jacobi.maths.monash.edu.au | - | | - | Call from C as: | - | unsigned FPU_shrx(void *arg1, unsigned arg2) | - | and | - | unsigned FPU_shrxs(void *arg1, unsigned arg2) | - | | - +---------------------------------------------------------------------------*/ - -#include "fpu_emu.h" - -.text -/*---------------------------------------------------------------------------+ - | unsigned FPU_shrx(void *arg1, unsigned arg2) | - | | - | Extended shift right function. | - | Fastest for small shifts. | - | Shifts the 64 bit quantity pointed to by the first arg (arg1) | - | right by the number of bits specified by the second arg (arg2). | - | Forms a 96 bit quantity from the 64 bit arg and eax: | - | [ 64 bit arg ][ eax ] | - | shift right ---------> | - | The eax register is initialized to 0 before the shifting. | - | Results returned in the 64 bit arg and eax. | - +---------------------------------------------------------------------------*/ - -ENTRY(FPU_shrx) - push %ebp - movl %esp,%ebp - pushl %esi - movl PARAM2,%ecx - movl PARAM1,%esi - cmpl $32,%ecx /* shrd only works for 0..31 bits */ - jnc L_more_than_31 - -/* less than 32 bits */ - pushl %ebx - movl (%esi),%ebx /* lsl */ - movl 4(%esi),%edx /* msl */ - xorl %eax,%eax /* extension */ - shrd %cl,%ebx,%eax - shrd %cl,%edx,%ebx - shr %cl,%edx - movl %ebx,(%esi) - movl %edx,4(%esi) - popl %ebx - popl %esi - leave - ret - -L_more_than_31: - cmpl $64,%ecx - jnc L_more_than_63 - - subb $32,%cl - movl (%esi),%eax /* lsl */ - movl 4(%esi),%edx /* msl */ - shrd %cl,%edx,%eax - shr %cl,%edx - movl %edx,(%esi) - movl $0,4(%esi) - popl %esi - leave - ret - -L_more_than_63: - cmpl $96,%ecx - jnc L_more_than_95 - - subb $64,%cl - movl 4(%esi),%eax /* msl */ - shr %cl,%eax - xorl %edx,%edx - movl %edx,(%esi) - movl %edx,4(%esi) - popl %esi - leave - ret - -L_more_than_95: - xorl %eax,%eax - movl %eax,(%esi) - movl %eax,4(%esi) - popl %esi - leave - ret - - -/*---------------------------------------------------------------------------+ - | unsigned FPU_shrxs(void *arg1, unsigned arg2) | - | | - | Extended shift right function (optimized for small floating point | - | integers). | - | Shifts the 64 bit quantity pointed to by the first arg (arg1) | - | right by the number of bits specified by the second arg (arg2). | - | Forms a 96 bit quantity from the 64 bit arg and eax: | - | [ 64 bit arg ][ eax ] | - | shift right ---------> | - | The eax register is initialized to 0 before the shifting. | - | The lower 8 bits of eax are lost and replaced by a flag which is | - | set (to 0x01) if any bit, apart from the first one, is set in the | - | part which has been shifted out of the arg. | - | Results returned in the 64 bit arg and eax. | - +---------------------------------------------------------------------------*/ -ENTRY(FPU_shrxs) - push %ebp - movl %esp,%ebp - pushl %esi - pushl %ebx - movl PARAM2,%ecx - movl PARAM1,%esi - cmpl $64,%ecx /* shrd only works for 0..31 bits */ - jnc Ls_more_than_63 - - cmpl $32,%ecx /* shrd only works for 0..31 bits */ - jc Ls_less_than_32 - -/* We got here without jumps by assuming that the most common requirement - is for small integers */ -/* Shift by [32..63] bits */ - subb $32,%cl - movl (%esi),%eax /* lsl */ - movl 4(%esi),%edx /* msl */ - xorl %ebx,%ebx - shrd %cl,%eax,%ebx - shrd %cl,%edx,%eax - shr %cl,%edx - orl %ebx,%ebx /* test these 32 bits */ - setne %bl - test $0x7fffffff,%eax /* and 31 bits here */ - setne %bh - orw %bx,%bx /* Any of the 63 bit set ? */ - setne %al - movl %edx,(%esi) - movl $0,4(%esi) - popl %ebx - popl %esi - leave - ret - -/* Shift by [0..31] bits */ -Ls_less_than_32: - movl (%esi),%ebx /* lsl */ - movl 4(%esi),%edx /* msl */ - xorl %eax,%eax /* extension */ - shrd %cl,%ebx,%eax - shrd %cl,%edx,%ebx - shr %cl,%edx - test $0x7fffffff,%eax /* only need to look at eax here */ - setne %al - movl %ebx,(%esi) - movl %edx,4(%esi) - popl %ebx - popl %esi - leave - ret - -/* Shift by [64..95] bits */ -Ls_more_than_63: - cmpl $96,%ecx - jnc Ls_more_than_95 - - subb $64,%cl - movl (%esi),%ebx /* lsl */ - movl 4(%esi),%eax /* msl */ - xorl %edx,%edx /* extension */ - shrd %cl,%ebx,%edx - shrd %cl,%eax,%ebx - shr %cl,%eax - orl %ebx,%edx - setne %bl - test $0x7fffffff,%eax /* only need to look at eax here */ - setne %bh - orw %bx,%bx - setne %al - xorl %edx,%edx - movl %edx,(%esi) /* set to zero */ - movl %edx,4(%esi) /* set to zero */ - popl %ebx - popl %esi - leave - ret - -Ls_more_than_95: -/* Shift by [96..inf) bits */ - xorl %eax,%eax - movl (%esi),%ebx - orl 4(%esi),%ebx - setne %al - xorl %ebx,%ebx - movl %ebx,(%esi) - movl %ebx,4(%esi) - popl %ebx - popl %esi - leave - ret diff --git a/arch/i386/math-emu/wm_sqrt.S b/arch/i386/math-emu/wm_sqrt.S deleted file mode 100644 index d258f59564e..00000000000 --- a/arch/i386/math-emu/wm_sqrt.S +++ /dev/null @@ -1,470 +0,0 @@ - .file "wm_sqrt.S" -/*---------------------------------------------------------------------------+ - | wm_sqrt.S | - | | - | Fixed point arithmetic square root evaluation. | - | | - | Copyright (C) 1992,1993,1995,1997 | - | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | - | Australia. E-mail billm@suburbia.net | - | | - | Call from C as: | - | int wm_sqrt(FPU_REG *n, unsigned int control_word) | - | | - +---------------------------------------------------------------------------*/ - -/*---------------------------------------------------------------------------+ - | wm_sqrt(FPU_REG *n, unsigned int control_word) | - | returns the square root of n in n. | - | | - | Use Newton's method to compute the square root of a number, which must | - | be in the range [1.0 .. 4.0), to 64 bits accuracy. | - | Does not check the sign or tag of the argument. | - | Sets the exponent, but not the sign or tag of the result. | - | | - | The guess is kept in %esi:%edi | - +---------------------------------------------------------------------------*/ - -#include "exception.h" -#include "fpu_emu.h" - - -#ifndef NON_REENTRANT_FPU -/* Local storage on the stack: */ -#define FPU_accum_3 -4(%ebp) /* ms word */ -#define FPU_accum_2 -8(%ebp) -#define FPU_accum_1 -12(%ebp) -#define FPU_accum_0 -16(%ebp) - -/* - * The de-normalised argument: - * sq_2 sq_1 sq_0 - * b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0 - * ^ binary point here - */ -#define FPU_fsqrt_arg_2 -20(%ebp) /* ms word */ -#define FPU_fsqrt_arg_1 -24(%ebp) -#define FPU_fsqrt_arg_0 -28(%ebp) /* ls word, at most the ms bit is set */ - -#else -/* Local storage in a static area: */ -.data - .align 4,0 -FPU_accum_3: - .long 0 /* ms word */ -FPU_accum_2: - .long 0 -FPU_accum_1: - .long 0 -FPU_accum_0: - .long 0 - -/* The de-normalised argument: - sq_2 sq_1 sq_0 - b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0 - ^ binary point here - */ -FPU_fsqrt_arg_2: - .long 0 /* ms word */ -FPU_fsqrt_arg_1: - .long 0 -FPU_fsqrt_arg_0: - .long 0 /* ls word, at most the ms bit is set */ -#endif /* NON_REENTRANT_FPU */ - - -.text -ENTRY(wm_sqrt) - pushl %ebp - movl %esp,%ebp -#ifndef NON_REENTRANT_FPU - subl $28,%esp -#endif /* NON_REENTRANT_FPU */ - pushl %esi - pushl %edi - pushl %ebx - - movl PARAM1,%esi - - movl SIGH(%esi),%eax - movl SIGL(%esi),%ecx - xorl %edx,%edx - -/* We use a rough linear estimate for the first guess.. */ - - cmpw EXP_BIAS,EXP(%esi) - jnz sqrt_arg_ge_2 - - shrl $1,%eax /* arg is in the range [1.0 .. 2.0) */ - rcrl $1,%ecx - rcrl $1,%edx - -sqrt_arg_ge_2: -/* From here on, n is never accessed directly again until it is - replaced by the answer. */ - - movl %eax,FPU_fsqrt_arg_2 /* ms word of n */ - movl %ecx,FPU_fsqrt_arg_1 - movl %edx,FPU_fsqrt_arg_0 - -/* Make a linear first estimate */ - shrl $1,%eax - addl $0x40000000,%eax - movl $0xaaaaaaaa,%ecx - mull %ecx - shll %edx /* max result was 7fff... */ - testl $0x80000000,%edx /* but min was 3fff... */ - jnz sqrt_prelim_no_adjust - - movl $0x80000000,%edx /* round up */ - -sqrt_prelim_no_adjust: - movl %edx,%esi /* Our first guess */ - -/* We have now computed (approx) (2 + x) / 3, which forms the basis - for a few iterations of Newton's method */ - - movl FPU_fsqrt_arg_2,%ecx /* ms word */ - -/* - * From our initial estimate, three iterations are enough to get us - * to 30 bits or so. This will then allow two iterations at better - * precision to complete the process. - */ - -/* Compute (g + n/g)/2 at each iteration (g is the guess). */ - shrl %ecx /* Doing this first will prevent a divide */ - /* overflow later. */ - - movl %ecx,%edx /* msw of the arg / 2 */ - divl %esi /* current estimate */ - shrl %esi /* divide by 2 */ - addl %eax,%esi /* the new estimate */ - - movl %ecx,%edx - divl %esi - shrl %esi - addl %eax,%esi - - movl %ecx,%edx - divl %esi - shrl %esi - addl %eax,%esi - -/* - * Now that an estimate accurate to about 30 bits has been obtained (in %esi), - * we improve it to 60 bits or so. - * - * The strategy from now on is to compute new estimates from - * guess := guess + (n - guess^2) / (2 * guess) - */ - -/* First, find the square of the guess */ - movl %esi,%eax - mull %esi -/* guess^2 now in %edx:%eax */ - - movl FPU_fsqrt_arg_1,%ecx - subl %ecx,%eax - movl FPU_fsqrt_arg_2,%ecx /* ms word of normalized n */ - sbbl %ecx,%edx - jnc sqrt_stage_2_positive - -/* Subtraction gives a negative result, - negate the result before division. */ - notl %edx - notl %eax - addl $1,%eax - adcl $0,%edx - - divl %esi - movl %eax,%ecx - - movl %edx,%eax - divl %esi - jmp sqrt_stage_2_finish - -sqrt_stage_2_positive: - divl %esi - movl %eax,%ecx - - movl %edx,%eax - divl %esi - - notl %ecx - notl %eax - addl $1,%eax - adcl $0,%ecx - -sqrt_stage_2_finish: - sarl $1,%ecx /* divide by 2 */ - rcrl $1,%eax - - /* Form the new estimate in %esi:%edi */ - movl %eax,%edi - addl %ecx,%esi - - jnz sqrt_stage_2_done /* result should be [1..2) */ - -#ifdef PARANOID -/* It should be possible to get here only if the arg is ffff....ffff */ - cmp $0xffffffff,FPU_fsqrt_arg_1 - jnz sqrt_stage_2_error -#endif /* PARANOID */ - -/* The best rounded result. */ - xorl %eax,%eax - decl %eax - movl %eax,%edi - movl %eax,%esi - movl $0x7fffffff,%eax - jmp sqrt_round_result - -#ifdef PARANOID -sqrt_stage_2_error: - pushl EX_INTERNAL|0x213 - call EXCEPTION -#endif /* PARANOID */ - -sqrt_stage_2_done: - -/* Now the square root has been computed to better than 60 bits. */ - -/* Find the square of the guess. */ - movl %edi,%eax /* ls word of guess */ - mull %edi - movl %edx,FPU_accum_1 - - movl %esi,%eax - mull %esi - movl %edx,FPU_accum_3 - movl %eax,FPU_accum_2 - - movl %edi,%eax - mull %esi - addl %eax,FPU_accum_1 - adcl %edx,FPU_accum_2 - adcl $0,FPU_accum_3 - -/* movl %esi,%eax */ -/* mull %edi */ - addl %eax,FPU_accum_1 - adcl %edx,FPU_accum_2 - adcl $0,FPU_accum_3 - -/* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */ - - movl FPU_fsqrt_arg_0,%eax /* get normalized n */ - subl %eax,FPU_accum_1 - movl FPU_fsqrt_arg_1,%eax - sbbl %eax,FPU_accum_2 - movl FPU_fsqrt_arg_2,%eax /* ms word of normalized n */ - sbbl %eax,FPU_accum_3 - jnc sqrt_stage_3_positive - -/* Subtraction gives a negative result, - negate the result before division */ - notl FPU_accum_1 - notl FPU_accum_2 - notl FPU_accum_3 - addl $1,FPU_accum_1 - adcl $0,FPU_accum_2 - -#ifdef PARANOID - adcl $0,FPU_accum_3 /* This must be zero */ - jz sqrt_stage_3_no_error - -sqrt_stage_3_error: - pushl EX_INTERNAL|0x207 - call EXCEPTION - -sqrt_stage_3_no_error: -#endif /* PARANOID */ - - movl FPU_accum_2,%edx - movl FPU_accum_1,%eax - divl %esi - movl %eax,%ecx - - movl %edx,%eax - divl %esi - - sarl $1,%ecx /* divide by 2 */ - rcrl $1,%eax - - /* prepare to round the result */ - - addl %ecx,%edi - adcl $0,%esi - - jmp sqrt_stage_3_finished - -sqrt_stage_3_positive: - movl FPU_accum_2,%edx - movl FPU_accum_1,%eax - divl %esi - movl %eax,%ecx - - movl %edx,%eax - divl %esi - - sarl $1,%ecx /* divide by 2 */ - rcrl $1,%eax - - /* prepare to round the result */ - - notl %eax /* Negate the correction term */ - notl %ecx - addl $1,%eax - adcl $0,%ecx /* carry here ==> correction == 0 */ - adcl $0xffffffff,%esi - - addl %ecx,%edi - adcl $0,%esi - -sqrt_stage_3_finished: - -/* - * The result in %esi:%edi:%esi should be good to about 90 bits here, - * and the rounding information here does not have sufficient accuracy - * in a few rare cases. - */ - cmpl $0xffffffe0,%eax - ja sqrt_near_exact_x - - cmpl $0x00000020,%eax - jb sqrt_near_exact - - cmpl $0x7fffffe0,%eax - jb sqrt_round_result - - cmpl $0x80000020,%eax - jb sqrt_get_more_precision - -sqrt_round_result: -/* Set up for rounding operations */ - movl %eax,%edx - movl %esi,%eax - movl %edi,%ebx - movl PARAM1,%edi - movw EXP_BIAS,EXP(%edi) /* Result is in [1.0 .. 2.0) */ - jmp fpu_reg_round - - -sqrt_near_exact_x: -/* First, the estimate must be rounded up. */ - addl $1,%edi - adcl $0,%esi - -sqrt_near_exact: -/* - * This is an easy case because x^1/2 is monotonic. - * We need just find the square of our estimate, compare it - * with the argument, and deduce whether our estimate is - * above, below, or exact. We use the fact that the estimate - * is known to be accurate to about 90 bits. - */ - movl %edi,%eax /* ls word of guess */ - mull %edi - movl %edx,%ebx /* 2nd ls word of square */ - movl %eax,%ecx /* ls word of square */ - - movl %edi,%eax - mull %esi - addl %eax,%ebx - addl %eax,%ebx - -#ifdef PARANOID - cmp $0xffffffb0,%ebx - jb sqrt_near_exact_ok - - cmp $0x00000050,%ebx - ja sqrt_near_exact_ok - - pushl EX_INTERNAL|0x214 - call EXCEPTION - -sqrt_near_exact_ok: -#endif /* PARANOID */ - - or %ebx,%ebx - js sqrt_near_exact_small - - jnz sqrt_near_exact_large - - or %ebx,%edx - jnz sqrt_near_exact_large - -/* Our estimate is exactly the right answer */ - xorl %eax,%eax - jmp sqrt_round_result - -sqrt_near_exact_small: -/* Our estimate is too small */ - movl $0x000000ff,%eax - jmp sqrt_round_result - -sqrt_near_exact_large: -/* Our estimate is too large, we need to decrement it */ - subl $1,%edi - sbbl $0,%esi - movl $0xffffff00,%eax - jmp sqrt_round_result - - -sqrt_get_more_precision: -/* This case is almost the same as the above, except we start - with an extra bit of precision in the estimate. */ - stc /* The extra bit. */ - rcll $1,%edi /* Shift the estimate left one bit */ - rcll $1,%esi - - movl %edi,%eax /* ls word of guess */ - mull %edi - movl %edx,%ebx /* 2nd ls word of square */ - movl %eax,%ecx /* ls word of square */ - - movl %edi,%eax - mull %esi - addl %eax,%ebx - addl %eax,%ebx - -/* Put our estimate back to its original value */ - stc /* The ms bit. */ - rcrl $1,%esi /* Shift the estimate left one bit */ - rcrl $1,%edi - -#ifdef PARANOID - cmp $0xffffff60,%ebx - jb sqrt_more_prec_ok - - cmp $0x000000a0,%ebx - ja sqrt_more_prec_ok - - pushl EX_INTERNAL|0x215 - call EXCEPTION - -sqrt_more_prec_ok: -#endif /* PARANOID */ - - or %ebx,%ebx - js sqrt_more_prec_small - - jnz sqrt_more_prec_large - - or %ebx,%ecx - jnz sqrt_more_prec_large - -/* Our estimate is exactly the right answer */ - movl $0x80000000,%eax - jmp sqrt_round_result - -sqrt_more_prec_small: -/* Our estimate is too small */ - movl $0x800000ff,%eax - jmp sqrt_round_result - -sqrt_more_prec_large: -/* Our estimate is too large */ - movl $0x7fffff00,%eax - jmp sqrt_round_result diff --git a/arch/x86/math-emu/Makefile b/arch/x86/math-emu/Makefile new file mode 100644 index 00000000000..9c943fa6ce6 --- /dev/null +++ b/arch/x86/math-emu/Makefile @@ -0,0 +1,30 @@ +# +# Makefile for wm-FPU-emu +# + +#DEBUG = -DDEBUGGING +DEBUG = +PARANOID = -DPARANOID +CFLAGS := $(CFLAGS) $(PARANOID) $(DEBUG) -fno-builtin $(MATH_EMULATION) + +EXTRA_AFLAGS := $(PARANOID) + +# From 'C' language sources: +C_OBJS =fpu_entry.o errors.o \ + fpu_arith.o fpu_aux.o fpu_etc.o fpu_tags.o fpu_trig.o \ + load_store.o get_address.o \ + poly_atan.o poly_l2.o poly_2xm1.o poly_sin.o poly_tan.o \ + reg_add_sub.o reg_compare.o reg_constant.o reg_convert.o \ + reg_ld_str.o reg_divide.o reg_mul.o + +# From 80x86 assembler sources: +A_OBJS =reg_u_add.o reg_u_div.o reg_u_mul.o reg_u_sub.o \ + div_small.o reg_norm.o reg_round.o \ + wm_shrx.o wm_sqrt.o \ + div_Xsig.o polynom_Xsig.o round_Xsig.o \ + shr_Xsig.o mul_Xsig.o + +obj-y =$(C_OBJS) $(A_OBJS) + +proto: + cproto -e -DMAKING_PROTO *.c >fpu_proto.h diff --git a/arch/x86/math-emu/README b/arch/x86/math-emu/README new file mode 100644 index 00000000000..e6235491d6e --- /dev/null +++ b/arch/x86/math-emu/README @@ -0,0 +1,427 @@ + +---------------------------------------------------------------------------+ + | wm-FPU-emu an FPU emulator for 80386 and 80486SX microprocessors. | + | | + | Copyright (C) 1992,1993,1994,1995,1996,1997,1999 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@melbpc.org.au | + | | + | This program is free software; you can redistribute it and/or modify | + | it under the terms of the GNU General Public License version 2 as | + | published by the Free Software Foundation. | + | | + | This program is distributed in the hope that it will be useful, | + | but WITHOUT ANY WARRANTY; without even the implied warranty of | + | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | + | GNU General Public License for more details. | + | | + | You should have received a copy of the GNU General Public License | + | along with this program; if not, write to the Free Software | + | Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | + | | + +---------------------------------------------------------------------------+ + + + +wm-FPU-emu is an FPU emulator for Linux. It is derived from wm-emu387 +which was my 80387 emulator for early versions of djgpp (gcc under +msdos); wm-emu387 was in turn based upon emu387 which was written by +DJ Delorie for djgpp. The interface to the Linux kernel is based upon +the original Linux math emulator by Linus Torvalds. + +My target FPU for wm-FPU-emu is that described in the Intel486 +Programmer's Reference Manual (1992 edition). Unfortunately, numerous +facets of the functioning of the FPU are not well covered in the +Reference Manual. The information in the manual has been supplemented +with measurements on real 80486's. Unfortunately, it is simply not +possible to be sure that all of the peculiarities of the 80486 have +been discovered, so there is always likely to be obscure differences +in the detailed behaviour of the emulator and a real 80486. + +wm-FPU-emu does not implement all of the behaviour of the 80486 FPU, +but is very close. See "Limitations" later in this file for a list of +some differences. + +Please report bugs, etc to me at: + billm@melbpc.org.au +or b.metzenthen@medoto.unimelb.edu.au + +For more information on the emulator and on floating point topics, see +my web pages, currently at http://www.suburbia.net/~billm/ + + +--Bill Metzenthen + December 1999 + + +----------------------- Internals of wm-FPU-emu ----------------------- + +Numeric algorithms: +(1) Add, subtract, and multiply. Nothing remarkable in these. +(2) Divide has been tuned to get reasonable performance. The algorithm + is not the obvious one which most people seem to use, but is designed + to take advantage of the characteristics of the 80386. I expect that + it has been invented many times before I discovered it, but I have not + seen it. It is based upon one of those ideas which one carries around + for years without ever bothering to check it out. +(3) The sqrt function has been tuned to get good performance. It is based + upon Newton's classic method. Performance was improved by capitalizing + upon the properties of Newton's method, and the code is once again + structured taking account of the 80386 characteristics. +(4) The trig, log, and exp functions are based in each case upon quasi- + "optimal" polynomial approximations. My definition of "optimal" was + based upon getting good accuracy with reasonable speed. +(5) The argument reducing code for the trig function effectively uses + a value of pi which is accurate to more than 128 bits. As a consequence, + the reduced argument is accurate to more than 64 bits for arguments up + to a few pi, and accurate to more than 64 bits for most arguments, + even for arguments approaching 2^63. This is far superior to an + 80486, which uses a value of pi which is accurate to 66 bits. + +The code of the emulator is complicated slightly by the need to +account for a limited form of re-entrancy. Normally, the emulator will +emulate each FPU instruction to completion without interruption. +However, it may happen that when the emulator is accessing the user +memory space, swapping may be needed. In this case the emulator may be +temporarily suspended while disk i/o takes place. During this time +another process may use the emulator, thereby perhaps changing static +variables. The code which accesses user memory is confined to five +files: + fpu_entry.c + reg_ld_str.c + load_store.c + get_address.c + errors.c +As from version 1.12 of the emulator, no static variables are used +(apart from those in the kernel's per-process tables). The emulator is +therefore now fully re-entrant, rather than having just the restricted +form of re-entrancy which is required by the Linux kernel. + +----------------------- Limitations of wm-FPU-emu ----------------------- + +There are a number of differences between the current wm-FPU-emu +(version 2.01) and the 80486 FPU (apart from bugs). The differences +are fewer than those which applied to the 1.xx series of the emulator. +Some of the more important differences are listed below: + +The Roundup flag does not have much meaning for the transcendental +functions and its 80486 value with these functions is likely to differ +from its emulator value. + +In a few rare cases the Underflow flag obtained with the emulator will +be different from that obtained with an 80486. This occurs when the +following conditions apply simultaneously: +(a) the operands have a higher precision than the current setting of the + precision control (PC) flags. +(b) the underflow exception is masked. +(c) the magnitude of the exact result (before rounding) is less than 2^-16382. +(d) the magnitude of the final result (after rounding) is exactly 2^-16382. +(e) the magnitude of the exact result would be exactly 2^-16382 if the + operands were rounded to the current precision before the arithmetic + operation was performed. +If all of these apply, the emulator will set the Underflow flag but a real +80486 will not. + +NOTE: Certain formats of Extended Real are UNSUPPORTED. They are +unsupported by the 80486. They are the Pseudo-NaNs, Pseudoinfinities, +and Unnormals. None of these will be generated by an 80486 or by the +emulator. Do not use them. The emulator treats them differently in +detail from the way an 80486 does. + +Self modifying code can cause the emulator to fail. An example of such +code is: + movl %esp,[%ebx] + fld1 +The FPU instruction may be (usually will be) loaded into the pre-fetch +queue of the CPU before the mov instruction is executed. If the +destination of the 'movl' overlaps the FPU instruction then the bytes +in the prefetch queue and memory will be inconsistent when the FPU +instruction is executed. The emulator will be invoked but will not be +able to find the instruction which caused the device-not-present +exception. For this case, the emulator cannot emulate the behaviour of +an 80486DX. + +Handling of the address size override prefix byte (0x67) has not been +extensively tested yet. A major problem exists because using it in +vm86 mode can cause a general protection fault. Address offsets +greater than 0xffff appear to be illegal in vm86 mode but are quite +acceptable (and work) in real mode. A small test program developed to +check the addressing, and which runs successfully in real mode, +crashes dosemu under Linux and also brings Windows down with a general +protection fault message when run under the MS-DOS prompt of Windows +3.1. (The program simply reads data from a valid address). + +The emulator supports 16-bit protected mode, with one difference from +an 80486DX. A 80486DX will allow some floating point instructions to +write a few bytes below the lowest address of the stack. The emulator +will not allow this in 16-bit protected mode: no instructions are +allowed to write outside the bounds set by the protection. + +----------------------- Performance of wm-FPU-emu ----------------------- + +Speed. +----- + +The speed of floating point computation with the emulator will depend +upon instruction mix. Relative performance is best for the instructions +which require most computation. The simple instructions are adversely +affected by the FPU instruction trap overhead. + + +Timing: Some simple timing tests have been made on the emulator functions. +The times include load/store instructions. All times are in microseconds +measured on a 33MHz 386 with 64k cache. The Turbo C tests were under +ms-dos, the next two columns are for emulators running with the djgpp +ms-dos extender. The final column is for wm-FPU-emu in Linux 0.97, +using libm4.0 (hard). + +function Turbo C djgpp 1.06 WM-emu387 wm-FPU-emu + + + 60.5 154.8 76.5 139.4 + - 61.1-65.5 157.3-160.8 76.2-79.5 142.9-144.7 + * 71.0 190.8 79.6 146.6 + / 61.2-75.0 261.4-266.9 75.3-91.6 142.2-158.1 + + sin() 310.8 4692.0 319.0 398.5 + cos() 284.4 4855.2 308.0 388.7 + tan() 495.0 8807.1 394.9 504.7 + atan() 328.9 4866.4 601.1 419.5-491.9 + + sqrt() 128.7 crashed 145.2 227.0 + log() 413.1-419.1 5103.4-5354.21 254.7-282.2 409.4-437.1 + exp() 479.1 6619.2 469.1 850.8 + + +The performance under Linux is improved by the use of look-ahead code. +The following results show the improvement which is obtained under +Linux due to the look-ahead code. Also given are the times for the +original Linux emulator with the 4.1 'soft' lib. + + [ Linus' note: I changed look-ahead to be the default under linux, as + there was no reason not to use it after I had edited it to be + disabled during tracing ] + + wm-FPU-emu w original w + look-ahead 'soft' lib + + 106.4 190.2 + - 108.6-111.6 192.4-216.2 + * 113.4 193.1 + / 108.8-124.4 700.1-706.2 + + sin() 390.5 2642.0 + cos() 381.5 2767.4 + tan() 496.5 3153.3 + atan() 367.2-435.5 2439.4-3396.8 + + sqrt() 195.1 4732.5 + log() 358.0-387.5 3359.2-3390.3 + exp() 619.3 4046.4 + + +These figures are now somewhat out-of-date. The emulator has become +progressively slower for most functions as more of the 80486 features +have been implemented. + + +----------------------- Accuracy of wm-FPU-emu ----------------------- + + +The accuracy of the emulator is in almost all cases equal to or better +than that of an Intel 80486 FPU. + +The results of the basic arithmetic functions (+,-,*,/), and fsqrt +match those of an 80486 FPU. They are the best possible; the error for +these never exceeds 1/2 an lsb. The fprem and fprem1 instructions +return exact results; they have no error. + + +The following table compares the emulator accuracy for the sqrt(), +trig and log functions against the Turbo C "emulator". For this table, +each function was tested at about 400 points. Ideal worst-case results +would be 64 bits. The reduced Turbo C accuracy of cos() and tan() for +arguments greater than pi/4 can be thought of as being related to the +precision of the argument x; e.g. an argument of pi/2-(1e-10) which is +accurate to 64 bits can result in a relative accuracy in cos() of +about 64 + log2(cos(x)) = 31 bits. + + +Function Tested x range Worst result Turbo C + (relative bits) + +sqrt(x) 1 .. 2 64.1 63.2 +atan(x) 1e-10 .. 200 64.2 62.8 +cos(x) 0 .. pi/2-(1e-10) 64.4 (x <= pi/4) 62.4 + 64.1 (x = pi/2-(1e-10)) 31.9 +sin(x) 1e-10 .. pi/2 64.0 62.8 +tan(x) 1e-10 .. pi/2-(1e-10) 64.0 (x <= pi/4) 62.1 + 64.1 (x = pi/2-(1e-10)) 31.9 +exp(x) 0 .. 1 63.1 ** 62.9 +log(x) 1+1e-6 .. 2 63.8 ** 62.1 + +** The accuracy for exp() and log() is low because the FPU (emulator) +does not compute them directly; two operations are required. + + +The emulator passes the "paranoia" tests (compiled with gcc 2.3.3 or +later) for 'float' variables (24 bit precision numbers) when precision +control is set to 24, 53 or 64 bits, and for 'double' variables (53 +bit precision numbers) when precision control is set to 53 bits (a +properly performing FPU cannot pass the 'paranoia' tests for 'double' +variables when precision control is set to 64 bits). + +The code for reducing the argument for the trig functions (fsin, fcos, +fptan and fsincos) has been improved and now effectively uses a value +for pi which is accurate to more than 128 bits precision. As a +consequence, the accuracy of these functions for large arguments has +been dramatically improved (and is now very much better than an 80486 +FPU). There is also now no degradation of accuracy for fcos and fptan +for operands close to pi/2. Measured results are (note that the +definition of accuracy has changed slightly from that used for the +above table): + +Function Tested x range Worst result + (absolute bits) + +cos(x) 0 .. 9.22e+18 62.0 +sin(x) 1e-16 .. 9.22e+18 62.1 +tan(x) 1e-16 .. 9.22e+18 61.8 + +It is possible with some effort to find very large arguments which +give much degraded precision. For example, the integer number + 8227740058411162616.0 +is within about 10e-7 of a multiple of pi. To find the tan (for +example) of this number to 64 bits precision it would be necessary to +have a value of pi which had about 150 bits precision. The FPU +emulator computes the result to about 42.6 bits precision (the correct +result is about -9.739715e-8). On the other hand, an 80486 FPU returns +0.01059, which in relative terms is hopelessly inaccurate. + +For arguments close to critical angles (which occur at multiples of +pi/2) the emulator is more accurate than an 80486 FPU. For very large +arguments, the emulator is far more accurate. + + +Prior to version 1.20 of the emulator, the accuracy of the results for +the transcendental functions (in their principal range) was not as +good as the results from an 80486 FPU. From version 1.20, the accuracy +has been considerably improved and these functions now give measured +worst-case results which are better than the worst-case results given +by an 80486 FPU. + +The following table gives the measured results for the emulator. The +number of randomly selected arguments in each case is about half a +million. The group of three columns gives the frequency of the given +accuracy in number of times per million, thus the second of these +columns shows that an accuracy of between 63.80 and 63.89 bits was +found at a rate of 133 times per one million measurements for fsin. +The results show that the fsin, fcos and fptan instructions return +results which are in error (i.e. less accurate than the best possible +result (which is 64 bits)) for about one per cent of all arguments +between -pi/2 and +pi/2. The other instructions have a lower +frequency of results which are in error. The last two columns give +the worst accuracy which was found (in bits) and the approximate value +of the argument which produced it. + + frequency (per M) + ------------------- --------------- +instr arg range # tests 63.7 63.8 63.9 worst at arg + bits bits bits bits +----- ------------ ------- ---- ---- ----- ----- -------- +fsin (0,pi/2) 547756 0 133 10673 63.89 0.451317 +fcos (0,pi/2) 547563 0 126 10532 63.85 0.700801 +fptan (0,pi/2) 536274 11 267 10059 63.74 0.784876 +fpatan 4 quadrants 517087 0 8 1855 63.88 0.435121 (4q) +fyl2x (0,20) 541861 0 0 1323 63.94 1.40923 (x) +fyl2xp1 (-.293,.414) 520256 0 0 5678 63.93 0.408542 (x) +f2xm1 (-1,1) 538847 4 481 6488 63.79 0.167709 + + +Tests performed on an 80486 FPU showed results of lower accuracy. The +following table gives the results which were obtained with an AMD +486DX2/66 (other tests indicate that an Intel 486DX produces +identical results). The tests were basically the same as those used +to measure the emulator (the values, being random, were in general not +the same). The total number of tests for each instruction are given +at the end of the table, in case each about 100k tests were performed. +Another line of figures at the end of the table shows that most of the +instructions return results which are in error for more than 10 +percent of the arguments tested. + +The numbers in the body of the table give the approx number of times a +result of the given accuracy in bits (given in the left-most column) +was obtained per one million arguments. For three of the instructions, +two columns of results are given: * The second column for f2xm1 gives +the number cases where the results of the first column were for a +positive argument, this shows that this instruction gives better +results for positive arguments than it does for negative. * In the +cases of fcos and fptan, the first column gives the results when all +cases where arguments greater than 1.5 were removed from the results +given in the second column. Unlike the emulator, an 80486 FPU returns +results of relatively poor accuracy for these instructions when the +argument approaches pi/2. The table does not show those cases when the +accuracy of the results were less than 62 bits, which occurs quite +often for fsin and fptan when the argument approaches pi/2. This poor +accuracy is discussed above in relation to the Turbo C "emulator", and +the accuracy of the value of pi. + + +bits f2xm1 f2xm1 fpatan fcos fcos fyl2x fyl2xp1 fsin fptan fptan +62.0 0 0 0 0 437 0 0 0 0 925 +62.1 0 0 10 0 894 0 0 0 0 1023 +62.2 14 0 0 0 1033 0 0 0 0 945 +62.3 57 0 0 0 1202 0 0 0 0 1023 +62.4 385 0 0 10 1292 0 23 0 0 1178 +62.5 1140 0 0 119 1649 0 39 0 0 1149 +62.6 2037 0 0 189 1620 0 16 0 0 1169 +62.7 5086 14 0 646 2315 10 101 35 39 1402 +62.8 8818 86 0 984 3050 59 287 131 224 2036 +62.9 11340 1355 0 2126 4153 79 605 357 321 1948 +63.0 15557 4750 0 3319 5376 246 1281 862 808 2688 +63.1 20016 8288 0 4620 6628 511 2569 1723 1510 3302 +63.2 24945 11127 10 6588 8098 1120 4470 2968 2990 4724 +63.3 25686 12382 69 8774 10682 1906 6775 4482 5474 7236 +63.4 29219 14722 79 11109 12311 3094 9414 7259 8912 10587 +63.5 30458 14936 393 13802 15014 5874 12666 9609 13762 15262 +63.6 32439 16448 1277 17945 19028 10226 15537 14657 19158 20346 +63.7 35031 16805 4067 23003 23947 18910 20116 21333 25001 26209 +63.8 33251 15820 7673 24781 25675 24617 25354 24440 29433 30329 +63.9 33293 16833 18529 28318 29233 31267 31470 27748 29676 30601 + +Per cent with error: + 30.9 3.2 18.5 9.8 13.1 11.6 17.4 +Total arguments tested: + 70194 70099 101784 100641 100641 101799 128853 114893 102675 102675 + + +------------------------- Contributors ------------------------------- + +A number of people have contributed to the development of the +emulator, often by just reporting bugs, sometimes with suggested +fixes, and a few kind people have provided me with access in one way +or another to an 80486 machine. Contributors include (to those people +who I may have forgotten, please forgive me): + +Linus Torvalds +Tommy.Thorn@daimi.aau.dk +Andrew.Tridgell@anu.edu.au +Nick Holloway, alfie@dcs.warwick.ac.uk +Hermano Moura, moura@dcs.gla.ac.uk +Jon Jagger, J.Jagger@scp.ac.uk +Lennart Benschop +Brian Gallew, geek+@CMU.EDU +Thomas Staniszewski, ts3v+@andrew.cmu.edu +Martin Howell, mph@plasma.apana.org.au +M Saggaf, alsaggaf@athena.mit.edu +Peter Barker, PETER@socpsy.sci.fau.edu +tom@vlsivie.tuwien.ac.at +Dan Russel, russed@rpi.edu +Daniel Carosone, danielce@ee.mu.oz.au +cae@jpmorgan.com +Hamish Coleman, t933093@minyos.xx.rmit.oz.au +Bruce Evans, bde@kralizec.zeta.org.au +Timo Korvola, Timo.Korvola@hut.fi +Rick Lyons, rick@razorback.brisnet.org.au +Rick, jrs@world.std.com + +...and numerous others who responded to my request for help with +a real 80486. + diff --git a/arch/x86/math-emu/control_w.h b/arch/x86/math-emu/control_w.h new file mode 100644 index 00000000000..ae2274dbd30 --- /dev/null +++ b/arch/x86/math-emu/control_w.h @@ -0,0 +1,45 @@ +/*---------------------------------------------------------------------------+ + | control_w.h | + | | + | Copyright (C) 1992,1993 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + +---------------------------------------------------------------------------*/ + +#ifndef _CONTROLW_H_ +#define _CONTROLW_H_ + +#ifdef __ASSEMBLY__ +#define _Const_(x) $##x +#else +#define _Const_(x) x +#endif + +#define CW_RC _Const_(0x0C00) /* rounding control */ +#define CW_PC _Const_(0x0300) /* precision control */ + +#define CW_Precision Const_(0x0020) /* loss of precision mask */ +#define CW_Underflow Const_(0x0010) /* underflow mask */ +#define CW_Overflow Const_(0x0008) /* overflow mask */ +#define CW_ZeroDiv Const_(0x0004) /* divide by zero mask */ +#define CW_Denormal Const_(0x0002) /* denormalized operand mask */ +#define CW_Invalid Const_(0x0001) /* invalid operation mask */ + +#define CW_Exceptions _Const_(0x003f) /* all masks */ + +#define RC_RND _Const_(0x0000) +#define RC_DOWN _Const_(0x0400) +#define RC_UP _Const_(0x0800) +#define RC_CHOP _Const_(0x0C00) + +/* p 15-5: Precision control bits affect only the following: + ADD, SUB(R), MUL, DIV(R), and SQRT */ +#define PR_24_BITS _Const_(0x000) +#define PR_53_BITS _Const_(0x200) +#define PR_64_BITS _Const_(0x300) +#define PR_RESERVED_BITS _Const_(0x100) +/* FULL_PRECISION simulates all exceptions masked */ +#define FULL_PRECISION (PR_64_BITS | RC_RND | 0x3f) + +#endif /* _CONTROLW_H_ */ diff --git a/arch/x86/math-emu/div_Xsig.S b/arch/x86/math-emu/div_Xsig.S new file mode 100644 index 00000000000..f77ba3058b3 --- /dev/null +++ b/arch/x86/math-emu/div_Xsig.S @@ -0,0 +1,365 @@ + .file "div_Xsig.S" +/*---------------------------------------------------------------------------+ + | div_Xsig.S | + | | + | Division subroutine for 96 bit quantities | + | | + | Copyright (C) 1994,1995 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@jacobi.maths.monash.edu.au | + | | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | Divide the 96 bit quantity pointed to by a, by that pointed to by b, and | + | put the 96 bit result at the location d. | + | | + | The result may not be accurate to 96 bits. It is intended for use where | + | a result better than 64 bits is required. The result should usually be | + | good to at least 94 bits. | + | The returned result is actually divided by one half. This is done to | + | prevent overflow. | + | | + | .aaaaaaaaaaaaaa / .bbbbbbbbbbbbb -> .dddddddddddd | + | | + | void div_Xsig(Xsig *a, Xsig *b, Xsig *dest) | + | | + +---------------------------------------------------------------------------*/ + +#include "exception.h" +#include "fpu_emu.h" + + +#define XsigLL(x) (x) +#define XsigL(x) 4(x) +#define XsigH(x) 8(x) + + +#ifndef NON_REENTRANT_FPU +/* + Local storage on the stack: + Accumulator: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0 + */ +#define FPU_accum_3 -4(%ebp) +#define FPU_accum_2 -8(%ebp) +#define FPU_accum_1 -12(%ebp) +#define FPU_accum_0 -16(%ebp) +#define FPU_result_3 -20(%ebp) +#define FPU_result_2 -24(%ebp) +#define FPU_result_1 -28(%ebp) + +#else +.data +/* + Local storage in a static area: + Accumulator: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0 + */ + .align 4,0 +FPU_accum_3: + .long 0 +FPU_accum_2: + .long 0 +FPU_accum_1: + .long 0 +FPU_accum_0: + .long 0 +FPU_result_3: + .long 0 +FPU_result_2: + .long 0 +FPU_result_1: + .long 0 +#endif /* NON_REENTRANT_FPU */ + + +.text +ENTRY(div_Xsig) + pushl %ebp + movl %esp,%ebp +#ifndef NON_REENTRANT_FPU + subl $28,%esp +#endif /* NON_REENTRANT_FPU */ + + pushl %esi + pushl %edi + pushl %ebx + + movl PARAM1,%esi /* pointer to num */ + movl PARAM2,%ebx /* pointer to denom */ + +#ifdef PARANOID + testl $0x80000000, XsigH(%ebx) /* Divisor */ + je L_bugged +#endif /* PARANOID */ + + +/*---------------------------------------------------------------------------+ + | Divide: Return arg1/arg2 to arg3. | + | | + | The maximum returned value is (ignoring exponents) | + | .ffffffff ffffffff | + | ------------------ = 1.ffffffff fffffffe | + | .80000000 00000000 | + | and the minimum is | + | .80000000 00000000 | + | ------------------ = .80000000 00000001 (rounded) | + | .ffffffff ffffffff | + | | + +---------------------------------------------------------------------------*/ + + /* Save extended dividend in local register */ + + /* Divide by 2 to prevent overflow */ + clc + movl XsigH(%esi),%eax + rcrl %eax + movl %eax,FPU_accum_3 + movl XsigL(%esi),%eax + rcrl %eax + movl %eax,FPU_accum_2 + movl XsigLL(%esi),%eax + rcrl %eax + movl %eax,FPU_accum_1 + movl $0,%eax + rcrl %eax + movl %eax,FPU_accum_0 + + movl FPU_accum_2,%eax /* Get the current num */ + movl FPU_accum_3,%edx + +/*----------------------------------------------------------------------*/ +/* Initialization done. + Do the first 32 bits. */ + + /* We will divide by a number which is too large */ + movl XsigH(%ebx),%ecx + addl $1,%ecx + jnc LFirst_div_not_1 + + /* here we need to divide by 100000000h, + i.e., no division at all.. */ + mov %edx,%eax + jmp LFirst_div_done + +LFirst_div_not_1: + divl %ecx /* Divide the numerator by the augmented + denom ms dw */ + +LFirst_div_done: + movl %eax,FPU_result_3 /* Put the result in the answer */ + + mull XsigH(%ebx) /* mul by the ms dw of the denom */ + + subl %eax,FPU_accum_2 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_3 + + movl FPU_result_3,%eax /* Get the result back */ + mull XsigL(%ebx) /* now mul the ls dw of the denom */ + + subl %eax,FPU_accum_1 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_2 + sbbl $0,FPU_accum_3 + je LDo_2nd_32_bits /* Must check for non-zero result here */ + +#ifdef PARANOID + jb L_bugged_1 +#endif /* PARANOID */ + + /* need to subtract another once of the denom */ + incl FPU_result_3 /* Correct the answer */ + + movl XsigL(%ebx),%eax + movl XsigH(%ebx),%edx + subl %eax,FPU_accum_1 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_2 + +#ifdef PARANOID + sbbl $0,FPU_accum_3 + jne L_bugged_1 /* Must check for non-zero result here */ +#endif /* PARANOID */ + +/*----------------------------------------------------------------------*/ +/* Half of the main problem is done, there is just a reduced numerator + to handle now. + Work with the second 32 bits, FPU_accum_0 not used from now on */ +LDo_2nd_32_bits: + movl FPU_accum_2,%edx /* get the reduced num */ + movl FPU_accum_1,%eax + + /* need to check for possible subsequent overflow */ + cmpl XsigH(%ebx),%edx + jb LDo_2nd_div + ja LPrevent_2nd_overflow + + cmpl XsigL(%ebx),%eax + jb LDo_2nd_div + +LPrevent_2nd_overflow: +/* The numerator is greater or equal, would cause overflow */ + /* prevent overflow */ + subl XsigL(%ebx),%eax + sbbl XsigH(%ebx),%edx + movl %edx,FPU_accum_2 + movl %eax,FPU_accum_1 + + incl FPU_result_3 /* Reflect the subtraction in the answer */ + +#ifdef PARANOID + je L_bugged_2 /* Can't bump the result to 1.0 */ +#endif /* PARANOID */ + +LDo_2nd_div: + cmpl $0,%ecx /* augmented denom msw */ + jnz LSecond_div_not_1 + + /* %ecx == 0, we are dividing by 1.0 */ + mov %edx,%eax + jmp LSecond_div_done + +LSecond_div_not_1: + divl %ecx /* Divide the numerator by the denom ms dw */ + +LSecond_div_done: + movl %eax,FPU_result_2 /* Put the result in the answer */ + + mull XsigH(%ebx) /* mul by the ms dw of the denom */ + + subl %eax,FPU_accum_1 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_2 + +#ifdef PARANOID + jc L_bugged_2 +#endif /* PARANOID */ + + movl FPU_result_2,%eax /* Get the result back */ + mull XsigL(%ebx) /* now mul the ls dw of the denom */ + + subl %eax,FPU_accum_0 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_1 /* Subtract from the num local reg */ + sbbl $0,FPU_accum_2 + +#ifdef PARANOID + jc L_bugged_2 +#endif /* PARANOID */ + + jz LDo_3rd_32_bits + +#ifdef PARANOID + cmpl $1,FPU_accum_2 + jne L_bugged_2 +#endif /* PARANOID */ + + /* need to subtract another once of the denom */ + movl XsigL(%ebx),%eax + movl XsigH(%ebx),%edx + subl %eax,FPU_accum_0 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_1 + sbbl $0,FPU_accum_2 + +#ifdef PARANOID + jc L_bugged_2 + jne L_bugged_2 +#endif /* PARANOID */ + + addl $1,FPU_result_2 /* Correct the answer */ + adcl $0,FPU_result_3 + +#ifdef PARANOID + jc L_bugged_2 /* Must check for non-zero result here */ +#endif /* PARANOID */ + +/*----------------------------------------------------------------------*/ +/* The division is essentially finished here, we just need to perform + tidying operations. + Deal with the 3rd 32 bits */ +LDo_3rd_32_bits: + /* We use an approximation for the third 32 bits. + To take account of the 3rd 32 bits of the divisor + (call them del), we subtract del * (a/b) */ + + movl FPU_result_3,%eax /* a/b */ + mull XsigLL(%ebx) /* del */ + + subl %edx,FPU_accum_1 + + /* A borrow indicates that the result is negative */ + jnb LTest_over + + movl XsigH(%ebx),%edx + addl %edx,FPU_accum_1 + + subl $1,FPU_result_2 /* Adjust the answer */ + sbbl $0,FPU_result_3 + + /* The above addition might not have been enough, check again. */ + movl FPU_accum_1,%edx /* get the reduced num */ + cmpl XsigH(%ebx),%edx /* denom */ + jb LDo_3rd_div + + movl XsigH(%ebx),%edx + addl %edx,FPU_accum_1 + + subl $1,FPU_result_2 /* Adjust the answer */ + sbbl $0,FPU_result_3 + jmp LDo_3rd_div + +LTest_over: + movl FPU_accum_1,%edx /* get the reduced num */ + + /* need to check for possible subsequent overflow */ + cmpl XsigH(%ebx),%edx /* denom */ + jb LDo_3rd_div + + /* prevent overflow */ + subl XsigH(%ebx),%edx + movl %edx,FPU_accum_1 + + addl $1,FPU_result_2 /* Reflect the subtraction in the answer */ + adcl $0,FPU_result_3 + +LDo_3rd_div: + movl FPU_accum_0,%eax + movl FPU_accum_1,%edx + divl XsigH(%ebx) + + movl %eax,FPU_result_1 /* Rough estimate of third word */ + + movl PARAM3,%esi /* pointer to answer */ + + movl FPU_result_1,%eax + movl %eax,XsigLL(%esi) + movl FPU_result_2,%eax + movl %eax,XsigL(%esi) + movl FPU_result_3,%eax + movl %eax,XsigH(%esi) + +L_exit: + popl %ebx + popl %edi + popl %esi + + leave + ret + + +#ifdef PARANOID +/* The logic is wrong if we got here */ +L_bugged: + pushl EX_INTERNAL|0x240 + call EXCEPTION + pop %ebx + jmp L_exit + +L_bugged_1: + pushl EX_INTERNAL|0x241 + call EXCEPTION + pop %ebx + jmp L_exit + +L_bugged_2: + pushl EX_INTERNAL|0x242 + call EXCEPTION + pop %ebx + jmp L_exit +#endif /* PARANOID */ diff --git a/arch/x86/math-emu/div_small.S b/arch/x86/math-emu/div_small.S new file mode 100644 index 00000000000..47099628fa4 --- /dev/null +++ b/arch/x86/math-emu/div_small.S @@ -0,0 +1,47 @@ + .file "div_small.S" +/*---------------------------------------------------------------------------+ + | div_small.S | + | | + | Divide a 64 bit integer by a 32 bit integer & return remainder. | + | | + | Copyright (C) 1992,1995 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@jacobi.maths.monash.edu.au | + | | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | unsigned long FPU_div_small(unsigned long long *x, unsigned long y) | + +---------------------------------------------------------------------------*/ + +#include "fpu_emu.h" + +.text +ENTRY(FPU_div_small) + pushl %ebp + movl %esp,%ebp + + pushl %esi + + movl PARAM1,%esi /* pointer to num */ + movl PARAM2,%ecx /* The denominator */ + + movl 4(%esi),%eax /* Get the current num msw */ + xorl %edx,%edx + divl %ecx + + movl %eax,4(%esi) + + movl (%esi),%eax /* Get the num lsw */ + divl %ecx + + movl %eax,(%esi) + + movl %edx,%eax /* Return the remainder in eax */ + + popl %esi + + leave + ret + diff --git a/arch/x86/math-emu/errors.c b/arch/x86/math-emu/errors.c new file mode 100644 index 00000000000..a1b0d22f697 --- /dev/null +++ b/arch/x86/math-emu/errors.c @@ -0,0 +1,739 @@ +/*---------------------------------------------------------------------------+ + | errors.c | + | | + | The error handling functions for wm-FPU-emu | + | | + | Copyright (C) 1992,1993,1994,1996 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@jacobi.maths.monash.edu.au | + | | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | Note: | + | The file contains code which accesses user memory. | + | Emulator static data may change when user memory is accessed, due to | + | other processes using the emulator while swapping is in progress. | + +---------------------------------------------------------------------------*/ + +#include + +#include + +#include "fpu_emu.h" +#include "fpu_system.h" +#include "exception.h" +#include "status_w.h" +#include "control_w.h" +#include "reg_constant.h" +#include "version.h" + +/* */ +#undef PRINT_MESSAGES +/* */ + + +#if 0 +void Un_impl(void) +{ + u_char byte1, FPU_modrm; + unsigned long address = FPU_ORIG_EIP; + + RE_ENTRANT_CHECK_OFF; + /* No need to check access_ok(), we have previously fetched these bytes. */ + printk("Unimplemented FPU Opcode at eip=%p : ", (void __user *) address); + if ( FPU_CS == __USER_CS ) + { + while ( 1 ) + { + FPU_get_user(byte1, (u_char __user *) address); + if ( (byte1 & 0xf8) == 0xd8 ) break; + printk("[%02x]", byte1); + address++; + } + printk("%02x ", byte1); + FPU_get_user(FPU_modrm, 1 + (u_char __user *) address); + + if (FPU_modrm >= 0300) + printk("%02x (%02x+%d)\n", FPU_modrm, FPU_modrm & 0xf8, FPU_modrm & 7); + else + printk("/%d\n", (FPU_modrm >> 3) & 7); + } + else + { + printk("cs selector = %04x\n", FPU_CS); + } + + RE_ENTRANT_CHECK_ON; + + EXCEPTION(EX_Invalid); + +} +#endif /* 0 */ + + +/* + Called for opcodes which are illegal and which are known to result in a + SIGILL with a real 80486. + */ +void FPU_illegal(void) +{ + math_abort(FPU_info,SIGILL); +} + + + +void FPU_printall(void) +{ + int i; + static const char *tag_desc[] = { "Valid", "Zero", "ERROR", "Empty", + "DeNorm", "Inf", "NaN" }; + u_char byte1, FPU_modrm; + unsigned long address = FPU_ORIG_EIP; + + RE_ENTRANT_CHECK_OFF; + /* No need to check access_ok(), we have previously fetched these bytes. */ + printk("At %p:", (void *) address); + if ( FPU_CS == __USER_CS ) + { +#define MAX_PRINTED_BYTES 20 + for ( i = 0; i < MAX_PRINTED_BYTES; i++ ) + { + FPU_get_user(byte1, (u_char __user *) address); + if ( (byte1 & 0xf8) == 0xd8 ) + { + printk(" %02x", byte1); + break; + } + printk(" [%02x]", byte1); + address++; + } + if ( i == MAX_PRINTED_BYTES ) + printk(" [more..]\n"); + else + { + FPU_get_user(FPU_modrm, 1 + (u_char __user *) address); + + if (FPU_modrm >= 0300) + printk(" %02x (%02x+%d)\n", FPU_modrm, FPU_modrm & 0xf8, FPU_modrm & 7); + else + printk(" /%d, mod=%d rm=%d\n", + (FPU_modrm >> 3) & 7, (FPU_modrm >> 6) & 3, FPU_modrm & 7); + } + } + else + { + printk("%04x\n", FPU_CS); + } + + partial_status = status_word(); + +#ifdef DEBUGGING +if ( partial_status & SW_Backward ) printk("SW: backward compatibility\n"); +if ( partial_status & SW_C3 ) printk("SW: condition bit 3\n"); +if ( partial_status & SW_C2 ) printk("SW: condition bit 2\n"); +if ( partial_status & SW_C1 ) printk("SW: condition bit 1\n"); +if ( partial_status & SW_C0 ) printk("SW: condition bit 0\n"); +if ( partial_status & SW_Summary ) printk("SW: exception summary\n"); +if ( partial_status & SW_Stack_Fault ) printk("SW: stack fault\n"); +if ( partial_status & SW_Precision ) printk("SW: loss of precision\n"); +if ( partial_status & SW_Underflow ) printk("SW: underflow\n"); +if ( partial_status & SW_Overflow ) printk("SW: overflow\n"); +if ( partial_status & SW_Zero_Div ) printk("SW: divide by zero\n"); +if ( partial_status & SW_Denorm_Op ) printk("SW: denormalized operand\n"); +if ( partial_status & SW_Invalid ) printk("SW: invalid operation\n"); +#endif /* DEBUGGING */ + + printk(" SW: b=%d st=%ld es=%d sf=%d cc=%d%d%d%d ef=%d%d%d%d%d%d\n", + partial_status & 0x8000 ? 1 : 0, /* busy */ + (partial_status & 0x3800) >> 11, /* stack top pointer */ + partial_status & 0x80 ? 1 : 0, /* Error summary status */ + partial_status & 0x40 ? 1 : 0, /* Stack flag */ + partial_status & SW_C3?1:0, partial_status & SW_C2?1:0, /* cc */ + partial_status & SW_C1?1:0, partial_status & SW_C0?1:0, /* cc */ + partial_status & SW_Precision?1:0, partial_status & SW_Underflow?1:0, + partial_status & SW_Overflow?1:0, partial_status & SW_Zero_Div?1:0, + partial_status & SW_Denorm_Op?1:0, partial_status & SW_Invalid?1:0); + +printk(" CW: ic=%d rc=%ld%ld pc=%ld%ld iem=%d ef=%d%d%d%d%d%d\n", + control_word & 0x1000 ? 1 : 0, + (control_word & 0x800) >> 11, (control_word & 0x400) >> 10, + (control_word & 0x200) >> 9, (control_word & 0x100) >> 8, + control_word & 0x80 ? 1 : 0, + control_word & SW_Precision?1:0, control_word & SW_Underflow?1:0, + control_word & SW_Overflow?1:0, control_word & SW_Zero_Div?1:0, + control_word & SW_Denorm_Op?1:0, control_word & SW_Invalid?1:0); + + for ( i = 0; i < 8; i++ ) + { + FPU_REG *r = &st(i); + u_char tagi = FPU_gettagi(i); + switch (tagi) + { + case TAG_Empty: + continue; + break; + case TAG_Zero: + case TAG_Special: + tagi = FPU_Special(r); + case TAG_Valid: + printk("st(%d) %c .%04lx %04lx %04lx %04lx e%+-6d ", i, + getsign(r) ? '-' : '+', + (long)(r->sigh >> 16), + (long)(r->sigh & 0xFFFF), + (long)(r->sigl >> 16), + (long)(r->sigl & 0xFFFF), + exponent(r) - EXP_BIAS + 1); + break; + default: + printk("Whoops! Error in errors.c: tag%d is %d ", i, tagi); + continue; + break; + } + printk("%s\n", tag_desc[(int) (unsigned) tagi]); + } + + RE_ENTRANT_CHECK_ON; + +} + +static struct { + int type; + const char *name; +} exception_names[] = { + { EX_StackOver, "stack overflow" }, + { EX_StackUnder, "stack underflow" }, + { EX_Precision, "loss of precision" }, + { EX_Underflow, "underflow" }, + { EX_Overflow, "overflow" }, + { EX_ZeroDiv, "divide by zero" }, + { EX_Denormal, "denormalized operand" }, + { EX_Invalid, "invalid operation" }, + { EX_INTERNAL, "INTERNAL BUG in "FPU_VERSION }, + { 0, NULL } +}; + +/* + EX_INTERNAL is always given with a code which indicates where the + error was detected. + + Internal error types: + 0x14 in fpu_etc.c + 0x1nn in a *.c file: + 0x101 in reg_add_sub.c + 0x102 in reg_mul.c + 0x104 in poly_atan.c + 0x105 in reg_mul.c + 0x107 in fpu_trig.c + 0x108 in reg_compare.c + 0x109 in reg_compare.c + 0x110 in reg_add_sub.c + 0x111 in fpe_entry.c + 0x112 in fpu_trig.c + 0x113 in errors.c + 0x115 in fpu_trig.c + 0x116 in fpu_trig.c + 0x117 in fpu_trig.c + 0x118 in fpu_trig.c + 0x119 in fpu_trig.c + 0x120 in poly_atan.c + 0x121 in reg_compare.c + 0x122 in reg_compare.c + 0x123 in reg_compare.c + 0x125 in fpu_trig.c + 0x126 in fpu_entry.c + 0x127 in poly_2xm1.c + 0x128 in fpu_entry.c + 0x129 in fpu_entry.c + 0x130 in get_address.c + 0x131 in get_address.c + 0x132 in get_address.c + 0x133 in get_address.c + 0x140 in load_store.c + 0x141 in load_store.c + 0x150 in poly_sin.c + 0x151 in poly_sin.c + 0x160 in reg_ld_str.c + 0x161 in reg_ld_str.c + 0x162 in reg_ld_str.c + 0x163 in reg_ld_str.c + 0x164 in reg_ld_str.c + 0x170 in fpu_tags.c + 0x171 in fpu_tags.c + 0x172 in fpu_tags.c + 0x180 in reg_convert.c + 0x2nn in an *.S file: + 0x201 in reg_u_add.S + 0x202 in reg_u_div.S + 0x203 in reg_u_div.S + 0x204 in reg_u_div.S + 0x205 in reg_u_mul.S + 0x206 in reg_u_sub.S + 0x207 in wm_sqrt.S + 0x208 in reg_div.S + 0x209 in reg_u_sub.S + 0x210 in reg_u_sub.S + 0x211 in reg_u_sub.S + 0x212 in reg_u_sub.S + 0x213 in wm_sqrt.S + 0x214 in wm_sqrt.S + 0x215 in wm_sqrt.S + 0x220 in reg_norm.S + 0x221 in reg_norm.S + 0x230 in reg_round.S + 0x231 in reg_round.S + 0x232 in reg_round.S + 0x233 in reg_round.S + 0x234 in reg_round.S + 0x235 in reg_round.S + 0x236 in reg_round.S + 0x240 in div_Xsig.S + 0x241 in div_Xsig.S + 0x242 in div_Xsig.S + */ + +asmlinkage void FPU_exception(int n) +{ + int i, int_type; + + int_type = 0; /* Needed only to stop compiler warnings */ + if ( n & EX_INTERNAL ) + { + int_type = n - EX_INTERNAL; + n = EX_INTERNAL; + /* Set lots of exception bits! */ + partial_status |= (SW_Exc_Mask | SW_Summary | SW_Backward); + } + else + { + /* Extract only the bits which we use to set the status word */ + n &= (SW_Exc_Mask); + /* Set the corresponding exception bit */ + partial_status |= n; + /* Set summary bits iff exception isn't masked */ + if ( partial_status & ~control_word & CW_Exceptions ) + partial_status |= (SW_Summary | SW_Backward); + if ( n & (SW_Stack_Fault | EX_Precision) ) + { + if ( !(n & SW_C1) ) + /* This bit distinguishes over- from underflow for a stack fault, + and roundup from round-down for precision loss. */ + partial_status &= ~SW_C1; + } + } + + RE_ENTRANT_CHECK_OFF; + if ( (~control_word & n & CW_Exceptions) || (n == EX_INTERNAL) ) + { +#ifdef PRINT_MESSAGES + /* My message from the sponsor */ + printk(FPU_VERSION" "__DATE__" (C) W. Metzenthen.\n"); +#endif /* PRINT_MESSAGES */ + + /* Get a name string for error reporting */ + for (i=0; exception_names[i].type; i++) + if ( (exception_names[i].type & n) == exception_names[i].type ) + break; + + if (exception_names[i].type) + { +#ifdef PRINT_MESSAGES + printk("FP Exception: %s!\n", exception_names[i].name); +#endif /* PRINT_MESSAGES */ + } + else + printk("FPU emulator: Unknown Exception: 0x%04x!\n", n); + + if ( n == EX_INTERNAL ) + { + printk("FPU emulator: Internal error type 0x%04x\n", int_type); + FPU_printall(); + } +#ifdef PRINT_MESSAGES + else + FPU_printall(); +#endif /* PRINT_MESSAGES */ + + /* + * The 80486 generates an interrupt on the next non-control FPU + * instruction. So we need some means of flagging it. + * We use the ES (Error Summary) bit for this. + */ + } + RE_ENTRANT_CHECK_ON; + +#ifdef __DEBUG__ + math_abort(FPU_info,SIGFPE); +#endif /* __DEBUG__ */ + +} + + +/* Real operation attempted on a NaN. */ +/* Returns < 0 if the exception is unmasked */ +int real_1op_NaN(FPU_REG *a) +{ + int signalling, isNaN; + + isNaN = (exponent(a) == EXP_OVER) && (a->sigh & 0x80000000); + + /* The default result for the case of two "equal" NaNs (signs may + differ) is chosen to reproduce 80486 behaviour */ + signalling = isNaN && !(a->sigh & 0x40000000); + + if ( !signalling ) + { + if ( !isNaN ) /* pseudo-NaN, or other unsupported? */ + { + if ( control_word & CW_Invalid ) + { + /* Masked response */ + reg_copy(&CONST_QNaN, a); + } + EXCEPTION(EX_Invalid); + return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special; + } + return TAG_Special; + } + + if ( control_word & CW_Invalid ) + { + /* The masked response */ + if ( !(a->sigh & 0x80000000) ) /* pseudo-NaN ? */ + { + reg_copy(&CONST_QNaN, a); + } + /* ensure a Quiet NaN */ + a->sigh |= 0x40000000; + } + + EXCEPTION(EX_Invalid); + + return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special; +} + + +/* Real operation attempted on two operands, one a NaN. */ +/* Returns < 0 if the exception is unmasked */ +int real_2op_NaN(FPU_REG const *b, u_char tagb, + int deststnr, + FPU_REG const *defaultNaN) +{ + FPU_REG *dest = &st(deststnr); + FPU_REG const *a = dest; + u_char taga = FPU_gettagi(deststnr); + FPU_REG const *x; + int signalling, unsupported; + + if ( taga == TAG_Special ) + taga = FPU_Special(a); + if ( tagb == TAG_Special ) + tagb = FPU_Special(b); + + /* TW_NaN is also used for unsupported data types. */ + unsupported = ((taga == TW_NaN) + && !((exponent(a) == EXP_OVER) && (a->sigh & 0x80000000))) + || ((tagb == TW_NaN) + && !((exponent(b) == EXP_OVER) && (b->sigh & 0x80000000))); + if ( unsupported ) + { + if ( control_word & CW_Invalid ) + { + /* Masked response */ + FPU_copy_to_regi(&CONST_QNaN, TAG_Special, deststnr); + } + EXCEPTION(EX_Invalid); + return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special; + } + + if (taga == TW_NaN) + { + x = a; + if (tagb == TW_NaN) + { + signalling = !(a->sigh & b->sigh & 0x40000000); + if ( significand(b) > significand(a) ) + x = b; + else if ( significand(b) == significand(a) ) + { + /* The default result for the case of two "equal" NaNs (signs may + differ) is chosen to reproduce 80486 behaviour */ + x = defaultNaN; + } + } + else + { + /* return the quiet version of the NaN in a */ + signalling = !(a->sigh & 0x40000000); + } + } + else +#ifdef PARANOID + if (tagb == TW_NaN) +#endif /* PARANOID */ + { + signalling = !(b->sigh & 0x40000000); + x = b; + } +#ifdef PARANOID + else + { + signalling = 0; + EXCEPTION(EX_INTERNAL|0x113); + x = &CONST_QNaN; + } +#endif /* PARANOID */ + + if ( (!signalling) || (control_word & CW_Invalid) ) + { + if ( ! x ) + x = b; + + if ( !(x->sigh & 0x80000000) ) /* pseudo-NaN ? */ + x = &CONST_QNaN; + + FPU_copy_to_regi(x, TAG_Special, deststnr); + + if ( !signalling ) + return TAG_Special; + + /* ensure a Quiet NaN */ + dest->sigh |= 0x40000000; + } + + EXCEPTION(EX_Invalid); + + return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special; +} + + +/* Invalid arith operation on Valid registers */ +/* Returns < 0 if the exception is unmasked */ +asmlinkage int arith_invalid(int deststnr) +{ + + EXCEPTION(EX_Invalid); + + if ( control_word & CW_Invalid ) + { + /* The masked response */ + FPU_copy_to_regi(&CONST_QNaN, TAG_Special, deststnr); + } + + return (!(control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Valid; + +} + + +/* Divide a finite number by zero */ +asmlinkage int FPU_divide_by_zero(int deststnr, u_char sign) +{ + FPU_REG *dest = &st(deststnr); + int tag = TAG_Valid; + + if ( control_word & CW_ZeroDiv ) + { + /* The masked response */ + FPU_copy_to_regi(&CONST_INF, TAG_Special, deststnr); + setsign(dest, sign); + tag = TAG_Special; + } + + EXCEPTION(EX_ZeroDiv); + + return (!(control_word & CW_ZeroDiv) ? FPU_Exception : 0) | tag; + +} + + +/* This may be called often, so keep it lean */ +int set_precision_flag(int flags) +{ + if ( control_word & CW_Precision ) + { + partial_status &= ~(SW_C1 & flags); + partial_status |= flags; /* The masked response */ + return 0; + } + else + { + EXCEPTION(flags); + return 1; + } +} + + +/* This may be called often, so keep it lean */ +asmlinkage void set_precision_flag_up(void) +{ + if ( control_word & CW_Precision ) + partial_status |= (SW_Precision | SW_C1); /* The masked response */ + else + EXCEPTION(EX_Precision | SW_C1); +} + + +/* This may be called often, so keep it lean */ +asmlinkage void set_precision_flag_down(void) +{ + if ( control_word & CW_Precision ) + { /* The masked response */ + partial_status &= ~SW_C1; + partial_status |= SW_Precision; + } + else + EXCEPTION(EX_Precision); +} + + +asmlinkage int denormal_operand(void) +{ + if ( control_word & CW_Denormal ) + { /* The masked response */ + partial_status |= SW_Denorm_Op; + return TAG_Special; + } + else + { + EXCEPTION(EX_Denormal); + return TAG_Special | FPU_Exception; + } +} + + +asmlinkage int arith_overflow(FPU_REG *dest) +{ + int tag = TAG_Valid; + + if ( control_word & CW_Overflow ) + { + /* The masked response */ +/* ###### The response here depends upon the rounding mode */ + reg_copy(&CONST_INF, dest); + tag = TAG_Special; + } + else + { + /* Subtract the magic number from the exponent */ + addexponent(dest, (-3 * (1 << 13))); + } + + EXCEPTION(EX_Overflow); + if ( control_word & CW_Overflow ) + { + /* The overflow exception is masked. */ + /* By definition, precision is lost. + The roundup bit (C1) is also set because we have + "rounded" upwards to Infinity. */ + EXCEPTION(EX_Precision | SW_C1); + return tag; + } + + return tag; + +} + + +asmlinkage int arith_underflow(FPU_REG *dest) +{ + int tag = TAG_Valid; + + if ( control_word & CW_Underflow ) + { + /* The masked response */ + if ( exponent16(dest) <= EXP_UNDER - 63 ) + { + reg_copy(&CONST_Z, dest); + partial_status &= ~SW_C1; /* Round down. */ + tag = TAG_Zero; + } + else + { + stdexp(dest); + } + } + else + { + /* Add the magic number to the exponent. */ + addexponent(dest, (3 * (1 << 13)) + EXTENDED_Ebias); + } + + EXCEPTION(EX_Underflow); + if ( control_word & CW_Underflow ) + { + /* The underflow exception is masked. */ + EXCEPTION(EX_Precision); + return tag; + } + + return tag; + +} + + +void FPU_stack_overflow(void) +{ + + if ( control_word & CW_Invalid ) + { + /* The masked response */ + top--; + FPU_copy_to_reg0(&CONST_QNaN, TAG_Special); + } + + EXCEPTION(EX_StackOver); + + return; + +} + + +void FPU_stack_underflow(void) +{ + + if ( control_word & CW_Invalid ) + { + /* The masked response */ + FPU_copy_to_reg0(&CONST_QNaN, TAG_Special); + } + + EXCEPTION(EX_StackUnder); + + return; + +} + + +void FPU_stack_underflow_i(int i) +{ + + if ( control_word & CW_Invalid ) + { + /* The masked response */ + FPU_copy_to_regi(&CONST_QNaN, TAG_Special, i); + } + + EXCEPTION(EX_StackUnder); + + return; + +} + + +void FPU_stack_underflow_pop(int i) +{ + + if ( control_word & CW_Invalid ) + { + /* The masked response */ + FPU_copy_to_regi(&CONST_QNaN, TAG_Special, i); + FPU_pop(); + } + + EXCEPTION(EX_StackUnder); + + return; + +} + diff --git a/arch/x86/math-emu/exception.h b/arch/x86/math-emu/exception.h new file mode 100644 index 00000000000..b463f21a811 --- /dev/null +++ b/arch/x86/math-emu/exception.h @@ -0,0 +1,53 @@ +/*---------------------------------------------------------------------------+ + | exception.h | + | | + | Copyright (C) 1992 W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + +---------------------------------------------------------------------------*/ + +#ifndef _EXCEPTION_H_ +#define _EXCEPTION_H_ + + +#ifdef __ASSEMBLY__ +#define Const_(x) $##x +#else +#define Const_(x) x +#endif + +#ifndef SW_C1 +#include "fpu_emu.h" +#endif /* SW_C1 */ + +#define FPU_BUSY Const_(0x8000) /* FPU busy bit (8087 compatibility) */ +#define EX_ErrorSummary Const_(0x0080) /* Error summary status */ +/* Special exceptions: */ +#define EX_INTERNAL Const_(0x8000) /* Internal error in wm-FPU-emu */ +#define EX_StackOver Const_(0x0041|SW_C1) /* stack overflow */ +#define EX_StackUnder Const_(0x0041) /* stack underflow */ +/* Exception flags: */ +#define EX_Precision Const_(0x0020) /* loss of precision */ +#define EX_Underflow Const_(0x0010) /* underflow */ +#define EX_Overflow Const_(0x0008) /* overflow */ +#define EX_ZeroDiv Const_(0x0004) /* divide by zero */ +#define EX_Denormal Const_(0x0002) /* denormalized operand */ +#define EX_Invalid Const_(0x0001) /* invalid operation */ + + +#define PRECISION_LOST_UP Const_((EX_Precision | SW_C1)) +#define PRECISION_LOST_DOWN Const_(EX_Precision) + + +#ifndef __ASSEMBLY__ + +#ifdef DEBUG +#define EXCEPTION(x) { printk("exception in %s at line %d\n", \ + __FILE__, __LINE__); FPU_exception(x); } +#else +#define EXCEPTION(x) FPU_exception(x) +#endif + +#endif /* __ASSEMBLY__ */ + +#endif /* _EXCEPTION_H_ */ diff --git a/arch/x86/math-emu/fpu_arith.c b/arch/x86/math-emu/fpu_arith.c new file mode 100644 index 00000000000..6972dec01af --- /dev/null +++ b/arch/x86/math-emu/fpu_arith.c @@ -0,0 +1,174 @@ +/*---------------------------------------------------------------------------+ + | fpu_arith.c | + | | + | Code to implement the FPU register/register arithmetic instructions | + | | + | Copyright (C) 1992,1993,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@suburbia.net | + | | + | | + +---------------------------------------------------------------------------*/ + +#include "fpu_system.h" +#include "fpu_emu.h" +#include "control_w.h" +#include "status_w.h" + + +void fadd__(void) +{ + /* fadd st,st(i) */ + int i = FPU_rm; + clear_C1(); + FPU_add(&st(i), FPU_gettagi(i), 0, control_word); +} + + +void fmul__(void) +{ + /* fmul st,st(i) */ + int i = FPU_rm; + clear_C1(); + FPU_mul(&st(i), FPU_gettagi(i), 0, control_word); +} + + + +void fsub__(void) +{ + /* fsub st,st(i) */ + clear_C1(); + FPU_sub(0, FPU_rm, control_word); +} + + +void fsubr_(void) +{ + /* fsubr st,st(i) */ + clear_C1(); + FPU_sub(REV, FPU_rm, control_word); +} + + +void fdiv__(void) +{ + /* fdiv st,st(i) */ + clear_C1(); + FPU_div(0, FPU_rm, control_word); +} + + +void fdivr_(void) +{ + /* fdivr st,st(i) */ + clear_C1(); + FPU_div(REV, FPU_rm, control_word); +} + + + +void fadd_i(void) +{ + /* fadd st(i),st */ + int i = FPU_rm; + clear_C1(); + FPU_add(&st(i), FPU_gettagi(i), i, control_word); +} + + +void fmul_i(void) +{ + /* fmul st(i),st */ + clear_C1(); + FPU_mul(&st(0), FPU_gettag0(), FPU_rm, control_word); +} + + +void fsubri(void) +{ + /* fsubr st(i),st */ + clear_C1(); + FPU_sub(DEST_RM, FPU_rm, control_word); +} + + +void fsub_i(void) +{ + /* fsub st(i),st */ + clear_C1(); + FPU_sub(REV|DEST_RM, FPU_rm, control_word); +} + + +void fdivri(void) +{ + /* fdivr st(i),st */ + clear_C1(); + FPU_div(DEST_RM, FPU_rm, control_word); +} + + +void fdiv_i(void) +{ + /* fdiv st(i),st */ + clear_C1(); + FPU_div(REV|DEST_RM, FPU_rm, control_word); +} + + + +void faddp_(void) +{ + /* faddp st(i),st */ + int i = FPU_rm; + clear_C1(); + if ( FPU_add(&st(i), FPU_gettagi(i), i, control_word) >= 0 ) + FPU_pop(); +} + + +void fmulp_(void) +{ + /* fmulp st(i),st */ + clear_C1(); + if ( FPU_mul(&st(0), FPU_gettag0(), FPU_rm, control_word) >= 0 ) + FPU_pop(); +} + + + +void fsubrp(void) +{ + /* fsubrp st(i),st */ + clear_C1(); + if ( FPU_sub(DEST_RM, FPU_rm, control_word) >= 0 ) + FPU_pop(); +} + + +void fsubp_(void) +{ + /* fsubp st(i),st */ + clear_C1(); + if ( FPU_sub(REV|DEST_RM, FPU_rm, control_word) >= 0 ) + FPU_pop(); +} + + +void fdivrp(void) +{ + /* fdivrp st(i),st */ + clear_C1(); + if ( FPU_div(DEST_RM, FPU_rm, control_word) >= 0 ) + FPU_pop(); +} + + +void fdivp_(void) +{ + /* fdivp st(i),st */ + clear_C1(); + if ( FPU_div(REV|DEST_RM, FPU_rm, control_word) >= 0 ) + FPU_pop(); +} diff --git a/arch/x86/math-emu/fpu_asm.h b/arch/x86/math-emu/fpu_asm.h new file mode 100644 index 00000000000..9ba12416df1 --- /dev/null +++ b/arch/x86/math-emu/fpu_asm.h @@ -0,0 +1,32 @@ +/*---------------------------------------------------------------------------+ + | fpu_asm.h | + | | + | Copyright (C) 1992,1995,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@suburbia.net | + | | + +---------------------------------------------------------------------------*/ + +#ifndef _FPU_ASM_H_ +#define _FPU_ASM_H_ + +#include + +#define EXCEPTION FPU_exception + + +#define PARAM1 8(%ebp) +#define PARAM2 12(%ebp) +#define PARAM3 16(%ebp) +#define PARAM4 20(%ebp) +#define PARAM5 24(%ebp) +#define PARAM6 28(%ebp) +#define PARAM7 32(%ebp) + +#define SIGL_OFFSET 0 +#define EXP(x) 8(x) +#define SIG(x) SIGL_OFFSET##(x) +#define SIGL(x) SIGL_OFFSET##(x) +#define SIGH(x) 4(x) + +#endif /* _FPU_ASM_H_ */ diff --git a/arch/x86/math-emu/fpu_aux.c b/arch/x86/math-emu/fpu_aux.c new file mode 100644 index 00000000000..20886cfb9f7 --- /dev/null +++ b/arch/x86/math-emu/fpu_aux.c @@ -0,0 +1,204 @@ +/*---------------------------------------------------------------------------+ + | fpu_aux.c | + | | + | Code to implement some of the FPU auxiliary instructions. | + | | + | Copyright (C) 1992,1993,1994,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@suburbia.net | + | | + | | + +---------------------------------------------------------------------------*/ + +#include "fpu_system.h" +#include "exception.h" +#include "fpu_emu.h" +#include "status_w.h" +#include "control_w.h" + + +static void fnop(void) +{ +} + +static void fclex(void) +{ + partial_status &= ~(SW_Backward|SW_Summary|SW_Stack_Fault|SW_Precision| + SW_Underflow|SW_Overflow|SW_Zero_Div|SW_Denorm_Op| + SW_Invalid); + no_ip_update = 1; +} + +/* Needs to be externally visible */ +void finit(void) +{ + control_word = 0x037f; + partial_status = 0; + top = 0; /* We don't keep top in the status word internally. */ + fpu_tag_word = 0xffff; + /* The behaviour is different from that detailed in + Section 15.1.6 of the Intel manual */ + operand_address.offset = 0; + operand_address.selector = 0; + instruction_address.offset = 0; + instruction_address.selector = 0; + instruction_address.opcode = 0; + no_ip_update = 1; +} + +/* + * These are nops on the i387.. + */ +#define feni fnop +#define fdisi fnop +#define fsetpm fnop + +static FUNC const finit_table[] = { + feni, fdisi, fclex, finit, + fsetpm, FPU_illegal, FPU_illegal, FPU_illegal +}; + +void finit_(void) +{ + (finit_table[FPU_rm])(); +} + + +static void fstsw_ax(void) +{ + *(short *) &FPU_EAX = status_word(); + no_ip_update = 1; +} + +static FUNC const fstsw_table[] = { + fstsw_ax, FPU_illegal, FPU_illegal, FPU_illegal, + FPU_illegal, FPU_illegal, FPU_illegal, FPU_illegal +}; + +void fstsw_(void) +{ + (fstsw_table[FPU_rm])(); +} + + +static FUNC const fp_nop_table[] = { + fnop, FPU_illegal, FPU_illegal, FPU_illegal, + FPU_illegal, FPU_illegal, FPU_illegal, FPU_illegal +}; + +void fp_nop(void) +{ + (fp_nop_table[FPU_rm])(); +} + + +void fld_i_(void) +{ + FPU_REG *st_new_ptr; + int i; + u_char tag; + + if ( STACK_OVERFLOW ) + { FPU_stack_overflow(); return; } + + /* fld st(i) */ + i = FPU_rm; + if ( NOT_EMPTY(i) ) + { + reg_copy(&st(i), st_new_ptr); + tag = FPU_gettagi(i); + push(); + FPU_settag0(tag); + } + else + { + if ( control_word & CW_Invalid ) + { + /* The masked response */ + FPU_stack_underflow(); + } + else + EXCEPTION(EX_StackUnder); + } + +} + + +void fxch_i(void) +{ + /* fxch st(i) */ + FPU_REG t; + int i = FPU_rm; + FPU_REG *st0_ptr = &st(0), *sti_ptr = &st(i); + long tag_word = fpu_tag_word; + int regnr = top & 7, regnri = ((regnr + i) & 7); + u_char st0_tag = (tag_word >> (regnr*2)) & 3; + u_char sti_tag = (tag_word >> (regnri*2)) & 3; + + if ( st0_tag == TAG_Empty ) + { + if ( sti_tag == TAG_Empty ) + { + FPU_stack_underflow(); + FPU_stack_underflow_i(i); + return; + } + if ( control_word & CW_Invalid ) + { + /* Masked response */ + FPU_copy_to_reg0(sti_ptr, sti_tag); + } + FPU_stack_underflow_i(i); + return; + } + if ( sti_tag == TAG_Empty ) + { + if ( control_word & CW_Invalid ) + { + /* Masked response */ + FPU_copy_to_regi(st0_ptr, st0_tag, i); + } + FPU_stack_underflow(); + return; + } + clear_C1(); + + reg_copy(st0_ptr, &t); + reg_copy(sti_ptr, st0_ptr); + reg_copy(&t, sti_ptr); + + tag_word &= ~(3 << (regnr*2)) & ~(3 << (regnri*2)); + tag_word |= (sti_tag << (regnr*2)) | (st0_tag << (regnri*2)); + fpu_tag_word = tag_word; +} + + +void ffree_(void) +{ + /* ffree st(i) */ + FPU_settagi(FPU_rm, TAG_Empty); +} + + +void ffreep(void) +{ + /* ffree st(i) + pop - unofficial code */ + FPU_settagi(FPU_rm, TAG_Empty); + FPU_pop(); +} + + +void fst_i_(void) +{ + /* fst st(i) */ + FPU_copy_to_regi(&st(0), FPU_gettag0(), FPU_rm); +} + + +void fstp_i(void) +{ + /* fstp st(i) */ + FPU_copy_to_regi(&st(0), FPU_gettag0(), FPU_rm); + FPU_pop(); +} + diff --git a/arch/x86/math-emu/fpu_emu.h b/arch/x86/math-emu/fpu_emu.h new file mode 100644 index 00000000000..65120f52385 --- /dev/null +++ b/arch/x86/math-emu/fpu_emu.h @@ -0,0 +1,218 @@ +/*---------------------------------------------------------------------------+ + | fpu_emu.h | + | | + | Copyright (C) 1992,1993,1994,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@suburbia.net | + | | + +---------------------------------------------------------------------------*/ + + +#ifndef _FPU_EMU_H_ +#define _FPU_EMU_H_ + +/* + * Define PECULIAR_486 to get a closer approximation to 80486 behaviour, + * rather than behaviour which appears to be cleaner. + * This is a matter of opinion: for all I know, the 80486 may simply + * be complying with the IEEE spec. Maybe one day I'll get to see the + * spec... + */ +#define PECULIAR_486 + +#ifdef __ASSEMBLY__ +#include "fpu_asm.h" +#define Const(x) $##x +#else +#define Const(x) x +#endif + +#define EXP_BIAS Const(0) +#define EXP_OVER Const(0x4000) /* smallest invalid large exponent */ +#define EXP_UNDER Const(-0x3fff) /* largest invalid small exponent */ +#define EXP_WAY_UNDER Const(-0x6000) /* Below the smallest denormal, but + still a 16 bit nr. */ +#define EXP_Infinity EXP_OVER +#define EXP_NaN EXP_OVER + +#define EXTENDED_Ebias Const(0x3fff) +#define EXTENDED_Emin (-0x3ffe) /* smallest valid exponent */ + +#define SIGN_POS Const(0) +#define SIGN_NEG Const(0x80) + +#define SIGN_Positive Const(0) +#define SIGN_Negative Const(0x8000) + + +/* Keep the order TAG_Valid, TAG_Zero, TW_Denormal */ +/* The following fold to 2 (Special) in the Tag Word */ +#define TW_Denormal Const(4) /* De-normal */ +#define TW_Infinity Const(5) /* + or - infinity */ +#define TW_NaN Const(6) /* Not a Number */ +#define TW_Unsupported Const(7) /* Not supported by an 80486 */ + +#define TAG_Valid Const(0) /* valid */ +#define TAG_Zero Const(1) /* zero */ +#define TAG_Special Const(2) /* De-normal, + or - infinity, + or Not a Number */ +#define TAG_Empty Const(3) /* empty */ +#define TAG_Error Const(0x80) /* probably need to abort */ + +#define LOADED_DATA Const(10101) /* Special st() number to identify + loaded data (not on stack). */ + +/* A few flags (must be >= 0x10). */ +#define REV 0x10 +#define DEST_RM 0x20 +#define LOADED 0x40 + +#define FPU_Exception Const(0x80000000) /* Added to tag returns. */ + + +#ifndef __ASSEMBLY__ + +#include "fpu_system.h" + +#include /* for struct _fpstate */ +#include +#include + +/* +#define RE_ENTRANT_CHECKING + */ + +#ifdef RE_ENTRANT_CHECKING +extern u_char emulating; +# define RE_ENTRANT_CHECK_OFF emulating = 0 +# define RE_ENTRANT_CHECK_ON emulating = 1 +#else +# define RE_ENTRANT_CHECK_OFF +# define RE_ENTRANT_CHECK_ON +#endif /* RE_ENTRANT_CHECKING */ + +#define FWAIT_OPCODE 0x9b +#define OP_SIZE_PREFIX 0x66 +#define ADDR_SIZE_PREFIX 0x67 +#define PREFIX_CS 0x2e +#define PREFIX_DS 0x3e +#define PREFIX_ES 0x26 +#define PREFIX_SS 0x36 +#define PREFIX_FS 0x64 +#define PREFIX_GS 0x65 +#define PREFIX_REPE 0xf3 +#define PREFIX_REPNE 0xf2 +#define PREFIX_LOCK 0xf0 +#define PREFIX_CS_ 1 +#define PREFIX_DS_ 2 +#define PREFIX_ES_ 3 +#define PREFIX_FS_ 4 +#define PREFIX_GS_ 5 +#define PREFIX_SS_ 6 +#define PREFIX_DEFAULT 7 + +struct address { + unsigned int offset; + unsigned int selector:16; + unsigned int opcode:11; + unsigned int empty:5; +}; +struct fpu__reg { + unsigned sigl; + unsigned sigh; + short exp; +}; + +typedef void (*FUNC)(void); +typedef struct fpu__reg FPU_REG; +typedef void (*FUNC_ST0)(FPU_REG *st0_ptr, u_char st0_tag); +typedef struct { u_char address_size, operand_size, segment; } + overrides; +/* This structure is 32 bits: */ +typedef struct { overrides override; + u_char default_mode; } fpu_addr_modes; +/* PROTECTED has a restricted meaning in the emulator; it is used + to signal that the emulator needs to do special things to ensure + that protection is respected in a segmented model. */ +#define PROTECTED 4 +#define SIXTEEN 1 /* We rely upon this being 1 (true) */ +#define VM86 SIXTEEN +#define PM16 (SIXTEEN | PROTECTED) +#define SEG32 PROTECTED +extern u_char const data_sizes_16[32]; + +#define register_base ((u_char *) registers ) +#define fpu_register(x) ( * ((FPU_REG *)( register_base + 10 * (x & 7) )) ) +#define st(x) ( * ((FPU_REG *)( register_base + 10 * ((top+x) & 7) )) ) + +#define STACK_OVERFLOW (FPU_stackoverflow(&st_new_ptr)) +#define NOT_EMPTY(i) (!FPU_empty_i(i)) + +#define NOT_EMPTY_ST0 (st0_tag ^ TAG_Empty) + +#define poppop() { FPU_pop(); FPU_pop(); } + +/* push() does not affect the tags */ +#define push() { top--; } + +#define signbyte(a) (((u_char *)(a))[9]) +#define getsign(a) (signbyte(a) & 0x80) +#define setsign(a,b) { if (b) signbyte(a) |= 0x80; else signbyte(a) &= 0x7f; } +#define copysign(a,b) { if (getsign(a)) signbyte(b) |= 0x80; \ + else signbyte(b) &= 0x7f; } +#define changesign(a) { signbyte(a) ^= 0x80; } +#define setpositive(a) { signbyte(a) &= 0x7f; } +#define setnegative(a) { signbyte(a) |= 0x80; } +#define signpositive(a) ( (signbyte(a) & 0x80) == 0 ) +#define signnegative(a) (signbyte(a) & 0x80) + +static inline void reg_copy(FPU_REG const *x, FPU_REG *y) +{ + *(short *)&(y->exp) = *(const short *)&(x->exp); + *(long long *)&(y->sigl) = *(const long long *)&(x->sigl); +} + +#define exponent(x) (((*(short *)&((x)->exp)) & 0x7fff) - EXTENDED_Ebias) +#define setexponentpos(x,y) { (*(short *)&((x)->exp)) = \ + ((y) + EXTENDED_Ebias) & 0x7fff; } +#define exponent16(x) (*(short *)&((x)->exp)) +#define setexponent16(x,y) { (*(short *)&((x)->exp)) = (y); } +#define addexponent(x,y) { (*(short *)&((x)->exp)) += (y); } +#define stdexp(x) { (*(short *)&((x)->exp)) += EXTENDED_Ebias; } + +#define isdenormal(ptr) (exponent(ptr) == EXP_BIAS+EXP_UNDER) + +#define significand(x) ( ((unsigned long long *)&((x)->sigl))[0] ) + + +/*----- Prototypes for functions written in assembler -----*/ +/* extern void reg_move(FPU_REG *a, FPU_REG *b); */ + +asmlinkage int FPU_normalize(FPU_REG *x); +asmlinkage int FPU_normalize_nuo(FPU_REG *x); +asmlinkage int FPU_u_sub(FPU_REG const *arg1, FPU_REG const *arg2, + FPU_REG *answ, unsigned int control_w, u_char sign, + int expa, int expb); +asmlinkage int FPU_u_mul(FPU_REG const *arg1, FPU_REG const *arg2, + FPU_REG *answ, unsigned int control_w, u_char sign, + int expon); +asmlinkage int FPU_u_div(FPU_REG const *arg1, FPU_REG const *arg2, + FPU_REG *answ, unsigned int control_w, u_char sign); +asmlinkage int FPU_u_add(FPU_REG const *arg1, FPU_REG const *arg2, + FPU_REG *answ, unsigned int control_w, u_char sign, + int expa, int expb); +asmlinkage int wm_sqrt(FPU_REG *n, int dummy1, int dummy2, + unsigned int control_w, u_char sign); +asmlinkage unsigned FPU_shrx(void *l, unsigned x); +asmlinkage unsigned FPU_shrxs(void *v, unsigned x); +asmlinkage unsigned long FPU_div_small(unsigned long long *x, unsigned long y); +asmlinkage int FPU_round(FPU_REG *arg, unsigned int extent, int dummy, + unsigned int control_w, u_char sign); + +#ifndef MAKING_PROTO +#include "fpu_proto.h" +#endif + +#endif /* __ASSEMBLY__ */ + +#endif /* _FPU_EMU_H_ */ diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c new file mode 100644 index 00000000000..1853524c8b5 --- /dev/null +++ b/arch/x86/math-emu/fpu_entry.c @@ -0,0 +1,761 @@ +/*---------------------------------------------------------------------------+ + | fpu_entry.c | + | | + | The entry functions for wm-FPU-emu | + | | + | Copyright (C) 1992,1993,1994,1996,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@suburbia.net | + | | + | See the files "README" and "COPYING" for further copyright and warranty | + | information. | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | Note: | + | The file contains code which accesses user memory. | + | Emulator static data may change when user memory is accessed, due to | + | other processes using the emulator while swapping is in progress. | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | math_emulate(), restore_i387_soft() and save_i387_soft() are the only | + | entry points for wm-FPU-emu. | + +---------------------------------------------------------------------------*/ + +#include +#include + +#include +#include + +#include "fpu_system.h" +#include "fpu_emu.h" +#include "exception.h" +#include "control_w.h" +#include "status_w.h" + +#define __BAD__ FPU_illegal /* Illegal on an 80486, causes SIGILL */ + +#ifndef NO_UNDOC_CODE /* Un-documented FPU op-codes supported by default. */ + +/* WARNING: These codes are not documented by Intel in their 80486 manual + and may not work on FPU clones or later Intel FPUs. */ + +/* Changes to support the un-doc codes provided by Linus Torvalds. */ + +#define _d9_d8_ fstp_i /* unofficial code (19) */ +#define _dc_d0_ fcom_st /* unofficial code (14) */ +#define _dc_d8_ fcompst /* unofficial code (1c) */ +#define _dd_c8_ fxch_i /* unofficial code (0d) */ +#define _de_d0_ fcompst /* unofficial code (16) */ +#define _df_c0_ ffreep /* unofficial code (07) ffree + pop */ +#define _df_c8_ fxch_i /* unofficial code (0f) */ +#define _df_d0_ fstp_i /* unofficial code (17) */ +#define _df_d8_ fstp_i /* unofficial code (1f) */ + +static FUNC const st_instr_table[64] = { + fadd__, fld_i_, __BAD__, __BAD__, fadd_i, ffree_, faddp_, _df_c0_, + fmul__, fxch_i, __BAD__, __BAD__, fmul_i, _dd_c8_, fmulp_, _df_c8_, + fcom_st, fp_nop, __BAD__, __BAD__, _dc_d0_, fst_i_, _de_d0_, _df_d0_, + fcompst, _d9_d8_, __BAD__, __BAD__, _dc_d8_, fstp_i, fcompp, _df_d8_, + fsub__, FPU_etc, __BAD__, finit_, fsubri, fucom_, fsubrp, fstsw_, + fsubr_, fconst, fucompp, __BAD__, fsub_i, fucomp, fsubp_, __BAD__, + fdiv__, FPU_triga, __BAD__, __BAD__, fdivri, __BAD__, fdivrp, __BAD__, + fdivr_, FPU_trigb, __BAD__, __BAD__, fdiv_i, __BAD__, fdivp_, __BAD__, +}; + +#else /* Support only documented FPU op-codes */ + +static FUNC const st_instr_table[64] = { + fadd__, fld_i_, __BAD__, __BAD__, fadd_i, ffree_, faddp_, __BAD__, + fmul__, fxch_i, __BAD__, __BAD__, fmul_i, __BAD__, fmulp_, __BAD__, + fcom_st, fp_nop, __BAD__, __BAD__, __BAD__, fst_i_, __BAD__, __BAD__, + fcompst, __BAD__, __BAD__, __BAD__, __BAD__, fstp_i, fcompp, __BAD__, + fsub__, FPU_etc, __BAD__, finit_, fsubri, fucom_, fsubrp, fstsw_, + fsubr_, fconst, fucompp, __BAD__, fsub_i, fucomp, fsubp_, __BAD__, + fdiv__, FPU_triga, __BAD__, __BAD__, fdivri, __BAD__, fdivrp, __BAD__, + fdivr_, FPU_trigb, __BAD__, __BAD__, fdiv_i, __BAD__, fdivp_, __BAD__, +}; + +#endif /* NO_UNDOC_CODE */ + + +#define _NONE_ 0 /* Take no special action */ +#define _REG0_ 1 /* Need to check for not empty st(0) */ +#define _REGI_ 2 /* Need to check for not empty st(0) and st(rm) */ +#define _REGi_ 0 /* Uses st(rm) */ +#define _PUSH_ 3 /* Need to check for space to push onto stack */ +#define _null_ 4 /* Function illegal or not implemented */ +#define _REGIi 5 /* Uses st(0) and st(rm), result to st(rm) */ +#define _REGIp 6 /* Uses st(0) and st(rm), result to st(rm) then pop */ +#define _REGIc 0 /* Compare st(0) and st(rm) */ +#define _REGIn 0 /* Uses st(0) and st(rm), but handle checks later */ + +#ifndef NO_UNDOC_CODE + +/* Un-documented FPU op-codes supported by default. (see above) */ + +static u_char const type_table[64] = { + _REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _REGi_, + _REGI_, _REGIn, _null_, _null_, _REGIi, _REGI_, _REGIp, _REGI_, + _REGIc, _NONE_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_, + _REGIc, _REG0_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_, + _REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_, + _REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_, + _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_, + _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_ +}; + +#else /* Support only documented FPU op-codes */ + +static u_char const type_table[64] = { + _REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _null_, + _REGI_, _REGIn, _null_, _null_, _REGIi, _null_, _REGIp, _null_, + _REGIc, _NONE_, _null_, _null_, _null_, _REG0_, _null_, _null_, + _REGIc, _null_, _null_, _null_, _null_, _REG0_, _REGIc, _null_, + _REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_, + _REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_, + _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_, + _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_ +}; + +#endif /* NO_UNDOC_CODE */ + + +#ifdef RE_ENTRANT_CHECKING +u_char emulating=0; +#endif /* RE_ENTRANT_CHECKING */ + +static int valid_prefix(u_char *Byte, u_char __user **fpu_eip, + overrides *override); + +asmlinkage void math_emulate(long arg) +{ + u_char FPU_modrm, byte1; + unsigned short code; + fpu_addr_modes addr_modes; + int unmasked; + FPU_REG loaded_data; + FPU_REG *st0_ptr; + u_char loaded_tag, st0_tag; + void __user *data_address; + struct address data_sel_off; + struct address entry_sel_off; + unsigned long code_base = 0; + unsigned long code_limit = 0; /* Initialized to stop compiler warnings */ + struct desc_struct code_descriptor; + +#ifdef RE_ENTRANT_CHECKING + if ( emulating ) + { + printk("ERROR: wm-FPU-emu is not RE-ENTRANT!\n"); + } + RE_ENTRANT_CHECK_ON; +#endif /* RE_ENTRANT_CHECKING */ + + if (!used_math()) + { + finit(); + set_used_math(); + } + + SETUP_DATA_AREA(arg); + + FPU_ORIG_EIP = FPU_EIP; + + if ( (FPU_EFLAGS & 0x00020000) != 0 ) + { + /* Virtual 8086 mode */ + addr_modes.default_mode = VM86; + FPU_EIP += code_base = FPU_CS << 4; + code_limit = code_base + 0xffff; /* Assumes code_base <= 0xffff0000 */ + } + else if ( FPU_CS == __USER_CS && FPU_DS == __USER_DS ) + { + addr_modes.default_mode = 0; + } + else if ( FPU_CS == __KERNEL_CS ) + { + printk("math_emulate: %04x:%08lx\n",FPU_CS,FPU_EIP); + panic("Math emulation needed in kernel"); + } + else + { + + if ( (FPU_CS & 4) != 4 ) /* Must be in the LDT */ + { + /* Can only handle segmented addressing via the LDT + for now, and it must be 16 bit */ + printk("FPU emulator: Unsupported addressing mode\n"); + math_abort(FPU_info, SIGILL); + } + + code_descriptor = LDT_DESCRIPTOR(FPU_CS); + if ( SEG_D_SIZE(code_descriptor) ) + { + /* The above test may be wrong, the book is not clear */ + /* Segmented 32 bit protected mode */ + addr_modes.default_mode = SEG32; + } + else + { + /* 16 bit protected mode */ + addr_modes.default_mode = PM16; + } + FPU_EIP += code_base = SEG_BASE_ADDR(code_descriptor); + code_limit = code_base + + (SEG_LIMIT(code_descriptor)+1) * SEG_GRANULARITY(code_descriptor) + - 1; + if ( code_limit < code_base ) code_limit = 0xffffffff; + } + + FPU_lookahead = 1; + if (current->ptrace & PT_PTRACED) + FPU_lookahead = 0; + + if ( !valid_prefix(&byte1, (u_char __user **)&FPU_EIP, + &addr_modes.override) ) + { + RE_ENTRANT_CHECK_OFF; + printk("FPU emulator: Unknown prefix byte 0x%02x, probably due to\n" + "FPU emulator: self-modifying code! (emulation impossible)\n", + byte1); + RE_ENTRANT_CHECK_ON; + EXCEPTION(EX_INTERNAL|0x126); + math_abort(FPU_info,SIGILL); + } + +do_another_FPU_instruction: + + no_ip_update = 0; + + FPU_EIP++; /* We have fetched the prefix and first code bytes. */ + + if ( addr_modes.default_mode ) + { + /* This checks for the minimum instruction bytes. + We also need to check any extra (address mode) code access. */ + if ( FPU_EIP > code_limit ) + math_abort(FPU_info,SIGSEGV); + } + + if ( (byte1 & 0xf8) != 0xd8 ) + { + if ( byte1 == FWAIT_OPCODE ) + { + if (partial_status & SW_Summary) + goto do_the_FPU_interrupt; + else + goto FPU_fwait_done; + } +#ifdef PARANOID + EXCEPTION(EX_INTERNAL|0x128); + math_abort(FPU_info,SIGILL); +#endif /* PARANOID */ + } + + RE_ENTRANT_CHECK_OFF; + FPU_code_access_ok(1); + FPU_get_user(FPU_modrm, (u_char __user *) FPU_EIP); + RE_ENTRANT_CHECK_ON; + FPU_EIP++; + + if (partial_status & SW_Summary) + { + /* Ignore the error for now if the current instruction is a no-wait + control instruction */ + /* The 80486 manual contradicts itself on this topic, + but a real 80486 uses the following instructions: + fninit, fnstenv, fnsave, fnstsw, fnstenv, fnclex. + */ + code = (FPU_modrm << 8) | byte1; + if ( ! ( (((code & 0xf803) == 0xe003) || /* fnclex, fninit, fnstsw */ + (((code & 0x3003) == 0x3001) && /* fnsave, fnstcw, fnstenv, + fnstsw */ + ((code & 0xc000) != 0xc000))) ) ) + { + /* + * We need to simulate the action of the kernel to FPU + * interrupts here. + */ + do_the_FPU_interrupt: + + FPU_EIP = FPU_ORIG_EIP; /* Point to current FPU instruction. */ + + RE_ENTRANT_CHECK_OFF; + current->thread.trap_no = 16; + current->thread.error_code = 0; + send_sig(SIGFPE, current, 1); + return; + } + } + + entry_sel_off.offset = FPU_ORIG_EIP; + entry_sel_off.selector = FPU_CS; + entry_sel_off.opcode = (byte1 << 8) | FPU_modrm; + + FPU_rm = FPU_modrm & 7; + + if ( FPU_modrm < 0300 ) + { + /* All of these instructions use the mod/rm byte to get a data address */ + + if ( (addr_modes.default_mode & SIXTEEN) + ^ (addr_modes.override.address_size == ADDR_SIZE_PREFIX) ) + data_address = FPU_get_address_16(FPU_modrm, &FPU_EIP, &data_sel_off, + addr_modes); + else + data_address = FPU_get_address(FPU_modrm, &FPU_EIP, &data_sel_off, + addr_modes); + + if ( addr_modes.default_mode ) + { + if ( FPU_EIP-1 > code_limit ) + math_abort(FPU_info,SIGSEGV); + } + + if ( !(byte1 & 1) ) + { + unsigned short status1 = partial_status; + + st0_ptr = &st(0); + st0_tag = FPU_gettag0(); + + /* Stack underflow has priority */ + if ( NOT_EMPTY_ST0 ) + { + if ( addr_modes.default_mode & PROTECTED ) + { + /* This table works for 16 and 32 bit protected mode */ + if ( access_limit < data_sizes_16[(byte1 >> 1) & 3] ) + math_abort(FPU_info,SIGSEGV); + } + + unmasked = 0; /* Do this here to stop compiler warnings. */ + switch ( (byte1 >> 1) & 3 ) + { + case 0: + unmasked = FPU_load_single((float __user *)data_address, + &loaded_data); + loaded_tag = unmasked & 0xff; + unmasked &= ~0xff; + break; + case 1: + loaded_tag = FPU_load_int32((long __user *)data_address, &loaded_data); + break; + case 2: + unmasked = FPU_load_double((double __user *)data_address, + &loaded_data); + loaded_tag = unmasked & 0xff; + unmasked &= ~0xff; + break; + case 3: + default: /* Used here to suppress gcc warnings. */ + loaded_tag = FPU_load_int16((short __user *)data_address, &loaded_data); + break; + } + + /* No more access to user memory, it is safe + to use static data now */ + + /* NaN operands have the next priority. */ + /* We have to delay looking at st(0) until after + loading the data, because that data might contain an SNaN */ + if ( ((st0_tag == TAG_Special) && isNaN(st0_ptr)) || + ((loaded_tag == TAG_Special) && isNaN(&loaded_data)) ) + { + /* Restore the status word; we might have loaded a + denormal. */ + partial_status = status1; + if ( (FPU_modrm & 0x30) == 0x10 ) + { + /* fcom or fcomp */ + EXCEPTION(EX_Invalid); + setcc(SW_C3 | SW_C2 | SW_C0); + if ( (FPU_modrm & 0x08) && (control_word & CW_Invalid) ) + FPU_pop(); /* fcomp, masked, so we pop. */ + } + else + { + if ( loaded_tag == TAG_Special ) + loaded_tag = FPU_Special(&loaded_data); +#ifdef PECULIAR_486 + /* This is not really needed, but gives behaviour + identical to an 80486 */ + if ( (FPU_modrm & 0x28) == 0x20 ) + /* fdiv or fsub */ + real_2op_NaN(&loaded_data, loaded_tag, 0, &loaded_data); + else +#endif /* PECULIAR_486 */ + /* fadd, fdivr, fmul, or fsubr */ + real_2op_NaN(&loaded_data, loaded_tag, 0, st0_ptr); + } + goto reg_mem_instr_done; + } + + if ( unmasked && !((FPU_modrm & 0x30) == 0x10) ) + { + /* Is not a comparison instruction. */ + if ( (FPU_modrm & 0x38) == 0x38 ) + { + /* fdivr */ + if ( (st0_tag == TAG_Zero) && + ((loaded_tag == TAG_Valid) + || (loaded_tag == TAG_Special + && isdenormal(&loaded_data))) ) + { + if ( FPU_divide_by_zero(0, getsign(&loaded_data)) + < 0 ) + { + /* We use the fact here that the unmasked + exception in the loaded data was for a + denormal operand */ + /* Restore the state of the denormal op bit */ + partial_status &= ~SW_Denorm_Op; + partial_status |= status1 & SW_Denorm_Op; + } + else + setsign(st0_ptr, getsign(&loaded_data)); + } + } + goto reg_mem_instr_done; + } + + switch ( (FPU_modrm >> 3) & 7 ) + { + case 0: /* fadd */ + clear_C1(); + FPU_add(&loaded_data, loaded_tag, 0, control_word); + break; + case 1: /* fmul */ + clear_C1(); + FPU_mul(&loaded_data, loaded_tag, 0, control_word); + break; + case 2: /* fcom */ + FPU_compare_st_data(&loaded_data, loaded_tag); + break; + case 3: /* fcomp */ + if ( !FPU_compare_st_data(&loaded_data, loaded_tag) + && !unmasked ) + FPU_pop(); + break; + case 4: /* fsub */ + clear_C1(); + FPU_sub(LOADED|loaded_tag, (int)&loaded_data, control_word); + break; + case 5: /* fsubr */ + clear_C1(); + FPU_sub(REV|LOADED|loaded_tag, (int)&loaded_data, control_word); + break; + case 6: /* fdiv */ + clear_C1(); + FPU_div(LOADED|loaded_tag, (int)&loaded_data, control_word); + break; + case 7: /* fdivr */ + clear_C1(); + if ( st0_tag == TAG_Zero ) + partial_status = status1; /* Undo any denorm tag, + zero-divide has priority. */ + FPU_div(REV|LOADED|loaded_tag, (int)&loaded_data, control_word); + break; + } + } + else + { + if ( (FPU_modrm & 0x30) == 0x10 ) + { + /* The instruction is fcom or fcomp */ + EXCEPTION(EX_StackUnder); + setcc(SW_C3 | SW_C2 | SW_C0); + if ( (FPU_modrm & 0x08) && (control_word & CW_Invalid) ) + FPU_pop(); /* fcomp */ + } + else + FPU_stack_underflow(); + } + reg_mem_instr_done: + operand_address = data_sel_off; + } + else + { + if ( !(no_ip_update = + FPU_load_store(((FPU_modrm & 0x38) | (byte1 & 6)) >> 1, + addr_modes, data_address)) ) + { + operand_address = data_sel_off; + } + } + + } + else + { + /* None of these instructions access user memory */ + u_char instr_index = (FPU_modrm & 0x38) | (byte1 & 7); + +#ifdef PECULIAR_486 + /* This is supposed to be undefined, but a real 80486 seems + to do this: */ + operand_address.offset = 0; + operand_address.selector = FPU_DS; +#endif /* PECULIAR_486 */ + + st0_ptr = &st(0); + st0_tag = FPU_gettag0(); + switch ( type_table[(int) instr_index] ) + { + case _NONE_: /* also _REGIc: _REGIn */ + break; + case _REG0_: + if ( !NOT_EMPTY_ST0 ) + { + FPU_stack_underflow(); + goto FPU_instruction_done; + } + break; + case _REGIi: + if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) ) + { + FPU_stack_underflow_i(FPU_rm); + goto FPU_instruction_done; + } + break; + case _REGIp: + if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) ) + { + FPU_stack_underflow_pop(FPU_rm); + goto FPU_instruction_done; + } + break; + case _REGI_: + if ( !NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm) ) + { + FPU_stack_underflow(); + goto FPU_instruction_done; + } + break; + case _PUSH_: /* Only used by the fld st(i) instruction */ + break; + case _null_: + FPU_illegal(); + goto FPU_instruction_done; + default: + EXCEPTION(EX_INTERNAL|0x111); + goto FPU_instruction_done; + } + (*st_instr_table[(int) instr_index])(); + +FPU_instruction_done: + ; + } + + if ( ! no_ip_update ) + instruction_address = entry_sel_off; + +FPU_fwait_done: + +#ifdef DEBUG + RE_ENTRANT_CHECK_OFF; + FPU_printall(); + RE_ENTRANT_CHECK_ON; +#endif /* DEBUG */ + + if (FPU_lookahead && !need_resched()) + { + FPU_ORIG_EIP = FPU_EIP - code_base; + if ( valid_prefix(&byte1, (u_char __user **)&FPU_EIP, + &addr_modes.override) ) + goto do_another_FPU_instruction; + } + + if ( addr_modes.default_mode ) + FPU_EIP -= code_base; + + RE_ENTRANT_CHECK_OFF; +} + + +/* Support for prefix bytes is not yet complete. To properly handle + all prefix bytes, further changes are needed in the emulator code + which accesses user address space. Access to separate segments is + important for msdos emulation. */ +static int valid_prefix(u_char *Byte, u_char __user **fpu_eip, + overrides *override) +{ + u_char byte; + u_char __user *ip = *fpu_eip; + + *override = (overrides) { 0, 0, PREFIX_DEFAULT }; /* defaults */ + + RE_ENTRANT_CHECK_OFF; + FPU_code_access_ok(1); + FPU_get_user(byte, ip); + RE_ENTRANT_CHECK_ON; + + while ( 1 ) + { + switch ( byte ) + { + case ADDR_SIZE_PREFIX: + override->address_size = ADDR_SIZE_PREFIX; + goto do_next_byte; + + case OP_SIZE_PREFIX: + override->operand_size = OP_SIZE_PREFIX; + goto do_next_byte; + + case PREFIX_CS: + override->segment = PREFIX_CS_; + goto do_next_byte; + case PREFIX_ES: + override->segment = PREFIX_ES_; + goto do_next_byte; + case PREFIX_SS: + override->segment = PREFIX_SS_; + goto do_next_byte; + case PREFIX_FS: + override->segment = PREFIX_FS_; + goto do_next_byte; + case PREFIX_GS: + override->segment = PREFIX_GS_; + goto do_next_byte; + case PREFIX_DS: + override->segment = PREFIX_DS_; + goto do_next_byte; + +/* lock is not a valid prefix for FPU instructions, + let the cpu handle it to generate a SIGILL. */ +/* case PREFIX_LOCK: */ + + /* rep.. prefixes have no meaning for FPU instructions */ + case PREFIX_REPE: + case PREFIX_REPNE: + + do_next_byte: + ip++; + RE_ENTRANT_CHECK_OFF; + FPU_code_access_ok(1); + FPU_get_user(byte, ip); + RE_ENTRANT_CHECK_ON; + break; + case FWAIT_OPCODE: + *Byte = byte; + return 1; + default: + if ( (byte & 0xf8) == 0xd8 ) + { + *Byte = byte; + *fpu_eip = ip; + return 1; + } + else + { + /* Not a valid sequence of prefix bytes followed by + an FPU instruction. */ + *Byte = byte; /* Needed for error message. */ + return 0; + } + } + } +} + + +void math_abort(struct info * info, unsigned int signal) +{ + FPU_EIP = FPU_ORIG_EIP; + current->thread.trap_no = 16; + current->thread.error_code = 0; + send_sig(signal,current,1); + RE_ENTRANT_CHECK_OFF; + __asm__("movl %0,%%esp ; ret": :"g" (((long) info)-4)); +#ifdef PARANOID + printk("ERROR: wm-FPU-emu math_abort failed!\n"); +#endif /* PARANOID */ +} + + + +#define S387 ((struct i387_soft_struct *)s387) +#define sstatus_word() \ + ((S387->swd & ~SW_Top & 0xffff) | ((S387->ftop << SW_Top_Shift) & SW_Top)) + +int restore_i387_soft(void *s387, struct _fpstate __user *buf) +{ + u_char __user *d = (u_char __user *)buf; + int offset, other, i, tags, regnr, tag, newtop; + + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_READ, d, 7*4 + 8*10); + if (__copy_from_user(&S387->cwd, d, 7*4)) + return -1; + RE_ENTRANT_CHECK_ON; + + d += 7*4; + + S387->ftop = (S387->swd >> SW_Top_Shift) & 7; + offset = (S387->ftop & 7) * 10; + other = 80 - offset; + + RE_ENTRANT_CHECK_OFF; + /* Copy all registers in stack order. */ + if (__copy_from_user(((u_char *)&S387->st_space)+offset, d, other)) + return -1; + if ( offset ) + if (__copy_from_user((u_char *)&S387->st_space, d+other, offset)) + return -1; + RE_ENTRANT_CHECK_ON; + + /* The tags may need to be corrected now. */ + tags = S387->twd; + newtop = S387->ftop; + for ( i = 0; i < 8; i++ ) + { + regnr = (i+newtop) & 7; + if ( ((tags >> ((regnr & 7)*2)) & 3) != TAG_Empty ) + { + /* The loaded data over-rides all other cases. */ + tag = FPU_tagof((FPU_REG *)((u_char *)S387->st_space + 10*regnr)); + tags &= ~(3 << (regnr*2)); + tags |= (tag & 3) << (regnr*2); + } + } + S387->twd = tags; + + return 0; +} + + +int save_i387_soft(void *s387, struct _fpstate __user * buf) +{ + u_char __user *d = (u_char __user *)buf; + int offset = (S387->ftop & 7) * 10, other = 80 - offset; + + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE, d, 7*4 + 8*10); +#ifdef PECULIAR_486 + S387->cwd &= ~0xe080; + /* An 80486 sets nearly all of the reserved bits to 1. */ + S387->cwd |= 0xffff0040; + S387->swd = sstatus_word() | 0xffff0000; + S387->twd |= 0xffff0000; + S387->fcs &= ~0xf8000000; + S387->fos |= 0xffff0000; +#endif /* PECULIAR_486 */ + if (__copy_to_user(d, &S387->cwd, 7*4)) + return -1; + RE_ENTRANT_CHECK_ON; + + d += 7*4; + + RE_ENTRANT_CHECK_OFF; + /* Copy all registers in stack order. */ + if (__copy_to_user(d, ((u_char *)&S387->st_space)+offset, other)) + return -1; + if ( offset ) + if (__copy_to_user(d+other, (u_char *)&S387->st_space, offset)) + return -1; + RE_ENTRANT_CHECK_ON; + + return 1; +} diff --git a/arch/x86/math-emu/fpu_etc.c b/arch/x86/math-emu/fpu_etc.c new file mode 100644 index 00000000000..e3b5d465587 --- /dev/null +++ b/arch/x86/math-emu/fpu_etc.c @@ -0,0 +1,143 @@ +/*---------------------------------------------------------------------------+ + | fpu_etc.c | + | | + | Implement a few FPU instructions. | + | | + | Copyright (C) 1992,1993,1994,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@suburbia.net | + | | + | | + +---------------------------------------------------------------------------*/ + +#include "fpu_system.h" +#include "exception.h" +#include "fpu_emu.h" +#include "status_w.h" +#include "reg_constant.h" + + +static void fchs(FPU_REG *st0_ptr, u_char st0tag) +{ + if ( st0tag ^ TAG_Empty ) + { + signbyte(st0_ptr) ^= SIGN_NEG; + clear_C1(); + } + else + FPU_stack_underflow(); +} + + +static void fabs(FPU_REG *st0_ptr, u_char st0tag) +{ + if ( st0tag ^ TAG_Empty ) + { + setpositive(st0_ptr); + clear_C1(); + } + else + FPU_stack_underflow(); +} + + +static void ftst_(FPU_REG *st0_ptr, u_char st0tag) +{ + switch (st0tag) + { + case TAG_Zero: + setcc(SW_C3); + break; + case TAG_Valid: + if (getsign(st0_ptr) == SIGN_POS) + setcc(0); + else + setcc(SW_C0); + break; + case TAG_Special: + switch ( FPU_Special(st0_ptr) ) + { + case TW_Denormal: + if (getsign(st0_ptr) == SIGN_POS) + setcc(0); + else + setcc(SW_C0); + if ( denormal_operand() < 0 ) + { +#ifdef PECULIAR_486 + /* This is weird! */ + if (getsign(st0_ptr) == SIGN_POS) + setcc(SW_C3); +#endif /* PECULIAR_486 */ + return; + } + break; + case TW_NaN: + setcc(SW_C0|SW_C2|SW_C3); /* Operand is not comparable */ + EXCEPTION(EX_Invalid); + break; + case TW_Infinity: + if (getsign(st0_ptr) == SIGN_POS) + setcc(0); + else + setcc(SW_C0); + break; + default: + setcc(SW_C0|SW_C2|SW_C3); /* Operand is not comparable */ + EXCEPTION(EX_INTERNAL|0x14); + break; + } + break; + case TAG_Empty: + setcc(SW_C0|SW_C2|SW_C3); + EXCEPTION(EX_StackUnder); + break; + } +} + + +static void fxam(FPU_REG *st0_ptr, u_char st0tag) +{ + int c = 0; + switch (st0tag) + { + case TAG_Empty: + c = SW_C3|SW_C0; + break; + case TAG_Zero: + c = SW_C3; + break; + case TAG_Valid: + c = SW_C2; + break; + case TAG_Special: + switch ( FPU_Special(st0_ptr) ) + { + case TW_Denormal: + c = SW_C2|SW_C3; /* Denormal */ + break; + case TW_NaN: + /* We also use NaN for unsupported types. */ + if ( (st0_ptr->sigh & 0x80000000) && (exponent(st0_ptr) == EXP_OVER) ) + c = SW_C0; + break; + case TW_Infinity: + c = SW_C2|SW_C0; + break; + } + } + if ( getsign(st0_ptr) == SIGN_NEG ) + c |= SW_C1; + setcc(c); +} + + +static FUNC_ST0 const fp_etc_table[] = { + fchs, fabs, (FUNC_ST0)FPU_illegal, (FUNC_ST0)FPU_illegal, + ftst_, fxam, (FUNC_ST0)FPU_illegal, (FUNC_ST0)FPU_illegal +}; + +void FPU_etc(void) +{ + (fp_etc_table[FPU_rm])(&st(0), FPU_gettag0()); +} diff --git a/arch/x86/math-emu/fpu_proto.h b/arch/x86/math-emu/fpu_proto.h new file mode 100644 index 00000000000..37a8a7fe7e2 --- /dev/null +++ b/arch/x86/math-emu/fpu_proto.h @@ -0,0 +1,140 @@ +#ifndef _FPU_PROTO_H +#define _FPU_PROTO_H + +/* errors.c */ +extern void FPU_illegal(void); +extern void FPU_printall(void); +asmlinkage void FPU_exception(int n); +extern int real_1op_NaN(FPU_REG *a); +extern int real_2op_NaN(FPU_REG const *b, u_char tagb, int deststnr, + FPU_REG const *defaultNaN); +asmlinkage int arith_invalid(int deststnr); +asmlinkage int FPU_divide_by_zero(int deststnr, u_char sign); +extern int set_precision_flag(int flags); +asmlinkage void set_precision_flag_up(void); +asmlinkage void set_precision_flag_down(void); +asmlinkage int denormal_operand(void); +asmlinkage int arith_overflow(FPU_REG *dest); +asmlinkage int arith_underflow(FPU_REG *dest); +extern void FPU_stack_overflow(void); +extern void FPU_stack_underflow(void); +extern void FPU_stack_underflow_i(int i); +extern void FPU_stack_underflow_pop(int i); +/* fpu_arith.c */ +extern void fadd__(void); +extern void fmul__(void); +extern void fsub__(void); +extern void fsubr_(void); +extern void fdiv__(void); +extern void fdivr_(void); +extern void fadd_i(void); +extern void fmul_i(void); +extern void fsubri(void); +extern void fsub_i(void); +extern void fdivri(void); +extern void fdiv_i(void); +extern void faddp_(void); +extern void fmulp_(void); +extern void fsubrp(void); +extern void fsubp_(void); +extern void fdivrp(void); +extern void fdivp_(void); +/* fpu_aux.c */ +extern void finit(void); +extern void finit_(void); +extern void fstsw_(void); +extern void fp_nop(void); +extern void fld_i_(void); +extern void fxch_i(void); +extern void ffree_(void); +extern void ffreep(void); +extern void fst_i_(void); +extern void fstp_i(void); +/* fpu_entry.c */ +asmlinkage extern void math_emulate(long arg); +extern void math_abort(struct info *info, unsigned int signal); +/* fpu_etc.c */ +extern void FPU_etc(void); +/* fpu_tags.c */ +extern int FPU_gettag0(void); +extern int FPU_gettagi(int stnr); +extern int FPU_gettag(int regnr); +extern void FPU_settag0(int tag); +extern void FPU_settagi(int stnr, int tag); +extern void FPU_settag(int regnr, int tag); +extern int FPU_Special(FPU_REG const *ptr); +extern int isNaN(FPU_REG const *ptr); +extern void FPU_pop(void); +extern int FPU_empty_i(int stnr); +extern int FPU_stackoverflow(FPU_REG **st_new_ptr); +extern void FPU_copy_to_regi(FPU_REG const *r, u_char tag, int stnr); +extern void FPU_copy_to_reg1(FPU_REG const *r, u_char tag); +extern void FPU_copy_to_reg0(FPU_REG const *r, u_char tag); +/* fpu_trig.c */ +extern void FPU_triga(void); +extern void FPU_trigb(void); +/* get_address.c */ +extern void __user *FPU_get_address(u_char FPU_modrm, unsigned long *fpu_eip, + struct address *addr, fpu_addr_modes addr_modes); +extern void __user *FPU_get_address_16(u_char FPU_modrm, unsigned long *fpu_eip, + struct address *addr, fpu_addr_modes addr_modes); +/* load_store.c */ +extern int FPU_load_store(u_char type, fpu_addr_modes addr_modes, + void __user *data_address); +/* poly_2xm1.c */ +extern int poly_2xm1(u_char sign, FPU_REG *arg, FPU_REG *result); +/* poly_atan.c */ +extern void poly_atan(FPU_REG *st0_ptr, u_char st0_tag, FPU_REG *st1_ptr, + u_char st1_tag); +/* poly_l2.c */ +extern void poly_l2(FPU_REG *st0_ptr, FPU_REG *st1_ptr, u_char st1_sign); +extern int poly_l2p1(u_char s0, u_char s1, FPU_REG *r0, FPU_REG *r1, + FPU_REG *d); +/* poly_sin.c */ +extern void poly_sine(FPU_REG *st0_ptr); +extern void poly_cos(FPU_REG *st0_ptr); +/* poly_tan.c */ +extern void poly_tan(FPU_REG *st0_ptr); +/* reg_add_sub.c */ +extern int FPU_add(FPU_REG const *b, u_char tagb, int destrnr, int control_w); +extern int FPU_sub(int flags, int rm, int control_w); +/* reg_compare.c */ +extern int FPU_compare_st_data(FPU_REG const *loaded_data, u_char loaded_tag); +extern void fcom_st(void); +extern void fcompst(void); +extern void fcompp(void); +extern void fucom_(void); +extern void fucomp(void); +extern void fucompp(void); +/* reg_constant.c */ +extern void fconst(void); +/* reg_ld_str.c */ +extern int FPU_load_extended(long double __user *s, int stnr); +extern int FPU_load_double(double __user *dfloat, FPU_REG *loaded_data); +extern int FPU_load_single(float __user *single, FPU_REG *loaded_data); +extern int FPU_load_int64(long long __user *_s); +extern int FPU_load_int32(long __user *_s, FPU_REG *loaded_data); +extern int FPU_load_int16(short __user *_s, FPU_REG *loaded_data); +extern int FPU_load_bcd(u_char __user *s); +extern int FPU_store_extended(FPU_REG *st0_ptr, u_char st0_tag, + long double __user *d); +extern int FPU_store_double(FPU_REG *st0_ptr, u_char st0_tag, double __user *dfloat); +extern int FPU_store_single(FPU_REG *st0_ptr, u_char st0_tag, float __user *single); +extern int FPU_store_int64(FPU_REG *st0_ptr, u_char st0_tag, long long __user *d); +extern int FPU_store_int32(FPU_REG *st0_ptr, u_char st0_tag, long __user *d); +extern int FPU_store_int16(FPU_REG *st0_ptr, u_char st0_tag, short __user *d); +extern int FPU_store_bcd(FPU_REG *st0_ptr, u_char st0_tag, u_char __user *d); +extern int FPU_round_to_int(FPU_REG *r, u_char tag); +extern u_char __user *fldenv(fpu_addr_modes addr_modes, u_char __user *s); +extern void frstor(fpu_addr_modes addr_modes, u_char __user *data_address); +extern u_char __user *fstenv(fpu_addr_modes addr_modes, u_char __user *d); +extern void fsave(fpu_addr_modes addr_modes, u_char __user *data_address); +extern int FPU_tagof(FPU_REG *ptr); +/* reg_mul.c */ +extern int FPU_mul(FPU_REG const *b, u_char tagb, int deststnr, int control_w); + +extern int FPU_div(int flags, int regrm, int control_w); +/* reg_convert.c */ +extern int FPU_to_exp16(FPU_REG const *a, FPU_REG *x); +#endif /* _FPU_PROTO_H */ + diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h new file mode 100644 index 00000000000..a3ae28c49dd --- /dev/null +++ b/arch/x86/math-emu/fpu_system.h @@ -0,0 +1,90 @@ +/*---------------------------------------------------------------------------+ + | fpu_system.h | + | | + | Copyright (C) 1992,1994,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@suburbia.net | + | | + +---------------------------------------------------------------------------*/ + +#ifndef _FPU_SYSTEM_H +#define _FPU_SYSTEM_H + +/* system dependent definitions */ + +#include +#include +#include + +/* This sets the pointer FPU_info to point to the argument part + of the stack frame of math_emulate() */ +#define SETUP_DATA_AREA(arg) FPU_info = (struct info *) &arg + +/* s is always from a cpu register, and the cpu does bounds checking + * during register load --> no further bounds checks needed */ +#define LDT_DESCRIPTOR(s) (((struct desc_struct *)current->mm->context.ldt)[(s) >> 3]) +#define SEG_D_SIZE(x) ((x).b & (3 << 21)) +#define SEG_G_BIT(x) ((x).b & (1 << 23)) +#define SEG_GRANULARITY(x) (((x).b & (1 << 23)) ? 4096 : 1) +#define SEG_286_MODE(x) ((x).b & ( 0xff000000 | 0xf0000 | (1 << 23))) +#define SEG_BASE_ADDR(s) (((s).b & 0xff000000) \ + | (((s).b & 0xff) << 16) | ((s).a >> 16)) +#define SEG_LIMIT(s) (((s).b & 0xff0000) | ((s).a & 0xffff)) +#define SEG_EXECUTE_ONLY(s) (((s).b & ((1 << 11) | (1 << 9))) == (1 << 11)) +#define SEG_WRITE_PERM(s) (((s).b & ((1 << 11) | (1 << 9))) == (1 << 9)) +#define SEG_EXPAND_DOWN(s) (((s).b & ((1 << 11) | (1 << 10))) \ + == (1 << 10)) + +#define I387 (current->thread.i387) +#define FPU_info (I387.soft.info) + +#define FPU_CS (*(unsigned short *) &(FPU_info->___cs)) +#define FPU_SS (*(unsigned short *) &(FPU_info->___ss)) +#define FPU_DS (*(unsigned short *) &(FPU_info->___ds)) +#define FPU_EAX (FPU_info->___eax) +#define FPU_EFLAGS (FPU_info->___eflags) +#define FPU_EIP (FPU_info->___eip) +#define FPU_ORIG_EIP (FPU_info->___orig_eip) + +#define FPU_lookahead (I387.soft.lookahead) + +/* nz if ip_offset and cs_selector are not to be set for the current + instruction. */ +#define no_ip_update (*(u_char *)&(I387.soft.no_update)) +#define FPU_rm (*(u_char *)&(I387.soft.rm)) + +/* Number of bytes of data which can be legally accessed by the current + instruction. This only needs to hold a number <= 108, so a byte will do. */ +#define access_limit (*(u_char *)&(I387.soft.alimit)) + +#define partial_status (I387.soft.swd) +#define control_word (I387.soft.cwd) +#define fpu_tag_word (I387.soft.twd) +#define registers (I387.soft.st_space) +#define top (I387.soft.ftop) + +#define instruction_address (*(struct address *)&I387.soft.fip) +#define operand_address (*(struct address *)&I387.soft.foo) + +#define FPU_access_ok(x,y,z) if ( !access_ok(x,y,z) ) \ + math_abort(FPU_info,SIGSEGV) +#define FPU_abort math_abort(FPU_info, SIGSEGV) + +#undef FPU_IGNORE_CODE_SEGV +#ifdef FPU_IGNORE_CODE_SEGV +/* access_ok() is very expensive, and causes the emulator to run + about 20% slower if applied to the code. Anyway, errors due to bad + code addresses should be much rarer than errors due to bad data + addresses. */ +#define FPU_code_access_ok(z) +#else +/* A simpler test than access_ok() can probably be done for + FPU_code_access_ok() because the only possible error is to step + past the upper boundary of a legal code area. */ +#define FPU_code_access_ok(z) FPU_access_ok(VERIFY_READ,(void __user *)FPU_EIP,z) +#endif + +#define FPU_get_user(x,y) get_user((x),(y)) +#define FPU_put_user(x,y) put_user((x),(y)) + +#endif diff --git a/arch/x86/math-emu/fpu_tags.c b/arch/x86/math-emu/fpu_tags.c new file mode 100644 index 00000000000..cb436fe20e4 --- /dev/null +++ b/arch/x86/math-emu/fpu_tags.c @@ -0,0 +1,127 @@ +/*---------------------------------------------------------------------------+ + | fpu_tags.c | + | | + | Set FPU register tags. | + | | + | Copyright (C) 1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@jacobi.maths.monash.edu.au | + | | + | | + +---------------------------------------------------------------------------*/ + +#include "fpu_emu.h" +#include "fpu_system.h" +#include "exception.h" + + +void FPU_pop(void) +{ + fpu_tag_word |= 3 << ((top & 7)*2); + top++; +} + + +int FPU_gettag0(void) +{ + return (fpu_tag_word >> ((top & 7)*2)) & 3; +} + + +int FPU_gettagi(int stnr) +{ + return (fpu_tag_word >> (((top+stnr) & 7)*2)) & 3; +} + + +int FPU_gettag(int regnr) +{ + return (fpu_tag_word >> ((regnr & 7)*2)) & 3; +} + + +void FPU_settag0(int tag) +{ + int regnr = top; + regnr &= 7; + fpu_tag_word &= ~(3 << (regnr*2)); + fpu_tag_word |= (tag & 3) << (regnr*2); +} + + +void FPU_settagi(int stnr, int tag) +{ + int regnr = stnr+top; + regnr &= 7; + fpu_tag_word &= ~(3 << (regnr*2)); + fpu_tag_word |= (tag & 3) << (regnr*2); +} + + +void FPU_settag(int regnr, int tag) +{ + regnr &= 7; + fpu_tag_word &= ~(3 << (regnr*2)); + fpu_tag_word |= (tag & 3) << (regnr*2); +} + + +int FPU_Special(FPU_REG const *ptr) +{ + int exp = exponent(ptr); + + if ( exp == EXP_BIAS+EXP_UNDER ) + return TW_Denormal; + else if ( exp != EXP_BIAS+EXP_OVER ) + return TW_NaN; + else if ( (ptr->sigh == 0x80000000) && (ptr->sigl == 0) ) + return TW_Infinity; + return TW_NaN; +} + + +int isNaN(FPU_REG const *ptr) +{ + return ( (exponent(ptr) == EXP_BIAS+EXP_OVER) + && !((ptr->sigh == 0x80000000) && (ptr->sigl == 0)) ); +} + + +int FPU_empty_i(int stnr) +{ + int regnr = (top+stnr) & 7; + + return ((fpu_tag_word >> (regnr*2)) & 3) == TAG_Empty; +} + + +int FPU_stackoverflow(FPU_REG **st_new_ptr) +{ + *st_new_ptr = &st(-1); + + return ((fpu_tag_word >> (((top - 1) & 7)*2)) & 3) != TAG_Empty; +} + + +void FPU_copy_to_regi(FPU_REG const *r, u_char tag, int stnr) +{ + reg_copy(r, &st(stnr)); + FPU_settagi(stnr, tag); +} + +void FPU_copy_to_reg1(FPU_REG const *r, u_char tag) +{ + reg_copy(r, &st(1)); + FPU_settagi(1, tag); +} + +void FPU_copy_to_reg0(FPU_REG const *r, u_char tag) +{ + int regnr = top; + regnr &= 7; + + reg_copy(r, &st(0)); + + fpu_tag_word &= ~(3 << (regnr*2)); + fpu_tag_word |= (tag & 3) << (regnr*2); +} diff --git a/arch/x86/math-emu/fpu_trig.c b/arch/x86/math-emu/fpu_trig.c new file mode 100644 index 00000000000..403cbde1d42 --- /dev/null +++ b/arch/x86/math-emu/fpu_trig.c @@ -0,0 +1,1845 @@ +/*---------------------------------------------------------------------------+ + | fpu_trig.c | + | | + | Implementation of the FPU "transcendental" functions. | + | | + | Copyright (C) 1992,1993,1994,1997,1999 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@melbpc.org.au | + | | + | | + +---------------------------------------------------------------------------*/ + +#include "fpu_system.h" +#include "exception.h" +#include "fpu_emu.h" +#include "status_w.h" +#include "control_w.h" +#include "reg_constant.h" + +static void rem_kernel(unsigned long long st0, unsigned long long *y, + unsigned long long st1, + unsigned long long q, int n); + +#define BETTER_THAN_486 + +#define FCOS 4 + +/* Used only by fptan, fsin, fcos, and fsincos. */ +/* This routine produces very accurate results, similar to + using a value of pi with more than 128 bits precision. */ +/* Limited measurements show no results worse than 64 bit precision + except for the results for arguments close to 2^63, where the + precision of the result sometimes degrades to about 63.9 bits */ +static int trig_arg(FPU_REG *st0_ptr, int even) +{ + FPU_REG tmp; + u_char tmptag; + unsigned long long q; + int old_cw = control_word, saved_status = partial_status; + int tag, st0_tag = TAG_Valid; + + if ( exponent(st0_ptr) >= 63 ) + { + partial_status |= SW_C2; /* Reduction incomplete. */ + return -1; + } + + control_word &= ~CW_RC; + control_word |= RC_CHOP; + + setpositive(st0_ptr); + tag = FPU_u_div(st0_ptr, &CONST_PI2, &tmp, PR_64_BITS | RC_CHOP | 0x3f, + SIGN_POS); + + FPU_round_to_int(&tmp, tag); /* Fortunately, this can't overflow + to 2^64 */ + q = significand(&tmp); + if ( q ) + { + rem_kernel(significand(st0_ptr), + &significand(&tmp), + significand(&CONST_PI2), + q, exponent(st0_ptr) - exponent(&CONST_PI2)); + setexponent16(&tmp, exponent(&CONST_PI2)); + st0_tag = FPU_normalize(&tmp); + FPU_copy_to_reg0(&tmp, st0_tag); + } + + if ( (even && !(q & 1)) || (!even && (q & 1)) ) + { + st0_tag = FPU_sub(REV|LOADED|TAG_Valid, (int)&CONST_PI2, FULL_PRECISION); + +#ifdef BETTER_THAN_486 + /* So far, the results are exact but based upon a 64 bit + precision approximation to pi/2. The technique used + now is equivalent to using an approximation to pi/2 which + is accurate to about 128 bits. */ + if ( (exponent(st0_ptr) <= exponent(&CONST_PI2extra) + 64) || (q > 1) ) + { + /* This code gives the effect of having pi/2 to better than + 128 bits precision. */ + + significand(&tmp) = q + 1; + setexponent16(&tmp, 63); + FPU_normalize(&tmp); + tmptag = + FPU_u_mul(&CONST_PI2extra, &tmp, &tmp, FULL_PRECISION, SIGN_POS, + exponent(&CONST_PI2extra) + exponent(&tmp)); + setsign(&tmp, getsign(&CONST_PI2extra)); + st0_tag = FPU_add(&tmp, tmptag, 0, FULL_PRECISION); + if ( signnegative(st0_ptr) ) + { + /* CONST_PI2extra is negative, so the result of the addition + can be negative. This means that the argument is actually + in a different quadrant. The correction is always < pi/2, + so it can't overflow into yet another quadrant. */ + setpositive(st0_ptr); + q++; + } + } +#endif /* BETTER_THAN_486 */ + } +#ifdef BETTER_THAN_486 + else + { + /* So far, the results are exact but based upon a 64 bit + precision approximation to pi/2. The technique used + now is equivalent to using an approximation to pi/2 which + is accurate to about 128 bits. */ + if ( ((q > 0) && (exponent(st0_ptr) <= exponent(&CONST_PI2extra) + 64)) + || (q > 1) ) + { + /* This code gives the effect of having p/2 to better than + 128 bits precision. */ + + significand(&tmp) = q; + setexponent16(&tmp, 63); + FPU_normalize(&tmp); /* This must return TAG_Valid */ + tmptag = FPU_u_mul(&CONST_PI2extra, &tmp, &tmp, FULL_PRECISION, + SIGN_POS, + exponent(&CONST_PI2extra) + exponent(&tmp)); + setsign(&tmp, getsign(&CONST_PI2extra)); + st0_tag = FPU_sub(LOADED|(tmptag & 0x0f), (int)&tmp, + FULL_PRECISION); + if ( (exponent(st0_ptr) == exponent(&CONST_PI2)) && + ((st0_ptr->sigh > CONST_PI2.sigh) + || ((st0_ptr->sigh == CONST_PI2.sigh) + && (st0_ptr->sigl > CONST_PI2.sigl))) ) + { + /* CONST_PI2extra is negative, so the result of the + subtraction can be larger than pi/2. This means + that the argument is actually in a different quadrant. + The correction is always < pi/2, so it can't overflow + into yet another quadrant. */ + st0_tag = FPU_sub(REV|LOADED|TAG_Valid, (int)&CONST_PI2, + FULL_PRECISION); + q++; + } + } + } +#endif /* BETTER_THAN_486 */ + + FPU_settag0(st0_tag); + control_word = old_cw; + partial_status = saved_status & ~SW_C2; /* Reduction complete. */ + + return (q & 3) | even; +} + + +/* Convert a long to register */ +static void convert_l2reg(long const *arg, int deststnr) +{ + int tag; + long num = *arg; + u_char sign; + FPU_REG *dest = &st(deststnr); + + if (num == 0) + { + FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr); + return; + } + + if (num > 0) + { sign = SIGN_POS; } + else + { num = -num; sign = SIGN_NEG; } + + dest->sigh = num; + dest->sigl = 0; + setexponent16(dest, 31); + tag = FPU_normalize(dest); + FPU_settagi(deststnr, tag); + setsign(dest, sign); + return; +} + + +static void single_arg_error(FPU_REG *st0_ptr, u_char st0_tag) +{ + if ( st0_tag == TAG_Empty ) + FPU_stack_underflow(); /* Puts a QNaN in st(0) */ + else if ( st0_tag == TW_NaN ) + real_1op_NaN(st0_ptr); /* return with a NaN in st(0) */ +#ifdef PARANOID + else + EXCEPTION(EX_INTERNAL|0x0112); +#endif /* PARANOID */ +} + + +static void single_arg_2_error(FPU_REG *st0_ptr, u_char st0_tag) +{ + int isNaN; + + switch ( st0_tag ) + { + case TW_NaN: + isNaN = (exponent(st0_ptr) == EXP_OVER) && (st0_ptr->sigh & 0x80000000); + if ( isNaN && !(st0_ptr->sigh & 0x40000000) ) /* Signaling ? */ + { + EXCEPTION(EX_Invalid); + if ( control_word & CW_Invalid ) + { + /* The masked response */ + /* Convert to a QNaN */ + st0_ptr->sigh |= 0x40000000; + push(); + FPU_copy_to_reg0(st0_ptr, TAG_Special); + } + } + else if ( isNaN ) + { + /* A QNaN */ + push(); + FPU_copy_to_reg0(st0_ptr, TAG_Special); + } + else + { + /* pseudoNaN or other unsupported */ + EXCEPTION(EX_Invalid); + if ( control_word & CW_Invalid ) + { + /* The masked response */ + FPU_copy_to_reg0(&CONST_QNaN, TAG_Special); + push(); + FPU_copy_to_reg0(&CONST_QNaN, TAG_Special); + } + } + break; /* return with a NaN in st(0) */ +#ifdef PARANOID + default: + EXCEPTION(EX_INTERNAL|0x0112); +#endif /* PARANOID */ + } +} + + +/*---------------------------------------------------------------------------*/ + +static void f2xm1(FPU_REG *st0_ptr, u_char tag) +{ + FPU_REG a; + + clear_C1(); + + if ( tag == TAG_Valid ) + { + /* For an 80486 FPU, the result is undefined if the arg is >= 1.0 */ + if ( exponent(st0_ptr) < 0 ) + { + denormal_arg: + + FPU_to_exp16(st0_ptr, &a); + + /* poly_2xm1(x) requires 0 < st(0) < 1. */ + poly_2xm1(getsign(st0_ptr), &a, st0_ptr); + } + set_precision_flag_up(); /* 80486 appears to always do this */ + return; + } + + if ( tag == TAG_Zero ) + return; + + if ( tag == TAG_Special ) + tag = FPU_Special(st0_ptr); + + switch ( tag ) + { + case TW_Denormal: + if ( denormal_operand() < 0 ) + return; + goto denormal_arg; + case TW_Infinity: + if ( signnegative(st0_ptr) ) + { + /* -infinity gives -1 (p16-10) */ + FPU_copy_to_reg0(&CONST_1, TAG_Valid); + setnegative(st0_ptr); + } + return; + default: + single_arg_error(st0_ptr, tag); + } +} + + +static void fptan(FPU_REG *st0_ptr, u_char st0_tag) +{ + FPU_REG *st_new_ptr; + int q; + u_char arg_sign = getsign(st0_ptr); + + /* Stack underflow has higher priority */ + if ( st0_tag == TAG_Empty ) + { + FPU_stack_underflow(); /* Puts a QNaN in st(0) */ + if ( control_word & CW_Invalid ) + { + st_new_ptr = &st(-1); + push(); + FPU_stack_underflow(); /* Puts a QNaN in the new st(0) */ + } + return; + } + + if ( STACK_OVERFLOW ) + { FPU_stack_overflow(); return; } + + if ( st0_tag == TAG_Valid ) + { + if ( exponent(st0_ptr) > -40 ) + { + if ( (q = trig_arg(st0_ptr, 0)) == -1 ) + { + /* Operand is out of range */ + return; + } + + poly_tan(st0_ptr); + setsign(st0_ptr, (q & 1) ^ (arg_sign != 0)); + set_precision_flag_up(); /* We do not really know if up or down */ + } + else + { + /* For a small arg, the result == the argument */ + /* Underflow may happen */ + + denormal_arg: + + FPU_to_exp16(st0_ptr, st0_ptr); + + st0_tag = FPU_round(st0_ptr, 1, 0, FULL_PRECISION, arg_sign); + FPU_settag0(st0_tag); + } + push(); + FPU_copy_to_reg0(&CONST_1, TAG_Valid); + return; + } + + if ( st0_tag == TAG_Zero ) + { + push(); + FPU_copy_to_reg0(&CONST_1, TAG_Valid); + setcc(0); + return; + } + + if ( st0_tag == TAG_Special ) + st0_tag = FPU_Special(st0_ptr); + + if ( st0_tag == TW_Denormal ) + { + if ( denormal_operand() < 0 ) + return; + + goto denormal_arg; + } + + if ( st0_tag == TW_Infinity ) + { + /* The 80486 treats infinity as an invalid operand */ + if ( arith_invalid(0) >= 0 ) + { + st_new_ptr = &st(-1); + push(); + arith_invalid(0); + } + return; + } + + single_arg_2_error(st0_ptr, st0_tag); +} + + +static void fxtract(FPU_REG *st0_ptr, u_char st0_tag) +{ + FPU_REG *st_new_ptr; + u_char sign; + register FPU_REG *st1_ptr = st0_ptr; /* anticipate */ + + if ( STACK_OVERFLOW ) + { FPU_stack_overflow(); return; } + + clear_C1(); + + if ( st0_tag == TAG_Valid ) + { + long e; + + push(); + sign = getsign(st1_ptr); + reg_copy(st1_ptr, st_new_ptr); + setexponent16(st_new_ptr, exponent(st_new_ptr)); + + denormal_arg: + + e = exponent16(st_new_ptr); + convert_l2reg(&e, 1); + setexponentpos(st_new_ptr, 0); + setsign(st_new_ptr, sign); + FPU_settag0(TAG_Valid); /* Needed if arg was a denormal */ + return; + } + else if ( st0_tag == TAG_Zero ) + { + sign = getsign(st0_ptr); + + if ( FPU_divide_by_zero(0, SIGN_NEG) < 0 ) + return; + + push(); + FPU_copy_to_reg0(&CONST_Z, TAG_Zero); + setsign(st_new_ptr, sign); + return; + } + + if ( st0_tag == TAG_Special ) + st0_tag = FPU_Special(st0_ptr); + + if ( st0_tag == TW_Denormal ) + { + if (denormal_operand() < 0 ) + return; + + push(); + sign = getsign(st1_ptr); + FPU_to_exp16(st1_ptr, st_new_ptr); + goto denormal_arg; + } + else if ( st0_tag == TW_Infinity ) + { + sign = getsign(st0_ptr); + setpositive(st0_ptr); + push(); + FPU_copy_to_reg0(&CONST_INF, TAG_Special); + setsign(st_new_ptr, sign); + return; + } + else if ( st0_tag == TW_NaN ) + { + if ( real_1op_NaN(st0_ptr) < 0 ) + return; + + push(); + FPU_copy_to_reg0(st0_ptr, TAG_Special); + return; + } + else if ( st0_tag == TAG_Empty ) + { + /* Is this the correct behaviour? */ + if ( control_word & EX_Invalid ) + { + FPU_stack_underflow(); + push(); + FPU_stack_underflow(); + } + else + EXCEPTION(EX_StackUnder); + } +#ifdef PARANOID + else + EXCEPTION(EX_INTERNAL | 0x119); +#endif /* PARANOID */ +} + + +static void fdecstp(void) +{ + clear_C1(); + top--; +} + +static void fincstp(void) +{ + clear_C1(); + top++; +} + + +static void fsqrt_(FPU_REG *st0_ptr, u_char st0_tag) +{ + int expon; + + clear_C1(); + + if ( st0_tag == TAG_Valid ) + { + u_char tag; + + if (signnegative(st0_ptr)) + { + arith_invalid(0); /* sqrt(negative) is invalid */ + return; + } + + /* make st(0) in [1.0 .. 4.0) */ + expon = exponent(st0_ptr); + + denormal_arg: + + setexponent16(st0_ptr, (expon & 1)); + + /* Do the computation, the sign of the result will be positive. */ + tag = wm_sqrt(st0_ptr, 0, 0, control_word, SIGN_POS); + addexponent(st0_ptr, expon >> 1); + FPU_settag0(tag); + return; + } + + if ( st0_tag == TAG_Zero ) + return; + + if ( st0_tag == TAG_Special ) + st0_tag = FPU_Special(st0_ptr); + + if ( st0_tag == TW_Infinity ) + { + if ( signnegative(st0_ptr) ) + arith_invalid(0); /* sqrt(-Infinity) is invalid */ + return; + } + else if ( st0_tag == TW_Denormal ) + { + if (signnegative(st0_ptr)) + { + arith_invalid(0); /* sqrt(negative) is invalid */ + return; + } + + if ( denormal_operand() < 0 ) + return; + + FPU_to_exp16(st0_ptr, st0_ptr); + + expon = exponent16(st0_ptr); + + goto denormal_arg; + } + + single_arg_error(st0_ptr, st0_tag); + +} + + +static void frndint_(FPU_REG *st0_ptr, u_char st0_tag) +{ + int flags, tag; + + if ( st0_tag == TAG_Valid ) + { + u_char sign; + + denormal_arg: + + sign = getsign(st0_ptr); + + if (exponent(st0_ptr) > 63) + return; + + if ( st0_tag == TW_Denormal ) + { + if (denormal_operand() < 0 ) + return; + } + + /* Fortunately, this can't overflow to 2^64 */ + if ( (flags = FPU_round_to_int(st0_ptr, st0_tag)) ) + set_precision_flag(flags); + + setexponent16(st0_ptr, 63); + tag = FPU_normalize(st0_ptr); + setsign(st0_ptr, sign); + FPU_settag0(tag); + return; + } + + if ( st0_tag == TAG_Zero ) + return; + + if ( st0_tag == TAG_Special ) + st0_tag = FPU_Special(st0_ptr); + + if ( st0_tag == TW_Denormal ) + goto denormal_arg; + else if ( st0_tag == TW_Infinity ) + return; + else + single_arg_error(st0_ptr, st0_tag); +} + + +static int fsin(FPU_REG *st0_ptr, u_char tag) +{ + u_char arg_sign = getsign(st0_ptr); + + if ( tag == TAG_Valid ) + { + int q; + + if ( exponent(st0_ptr) > -40 ) + { + if ( (q = trig_arg(st0_ptr, 0)) == -1 ) + { + /* Operand is out of range */ + return 1; + } + + poly_sine(st0_ptr); + + if (q & 2) + changesign(st0_ptr); + + setsign(st0_ptr, getsign(st0_ptr) ^ arg_sign); + + /* We do not really know if up or down */ + set_precision_flag_up(); + return 0; + } + else + { + /* For a small arg, the result == the argument */ + set_precision_flag_up(); /* Must be up. */ + return 0; + } + } + + if ( tag == TAG_Zero ) + { + setcc(0); + return 0; + } + + if ( tag == TAG_Special ) + tag = FPU_Special(st0_ptr); + + if ( tag == TW_Denormal ) + { + if ( denormal_operand() < 0 ) + return 1; + + /* For a small arg, the result == the argument */ + /* Underflow may happen */ + FPU_to_exp16(st0_ptr, st0_ptr); + + tag = FPU_round(st0_ptr, 1, 0, FULL_PRECISION, arg_sign); + + FPU_settag0(tag); + + return 0; + } + else if ( tag == TW_Infinity ) + { + /* The 80486 treats infinity as an invalid operand */ + arith_invalid(0); + return 1; + } + else + { + single_arg_error(st0_ptr, tag); + return 1; + } +} + + +static int f_cos(FPU_REG *st0_ptr, u_char tag) +{ + u_char st0_sign; + + st0_sign = getsign(st0_ptr); + + if ( tag == TAG_Valid ) + { + int q; + + if ( exponent(st0_ptr) > -40 ) + { + if ( (exponent(st0_ptr) < 0) + || ((exponent(st0_ptr) == 0) + && (significand(st0_ptr) <= 0xc90fdaa22168c234LL)) ) + { + poly_cos(st0_ptr); + + /* We do not really know if up or down */ + set_precision_flag_down(); + + return 0; + } + else if ( (q = trig_arg(st0_ptr, FCOS)) != -1 ) + { + poly_sine(st0_ptr); + + if ((q+1) & 2) + changesign(st0_ptr); + + /* We do not really know if up or down */ + set_precision_flag_down(); + + return 0; + } + else + { + /* Operand is out of range */ + return 1; + } + } + else + { + denormal_arg: + + setcc(0); + FPU_copy_to_reg0(&CONST_1, TAG_Valid); +#ifdef PECULIAR_486 + set_precision_flag_down(); /* 80486 appears to do this. */ +#else + set_precision_flag_up(); /* Must be up. */ +#endif /* PECULIAR_486 */ + return 0; + } + } + else if ( tag == TAG_Zero ) + { + FPU_copy_to_reg0(&CONST_1, TAG_Valid); + setcc(0); + return 0; + } + + if ( tag == TAG_Special ) + tag = FPU_Special(st0_ptr); + + if ( tag == TW_Denormal ) + { + if ( denormal_operand() < 0 ) + return 1; + + goto denormal_arg; + } + else if ( tag == TW_Infinity ) + { + /* The 80486 treats infinity as an invalid operand */ + arith_invalid(0); + return 1; + } + else + { + single_arg_error(st0_ptr, tag); /* requires st0_ptr == &st(0) */ + return 1; + } +} + + +static void fcos(FPU_REG *st0_ptr, u_char st0_tag) +{ + f_cos(st0_ptr, st0_tag); +} + + +static void fsincos(FPU_REG *st0_ptr, u_char st0_tag) +{ + FPU_REG *st_new_ptr; + FPU_REG arg; + u_char tag; + + /* Stack underflow has higher priority */ + if ( st0_tag == TAG_Empty ) + { + FPU_stack_underflow(); /* Puts a QNaN in st(0) */ + if ( control_word & CW_Invalid ) + { + st_new_ptr = &st(-1); + push(); + FPU_stack_underflow(); /* Puts a QNaN in the new st(0) */ + } + return; + } + + if ( STACK_OVERFLOW ) + { FPU_stack_overflow(); return; } + + if ( st0_tag == TAG_Special ) + tag = FPU_Special(st0_ptr); + else + tag = st0_tag; + + if ( tag == TW_NaN ) + { + single_arg_2_error(st0_ptr, TW_NaN); + return; + } + else if ( tag == TW_Infinity ) + { + /* The 80486 treats infinity as an invalid operand */ + if ( arith_invalid(0) >= 0 ) + { + /* Masked response */ + push(); + arith_invalid(0); + } + return; + } + + reg_copy(st0_ptr, &arg); + if ( !fsin(st0_ptr, st0_tag) ) + { + push(); + FPU_copy_to_reg0(&arg, st0_tag); + f_cos(&st(0), st0_tag); + } + else + { + /* An error, so restore st(0) */ + FPU_copy_to_reg0(&arg, st0_tag); + } +} + + +/*---------------------------------------------------------------------------*/ +/* The following all require two arguments: st(0) and st(1) */ + +/* A lean, mean kernel for the fprem instructions. This relies upon + the division and rounding to an integer in do_fprem giving an + exact result. Because of this, rem_kernel() needs to deal only with + the least significant 64 bits, the more significant bits of the + result must be zero. + */ +static void rem_kernel(unsigned long long st0, unsigned long long *y, + unsigned long long st1, + unsigned long long q, int n) +{ + int dummy; + unsigned long long x; + + x = st0 << n; + + /* Do the required multiplication and subtraction in the one operation */ + + /* lsw x -= lsw st1 * lsw q */ + asm volatile ("mull %4; subl %%eax,%0; sbbl %%edx,%1" + :"=m" (((unsigned *)&x)[0]), "=m" (((unsigned *)&x)[1]), + "=a" (dummy) + :"2" (((unsigned *)&st1)[0]), "m" (((unsigned *)&q)[0]) + :"%dx"); + /* msw x -= msw st1 * lsw q */ + asm volatile ("mull %3; subl %%eax,%0" + :"=m" (((unsigned *)&x)[1]), "=a" (dummy) + :"1" (((unsigned *)&st1)[1]), "m" (((unsigned *)&q)[0]) + :"%dx"); + /* msw x -= lsw st1 * msw q */ + asm volatile ("mull %3; subl %%eax,%0" + :"=m" (((unsigned *)&x)[1]), "=a" (dummy) + :"1" (((unsigned *)&st1)[0]), "m" (((unsigned *)&q)[1]) + :"%dx"); + + *y = x; +} + + +/* Remainder of st(0) / st(1) */ +/* This routine produces exact results, i.e. there is never any + rounding or truncation, etc of the result. */ +static void do_fprem(FPU_REG *st0_ptr, u_char st0_tag, int round) +{ + FPU_REG *st1_ptr = &st(1); + u_char st1_tag = FPU_gettagi(1); + + if ( !((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid)) ) + { + FPU_REG tmp, st0, st1; + u_char st0_sign, st1_sign; + u_char tmptag; + int tag; + int old_cw; + int expdif; + long long q; + unsigned short saved_status; + int cc; + + fprem_valid: + /* Convert registers for internal use. */ + st0_sign = FPU_to_exp16(st0_ptr, &st0); + st1_sign = FPU_to_exp16(st1_ptr, &st1); + expdif = exponent16(&st0) - exponent16(&st1); + + old_cw = control_word; + cc = 0; + + /* We want the status following the denorm tests, but don't want + the status changed by the arithmetic operations. */ + saved_status = partial_status; + control_word &= ~CW_RC; + control_word |= RC_CHOP; + + if ( expdif < 64 ) + { + /* This should be the most common case */ + + if ( expdif > -2 ) + { + u_char sign = st0_sign ^ st1_sign; + tag = FPU_u_div(&st0, &st1, &tmp, + PR_64_BITS | RC_CHOP | 0x3f, + sign); + setsign(&tmp, sign); + + if ( exponent(&tmp) >= 0 ) + { + FPU_round_to_int(&tmp, tag); /* Fortunately, this can't + overflow to 2^64 */ + q = significand(&tmp); + + rem_kernel(significand(&st0), + &significand(&tmp), + significand(&st1), + q, expdif); + + setexponent16(&tmp, exponent16(&st1)); + } + else + { + reg_copy(&st0, &tmp); + q = 0; + } + + if ( (round == RC_RND) && (tmp.sigh & 0xc0000000) ) + { + /* We may need to subtract st(1) once more, + to get a result <= 1/2 of st(1). */ + unsigned long long x; + expdif = exponent16(&st1) - exponent16(&tmp); + if ( expdif <= 1 ) + { + if ( expdif == 0 ) + x = significand(&st1) - significand(&tmp); + else /* expdif is 1 */ + x = (significand(&st1) << 1) - significand(&tmp); + if ( (x < significand(&tmp)) || + /* or equi-distant (from 0 & st(1)) and q is odd */ + ((x == significand(&tmp)) && (q & 1) ) ) + { + st0_sign = ! st0_sign; + significand(&tmp) = x; + q++; + } + } + } + + if (q & 4) cc |= SW_C0; + if (q & 2) cc |= SW_C3; + if (q & 1) cc |= SW_C1; + } + else + { + control_word = old_cw; + setcc(0); + return; + } + } + else + { + /* There is a large exponent difference ( >= 64 ) */ + /* To make much sense, the code in this section should + be done at high precision. */ + int exp_1, N; + u_char sign; + + /* prevent overflow here */ + /* N is 'a number between 32 and 63' (p26-113) */ + reg_copy(&st0, &tmp); + tmptag = st0_tag; + N = (expdif & 0x0000001f) + 32; /* This choice gives results + identical to an AMD 486 */ + setexponent16(&tmp, N); + exp_1 = exponent16(&st1); + setexponent16(&st1, 0); + expdif -= N; + + sign = getsign(&tmp) ^ st1_sign; + tag = FPU_u_div(&tmp, &st1, &tmp, PR_64_BITS | RC_CHOP | 0x3f, + sign); + setsign(&tmp, sign); + + FPU_round_to_int(&tmp, tag); /* Fortunately, this can't + overflow to 2^64 */ + + rem_kernel(significand(&st0), + &significand(&tmp), + significand(&st1), + significand(&tmp), + exponent(&tmp) + ); + setexponent16(&tmp, exp_1 + expdif); + + /* It is possible for the operation to be complete here. + What does the IEEE standard say? The Intel 80486 manual + implies that the operation will never be completed at this + point, and the behaviour of a real 80486 confirms this. + */ + if ( !(tmp.sigh | tmp.sigl) ) + { + /* The result is zero */ + control_word = old_cw; + partial_status = saved_status; + FPU_copy_to_reg0(&CONST_Z, TAG_Zero); + setsign(&st0, st0_sign); +#ifdef PECULIAR_486 + setcc(SW_C2); +#else + setcc(0); +#endif /* PECULIAR_486 */ + return; + } + cc = SW_C2; + } + + control_word = old_cw; + partial_status = saved_status; + tag = FPU_normalize_nuo(&tmp); + reg_copy(&tmp, st0_ptr); + + /* The only condition to be looked for is underflow, + and it can occur here only if underflow is unmasked. */ + if ( (exponent16(&tmp) <= EXP_UNDER) && (tag != TAG_Zero) + && !(control_word & CW_Underflow) ) + { + setcc(cc); + tag = arith_underflow(st0_ptr); + setsign(st0_ptr, st0_sign); + FPU_settag0(tag); + return; + } + else if ( (exponent16(&tmp) > EXP_UNDER) || (tag == TAG_Zero) ) + { + stdexp(st0_ptr); + setsign(st0_ptr, st0_sign); + } + else + { + tag = FPU_round(st0_ptr, 0, 0, FULL_PRECISION, st0_sign); + } + FPU_settag0(tag); + setcc(cc); + + return; + } + + if ( st0_tag == TAG_Special ) + st0_tag = FPU_Special(st0_ptr); + if ( st1_tag == TAG_Special ) + st1_tag = FPU_Special(st1_ptr); + + if ( ((st0_tag == TAG_Valid) && (st1_tag == TW_Denormal)) + || ((st0_tag == TW_Denormal) && (st1_tag == TAG_Valid)) + || ((st0_tag == TW_Denormal) && (st1_tag == TW_Denormal)) ) + { + if ( denormal_operand() < 0 ) + return; + goto fprem_valid; + } + else if ( (st0_tag == TAG_Empty) || (st1_tag == TAG_Empty) ) + { + FPU_stack_underflow(); + return; + } + else if ( st0_tag == TAG_Zero ) + { + if ( st1_tag == TAG_Valid ) + { + setcc(0); return; + } + else if ( st1_tag == TW_Denormal ) + { + if ( denormal_operand() < 0 ) + return; + setcc(0); return; + } + else if ( st1_tag == TAG_Zero ) + { arith_invalid(0); return; } /* fprem(?,0) always invalid */ + else if ( st1_tag == TW_Infinity ) + { setcc(0); return; } + } + else if ( (st0_tag == TAG_Valid) || (st0_tag == TW_Denormal) ) + { + if ( st1_tag == TAG_Zero ) + { + arith_invalid(0); /* fprem(Valid,Zero) is invalid */ + return; + } + else if ( st1_tag != TW_NaN ) + { + if ( ((st0_tag == TW_Denormal) || (st1_tag == TW_Denormal)) + && (denormal_operand() < 0) ) + return; + + if ( st1_tag == TW_Infinity ) + { + /* fprem(Valid,Infinity) is o.k. */ + setcc(0); return; + } + } + } + else if ( st0_tag == TW_Infinity ) + { + if ( st1_tag != TW_NaN ) + { + arith_invalid(0); /* fprem(Infinity,?) is invalid */ + return; + } + } + + /* One of the registers must contain a NaN if we got here. */ + +#ifdef PARANOID + if ( (st0_tag != TW_NaN) && (st1_tag != TW_NaN) ) + EXCEPTION(EX_INTERNAL | 0x118); +#endif /* PARANOID */ + + real_2op_NaN(st1_ptr, st1_tag, 0, st1_ptr); + +} + + +/* ST(1) <- ST(1) * log ST; pop ST */ +static void fyl2x(FPU_REG *st0_ptr, u_char st0_tag) +{ + FPU_REG *st1_ptr = &st(1), exponent; + u_char st1_tag = FPU_gettagi(1); + u_char sign; + int e, tag; + + clear_C1(); + + if ( (st0_tag == TAG_Valid) && (st1_tag == TAG_Valid) ) + { + both_valid: + /* Both regs are Valid or Denormal */ + if ( signpositive(st0_ptr) ) + { + if ( st0_tag == TW_Denormal ) + FPU_to_exp16(st0_ptr, st0_ptr); + else + /* Convert st(0) for internal use. */ + setexponent16(st0_ptr, exponent(st0_ptr)); + + if ( (st0_ptr->sigh == 0x80000000) && (st0_ptr->sigl == 0) ) + { + /* Special case. The result can be precise. */ + u_char esign; + e = exponent16(st0_ptr); + if ( e >= 0 ) + { + exponent.sigh = e; + esign = SIGN_POS; + } + else + { + exponent.sigh = -e; + esign = SIGN_NEG; + } + exponent.sigl = 0; + setexponent16(&exponent, 31); + tag = FPU_normalize_nuo(&exponent); + stdexp(&exponent); + setsign(&exponent, esign); + tag = FPU_mul(&exponent, tag, 1, FULL_PRECISION); + if ( tag >= 0 ) + FPU_settagi(1, tag); + } + else + { + /* The usual case */ + sign = getsign(st1_ptr); + if ( st1_tag == TW_Denormal ) + FPU_to_exp16(st1_ptr, st1_ptr); + else + /* Convert st(1) for internal use. */ + setexponent16(st1_ptr, exponent(st1_ptr)); + poly_l2(st0_ptr, st1_ptr, sign); + } + } + else + { + /* negative */ + if ( arith_invalid(1) < 0 ) + return; + } + + FPU_pop(); + + return; + } + + if ( st0_tag == TAG_Special ) + st0_tag = FPU_Special(st0_ptr); + if ( st1_tag == TAG_Special ) + st1_tag = FPU_Special(st1_ptr); + + if ( (st0_tag == TAG_Empty) || (st1_tag == TAG_Empty) ) + { + FPU_stack_underflow_pop(1); + return; + } + else if ( (st0_tag <= TW_Denormal) && (st1_tag <= TW_Denormal) ) + { + if ( st0_tag == TAG_Zero ) + { + if ( st1_tag == TAG_Zero ) + { + /* Both args zero is invalid */ + if ( arith_invalid(1) < 0 ) + return; + } + else + { + u_char sign; + sign = getsign(st1_ptr)^SIGN_NEG; + if ( FPU_divide_by_zero(1, sign) < 0 ) + return; + + setsign(st1_ptr, sign); + } + } + else if ( st1_tag == TAG_Zero ) + { + /* st(1) contains zero, st(0) valid <> 0 */ + /* Zero is the valid answer */ + sign = getsign(st1_ptr); + + if ( signnegative(st0_ptr) ) + { + /* log(negative) */ + if ( arith_invalid(1) < 0 ) + return; + } + else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) + return; + else + { + if ( exponent(st0_ptr) < 0 ) + sign ^= SIGN_NEG; + + FPU_copy_to_reg1(&CONST_Z, TAG_Zero); + setsign(st1_ptr, sign); + } + } + else + { + /* One or both operands are denormals. */ + if ( denormal_operand() < 0 ) + return; + goto both_valid; + } + } + else if ( (st0_tag == TW_NaN) || (st1_tag == TW_NaN) ) + { + if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 ) + return; + } + /* One or both arg must be an infinity */ + else if ( st0_tag == TW_Infinity ) + { + if ( (signnegative(st0_ptr)) || (st1_tag == TAG_Zero) ) + { + /* log(-infinity) or 0*log(infinity) */ + if ( arith_invalid(1) < 0 ) + return; + } + else + { + u_char sign = getsign(st1_ptr); + + if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) ) + return; + + FPU_copy_to_reg1(&CONST_INF, TAG_Special); + setsign(st1_ptr, sign); + } + } + /* st(1) must be infinity here */ + else if ( ((st0_tag == TAG_Valid) || (st0_tag == TW_Denormal)) + && ( signpositive(st0_ptr) ) ) + { + if ( exponent(st0_ptr) >= 0 ) + { + if ( (exponent(st0_ptr) == 0) && + (st0_ptr->sigh == 0x80000000) && + (st0_ptr->sigl == 0) ) + { + /* st(0) holds 1.0 */ + /* infinity*log(1) */ + if ( arith_invalid(1) < 0 ) + return; + } + /* else st(0) is positive and > 1.0 */ + } + else + { + /* st(0) is positive and < 1.0 */ + + if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) + return; + + changesign(st1_ptr); + } + } + else + { + /* st(0) must be zero or negative */ + if ( st0_tag == TAG_Zero ) + { + /* This should be invalid, but a real 80486 is happy with it. */ + +#ifndef PECULIAR_486 + sign = getsign(st1_ptr); + if ( FPU_divide_by_zero(1, sign) < 0 ) + return; +#endif /* PECULIAR_486 */ + + changesign(st1_ptr); + } + else if ( arith_invalid(1) < 0 ) /* log(negative) */ + return; + } + + FPU_pop(); +} + + +static void fpatan(FPU_REG *st0_ptr, u_char st0_tag) +{ + FPU_REG *st1_ptr = &st(1); + u_char st1_tag = FPU_gettagi(1); + int tag; + + clear_C1(); + if ( !((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid)) ) + { + valid_atan: + + poly_atan(st0_ptr, st0_tag, st1_ptr, st1_tag); + + FPU_pop(); + + return; + } + + if ( st0_tag == TAG_Special ) + st0_tag = FPU_Special(st0_ptr); + if ( st1_tag == TAG_Special ) + st1_tag = FPU_Special(st1_ptr); + + if ( ((st0_tag == TAG_Valid) && (st1_tag == TW_Denormal)) + || ((st0_tag == TW_Denormal) && (st1_tag == TAG_Valid)) + || ((st0_tag == TW_Denormal) && (st1_tag == TW_Denormal)) ) + { + if ( denormal_operand() < 0 ) + return; + + goto valid_atan; + } + else if ( (st0_tag == TAG_Empty) || (st1_tag == TAG_Empty) ) + { + FPU_stack_underflow_pop(1); + return; + } + else if ( (st0_tag == TW_NaN) || (st1_tag == TW_NaN) ) + { + if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) >= 0 ) + FPU_pop(); + return; + } + else if ( (st0_tag == TW_Infinity) || (st1_tag == TW_Infinity) ) + { + u_char sign = getsign(st1_ptr); + if ( st0_tag == TW_Infinity ) + { + if ( st1_tag == TW_Infinity ) + { + if ( signpositive(st0_ptr) ) + { + FPU_copy_to_reg1(&CONST_PI4, TAG_Valid); + } + else + { + setpositive(st1_ptr); + tag = FPU_u_add(&CONST_PI4, &CONST_PI2, st1_ptr, + FULL_PRECISION, SIGN_POS, + exponent(&CONST_PI4), exponent(&CONST_PI2)); + if ( tag >= 0 ) + FPU_settagi(1, tag); + } + } + else + { + if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) ) + return; + + if ( signpositive(st0_ptr) ) + { + FPU_copy_to_reg1(&CONST_Z, TAG_Zero); + setsign(st1_ptr, sign); /* An 80486 preserves the sign */ + FPU_pop(); + return; + } + else + { + FPU_copy_to_reg1(&CONST_PI, TAG_Valid); + } + } + } + else + { + /* st(1) is infinity, st(0) not infinity */ + if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) + return; + + FPU_copy_to_reg1(&CONST_PI2, TAG_Valid); + } + setsign(st1_ptr, sign); + } + else if ( st1_tag == TAG_Zero ) + { + /* st(0) must be valid or zero */ + u_char sign = getsign(st1_ptr); + + if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) + return; + + if ( signpositive(st0_ptr) ) + { + /* An 80486 preserves the sign */ + FPU_pop(); + return; + } + + FPU_copy_to_reg1(&CONST_PI, TAG_Valid); + setsign(st1_ptr, sign); + } + else if ( st0_tag == TAG_Zero ) + { + /* st(1) must be TAG_Valid here */ + u_char sign = getsign(st1_ptr); + + if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) ) + return; + + FPU_copy_to_reg1(&CONST_PI2, TAG_Valid); + setsign(st1_ptr, sign); + } +#ifdef PARANOID + else + EXCEPTION(EX_INTERNAL | 0x125); +#endif /* PARANOID */ + + FPU_pop(); + set_precision_flag_up(); /* We do not really know if up or down */ +} + + +static void fprem(FPU_REG *st0_ptr, u_char st0_tag) +{ + do_fprem(st0_ptr, st0_tag, RC_CHOP); +} + + +static void fprem1(FPU_REG *st0_ptr, u_char st0_tag) +{ + do_fprem(st0_ptr, st0_tag, RC_RND); +} + + +static void fyl2xp1(FPU_REG *st0_ptr, u_char st0_tag) +{ + u_char sign, sign1; + FPU_REG *st1_ptr = &st(1), a, b; + u_char st1_tag = FPU_gettagi(1); + + clear_C1(); + if ( !((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid)) ) + { + valid_yl2xp1: + + sign = getsign(st0_ptr); + sign1 = getsign(st1_ptr); + + FPU_to_exp16(st0_ptr, &a); + FPU_to_exp16(st1_ptr, &b); + + if ( poly_l2p1(sign, sign1, &a, &b, st1_ptr) ) + return; + + FPU_pop(); + return; + } + + if ( st0_tag == TAG_Special ) + st0_tag = FPU_Special(st0_ptr); + if ( st1_tag == TAG_Special ) + st1_tag = FPU_Special(st1_ptr); + + if ( ((st0_tag == TAG_Valid) && (st1_tag == TW_Denormal)) + || ((st0_tag == TW_Denormal) && (st1_tag == TAG_Valid)) + || ((st0_tag == TW_Denormal) && (st1_tag == TW_Denormal)) ) + { + if ( denormal_operand() < 0 ) + return; + + goto valid_yl2xp1; + } + else if ( (st0_tag == TAG_Empty) | (st1_tag == TAG_Empty) ) + { + FPU_stack_underflow_pop(1); + return; + } + else if ( st0_tag == TAG_Zero ) + { + switch ( st1_tag ) + { + case TW_Denormal: + if ( denormal_operand() < 0 ) + return; + + case TAG_Zero: + case TAG_Valid: + setsign(st0_ptr, getsign(st0_ptr) ^ getsign(st1_ptr)); + FPU_copy_to_reg1(st0_ptr, st0_tag); + break; + + case TW_Infinity: + /* Infinity*log(1) */ + if ( arith_invalid(1) < 0 ) + return; + break; + + case TW_NaN: + if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 ) + return; + break; + + default: +#ifdef PARANOID + EXCEPTION(EX_INTERNAL | 0x116); + return; +#endif /* PARANOID */ + break; + } + } + else if ( (st0_tag == TAG_Valid) || (st0_tag == TW_Denormal) ) + { + switch ( st1_tag ) + { + case TAG_Zero: + if ( signnegative(st0_ptr) ) + { + if ( exponent(st0_ptr) >= 0 ) + { + /* st(0) holds <= -1.0 */ +#ifdef PECULIAR_486 /* Stupid 80486 doesn't worry about log(negative). */ + changesign(st1_ptr); +#else + if ( arith_invalid(1) < 0 ) + return; +#endif /* PECULIAR_486 */ + } + else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) + return; + else + changesign(st1_ptr); + } + else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) + return; + break; + + case TW_Infinity: + if ( signnegative(st0_ptr) ) + { + if ( (exponent(st0_ptr) >= 0) && + !((st0_ptr->sigh == 0x80000000) && + (st0_ptr->sigl == 0)) ) + { + /* st(0) holds < -1.0 */ +#ifdef PECULIAR_486 /* Stupid 80486 doesn't worry about log(negative). */ + changesign(st1_ptr); +#else + if ( arith_invalid(1) < 0 ) return; +#endif /* PECULIAR_486 */ + } + else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) + return; + else + changesign(st1_ptr); + } + else if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) + return; + break; + + case TW_NaN: + if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 ) + return; + } + + } + else if ( st0_tag == TW_NaN ) + { + if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 ) + return; + } + else if ( st0_tag == TW_Infinity ) + { + if ( st1_tag == TW_NaN ) + { + if ( real_2op_NaN(st0_ptr, st0_tag, 1, st0_ptr) < 0 ) + return; + } + else if ( signnegative(st0_ptr) ) + { +#ifndef PECULIAR_486 + /* This should have higher priority than denormals, but... */ + if ( arith_invalid(1) < 0 ) /* log(-infinity) */ + return; +#endif /* PECULIAR_486 */ + if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) ) + return; +#ifdef PECULIAR_486 + /* Denormal operands actually get higher priority */ + if ( arith_invalid(1) < 0 ) /* log(-infinity) */ + return; +#endif /* PECULIAR_486 */ + } + else if ( st1_tag == TAG_Zero ) + { + /* log(infinity) */ + if ( arith_invalid(1) < 0 ) + return; + } + + /* st(1) must be valid here. */ + + else if ( (st1_tag == TW_Denormal) && (denormal_operand() < 0) ) + return; + + /* The Manual says that log(Infinity) is invalid, but a real + 80486 sensibly says that it is o.k. */ + else + { + u_char sign = getsign(st1_ptr); + FPU_copy_to_reg1(&CONST_INF, TAG_Special); + setsign(st1_ptr, sign); + } + } +#ifdef PARANOID + else + { + EXCEPTION(EX_INTERNAL | 0x117); + return; + } +#endif /* PARANOID */ + + FPU_pop(); + return; + +} + + +static void fscale(FPU_REG *st0_ptr, u_char st0_tag) +{ + FPU_REG *st1_ptr = &st(1); + u_char st1_tag = FPU_gettagi(1); + int old_cw = control_word; + u_char sign = getsign(st0_ptr); + + clear_C1(); + if ( !((st0_tag ^ TAG_Valid) | (st1_tag ^ TAG_Valid)) ) + { + long scale; + FPU_REG tmp; + + /* Convert register for internal use. */ + setexponent16(st0_ptr, exponent(st0_ptr)); + + valid_scale: + + if ( exponent(st1_ptr) > 30 ) + { + /* 2^31 is far too large, would require 2^(2^30) or 2^(-2^30) */ + + if ( signpositive(st1_ptr) ) + { + EXCEPTION(EX_Overflow); + FPU_copy_to_reg0(&CONST_INF, TAG_Special); + } + else + { + EXCEPTION(EX_Underflow); + FPU_copy_to_reg0(&CONST_Z, TAG_Zero); + } + setsign(st0_ptr, sign); + return; + } + + control_word &= ~CW_RC; + control_word |= RC_CHOP; + reg_copy(st1_ptr, &tmp); + FPU_round_to_int(&tmp, st1_tag); /* This can never overflow here */ + control_word = old_cw; + scale = signnegative(st1_ptr) ? -tmp.sigl : tmp.sigl; + scale += exponent16(st0_ptr); + + setexponent16(st0_ptr, scale); + + /* Use FPU_round() to properly detect under/overflow etc */ + FPU_round(st0_ptr, 0, 0, control_word, sign); + + return; + } + + if ( st0_tag == TAG_Special ) + st0_tag = FPU_Special(st0_ptr); + if ( st1_tag == TAG_Special ) + st1_tag = FPU_Special(st1_ptr); + + if ( (st0_tag == TAG_Valid) || (st0_tag == TW_Denormal) ) + { + switch ( st1_tag ) + { + case TAG_Valid: + /* st(0) must be a denormal */ + if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) + return; + + FPU_to_exp16(st0_ptr, st0_ptr); /* Will not be left on stack */ + goto valid_scale; + + case TAG_Zero: + if ( st0_tag == TW_Denormal ) + denormal_operand(); + return; + + case TW_Denormal: + denormal_operand(); + return; + + case TW_Infinity: + if ( (st0_tag == TW_Denormal) && (denormal_operand() < 0) ) + return; + + if ( signpositive(st1_ptr) ) + FPU_copy_to_reg0(&CONST_INF, TAG_Special); + else + FPU_copy_to_reg0(&CONST_Z, TAG_Zero); + setsign(st0_ptr, sign); + return; + + case TW_NaN: + real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr); + return; + } + } + else if ( st0_tag == TAG_Zero ) + { + switch ( st1_tag ) + { + case TAG_Valid: + case TAG_Zero: + return; + + case TW_Denormal: + denormal_operand(); + return; + + case TW_Infinity: + if ( signpositive(st1_ptr) ) + arith_invalid(0); /* Zero scaled by +Infinity */ + return; + + case TW_NaN: + real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr); + return; + } + } + else if ( st0_tag == TW_Infinity ) + { + switch ( st1_tag ) + { + case TAG_Valid: + case TAG_Zero: + return; + + case TW_Denormal: + denormal_operand(); + return; + + case TW_Infinity: + if ( signnegative(st1_ptr) ) + arith_invalid(0); /* Infinity scaled by -Infinity */ + return; + + case TW_NaN: + real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr); + return; + } + } + else if ( st0_tag == TW_NaN ) + { + if ( st1_tag != TAG_Empty ) + { real_2op_NaN(st1_ptr, st1_tag, 0, st0_ptr); return; } + } + +#ifdef PARANOID + if ( !((st0_tag == TAG_Empty) || (st1_tag == TAG_Empty)) ) + { + EXCEPTION(EX_INTERNAL | 0x115); + return; + } +#endif + + /* At least one of st(0), st(1) must be empty */ + FPU_stack_underflow(); + +} + + +/*---------------------------------------------------------------------------*/ + +static FUNC_ST0 const trig_table_a[] = { + f2xm1, fyl2x, fptan, fpatan, + fxtract, fprem1, (FUNC_ST0)fdecstp, (FUNC_ST0)fincstp +}; + +void FPU_triga(void) +{ + (trig_table_a[FPU_rm])(&st(0), FPU_gettag0()); +} + + +static FUNC_ST0 const trig_table_b[] = + { + fprem, fyl2xp1, fsqrt_, fsincos, frndint_, fscale, (FUNC_ST0)fsin, fcos + }; + +void FPU_trigb(void) +{ + (trig_table_b[FPU_rm])(&st(0), FPU_gettag0()); +} diff --git a/arch/x86/math-emu/get_address.c b/arch/x86/math-emu/get_address.c new file mode 100644 index 00000000000..2e2c51a8bd3 --- /dev/null +++ b/arch/x86/math-emu/get_address.c @@ -0,0 +1,438 @@ +/*---------------------------------------------------------------------------+ + | get_address.c | + | | + | Get the effective address from an FPU instruction. | + | | + | Copyright (C) 1992,1993,1994,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@suburbia.net | + | | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | Note: | + | The file contains code which accesses user memory. | + | Emulator static data may change when user memory is accessed, due to | + | other processes using the emulator while swapping is in progress. | + +---------------------------------------------------------------------------*/ + + +#include + +#include +#include + +#include "fpu_system.h" +#include "exception.h" +#include "fpu_emu.h" + + +#define FPU_WRITE_BIT 0x10 + +static int reg_offset[] = { + offsetof(struct info,___eax), + offsetof(struct info,___ecx), + offsetof(struct info,___edx), + offsetof(struct info,___ebx), + offsetof(struct info,___esp), + offsetof(struct info,___ebp), + offsetof(struct info,___esi), + offsetof(struct info,___edi) +}; + +#define REG_(x) (*(long *)(reg_offset[(x)]+(u_char *) FPU_info)) + +static int reg_offset_vm86[] = { + offsetof(struct info,___cs), + offsetof(struct info,___vm86_ds), + offsetof(struct info,___vm86_es), + offsetof(struct info,___vm86_fs), + offsetof(struct info,___vm86_gs), + offsetof(struct info,___ss), + offsetof(struct info,___vm86_ds) + }; + +#define VM86_REG_(x) (*(unsigned short *) \ + (reg_offset_vm86[((unsigned)x)]+(u_char *) FPU_info)) + +/* This dummy, gs is not saved on the stack. */ +#define ___GS ___ds + +static int reg_offset_pm[] = { + offsetof(struct info,___cs), + offsetof(struct info,___ds), + offsetof(struct info,___es), + offsetof(struct info,___fs), + offsetof(struct info,___GS), + offsetof(struct info,___ss), + offsetof(struct info,___ds) + }; + +#define PM_REG_(x) (*(unsigned short *) \ + (reg_offset_pm[((unsigned)x)]+(u_char *) FPU_info)) + + +/* Decode the SIB byte. This function assumes mod != 0 */ +static int sib(int mod, unsigned long *fpu_eip) +{ + u_char ss,index,base; + long offset; + + RE_ENTRANT_CHECK_OFF; + FPU_code_access_ok(1); + FPU_get_user(base, (u_char __user *) (*fpu_eip)); /* The SIB byte */ + RE_ENTRANT_CHECK_ON; + (*fpu_eip)++; + ss = base >> 6; + index = (base >> 3) & 7; + base &= 7; + + if ((mod == 0) && (base == 5)) + offset = 0; /* No base register */ + else + offset = REG_(base); + + if (index == 4) + { + /* No index register */ + /* A non-zero ss is illegal */ + if ( ss ) + EXCEPTION(EX_Invalid); + } + else + { + offset += (REG_(index)) << ss; + } + + if (mod == 1) + { + /* 8 bit signed displacement */ + long displacement; + RE_ENTRANT_CHECK_OFF; + FPU_code_access_ok(1); + FPU_get_user(displacement, (signed char __user *) (*fpu_eip)); + offset += displacement; + RE_ENTRANT_CHECK_ON; + (*fpu_eip)++; + } + else if (mod == 2 || base == 5) /* The second condition also has mod==0 */ + { + /* 32 bit displacement */ + long displacement; + RE_ENTRANT_CHECK_OFF; + FPU_code_access_ok(4); + FPU_get_user(displacement, (long __user *) (*fpu_eip)); + offset += displacement; + RE_ENTRANT_CHECK_ON; + (*fpu_eip) += 4; + } + + return offset; +} + + +static unsigned long vm86_segment(u_char segment, + struct address *addr) +{ + segment--; +#ifdef PARANOID + if ( segment > PREFIX_SS_ ) + { + EXCEPTION(EX_INTERNAL|0x130); + math_abort(FPU_info,SIGSEGV); + } +#endif /* PARANOID */ + addr->selector = VM86_REG_(segment); + return (unsigned long)VM86_REG_(segment) << 4; +} + + +/* This should work for 16 and 32 bit protected mode. */ +static long pm_address(u_char FPU_modrm, u_char segment, + struct address *addr, long offset) +{ + struct desc_struct descriptor; + unsigned long base_address, limit, address, seg_top; + + segment--; + +#ifdef PARANOID + /* segment is unsigned, so this also detects if segment was 0: */ + if ( segment > PREFIX_SS_ ) + { + EXCEPTION(EX_INTERNAL|0x132); + math_abort(FPU_info,SIGSEGV); + } +#endif /* PARANOID */ + + switch ( segment ) + { + /* gs isn't used by the kernel, so it still has its + user-space value. */ + case PREFIX_GS_-1: + /* N.B. - movl %seg, mem is a 2 byte write regardless of prefix */ + savesegment(gs, addr->selector); + break; + default: + addr->selector = PM_REG_(segment); + } + + descriptor = LDT_DESCRIPTOR(PM_REG_(segment)); + base_address = SEG_BASE_ADDR(descriptor); + address = base_address + offset; + limit = base_address + + (SEG_LIMIT(descriptor)+1) * SEG_GRANULARITY(descriptor) - 1; + if ( limit < base_address ) limit = 0xffffffff; + + if ( SEG_EXPAND_DOWN(descriptor) ) + { + if ( SEG_G_BIT(descriptor) ) + seg_top = 0xffffffff; + else + { + seg_top = base_address + (1 << 20); + if ( seg_top < base_address ) seg_top = 0xffffffff; + } + access_limit = + (address <= limit) || (address >= seg_top) ? 0 : + ((seg_top-address) >= 255 ? 255 : seg_top-address); + } + else + { + access_limit = + (address > limit) || (address < base_address) ? 0 : + ((limit-address) >= 254 ? 255 : limit-address+1); + } + if ( SEG_EXECUTE_ONLY(descriptor) || + (!SEG_WRITE_PERM(descriptor) && (FPU_modrm & FPU_WRITE_BIT)) ) + { + access_limit = 0; + } + return address; +} + + +/* + MOD R/M byte: MOD == 3 has a special use for the FPU + SIB byte used iff R/M = 100b + + 7 6 5 4 3 2 1 0 + ..... ......... ......... + MOD OPCODE(2) R/M + + + SIB byte + + 7 6 5 4 3 2 1 0 + ..... ......... ......... + SS INDEX BASE + +*/ + +void __user *FPU_get_address(u_char FPU_modrm, unsigned long *fpu_eip, + struct address *addr, + fpu_addr_modes addr_modes) +{ + u_char mod; + unsigned rm = FPU_modrm & 7; + long *cpu_reg_ptr; + int address = 0; /* Initialized just to stop compiler warnings. */ + + /* Memory accessed via the cs selector is write protected + in `non-segmented' 32 bit protected mode. */ + if ( !addr_modes.default_mode && (FPU_modrm & FPU_WRITE_BIT) + && (addr_modes.override.segment == PREFIX_CS_) ) + { + math_abort(FPU_info,SIGSEGV); + } + + addr->selector = FPU_DS; /* Default, for 32 bit non-segmented mode. */ + + mod = (FPU_modrm >> 6) & 3; + + if (rm == 4 && mod != 3) + { + address = sib(mod, fpu_eip); + } + else + { + cpu_reg_ptr = & REG_(rm); + switch (mod) + { + case 0: + if (rm == 5) + { + /* Special case: disp32 */ + RE_ENTRANT_CHECK_OFF; + FPU_code_access_ok(4); + FPU_get_user(address, (unsigned long __user *) (*fpu_eip)); + (*fpu_eip) += 4; + RE_ENTRANT_CHECK_ON; + addr->offset = address; + return (void __user *) address; + } + else + { + address = *cpu_reg_ptr; /* Just return the contents + of the cpu register */ + addr->offset = address; + return (void __user *) address; + } + case 1: + /* 8 bit signed displacement */ + RE_ENTRANT_CHECK_OFF; + FPU_code_access_ok(1); + FPU_get_user(address, (signed char __user *) (*fpu_eip)); + RE_ENTRANT_CHECK_ON; + (*fpu_eip)++; + break; + case 2: + /* 32 bit displacement */ + RE_ENTRANT_CHECK_OFF; + FPU_code_access_ok(4); + FPU_get_user(address, (long __user *) (*fpu_eip)); + (*fpu_eip) += 4; + RE_ENTRANT_CHECK_ON; + break; + case 3: + /* Not legal for the FPU */ + EXCEPTION(EX_Invalid); + } + address += *cpu_reg_ptr; + } + + addr->offset = address; + + switch ( addr_modes.default_mode ) + { + case 0: + break; + case VM86: + address += vm86_segment(addr_modes.override.segment, addr); + break; + case PM16: + case SEG32: + address = pm_address(FPU_modrm, addr_modes.override.segment, + addr, address); + break; + default: + EXCEPTION(EX_INTERNAL|0x133); + } + + return (void __user *)address; +} + + +void __user *FPU_get_address_16(u_char FPU_modrm, unsigned long *fpu_eip, + struct address *addr, + fpu_addr_modes addr_modes) +{ + u_char mod; + unsigned rm = FPU_modrm & 7; + int address = 0; /* Default used for mod == 0 */ + + /* Memory accessed via the cs selector is write protected + in `non-segmented' 32 bit protected mode. */ + if ( !addr_modes.default_mode && (FPU_modrm & FPU_WRITE_BIT) + && (addr_modes.override.segment == PREFIX_CS_) ) + { + math_abort(FPU_info,SIGSEGV); + } + + addr->selector = FPU_DS; /* Default, for 32 bit non-segmented mode. */ + + mod = (FPU_modrm >> 6) & 3; + + switch (mod) + { + case 0: + if (rm == 6) + { + /* Special case: disp16 */ + RE_ENTRANT_CHECK_OFF; + FPU_code_access_ok(2); + FPU_get_user(address, (unsigned short __user *) (*fpu_eip)); + (*fpu_eip) += 2; + RE_ENTRANT_CHECK_ON; + goto add_segment; + } + break; + case 1: + /* 8 bit signed displacement */ + RE_ENTRANT_CHECK_OFF; + FPU_code_access_ok(1); + FPU_get_user(address, (signed char __user *) (*fpu_eip)); + RE_ENTRANT_CHECK_ON; + (*fpu_eip)++; + break; + case 2: + /* 16 bit displacement */ + RE_ENTRANT_CHECK_OFF; + FPU_code_access_ok(2); + FPU_get_user(address, (unsigned short __user *) (*fpu_eip)); + (*fpu_eip) += 2; + RE_ENTRANT_CHECK_ON; + break; + case 3: + /* Not legal for the FPU */ + EXCEPTION(EX_Invalid); + break; + } + switch ( rm ) + { + case 0: + address += FPU_info->___ebx + FPU_info->___esi; + break; + case 1: + address += FPU_info->___ebx + FPU_info->___edi; + break; + case 2: + address += FPU_info->___ebp + FPU_info->___esi; + if ( addr_modes.override.segment == PREFIX_DEFAULT ) + addr_modes.override.segment = PREFIX_SS_; + break; + case 3: + address += FPU_info->___ebp + FPU_info->___edi; + if ( addr_modes.override.segment == PREFIX_DEFAULT ) + addr_modes.override.segment = PREFIX_SS_; + break; + case 4: + address += FPU_info->___esi; + break; + case 5: + address += FPU_info->___edi; + break; + case 6: + address += FPU_info->___ebp; + if ( addr_modes.override.segment == PREFIX_DEFAULT ) + addr_modes.override.segment = PREFIX_SS_; + break; + case 7: + address += FPU_info->___ebx; + break; + } + + add_segment: + address &= 0xffff; + + addr->offset = address; + + switch ( addr_modes.default_mode ) + { + case 0: + break; + case VM86: + address += vm86_segment(addr_modes.override.segment, addr); + break; + case PM16: + case SEG32: + address = pm_address(FPU_modrm, addr_modes.override.segment, + addr, address); + break; + default: + EXCEPTION(EX_INTERNAL|0x131); + } + + return (void __user *)address ; +} diff --git a/arch/x86/math-emu/load_store.c b/arch/x86/math-emu/load_store.c new file mode 100644 index 00000000000..eebd6fb1c8a --- /dev/null +++ b/arch/x86/math-emu/load_store.c @@ -0,0 +1,272 @@ +/*---------------------------------------------------------------------------+ + | load_store.c | + | | + | This file contains most of the code to interpret the FPU instructions | + | which load and store from user memory. | + | | + | Copyright (C) 1992,1993,1994,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@suburbia.net | + | | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | Note: | + | The file contains code which accesses user memory. | + | Emulator static data may change when user memory is accessed, due to | + | other processes using the emulator while swapping is in progress. | + +---------------------------------------------------------------------------*/ + +#include + +#include "fpu_system.h" +#include "exception.h" +#include "fpu_emu.h" +#include "status_w.h" +#include "control_w.h" + + +#define _NONE_ 0 /* st0_ptr etc not needed */ +#define _REG0_ 1 /* Will be storing st(0) */ +#define _PUSH_ 3 /* Need to check for space to push onto stack */ +#define _null_ 4 /* Function illegal or not implemented */ + +#define pop_0() { FPU_settag0(TAG_Empty); top++; } + + +static u_char const type_table[32] = { + _PUSH_, _PUSH_, _PUSH_, _PUSH_, + _null_, _null_, _null_, _null_, + _REG0_, _REG0_, _REG0_, _REG0_, + _REG0_, _REG0_, _REG0_, _REG0_, + _NONE_, _null_, _NONE_, _PUSH_, + _NONE_, _PUSH_, _null_, _PUSH_, + _NONE_, _null_, _NONE_, _REG0_, + _NONE_, _REG0_, _NONE_, _REG0_ + }; + +u_char const data_sizes_16[32] = { + 4, 4, 8, 2, 0, 0, 0, 0, + 4, 4, 8, 2, 4, 4, 8, 2, + 14, 0, 94, 10, 2, 10, 0, 8, + 14, 0, 94, 10, 2, 10, 2, 8 +}; + +static u_char const data_sizes_32[32] = { + 4, 4, 8, 2, 0, 0, 0, 0, + 4, 4, 8, 2, 4, 4, 8, 2, + 28, 0,108, 10, 2, 10, 0, 8, + 28, 0,108, 10, 2, 10, 2, 8 +}; + +int FPU_load_store(u_char type, fpu_addr_modes addr_modes, + void __user *data_address) +{ + FPU_REG loaded_data; + FPU_REG *st0_ptr; + u_char st0_tag = TAG_Empty; /* This is just to stop a gcc warning. */ + u_char loaded_tag; + + st0_ptr = NULL; /* Initialized just to stop compiler warnings. */ + + if ( addr_modes.default_mode & PROTECTED ) + { + if ( addr_modes.default_mode == SEG32 ) + { + if ( access_limit < data_sizes_32[type] ) + math_abort(FPU_info,SIGSEGV); + } + else if ( addr_modes.default_mode == PM16 ) + { + if ( access_limit < data_sizes_16[type] ) + math_abort(FPU_info,SIGSEGV); + } +#ifdef PARANOID + else + EXCEPTION(EX_INTERNAL|0x140); +#endif /* PARANOID */ + } + + switch ( type_table[type] ) + { + case _NONE_: + break; + case _REG0_: + st0_ptr = &st(0); /* Some of these instructions pop after + storing */ + st0_tag = FPU_gettag0(); + break; + case _PUSH_: + { + if ( FPU_gettagi(-1) != TAG_Empty ) + { FPU_stack_overflow(); return 0; } + top--; + st0_ptr = &st(0); + } + break; + case _null_: + FPU_illegal(); + return 0; +#ifdef PARANOID + default: + EXCEPTION(EX_INTERNAL|0x141); + return 0; +#endif /* PARANOID */ + } + + switch ( type ) + { + case 000: /* fld m32real */ + clear_C1(); + loaded_tag = FPU_load_single((float __user *)data_address, &loaded_data); + if ( (loaded_tag == TAG_Special) + && isNaN(&loaded_data) + && (real_1op_NaN(&loaded_data) < 0) ) + { + top++; + break; + } + FPU_copy_to_reg0(&loaded_data, loaded_tag); + break; + case 001: /* fild m32int */ + clear_C1(); + loaded_tag = FPU_load_int32((long __user *)data_address, &loaded_data); + FPU_copy_to_reg0(&loaded_data, loaded_tag); + break; + case 002: /* fld m64real */ + clear_C1(); + loaded_tag = FPU_load_double((double __user *)data_address, &loaded_data); + if ( (loaded_tag == TAG_Special) + && isNaN(&loaded_data) + && (real_1op_NaN(&loaded_data) < 0) ) + { + top++; + break; + } + FPU_copy_to_reg0(&loaded_data, loaded_tag); + break; + case 003: /* fild m16int */ + clear_C1(); + loaded_tag = FPU_load_int16((short __user *)data_address, &loaded_data); + FPU_copy_to_reg0(&loaded_data, loaded_tag); + break; + case 010: /* fst m32real */ + clear_C1(); + FPU_store_single(st0_ptr, st0_tag, (float __user *)data_address); + break; + case 011: /* fist m32int */ + clear_C1(); + FPU_store_int32(st0_ptr, st0_tag, (long __user *)data_address); + break; + case 012: /* fst m64real */ + clear_C1(); + FPU_store_double(st0_ptr, st0_tag, (double __user *)data_address); + break; + case 013: /* fist m16int */ + clear_C1(); + FPU_store_int16(st0_ptr, st0_tag, (short __user *)data_address); + break; + case 014: /* fstp m32real */ + clear_C1(); + if ( FPU_store_single(st0_ptr, st0_tag, (float __user *)data_address) ) + pop_0(); /* pop only if the number was actually stored + (see the 80486 manual p16-28) */ + break; + case 015: /* fistp m32int */ + clear_C1(); + if ( FPU_store_int32(st0_ptr, st0_tag, (long __user *)data_address) ) + pop_0(); /* pop only if the number was actually stored + (see the 80486 manual p16-28) */ + break; + case 016: /* fstp m64real */ + clear_C1(); + if ( FPU_store_double(st0_ptr, st0_tag, (double __user *)data_address) ) + pop_0(); /* pop only if the number was actually stored + (see the 80486 manual p16-28) */ + break; + case 017: /* fistp m16int */ + clear_C1(); + if ( FPU_store_int16(st0_ptr, st0_tag, (short __user *)data_address) ) + pop_0(); /* pop only if the number was actually stored + (see the 80486 manual p16-28) */ + break; + case 020: /* fldenv m14/28byte */ + fldenv(addr_modes, (u_char __user *)data_address); + /* Ensure that the values just loaded are not changed by + fix-up operations. */ + return 1; + case 022: /* frstor m94/108byte */ + frstor(addr_modes, (u_char __user *)data_address); + /* Ensure that the values just loaded are not changed by + fix-up operations. */ + return 1; + case 023: /* fbld m80dec */ + clear_C1(); + loaded_tag = FPU_load_bcd((u_char __user *)data_address); + FPU_settag0(loaded_tag); + break; + case 024: /* fldcw */ + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_READ, data_address, 2); + FPU_get_user(control_word, (unsigned short __user *) data_address); + RE_ENTRANT_CHECK_ON; + if ( partial_status & ~control_word & CW_Exceptions ) + partial_status |= (SW_Summary | SW_Backward); + else + partial_status &= ~(SW_Summary | SW_Backward); +#ifdef PECULIAR_486 + control_word |= 0x40; /* An 80486 appears to always set this bit */ +#endif /* PECULIAR_486 */ + return 1; + case 025: /* fld m80real */ + clear_C1(); + loaded_tag = FPU_load_extended((long double __user *)data_address, 0); + FPU_settag0(loaded_tag); + break; + case 027: /* fild m64int */ + clear_C1(); + loaded_tag = FPU_load_int64((long long __user *)data_address); + if (loaded_tag == TAG_Error) + return 0; + FPU_settag0(loaded_tag); + break; + case 030: /* fstenv m14/28byte */ + fstenv(addr_modes, (u_char __user *)data_address); + return 1; + case 032: /* fsave */ + fsave(addr_modes, (u_char __user *)data_address); + return 1; + case 033: /* fbstp m80dec */ + clear_C1(); + if ( FPU_store_bcd(st0_ptr, st0_tag, (u_char __user *)data_address) ) + pop_0(); /* pop only if the number was actually stored + (see the 80486 manual p16-28) */ + break; + case 034: /* fstcw m16int */ + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE,data_address,2); + FPU_put_user(control_word, (unsigned short __user *) data_address); + RE_ENTRANT_CHECK_ON; + return 1; + case 035: /* fstp m80real */ + clear_C1(); + if ( FPU_store_extended(st0_ptr, st0_tag, (long double __user *)data_address) ) + pop_0(); /* pop only if the number was actually stored + (see the 80486 manual p16-28) */ + break; + case 036: /* fstsw m2byte */ + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE,data_address,2); + FPU_put_user(status_word(),(unsigned short __user *) data_address); + RE_ENTRANT_CHECK_ON; + return 1; + case 037: /* fistp m64int */ + clear_C1(); + if ( FPU_store_int64(st0_ptr, st0_tag, (long long __user *)data_address) ) + pop_0(); /* pop only if the number was actually stored + (see the 80486 manual p16-28) */ + break; + } + return 0; +} diff --git a/arch/x86/math-emu/mul_Xsig.S b/arch/x86/math-emu/mul_Xsig.S new file mode 100644 index 00000000000..717785a53eb --- /dev/null +++ b/arch/x86/math-emu/mul_Xsig.S @@ -0,0 +1,176 @@ +/*---------------------------------------------------------------------------+ + | mul_Xsig.S | + | | + | Multiply a 12 byte fixed point number by another fixed point number. | + | | + | Copyright (C) 1992,1994,1995 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@jacobi.maths.monash.edu.au | + | | + | Call from C as: | + | void mul32_Xsig(Xsig *x, unsigned b) | + | | + | void mul64_Xsig(Xsig *x, unsigned long long *b) | + | | + | void mul_Xsig_Xsig(Xsig *x, unsigned *b) | + | | + | The result is neither rounded nor normalized, and the ls bit or so may | + | be wrong. | + | | + +---------------------------------------------------------------------------*/ + .file "mul_Xsig.S" + + +#include "fpu_emu.h" + +.text +ENTRY(mul32_Xsig) + pushl %ebp + movl %esp,%ebp + subl $16,%esp + pushl %esi + + movl PARAM1,%esi + movl PARAM2,%ecx + + xor %eax,%eax + movl %eax,-4(%ebp) + movl %eax,-8(%ebp) + + movl (%esi),%eax /* lsl of Xsig */ + mull %ecx /* msl of b */ + movl %edx,-12(%ebp) + + movl 4(%esi),%eax /* midl of Xsig */ + mull %ecx /* msl of b */ + addl %eax,-12(%ebp) + adcl %edx,-8(%ebp) + adcl $0,-4(%ebp) + + movl 8(%esi),%eax /* msl of Xsig */ + mull %ecx /* msl of b */ + addl %eax,-8(%ebp) + adcl %edx,-4(%ebp) + + movl -12(%ebp),%eax + movl %eax,(%esi) + movl -8(%ebp),%eax + movl %eax,4(%esi) + movl -4(%ebp),%eax + movl %eax,8(%esi) + + popl %esi + leave + ret + + +ENTRY(mul64_Xsig) + pushl %ebp + movl %esp,%ebp + subl $16,%esp + pushl %esi + + movl PARAM1,%esi + movl PARAM2,%ecx + + xor %eax,%eax + movl %eax,-4(%ebp) + movl %eax,-8(%ebp) + + movl (%esi),%eax /* lsl of Xsig */ + mull 4(%ecx) /* msl of b */ + movl %edx,-12(%ebp) + + movl 4(%esi),%eax /* midl of Xsig */ + mull (%ecx) /* lsl of b */ + addl %edx,-12(%ebp) + adcl $0,-8(%ebp) + adcl $0,-4(%ebp) + + movl 4(%esi),%eax /* midl of Xsig */ + mull 4(%ecx) /* msl of b */ + addl %eax,-12(%ebp) + adcl %edx,-8(%ebp) + adcl $0,-4(%ebp) + + movl 8(%esi),%eax /* msl of Xsig */ + mull (%ecx) /* lsl of b */ + addl %eax,-12(%ebp) + adcl %edx,-8(%ebp) + adcl $0,-4(%ebp) + + movl 8(%esi),%eax /* msl of Xsig */ + mull 4(%ecx) /* msl of b */ + addl %eax,-8(%ebp) + adcl %edx,-4(%ebp) + + movl -12(%ebp),%eax + movl %eax,(%esi) + movl -8(%ebp),%eax + movl %eax,4(%esi) + movl -4(%ebp),%eax + movl %eax,8(%esi) + + popl %esi + leave + ret + + + +ENTRY(mul_Xsig_Xsig) + pushl %ebp + movl %esp,%ebp + subl $16,%esp + pushl %esi + + movl PARAM1,%esi + movl PARAM2,%ecx + + xor %eax,%eax + movl %eax,-4(%ebp) + movl %eax,-8(%ebp) + + movl (%esi),%eax /* lsl of Xsig */ + mull 8(%ecx) /* msl of b */ + movl %edx,-12(%ebp) + + movl 4(%esi),%eax /* midl of Xsig */ + mull 4(%ecx) /* midl of b */ + addl %edx,-12(%ebp) + adcl $0,-8(%ebp) + adcl $0,-4(%ebp) + + movl 8(%esi),%eax /* msl of Xsig */ + mull (%ecx) /* lsl of b */ + addl %edx,-12(%ebp) + adcl $0,-8(%ebp) + adcl $0,-4(%ebp) + + movl 4(%esi),%eax /* midl of Xsig */ + mull 8(%ecx) /* msl of b */ + addl %eax,-12(%ebp) + adcl %edx,-8(%ebp) + adcl $0,-4(%ebp) + + movl 8(%esi),%eax /* msl of Xsig */ + mull 4(%ecx) /* midl of b */ + addl %eax,-12(%ebp) + adcl %edx,-8(%ebp) + adcl $0,-4(%ebp) + + movl 8(%esi),%eax /* msl of Xsig */ + mull 8(%ecx) /* msl of b */ + addl %eax,-8(%ebp) + adcl %edx,-4(%ebp) + + movl -12(%ebp),%edx + movl %edx,(%esi) + movl -8(%ebp),%edx + movl %edx,4(%esi) + movl -4(%ebp),%edx + movl %edx,8(%esi) + + popl %esi + leave + ret + diff --git a/arch/x86/math-emu/poly.h b/arch/x86/math-emu/poly.h new file mode 100644 index 00000000000..4db79811492 --- /dev/null +++ b/arch/x86/math-emu/poly.h @@ -0,0 +1,121 @@ +/*---------------------------------------------------------------------------+ + | poly.h | + | | + | Header file for the FPU-emu poly*.c source files. | + | | + | Copyright (C) 1994,1999 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@melbpc.org.au | + | | + | Declarations and definitions for functions operating on Xsig (12-byte | + | extended-significand) quantities. | + | | + +---------------------------------------------------------------------------*/ + +#ifndef _POLY_H +#define _POLY_H + +/* This 12-byte structure is used to improve the accuracy of computation + of transcendental functions. + Intended to be used to get results better than 8-byte computation + allows. 9-byte would probably be sufficient. + */ +typedef struct { + unsigned long lsw; + unsigned long midw; + unsigned long msw; +} Xsig; + +asmlinkage void mul64(unsigned long long const *a, unsigned long long const *b, + unsigned long long *result); +asmlinkage void polynomial_Xsig(Xsig *, const unsigned long long *x, + const unsigned long long terms[], const int n); + +asmlinkage void mul32_Xsig(Xsig *, const unsigned long mult); +asmlinkage void mul64_Xsig(Xsig *, const unsigned long long *mult); +asmlinkage void mul_Xsig_Xsig(Xsig *dest, const Xsig *mult); + +asmlinkage void shr_Xsig(Xsig *, const int n); +asmlinkage int round_Xsig(Xsig *); +asmlinkage int norm_Xsig(Xsig *); +asmlinkage void div_Xsig(Xsig *x1, const Xsig *x2, const Xsig *dest); + +/* Macro to extract the most significant 32 bits from a long long */ +#define LL_MSW(x) (((unsigned long *)&x)[1]) + +/* Macro to initialize an Xsig struct */ +#define MK_XSIG(a,b,c) { c, b, a } + +/* Macro to access the 8 ms bytes of an Xsig as a long long */ +#define XSIG_LL(x) (*(unsigned long long *)&x.midw) + + +/* + Need to run gcc with optimizations on to get these to + actually be in-line. + */ + +/* Multiply two fixed-point 32 bit numbers, producing a 32 bit result. + The answer is the ms word of the product. */ +/* Some versions of gcc make it difficult to stop eax from being clobbered. + Merely specifying that it is used doesn't work... + */ +static inline unsigned long mul_32_32(const unsigned long arg1, + const unsigned long arg2) +{ + int retval; + asm volatile ("mull %2; movl %%edx,%%eax" \ + :"=a" (retval) \ + :"0" (arg1), "g" (arg2) \ + :"dx"); + return retval; +} + + +/* Add the 12 byte Xsig x2 to Xsig dest, with no checks for overflow. */ +static inline void add_Xsig_Xsig(Xsig *dest, const Xsig *x2) +{ + asm volatile ("movl %1,%%edi; movl %2,%%esi;\n" + "movl (%%esi),%%eax; addl %%eax,(%%edi);\n" + "movl 4(%%esi),%%eax; adcl %%eax,4(%%edi);\n" + "movl 8(%%esi),%%eax; adcl %%eax,8(%%edi);\n" + :"=g" (*dest):"g" (dest), "g" (x2) + :"ax","si","di"); +} + + +/* Add the 12 byte Xsig x2 to Xsig dest, adjust exp if overflow occurs. */ +/* Note: the constraints in the asm statement didn't always work properly + with gcc 2.5.8. Changing from using edi to using ecx got around the + problem, but keep fingers crossed! */ +static inline void add_two_Xsig(Xsig *dest, const Xsig *x2, long int *exp) +{ + asm volatile ("movl %2,%%ecx; movl %3,%%esi;\n" + "movl (%%esi),%%eax; addl %%eax,(%%ecx);\n" + "movl 4(%%esi),%%eax; adcl %%eax,4(%%ecx);\n" + "movl 8(%%esi),%%eax; adcl %%eax,8(%%ecx);\n" + "jnc 0f;\n" + "rcrl 8(%%ecx); rcrl 4(%%ecx); rcrl (%%ecx)\n" + "movl %4,%%ecx; incl (%%ecx)\n" + "movl $1,%%eax; jmp 1f;\n" + "0: xorl %%eax,%%eax;\n" + "1:\n" + :"=g" (*exp), "=g" (*dest) + :"g" (dest), "g" (x2), "g" (exp) + :"cx","si","ax"); +} + + +/* Negate (subtract from 1.0) the 12 byte Xsig */ +/* This is faster in a loop on my 386 than using the "neg" instruction. */ +static inline void negate_Xsig(Xsig *x) +{ + asm volatile("movl %1,%%esi;\n" + "xorl %%ecx,%%ecx;\n" + "movl %%ecx,%%eax; subl (%%esi),%%eax; movl %%eax,(%%esi);\n" + "movl %%ecx,%%eax; sbbl 4(%%esi),%%eax; movl %%eax,4(%%esi);\n" + "movl %%ecx,%%eax; sbbl 8(%%esi),%%eax; movl %%eax,8(%%esi);\n" + :"=g" (*x):"g" (x):"si","ax","cx"); +} + +#endif /* _POLY_H */ diff --git a/arch/x86/math-emu/poly_2xm1.c b/arch/x86/math-emu/poly_2xm1.c new file mode 100644 index 00000000000..9766ad5e974 --- /dev/null +++ b/arch/x86/math-emu/poly_2xm1.c @@ -0,0 +1,156 @@ +/*---------------------------------------------------------------------------+ + | poly_2xm1.c | + | | + | Function to compute 2^x-1 by a polynomial approximation. | + | | + | Copyright (C) 1992,1993,1994,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@suburbia.net | + | | + | | + +---------------------------------------------------------------------------*/ + +#include "exception.h" +#include "reg_constant.h" +#include "fpu_emu.h" +#include "fpu_system.h" +#include "control_w.h" +#include "poly.h" + + +#define HIPOWER 11 +static const unsigned long long lterms[HIPOWER] = +{ + 0x0000000000000000LL, /* This term done separately as 12 bytes */ + 0xf5fdeffc162c7543LL, + 0x1c6b08d704a0bfa6LL, + 0x0276556df749cc21LL, + 0x002bb0ffcf14f6b8LL, + 0x0002861225ef751cLL, + 0x00001ffcbfcd5422LL, + 0x00000162c005d5f1LL, + 0x0000000da96ccb1bLL, + 0x0000000078d1b897LL, + 0x000000000422b029LL +}; + +static const Xsig hiterm = MK_XSIG(0xb17217f7, 0xd1cf79ab, 0xc8a39194); + +/* Four slices: 0.0 : 0.25 : 0.50 : 0.75 : 1.0, + These numbers are 2^(1/4), 2^(1/2), and 2^(3/4) + */ +static const Xsig shiftterm0 = MK_XSIG(0, 0, 0); +static const Xsig shiftterm1 = MK_XSIG(0x9837f051, 0x8db8a96f, 0x46ad2318); +static const Xsig shiftterm2 = MK_XSIG(0xb504f333, 0xf9de6484, 0x597d89b3); +static const Xsig shiftterm3 = MK_XSIG(0xd744fcca, 0xd69d6af4, 0x39a68bb9); + +static const Xsig *shiftterm[] = { &shiftterm0, &shiftterm1, + &shiftterm2, &shiftterm3 }; + + +/*--- poly_2xm1() -----------------------------------------------------------+ + | Requires st(0) which is TAG_Valid and < 1. | + +---------------------------------------------------------------------------*/ +int poly_2xm1(u_char sign, FPU_REG *arg, FPU_REG *result) +{ + long int exponent, shift; + unsigned long long Xll; + Xsig accumulator, Denom, argSignif; + u_char tag; + + exponent = exponent16(arg); + +#ifdef PARANOID + if ( exponent >= 0 ) /* Don't want a |number| >= 1.0 */ + { + /* Number negative, too large, or not Valid. */ + EXCEPTION(EX_INTERNAL|0x127); + return 1; + } +#endif /* PARANOID */ + + argSignif.lsw = 0; + XSIG_LL(argSignif) = Xll = significand(arg); + + if ( exponent == -1 ) + { + shift = (argSignif.msw & 0x40000000) ? 3 : 2; + /* subtract 0.5 or 0.75 */ + exponent -= 2; + XSIG_LL(argSignif) <<= 2; + Xll <<= 2; + } + else if ( exponent == -2 ) + { + shift = 1; + /* subtract 0.25 */ + exponent--; + XSIG_LL(argSignif) <<= 1; + Xll <<= 1; + } + else + shift = 0; + + if ( exponent < -2 ) + { + /* Shift the argument right by the required places. */ + if ( FPU_shrx(&Xll, -2-exponent) >= 0x80000000U ) + Xll++; /* round up */ + } + + accumulator.lsw = accumulator.midw = accumulator.msw = 0; + polynomial_Xsig(&accumulator, &Xll, lterms, HIPOWER-1); + mul_Xsig_Xsig(&accumulator, &argSignif); + shr_Xsig(&accumulator, 3); + + mul_Xsig_Xsig(&argSignif, &hiterm); /* The leading term */ + add_two_Xsig(&accumulator, &argSignif, &exponent); + + if ( shift ) + { + /* The argument is large, use the identity: + f(x+a) = f(a) * (f(x) + 1) - 1; + */ + shr_Xsig(&accumulator, - exponent); + accumulator.msw |= 0x80000000; /* add 1.0 */ + mul_Xsig_Xsig(&accumulator, shiftterm[shift]); + accumulator.msw &= 0x3fffffff; /* subtract 1.0 */ + exponent = 1; + } + + if ( sign != SIGN_POS ) + { + /* The argument is negative, use the identity: + f(-x) = -f(x) / (1 + f(x)) + */ + Denom.lsw = accumulator.lsw; + XSIG_LL(Denom) = XSIG_LL(accumulator); + if ( exponent < 0 ) + shr_Xsig(&Denom, - exponent); + else if ( exponent > 0 ) + { + /* exponent must be 1 here */ + XSIG_LL(Denom) <<= 1; + if ( Denom.lsw & 0x80000000 ) + XSIG_LL(Denom) |= 1; + (Denom.lsw) <<= 1; + } + Denom.msw |= 0x80000000; /* add 1.0 */ + div_Xsig(&accumulator, &Denom, &accumulator); + } + + /* Convert to 64 bit signed-compatible */ + exponent += round_Xsig(&accumulator); + + result = &st(0); + significand(result) = XSIG_LL(accumulator); + setexponent16(result, exponent); + + tag = FPU_round(result, 1, 0, FULL_PRECISION, sign); + + setsign(result, sign); + FPU_settag0(tag); + + return 0; + +} diff --git a/arch/x86/math-emu/poly_atan.c b/arch/x86/math-emu/poly_atan.c new file mode 100644 index 00000000000..82f702952f6 --- /dev/null +++ b/arch/x86/math-emu/poly_atan.c @@ -0,0 +1,229 @@ +/*---------------------------------------------------------------------------+ + | poly_atan.c | + | | + | Compute the arctan of a FPU_REG, using a polynomial approximation. | + | | + | Copyright (C) 1992,1993,1994,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@suburbia.net | + | | + | | + +---------------------------------------------------------------------------*/ + +#include "exception.h" +#include "reg_constant.h" +#include "fpu_emu.h" +#include "fpu_system.h" +#include "status_w.h" +#include "control_w.h" +#include "poly.h" + + +#define HIPOWERon 6 /* odd poly, negative terms */ +static const unsigned long long oddnegterms[HIPOWERon] = +{ + 0x0000000000000000LL, /* Dummy (not for - 1.0) */ + 0x015328437f756467LL, + 0x0005dda27b73dec6LL, + 0x0000226bf2bfb91aLL, + 0x000000ccc439c5f7LL, + 0x0000000355438407LL +} ; + +#define HIPOWERop 6 /* odd poly, positive terms */ +static const unsigned long long oddplterms[HIPOWERop] = +{ +/* 0xaaaaaaaaaaaaaaabLL, transferred to fixedpterm[] */ + 0x0db55a71875c9ac2LL, + 0x0029fce2d67880b0LL, + 0x0000dfd3908b4596LL, + 0x00000550fd61dab4LL, + 0x0000001c9422b3f9LL, + 0x000000003e3301e1LL +}; + +static const unsigned long long denomterm = 0xebd9b842c5c53a0eLL; + +static const Xsig fixedpterm = MK_XSIG(0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa); + +static const Xsig pi_signif = MK_XSIG(0xc90fdaa2, 0x2168c234, 0xc4c6628b); + + +/*--- poly_atan() -----------------------------------------------------------+ + | | + +---------------------------------------------------------------------------*/ +void poly_atan(FPU_REG *st0_ptr, u_char st0_tag, + FPU_REG *st1_ptr, u_char st1_tag) +{ + u_char transformed, inverted, + sign1, sign2; + int exponent; + long int dummy_exp; + Xsig accumulator, Numer, Denom, accumulatore, argSignif, + argSq, argSqSq; + u_char tag; + + sign1 = getsign(st0_ptr); + sign2 = getsign(st1_ptr); + if ( st0_tag == TAG_Valid ) + { + exponent = exponent(st0_ptr); + } + else + { + /* This gives non-compatible stack contents... */ + FPU_to_exp16(st0_ptr, st0_ptr); + exponent = exponent16(st0_ptr); + } + if ( st1_tag == TAG_Valid ) + { + exponent -= exponent(st1_ptr); + } + else + { + /* This gives non-compatible stack contents... */ + FPU_to_exp16(st1_ptr, st1_ptr); + exponent -= exponent16(st1_ptr); + } + + if ( (exponent < 0) || ((exponent == 0) && + ((st0_ptr->sigh < st1_ptr->sigh) || + ((st0_ptr->sigh == st1_ptr->sigh) && + (st0_ptr->sigl < st1_ptr->sigl))) ) ) + { + inverted = 1; + Numer.lsw = Denom.lsw = 0; + XSIG_LL(Numer) = significand(st0_ptr); + XSIG_LL(Denom) = significand(st1_ptr); + } + else + { + inverted = 0; + exponent = -exponent; + Numer.lsw = Denom.lsw = 0; + XSIG_LL(Numer) = significand(st1_ptr); + XSIG_LL(Denom) = significand(st0_ptr); + } + div_Xsig(&Numer, &Denom, &argSignif); + exponent += norm_Xsig(&argSignif); + + if ( (exponent >= -1) + || ((exponent == -2) && (argSignif.msw > 0xd413ccd0)) ) + { + /* The argument is greater than sqrt(2)-1 (=0.414213562...) */ + /* Convert the argument by an identity for atan */ + transformed = 1; + + if ( exponent >= 0 ) + { +#ifdef PARANOID + if ( !( (exponent == 0) && + (argSignif.lsw == 0) && (argSignif.midw == 0) && + (argSignif.msw == 0x80000000) ) ) + { + EXCEPTION(EX_INTERNAL|0x104); /* There must be a logic error */ + return; + } +#endif /* PARANOID */ + argSignif.msw = 0; /* Make the transformed arg -> 0.0 */ + } + else + { + Numer.lsw = Denom.lsw = argSignif.lsw; + XSIG_LL(Numer) = XSIG_LL(Denom) = XSIG_LL(argSignif); + + if ( exponent < -1 ) + shr_Xsig(&Numer, -1-exponent); + negate_Xsig(&Numer); + + shr_Xsig(&Denom, -exponent); + Denom.msw |= 0x80000000; + + div_Xsig(&Numer, &Denom, &argSignif); + + exponent = -1 + norm_Xsig(&argSignif); + } + } + else + { + transformed = 0; + } + + argSq.lsw = argSignif.lsw; argSq.midw = argSignif.midw; + argSq.msw = argSignif.msw; + mul_Xsig_Xsig(&argSq, &argSq); + + argSqSq.lsw = argSq.lsw; argSqSq.midw = argSq.midw; argSqSq.msw = argSq.msw; + mul_Xsig_Xsig(&argSqSq, &argSqSq); + + accumulatore.lsw = argSq.lsw; + XSIG_LL(accumulatore) = XSIG_LL(argSq); + + shr_Xsig(&argSq, 2*(-1-exponent-1)); + shr_Xsig(&argSqSq, 4*(-1-exponent-1)); + + /* Now have argSq etc with binary point at the left + .1xxxxxxxx */ + + /* Do the basic fixed point polynomial evaluation */ + accumulator.msw = accumulator.midw = accumulator.lsw = 0; + polynomial_Xsig(&accumulator, &XSIG_LL(argSqSq), + oddplterms, HIPOWERop-1); + mul64_Xsig(&accumulator, &XSIG_LL(argSq)); + negate_Xsig(&accumulator); + polynomial_Xsig(&accumulator, &XSIG_LL(argSqSq), oddnegterms, HIPOWERon-1); + negate_Xsig(&accumulator); + add_two_Xsig(&accumulator, &fixedpterm, &dummy_exp); + + mul64_Xsig(&accumulatore, &denomterm); + shr_Xsig(&accumulatore, 1 + 2*(-1-exponent)); + accumulatore.msw |= 0x80000000; + + div_Xsig(&accumulator, &accumulatore, &accumulator); + + mul_Xsig_Xsig(&accumulator, &argSignif); + mul_Xsig_Xsig(&accumulator, &argSq); + + shr_Xsig(&accumulator, 3); + negate_Xsig(&accumulator); + add_Xsig_Xsig(&accumulator, &argSignif); + + if ( transformed ) + { + /* compute pi/4 - accumulator */ + shr_Xsig(&accumulator, -1-exponent); + negate_Xsig(&accumulator); + add_Xsig_Xsig(&accumulator, &pi_signif); + exponent = -1; + } + + if ( inverted ) + { + /* compute pi/2 - accumulator */ + shr_Xsig(&accumulator, -exponent); + negate_Xsig(&accumulator); + add_Xsig_Xsig(&accumulator, &pi_signif); + exponent = 0; + } + + if ( sign1 ) + { + /* compute pi - accumulator */ + shr_Xsig(&accumulator, 1 - exponent); + negate_Xsig(&accumulator); + add_Xsig_Xsig(&accumulator, &pi_signif); + exponent = 1; + } + + exponent += round_Xsig(&accumulator); + + significand(st1_ptr) = XSIG_LL(accumulator); + setexponent16(st1_ptr, exponent); + + tag = FPU_round(st1_ptr, 1, 0, FULL_PRECISION, sign2); + FPU_settagi(1, tag); + + set_precision_flag_up(); /* We do not really know if up or down, + use this as the default. */ + +} diff --git a/arch/x86/math-emu/poly_l2.c b/arch/x86/math-emu/poly_l2.c new file mode 100644 index 00000000000..dd00e1d5b07 --- /dev/null +++ b/arch/x86/math-emu/poly_l2.c @@ -0,0 +1,272 @@ +/*---------------------------------------------------------------------------+ + | poly_l2.c | + | | + | Compute the base 2 log of a FPU_REG, using a polynomial approximation. | + | | + | Copyright (C) 1992,1993,1994,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@suburbia.net | + | | + | | + +---------------------------------------------------------------------------*/ + + +#include "exception.h" +#include "reg_constant.h" +#include "fpu_emu.h" +#include "fpu_system.h" +#include "control_w.h" +#include "poly.h" + + +static void log2_kernel(FPU_REG const *arg, u_char argsign, + Xsig *accum_result, long int *expon); + + +/*--- poly_l2() -------------------------------------------------------------+ + | Base 2 logarithm by a polynomial approximation. | + +---------------------------------------------------------------------------*/ +void poly_l2(FPU_REG *st0_ptr, FPU_REG *st1_ptr, u_char st1_sign) +{ + long int exponent, expon, expon_expon; + Xsig accumulator, expon_accum, yaccum; + u_char sign, argsign; + FPU_REG x; + int tag; + + exponent = exponent16(st0_ptr); + + /* From st0_ptr, make a number > sqrt(2)/2 and < sqrt(2) */ + if ( st0_ptr->sigh > (unsigned)0xb504f334 ) + { + /* Treat as sqrt(2)/2 < st0_ptr < 1 */ + significand(&x) = - significand(st0_ptr); + setexponent16(&x, -1); + exponent++; + argsign = SIGN_NEG; + } + else + { + /* Treat as 1 <= st0_ptr < sqrt(2) */ + x.sigh = st0_ptr->sigh - 0x80000000; + x.sigl = st0_ptr->sigl; + setexponent16(&x, 0); + argsign = SIGN_POS; + } + tag = FPU_normalize_nuo(&x); + + if ( tag == TAG_Zero ) + { + expon = 0; + accumulator.msw = accumulator.midw = accumulator.lsw = 0; + } + else + { + log2_kernel(&x, argsign, &accumulator, &expon); + } + + if ( exponent < 0 ) + { + sign = SIGN_NEG; + exponent = -exponent; + } + else + sign = SIGN_POS; + expon_accum.msw = exponent; expon_accum.midw = expon_accum.lsw = 0; + if ( exponent ) + { + expon_expon = 31 + norm_Xsig(&expon_accum); + shr_Xsig(&accumulator, expon_expon - expon); + + if ( sign ^ argsign ) + negate_Xsig(&accumulator); + add_Xsig_Xsig(&accumulator, &expon_accum); + } + else + { + expon_expon = expon; + sign = argsign; + } + + yaccum.lsw = 0; XSIG_LL(yaccum) = significand(st1_ptr); + mul_Xsig_Xsig(&accumulator, &yaccum); + + expon_expon += round_Xsig(&accumulator); + + if ( accumulator.msw == 0 ) + { + FPU_copy_to_reg1(&CONST_Z, TAG_Zero); + return; + } + + significand(st1_ptr) = XSIG_LL(accumulator); + setexponent16(st1_ptr, expon_expon + exponent16(st1_ptr) + 1); + + tag = FPU_round(st1_ptr, 1, 0, FULL_PRECISION, sign ^ st1_sign); + FPU_settagi(1, tag); + + set_precision_flag_up(); /* 80486 appears to always do this */ + + return; + +} + + +/*--- poly_l2p1() -----------------------------------------------------------+ + | Base 2 logarithm by a polynomial approximation. | + | log2(x+1) | + +---------------------------------------------------------------------------*/ +int poly_l2p1(u_char sign0, u_char sign1, + FPU_REG *st0_ptr, FPU_REG *st1_ptr, FPU_REG *dest) +{ + u_char tag; + long int exponent; + Xsig accumulator, yaccum; + + if ( exponent16(st0_ptr) < 0 ) + { + log2_kernel(st0_ptr, sign0, &accumulator, &exponent); + + yaccum.lsw = 0; + XSIG_LL(yaccum) = significand(st1_ptr); + mul_Xsig_Xsig(&accumulator, &yaccum); + + exponent += round_Xsig(&accumulator); + + exponent += exponent16(st1_ptr) + 1; + if ( exponent < EXP_WAY_UNDER ) exponent = EXP_WAY_UNDER; + + significand(dest) = XSIG_LL(accumulator); + setexponent16(dest, exponent); + + tag = FPU_round(dest, 1, 0, FULL_PRECISION, sign0 ^ sign1); + FPU_settagi(1, tag); + + if ( tag == TAG_Valid ) + set_precision_flag_up(); /* 80486 appears to always do this */ + } + else + { + /* The magnitude of st0_ptr is far too large. */ + + if ( sign0 != SIGN_POS ) + { + /* Trying to get the log of a negative number. */ +#ifdef PECULIAR_486 /* Stupid 80486 doesn't worry about log(negative). */ + changesign(st1_ptr); +#else + if ( arith_invalid(1) < 0 ) + return 1; +#endif /* PECULIAR_486 */ + } + + /* 80486 appears to do this */ + if ( sign0 == SIGN_NEG ) + set_precision_flag_down(); + else + set_precision_flag_up(); + } + + if ( exponent(dest) <= EXP_UNDER ) + EXCEPTION(EX_Underflow); + + return 0; + +} + + + + +#undef HIPOWER +#define HIPOWER 10 +static const unsigned long long logterms[HIPOWER] = +{ + 0x2a8eca5705fc2ef0LL, + 0xf6384ee1d01febceLL, + 0x093bb62877cdf642LL, + 0x006985d8a9ec439bLL, + 0x0005212c4f55a9c8LL, + 0x00004326a16927f0LL, + 0x0000038d1d80a0e7LL, + 0x0000003141cc80c6LL, + 0x00000002b1668c9fLL, + 0x000000002c7a46aaLL +}; + +static const unsigned long leadterm = 0xb8000000; + + +/*--- log2_kernel() ---------------------------------------------------------+ + | Base 2 logarithm by a polynomial approximation. | + | log2(x+1) | + +---------------------------------------------------------------------------*/ +static void log2_kernel(FPU_REG const *arg, u_char argsign, Xsig *accum_result, + long int *expon) +{ + long int exponent, adj; + unsigned long long Xsq; + Xsig accumulator, Numer, Denom, argSignif, arg_signif; + + exponent = exponent16(arg); + Numer.lsw = Denom.lsw = 0; + XSIG_LL(Numer) = XSIG_LL(Denom) = significand(arg); + if ( argsign == SIGN_POS ) + { + shr_Xsig(&Denom, 2 - (1 + exponent)); + Denom.msw |= 0x80000000; + div_Xsig(&Numer, &Denom, &argSignif); + } + else + { + shr_Xsig(&Denom, 1 - (1 + exponent)); + negate_Xsig(&Denom); + if ( Denom.msw & 0x80000000 ) + { + div_Xsig(&Numer, &Denom, &argSignif); + exponent ++; + } + else + { + /* Denom must be 1.0 */ + argSignif.lsw = Numer.lsw; argSignif.midw = Numer.midw; + argSignif.msw = Numer.msw; + } + } + +#ifndef PECULIAR_486 + /* Should check here that |local_arg| is within the valid range */ + if ( exponent >= -2 ) + { + if ( (exponent > -2) || + (argSignif.msw > (unsigned)0xafb0ccc0) ) + { + /* The argument is too large */ + } + } +#endif /* PECULIAR_486 */ + + arg_signif.lsw = argSignif.lsw; XSIG_LL(arg_signif) = XSIG_LL(argSignif); + adj = norm_Xsig(&argSignif); + accumulator.lsw = argSignif.lsw; XSIG_LL(accumulator) = XSIG_LL(argSignif); + mul_Xsig_Xsig(&accumulator, &accumulator); + shr_Xsig(&accumulator, 2*(-1 - (1 + exponent + adj))); + Xsq = XSIG_LL(accumulator); + if ( accumulator.lsw & 0x80000000 ) + Xsq++; + + accumulator.msw = accumulator.midw = accumulator.lsw = 0; + /* Do the basic fixed point polynomial evaluation */ + polynomial_Xsig(&accumulator, &Xsq, logterms, HIPOWER-1); + + mul_Xsig_Xsig(&accumulator, &argSignif); + shr_Xsig(&accumulator, 6 - adj); + + mul32_Xsig(&arg_signif, leadterm); + add_two_Xsig(&accumulator, &arg_signif, &exponent); + + *expon = exponent + 1; + accum_result->lsw = accumulator.lsw; + accum_result->midw = accumulator.midw; + accum_result->msw = accumulator.msw; + +} diff --git a/arch/x86/math-emu/poly_sin.c b/arch/x86/math-emu/poly_sin.c new file mode 100644 index 00000000000..a36313fb06f --- /dev/null +++ b/arch/x86/math-emu/poly_sin.c @@ -0,0 +1,397 @@ +/*---------------------------------------------------------------------------+ + | poly_sin.c | + | | + | Computation of an approximation of the sin function and the cosine | + | function by a polynomial. | + | | + | Copyright (C) 1992,1993,1994,1997,1999 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@melbpc.org.au | + | | + | | + +---------------------------------------------------------------------------*/ + + +#include "exception.h" +#include "reg_constant.h" +#include "fpu_emu.h" +#include "fpu_system.h" +#include "control_w.h" +#include "poly.h" + + +#define N_COEFF_P 4 +#define N_COEFF_N 4 + +static const unsigned long long pos_terms_l[N_COEFF_P] = +{ + 0xaaaaaaaaaaaaaaabLL, + 0x00d00d00d00cf906LL, + 0x000006b99159a8bbLL, + 0x000000000d7392e6LL +}; + +static const unsigned long long neg_terms_l[N_COEFF_N] = +{ + 0x2222222222222167LL, + 0x0002e3bc74aab624LL, + 0x0000000b09229062LL, + 0x00000000000c7973LL +}; + + + +#define N_COEFF_PH 4 +#define N_COEFF_NH 4 +static const unsigned long long pos_terms_h[N_COEFF_PH] = +{ + 0x0000000000000000LL, + 0x05b05b05b05b0406LL, + 0x000049f93edd91a9LL, + 0x00000000c9c9ed62LL +}; + +static const unsigned long long neg_terms_h[N_COEFF_NH] = +{ + 0xaaaaaaaaaaaaaa98LL, + 0x001a01a01a019064LL, + 0x0000008f76c68a77LL, + 0x0000000000d58f5eLL +}; + + +/*--- poly_sine() -----------------------------------------------------------+ + | | + +---------------------------------------------------------------------------*/ +void poly_sine(FPU_REG *st0_ptr) +{ + int exponent, echange; + Xsig accumulator, argSqrd, argTo4; + unsigned long fix_up, adj; + unsigned long long fixed_arg; + FPU_REG result; + + exponent = exponent(st0_ptr); + + accumulator.lsw = accumulator.midw = accumulator.msw = 0; + + /* Split into two ranges, for arguments below and above 1.0 */ + /* The boundary between upper and lower is approx 0.88309101259 */ + if ( (exponent < -1) || ((exponent == -1) && (st0_ptr->sigh <= 0xe21240aa)) ) + { + /* The argument is <= 0.88309101259 */ + + argSqrd.msw = st0_ptr->sigh; argSqrd.midw = st0_ptr->sigl; argSqrd.lsw = 0; + mul64_Xsig(&argSqrd, &significand(st0_ptr)); + shr_Xsig(&argSqrd, 2*(-1-exponent)); + argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw; + argTo4.lsw = argSqrd.lsw; + mul_Xsig_Xsig(&argTo4, &argTo4); + + polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l, + N_COEFF_N-1); + mul_Xsig_Xsig(&accumulator, &argSqrd); + negate_Xsig(&accumulator); + + polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l, + N_COEFF_P-1); + + shr_Xsig(&accumulator, 2); /* Divide by four */ + accumulator.msw |= 0x80000000; /* Add 1.0 */ + + mul64_Xsig(&accumulator, &significand(st0_ptr)); + mul64_Xsig(&accumulator, &significand(st0_ptr)); + mul64_Xsig(&accumulator, &significand(st0_ptr)); + + /* Divide by four, FPU_REG compatible, etc */ + exponent = 3*exponent; + + /* The minimum exponent difference is 3 */ + shr_Xsig(&accumulator, exponent(st0_ptr) - exponent); + + negate_Xsig(&accumulator); + XSIG_LL(accumulator) += significand(st0_ptr); + + echange = round_Xsig(&accumulator); + + setexponentpos(&result, exponent(st0_ptr) + echange); + } + else + { + /* The argument is > 0.88309101259 */ + /* We use sin(st(0)) = cos(pi/2-st(0)) */ + + fixed_arg = significand(st0_ptr); + + if ( exponent == 0 ) + { + /* The argument is >= 1.0 */ + + /* Put the binary point at the left. */ + fixed_arg <<= 1; + } + /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */ + fixed_arg = 0x921fb54442d18469LL - fixed_arg; + /* There is a special case which arises due to rounding, to fix here. */ + if ( fixed_arg == 0xffffffffffffffffLL ) + fixed_arg = 0; + + XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0; + mul64_Xsig(&argSqrd, &fixed_arg); + + XSIG_LL(argTo4) = XSIG_LL(argSqrd); argTo4.lsw = argSqrd.lsw; + mul_Xsig_Xsig(&argTo4, &argTo4); + + polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h, + N_COEFF_NH-1); + mul_Xsig_Xsig(&accumulator, &argSqrd); + negate_Xsig(&accumulator); + + polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h, + N_COEFF_PH-1); + negate_Xsig(&accumulator); + + mul64_Xsig(&accumulator, &fixed_arg); + mul64_Xsig(&accumulator, &fixed_arg); + + shr_Xsig(&accumulator, 3); + negate_Xsig(&accumulator); + + add_Xsig_Xsig(&accumulator, &argSqrd); + + shr_Xsig(&accumulator, 1); + + accumulator.lsw |= 1; /* A zero accumulator here would cause problems */ + negate_Xsig(&accumulator); + + /* The basic computation is complete. Now fix the answer to + compensate for the error due to the approximation used for + pi/2 + */ + + /* This has an exponent of -65 */ + fix_up = 0x898cc517; + /* The fix-up needs to be improved for larger args */ + if ( argSqrd.msw & 0xffc00000 ) + { + /* Get about 32 bit precision in these: */ + fix_up -= mul_32_32(0x898cc517, argSqrd.msw) / 6; + } + fix_up = mul_32_32(fix_up, LL_MSW(fixed_arg)); + + adj = accumulator.lsw; /* temp save */ + accumulator.lsw -= fix_up; + if ( accumulator.lsw > adj ) + XSIG_LL(accumulator) --; + + echange = round_Xsig(&accumulator); + + setexponentpos(&result, echange - 1); + } + + significand(&result) = XSIG_LL(accumulator); + setsign(&result, getsign(st0_ptr)); + FPU_copy_to_reg0(&result, TAG_Valid); + +#ifdef PARANOID + if ( (exponent(&result) >= 0) + && (significand(&result) > 0x8000000000000000LL) ) + { + EXCEPTION(EX_INTERNAL|0x150); + } +#endif /* PARANOID */ + +} + + + +/*--- poly_cos() ------------------------------------------------------------+ + | | + +---------------------------------------------------------------------------*/ +void poly_cos(FPU_REG *st0_ptr) +{ + FPU_REG result; + long int exponent, exp2, echange; + Xsig accumulator, argSqrd, fix_up, argTo4; + unsigned long long fixed_arg; + +#ifdef PARANOID + if ( (exponent(st0_ptr) > 0) + || ((exponent(st0_ptr) == 0) + && (significand(st0_ptr) > 0xc90fdaa22168c234LL)) ) + { + EXCEPTION(EX_Invalid); + FPU_copy_to_reg0(&CONST_QNaN, TAG_Special); + return; + } +#endif /* PARANOID */ + + exponent = exponent(st0_ptr); + + accumulator.lsw = accumulator.midw = accumulator.msw = 0; + + if ( (exponent < -1) || ((exponent == -1) && (st0_ptr->sigh <= 0xb00d6f54)) ) + { + /* arg is < 0.687705 */ + + argSqrd.msw = st0_ptr->sigh; argSqrd.midw = st0_ptr->sigl; + argSqrd.lsw = 0; + mul64_Xsig(&argSqrd, &significand(st0_ptr)); + + if ( exponent < -1 ) + { + /* shift the argument right by the required places */ + shr_Xsig(&argSqrd, 2*(-1-exponent)); + } + + argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw; + argTo4.lsw = argSqrd.lsw; + mul_Xsig_Xsig(&argTo4, &argTo4); + + polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h, + N_COEFF_NH-1); + mul_Xsig_Xsig(&accumulator, &argSqrd); + negate_Xsig(&accumulator); + + polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h, + N_COEFF_PH-1); + negate_Xsig(&accumulator); + + mul64_Xsig(&accumulator, &significand(st0_ptr)); + mul64_Xsig(&accumulator, &significand(st0_ptr)); + shr_Xsig(&accumulator, -2*(1+exponent)); + + shr_Xsig(&accumulator, 3); + negate_Xsig(&accumulator); + + add_Xsig_Xsig(&accumulator, &argSqrd); + + shr_Xsig(&accumulator, 1); + + /* It doesn't matter if accumulator is all zero here, the + following code will work ok */ + negate_Xsig(&accumulator); + + if ( accumulator.lsw & 0x80000000 ) + XSIG_LL(accumulator) ++; + if ( accumulator.msw == 0 ) + { + /* The result is 1.0 */ + FPU_copy_to_reg0(&CONST_1, TAG_Valid); + return; + } + else + { + significand(&result) = XSIG_LL(accumulator); + + /* will be a valid positive nr with expon = -1 */ + setexponentpos(&result, -1); + } + } + else + { + fixed_arg = significand(st0_ptr); + + if ( exponent == 0 ) + { + /* The argument is >= 1.0 */ + + /* Put the binary point at the left. */ + fixed_arg <<= 1; + } + /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */ + fixed_arg = 0x921fb54442d18469LL - fixed_arg; + /* There is a special case which arises due to rounding, to fix here. */ + if ( fixed_arg == 0xffffffffffffffffLL ) + fixed_arg = 0; + + exponent = -1; + exp2 = -1; + + /* A shift is needed here only for a narrow range of arguments, + i.e. for fixed_arg approx 2^-32, but we pick up more... */ + if ( !(LL_MSW(fixed_arg) & 0xffff0000) ) + { + fixed_arg <<= 16; + exponent -= 16; + exp2 -= 16; + } + + XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0; + mul64_Xsig(&argSqrd, &fixed_arg); + + if ( exponent < -1 ) + { + /* shift the argument right by the required places */ + shr_Xsig(&argSqrd, 2*(-1-exponent)); + } + + argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw; + argTo4.lsw = argSqrd.lsw; + mul_Xsig_Xsig(&argTo4, &argTo4); + + polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l, + N_COEFF_N-1); + mul_Xsig_Xsig(&accumulator, &argSqrd); + negate_Xsig(&accumulator); + + polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l, + N_COEFF_P-1); + + shr_Xsig(&accumulator, 2); /* Divide by four */ + accumulator.msw |= 0x80000000; /* Add 1.0 */ + + mul64_Xsig(&accumulator, &fixed_arg); + mul64_Xsig(&accumulator, &fixed_arg); + mul64_Xsig(&accumulator, &fixed_arg); + + /* Divide by four, FPU_REG compatible, etc */ + exponent = 3*exponent; + + /* The minimum exponent difference is 3 */ + shr_Xsig(&accumulator, exp2 - exponent); + + negate_Xsig(&accumulator); + XSIG_LL(accumulator) += fixed_arg; + + /* The basic computation is complete. Now fix the answer to + compensate for the error due to the approximation used for + pi/2 + */ + + /* This has an exponent of -65 */ + XSIG_LL(fix_up) = 0x898cc51701b839a2ll; + fix_up.lsw = 0; + + /* The fix-up needs to be improved for larger args */ + if ( argSqrd.msw & 0xffc00000 ) + { + /* Get about 32 bit precision in these: */ + fix_up.msw -= mul_32_32(0x898cc517, argSqrd.msw) / 2; + fix_up.msw += mul_32_32(0x898cc517, argTo4.msw) / 24; + } + + exp2 += norm_Xsig(&accumulator); + shr_Xsig(&accumulator, 1); /* Prevent overflow */ + exp2++; + shr_Xsig(&fix_up, 65 + exp2); + + add_Xsig_Xsig(&accumulator, &fix_up); + + echange = round_Xsig(&accumulator); + + setexponentpos(&result, exp2 + echange); + significand(&result) = XSIG_LL(accumulator); + } + + FPU_copy_to_reg0(&result, TAG_Valid); + +#ifdef PARANOID + if ( (exponent(&result) >= 0) + && (significand(&result) > 0x8000000000000000LL) ) + { + EXCEPTION(EX_INTERNAL|0x151); + } +#endif /* PARANOID */ + +} diff --git a/arch/x86/math-emu/poly_tan.c b/arch/x86/math-emu/poly_tan.c new file mode 100644 index 00000000000..8df3e03b6e6 --- /dev/null +++ b/arch/x86/math-emu/poly_tan.c @@ -0,0 +1,222 @@ +/*---------------------------------------------------------------------------+ + | poly_tan.c | + | | + | Compute the tan of a FPU_REG, using a polynomial approximation. | + | | + | Copyright (C) 1992,1993,1994,1997,1999 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@melbpc.org.au | + | | + | | + +---------------------------------------------------------------------------*/ + +#include "exception.h" +#include "reg_constant.h" +#include "fpu_emu.h" +#include "fpu_system.h" +#include "control_w.h" +#include "poly.h" + + +#define HiPOWERop 3 /* odd poly, positive terms */ +static const unsigned long long oddplterm[HiPOWERop] = +{ + 0x0000000000000000LL, + 0x0051a1cf08fca228LL, + 0x0000000071284ff7LL +}; + +#define HiPOWERon 2 /* odd poly, negative terms */ +static const unsigned long long oddnegterm[HiPOWERon] = +{ + 0x1291a9a184244e80LL, + 0x0000583245819c21LL +}; + +#define HiPOWERep 2 /* even poly, positive terms */ +static const unsigned long long evenplterm[HiPOWERep] = +{ + 0x0e848884b539e888LL, + 0x00003c7f18b887daLL +}; + +#define HiPOWERen 2 /* even poly, negative terms */ +static const unsigned long long evennegterm[HiPOWERen] = +{ + 0xf1f0200fd51569ccLL, + 0x003afb46105c4432LL +}; + +static const unsigned long long twothirds = 0xaaaaaaaaaaaaaaabLL; + + +/*--- poly_tan() ------------------------------------------------------------+ + | | + +---------------------------------------------------------------------------*/ +void poly_tan(FPU_REG *st0_ptr) +{ + long int exponent; + int invert; + Xsig argSq, argSqSq, accumulatoro, accumulatore, accum, + argSignif, fix_up; + unsigned long adj; + + exponent = exponent(st0_ptr); + +#ifdef PARANOID + if ( signnegative(st0_ptr) ) /* Can't hack a number < 0.0 */ + { arith_invalid(0); return; } /* Need a positive number */ +#endif /* PARANOID */ + + /* Split the problem into two domains, smaller and larger than pi/4 */ + if ( (exponent == 0) || ((exponent == -1) && (st0_ptr->sigh > 0xc90fdaa2)) ) + { + /* The argument is greater than (approx) pi/4 */ + invert = 1; + accum.lsw = 0; + XSIG_LL(accum) = significand(st0_ptr); + + if ( exponent == 0 ) + { + /* The argument is >= 1.0 */ + /* Put the binary point at the left. */ + XSIG_LL(accum) <<= 1; + } + /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */ + XSIG_LL(accum) = 0x921fb54442d18469LL - XSIG_LL(accum); + /* This is a special case which arises due to rounding. */ + if ( XSIG_LL(accum) == 0xffffffffffffffffLL ) + { + FPU_settag0(TAG_Valid); + significand(st0_ptr) = 0x8a51e04daabda360LL; + setexponent16(st0_ptr, (0x41 + EXTENDED_Ebias) | SIGN_Negative); + return; + } + + argSignif.lsw = accum.lsw; + XSIG_LL(argSignif) = XSIG_LL(accum); + exponent = -1 + norm_Xsig(&argSignif); + } + else + { + invert = 0; + argSignif.lsw = 0; + XSIG_LL(accum) = XSIG_LL(argSignif) = significand(st0_ptr); + + if ( exponent < -1 ) + { + /* shift the argument right by the required places */ + if ( FPU_shrx(&XSIG_LL(accum), -1-exponent) >= 0x80000000U ) + XSIG_LL(accum) ++; /* round up */ + } + } + + XSIG_LL(argSq) = XSIG_LL(accum); argSq.lsw = accum.lsw; + mul_Xsig_Xsig(&argSq, &argSq); + XSIG_LL(argSqSq) = XSIG_LL(argSq); argSqSq.lsw = argSq.lsw; + mul_Xsig_Xsig(&argSqSq, &argSqSq); + + /* Compute the negative terms for the numerator polynomial */ + accumulatoro.msw = accumulatoro.midw = accumulatoro.lsw = 0; + polynomial_Xsig(&accumulatoro, &XSIG_LL(argSqSq), oddnegterm, HiPOWERon-1); + mul_Xsig_Xsig(&accumulatoro, &argSq); + negate_Xsig(&accumulatoro); + /* Add the positive terms */ + polynomial_Xsig(&accumulatoro, &XSIG_LL(argSqSq), oddplterm, HiPOWERop-1); + + + /* Compute the positive terms for the denominator polynomial */ + accumulatore.msw = accumulatore.midw = accumulatore.lsw = 0; + polynomial_Xsig(&accumulatore, &XSIG_LL(argSqSq), evenplterm, HiPOWERep-1); + mul_Xsig_Xsig(&accumulatore, &argSq); + negate_Xsig(&accumulatore); + /* Add the negative terms */ + polynomial_Xsig(&accumulatore, &XSIG_LL(argSqSq), evennegterm, HiPOWERen-1); + /* Multiply by arg^2 */ + mul64_Xsig(&accumulatore, &XSIG_LL(argSignif)); + mul64_Xsig(&accumulatore, &XSIG_LL(argSignif)); + /* de-normalize and divide by 2 */ + shr_Xsig(&accumulatore, -2*(1+exponent) + 1); + negate_Xsig(&accumulatore); /* This does 1 - accumulator */ + + /* Now find the ratio. */ + if ( accumulatore.msw == 0 ) + { + /* accumulatoro must contain 1.0 here, (actually, 0) but it + really doesn't matter what value we use because it will + have negligible effect in later calculations + */ + XSIG_LL(accum) = 0x8000000000000000LL; + accum.lsw = 0; + } + else + { + div_Xsig(&accumulatoro, &accumulatore, &accum); + } + + /* Multiply by 1/3 * arg^3 */ + mul64_Xsig(&accum, &XSIG_LL(argSignif)); + mul64_Xsig(&accum, &XSIG_LL(argSignif)); + mul64_Xsig(&accum, &XSIG_LL(argSignif)); + mul64_Xsig(&accum, &twothirds); + shr_Xsig(&accum, -2*(exponent+1)); + + /* tan(arg) = arg + accum */ + add_two_Xsig(&accum, &argSignif, &exponent); + + if ( invert ) + { + /* We now have the value of tan(pi_2 - arg) where pi_2 is an + approximation for pi/2 + */ + /* The next step is to fix the answer to compensate for the + error due to the approximation used for pi/2 + */ + + /* This is (approx) delta, the error in our approx for pi/2 + (see above). It has an exponent of -65 + */ + XSIG_LL(fix_up) = 0x898cc51701b839a2LL; + fix_up.lsw = 0; + + if ( exponent == 0 ) + adj = 0xffffffff; /* We want approx 1.0 here, but + this is close enough. */ + else if ( exponent > -30 ) + { + adj = accum.msw >> -(exponent+1); /* tan */ + adj = mul_32_32(adj, adj); /* tan^2 */ + } + else + adj = 0; + adj = mul_32_32(0x898cc517, adj); /* delta * tan^2 */ + + fix_up.msw += adj; + if ( !(fix_up.msw & 0x80000000) ) /* did fix_up overflow ? */ + { + /* Yes, we need to add an msb */ + shr_Xsig(&fix_up, 1); + fix_up.msw |= 0x80000000; + shr_Xsig(&fix_up, 64 + exponent); + } + else + shr_Xsig(&fix_up, 65 + exponent); + + add_two_Xsig(&accum, &fix_up, &exponent); + + /* accum now contains tan(pi/2 - arg). + Use tan(arg) = 1.0 / tan(pi/2 - arg) + */ + accumulatoro.lsw = accumulatoro.midw = 0; + accumulatoro.msw = 0x80000000; + div_Xsig(&accumulatoro, &accum, &accum); + exponent = - exponent - 1; + } + + /* Transfer the result */ + round_Xsig(&accum); + FPU_settag0(TAG_Valid); + significand(st0_ptr) = XSIG_LL(accum); + setexponent16(st0_ptr, exponent + EXTENDED_Ebias); /* Result is positive. */ + +} diff --git a/arch/x86/math-emu/polynom_Xsig.S b/arch/x86/math-emu/polynom_Xsig.S new file mode 100644 index 00000000000..17315c89ff3 --- /dev/null +++ b/arch/x86/math-emu/polynom_Xsig.S @@ -0,0 +1,135 @@ +/*---------------------------------------------------------------------------+ + | polynomial_Xsig.S | + | | + | Fixed point arithmetic polynomial evaluation. | + | | + | Copyright (C) 1992,1993,1994,1995 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@jacobi.maths.monash.edu.au | + | | + | Call from C as: | + | void polynomial_Xsig(Xsig *accum, unsigned long long x, | + | unsigned long long terms[], int n) | + | | + | Computes: | + | terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x | + | and adds the result to the 12 byte Xsig. | + | The terms[] are each 8 bytes, but all computation is performed to 12 byte | + | precision. | + | | + | This function must be used carefully: most overflow of intermediate | + | results is controlled, but overflow of the result is not. | + | | + +---------------------------------------------------------------------------*/ + .file "polynomial_Xsig.S" + +#include "fpu_emu.h" + + +#define TERM_SIZE $8 +#define SUM_MS -20(%ebp) /* sum ms long */ +#define SUM_MIDDLE -24(%ebp) /* sum middle long */ +#define SUM_LS -28(%ebp) /* sum ls long */ +#define ACCUM_MS -4(%ebp) /* accum ms long */ +#define ACCUM_MIDDLE -8(%ebp) /* accum middle long */ +#define ACCUM_LS -12(%ebp) /* accum ls long */ +#define OVERFLOWED -16(%ebp) /* addition overflow flag */ + +.text +ENTRY(polynomial_Xsig) + pushl %ebp + movl %esp,%ebp + subl $32,%esp + pushl %esi + pushl %edi + pushl %ebx + + movl PARAM2,%esi /* x */ + movl PARAM3,%edi /* terms */ + + movl TERM_SIZE,%eax + mull PARAM4 /* n */ + addl %eax,%edi + + movl 4(%edi),%edx /* terms[n] */ + movl %edx,SUM_MS + movl (%edi),%edx /* terms[n] */ + movl %edx,SUM_MIDDLE + xor %eax,%eax + movl %eax,SUM_LS + movb %al,OVERFLOWED + + subl TERM_SIZE,%edi + decl PARAM4 + js L_accum_done + +L_accum_loop: + xor %eax,%eax + movl %eax,ACCUM_MS + movl %eax,ACCUM_MIDDLE + + movl SUM_MIDDLE,%eax + mull (%esi) /* x ls long */ + movl %edx,ACCUM_LS + + movl SUM_MIDDLE,%eax + mull 4(%esi) /* x ms long */ + addl %eax,ACCUM_LS + adcl %edx,ACCUM_MIDDLE + adcl $0,ACCUM_MS + + movl SUM_MS,%eax + mull (%esi) /* x ls long */ + addl %eax,ACCUM_LS + adcl %edx,ACCUM_MIDDLE + adcl $0,ACCUM_MS + + movl SUM_MS,%eax + mull 4(%esi) /* x ms long */ + addl %eax,ACCUM_MIDDLE + adcl %edx,ACCUM_MS + + testb $0xff,OVERFLOWED + jz L_no_overflow + + movl (%esi),%eax + addl %eax,ACCUM_MIDDLE + movl 4(%esi),%eax + adcl %eax,ACCUM_MS /* This could overflow too */ + +L_no_overflow: + +/* + * Now put the sum of next term and the accumulator + * into the sum register + */ + movl ACCUM_LS,%eax + addl (%edi),%eax /* term ls long */ + movl %eax,SUM_LS + movl ACCUM_MIDDLE,%eax + adcl (%edi),%eax /* term ls long */ + movl %eax,SUM_MIDDLE + movl ACCUM_MS,%eax + adcl 4(%edi),%eax /* term ms long */ + movl %eax,SUM_MS + sbbb %al,%al + movb %al,OVERFLOWED /* Used in the next iteration */ + + subl TERM_SIZE,%edi + decl PARAM4 + jns L_accum_loop + +L_accum_done: + movl PARAM1,%edi /* accum */ + movl SUM_LS,%eax + addl %eax,(%edi) + movl SUM_MIDDLE,%eax + adcl %eax,4(%edi) + movl SUM_MS,%eax + adcl %eax,8(%edi) + + popl %ebx + popl %edi + popl %esi + leave + ret diff --git a/arch/x86/math-emu/reg_add_sub.c b/arch/x86/math-emu/reg_add_sub.c new file mode 100644 index 00000000000..7cd3b37ac08 --- /dev/null +++ b/arch/x86/math-emu/reg_add_sub.c @@ -0,0 +1,374 @@ +/*---------------------------------------------------------------------------+ + | reg_add_sub.c | + | | + | Functions to add or subtract two registers and put the result in a third. | + | | + | Copyright (C) 1992,1993,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@suburbia.net | + | | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | For each function, the destination may be any FPU_REG, including one of | + | the source FPU_REGs. | + | Each function returns 0 if the answer is o.k., otherwise a non-zero | + | value is returned, indicating either an exception condition or an | + | internal error. | + +---------------------------------------------------------------------------*/ + +#include "exception.h" +#include "reg_constant.h" +#include "fpu_emu.h" +#include "control_w.h" +#include "fpu_system.h" + +static +int add_sub_specials(FPU_REG const *a, u_char taga, u_char signa, + FPU_REG const *b, u_char tagb, u_char signb, + FPU_REG *dest, int deststnr, int control_w); + +/* + Operates on st(0) and st(n), or on st(0) and temporary data. + The destination must be one of the source st(x). + */ +int FPU_add(FPU_REG const *b, u_char tagb, int deststnr, int control_w) +{ + FPU_REG *a = &st(0); + FPU_REG *dest = &st(deststnr); + u_char signb = getsign(b); + u_char taga = FPU_gettag0(); + u_char signa = getsign(a); + u_char saved_sign = getsign(dest); + int diff, tag, expa, expb; + + if ( !(taga | tagb) ) + { + expa = exponent(a); + expb = exponent(b); + + valid_add: + /* Both registers are valid */ + if (!(signa ^ signb)) + { + /* signs are the same */ + tag = FPU_u_add(a, b, dest, control_w, signa, expa, expb); + } + else + { + /* The signs are different, so do a subtraction */ + diff = expa - expb; + if (!diff) + { + diff = a->sigh - b->sigh; /* This works only if the ms bits + are identical. */ + if (!diff) + { + diff = a->sigl > b->sigl; + if (!diff) + diff = -(a->sigl < b->sigl); + } + } + + if (diff > 0) + { + tag = FPU_u_sub(a, b, dest, control_w, signa, expa, expb); + } + else if ( diff < 0 ) + { + tag = FPU_u_sub(b, a, dest, control_w, signb, expb, expa); + } + else + { + FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr); + /* sign depends upon rounding mode */ + setsign(dest, ((control_w & CW_RC) != RC_DOWN) + ? SIGN_POS : SIGN_NEG); + return TAG_Zero; + } + } + + if ( tag < 0 ) + { + setsign(dest, saved_sign); + return tag; + } + FPU_settagi(deststnr, tag); + return tag; + } + + if ( taga == TAG_Special ) + taga = FPU_Special(a); + if ( tagb == TAG_Special ) + tagb = FPU_Special(b); + + if ( ((taga == TAG_Valid) && (tagb == TW_Denormal)) + || ((taga == TW_Denormal) && (tagb == TAG_Valid)) + || ((taga == TW_Denormal) && (tagb == TW_Denormal)) ) + { + FPU_REG x, y; + + if ( denormal_operand() < 0 ) + return FPU_Exception; + + FPU_to_exp16(a, &x); + FPU_to_exp16(b, &y); + a = &x; + b = &y; + expa = exponent16(a); + expb = exponent16(b); + goto valid_add; + } + + if ( (taga == TW_NaN) || (tagb == TW_NaN) ) + { + if ( deststnr == 0 ) + return real_2op_NaN(b, tagb, deststnr, a); + else + return real_2op_NaN(a, taga, deststnr, a); + } + + return add_sub_specials(a, taga, signa, b, tagb, signb, + dest, deststnr, control_w); +} + + +/* Subtract b from a. (a-b) -> dest */ +int FPU_sub(int flags, int rm, int control_w) +{ + FPU_REG const *a, *b; + FPU_REG *dest; + u_char taga, tagb, signa, signb, saved_sign, sign; + int diff, tag = 0, expa, expb, deststnr; + + a = &st(0); + taga = FPU_gettag0(); + + deststnr = 0; + if ( flags & LOADED ) + { + b = (FPU_REG *)rm; + tagb = flags & 0x0f; + } + else + { + b = &st(rm); + tagb = FPU_gettagi(rm); + + if ( flags & DEST_RM ) + deststnr = rm; + } + + signa = getsign(a); + signb = getsign(b); + + if ( flags & REV ) + { + signa ^= SIGN_NEG; + signb ^= SIGN_NEG; + } + + dest = &st(deststnr); + saved_sign = getsign(dest); + + if ( !(taga | tagb) ) + { + expa = exponent(a); + expb = exponent(b); + + valid_subtract: + /* Both registers are valid */ + + diff = expa - expb; + + if (!diff) + { + diff = a->sigh - b->sigh; /* Works only if ms bits are identical */ + if (!diff) + { + diff = a->sigl > b->sigl; + if (!diff) + diff = -(a->sigl < b->sigl); + } + } + + switch ( (((int)signa)*2 + signb) / SIGN_NEG ) + { + case 0: /* P - P */ + case 3: /* N - N */ + if (diff > 0) + { + /* |a| > |b| */ + tag = FPU_u_sub(a, b, dest, control_w, signa, expa, expb); + } + else if ( diff == 0 ) + { + FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr); + + /* sign depends upon rounding mode */ + setsign(dest, ((control_w & CW_RC) != RC_DOWN) + ? SIGN_POS : SIGN_NEG); + return TAG_Zero; + } + else + { + sign = signa ^ SIGN_NEG; + tag = FPU_u_sub(b, a, dest, control_w, sign, expb, expa); + } + break; + case 1: /* P - N */ + tag = FPU_u_add(a, b, dest, control_w, SIGN_POS, expa, expb); + break; + case 2: /* N - P */ + tag = FPU_u_add(a, b, dest, control_w, SIGN_NEG, expa, expb); + break; +#ifdef PARANOID + default: + EXCEPTION(EX_INTERNAL|0x111); + return -1; +#endif + } + if ( tag < 0 ) + { + setsign(dest, saved_sign); + return tag; + } + FPU_settagi(deststnr, tag); + return tag; + } + + if ( taga == TAG_Special ) + taga = FPU_Special(a); + if ( tagb == TAG_Special ) + tagb = FPU_Special(b); + + if ( ((taga == TAG_Valid) && (tagb == TW_Denormal)) + || ((taga == TW_Denormal) && (tagb == TAG_Valid)) + || ((taga == TW_Denormal) && (tagb == TW_Denormal)) ) + { + FPU_REG x, y; + + if ( denormal_operand() < 0 ) + return FPU_Exception; + + FPU_to_exp16(a, &x); + FPU_to_exp16(b, &y); + a = &x; + b = &y; + expa = exponent16(a); + expb = exponent16(b); + + goto valid_subtract; + } + + if ( (taga == TW_NaN) || (tagb == TW_NaN) ) + { + FPU_REG const *d1, *d2; + if ( flags & REV ) + { + d1 = b; + d2 = a; + } + else + { + d1 = a; + d2 = b; + } + if ( flags & LOADED ) + return real_2op_NaN(b, tagb, deststnr, d1); + if ( flags & DEST_RM ) + return real_2op_NaN(a, taga, deststnr, d2); + else + return real_2op_NaN(b, tagb, deststnr, d2); + } + + return add_sub_specials(a, taga, signa, b, tagb, signb ^ SIGN_NEG, + dest, deststnr, control_w); +} + + +static +int add_sub_specials(FPU_REG const *a, u_char taga, u_char signa, + FPU_REG const *b, u_char tagb, u_char signb, + FPU_REG *dest, int deststnr, int control_w) +{ + if ( ((taga == TW_Denormal) || (tagb == TW_Denormal)) + && (denormal_operand() < 0) ) + return FPU_Exception; + + if (taga == TAG_Zero) + { + if (tagb == TAG_Zero) + { + /* Both are zero, result will be zero. */ + u_char different_signs = signa ^ signb; + + FPU_copy_to_regi(a, TAG_Zero, deststnr); + if ( different_signs ) + { + /* Signs are different. */ + /* Sign of answer depends upon rounding mode. */ + setsign(dest, ((control_w & CW_RC) != RC_DOWN) + ? SIGN_POS : SIGN_NEG); + } + else + setsign(dest, signa); /* signa may differ from the sign of a. */ + return TAG_Zero; + } + else + { + reg_copy(b, dest); + if ( (tagb == TW_Denormal) && (b->sigh & 0x80000000) ) + { + /* A pseudoDenormal, convert it. */ + addexponent(dest, 1); + tagb = TAG_Valid; + } + else if ( tagb > TAG_Empty ) + tagb = TAG_Special; + setsign(dest, signb); /* signb may differ from the sign of b. */ + FPU_settagi(deststnr, tagb); + return tagb; + } + } + else if (tagb == TAG_Zero) + { + reg_copy(a, dest); + if ( (taga == TW_Denormal) && (a->sigh & 0x80000000) ) + { + /* A pseudoDenormal */ + addexponent(dest, 1); + taga = TAG_Valid; + } + else if ( taga > TAG_Empty ) + taga = TAG_Special; + setsign(dest, signa); /* signa may differ from the sign of a. */ + FPU_settagi(deststnr, taga); + return taga; + } + else if (taga == TW_Infinity) + { + if ( (tagb != TW_Infinity) || (signa == signb) ) + { + FPU_copy_to_regi(a, TAG_Special, deststnr); + setsign(dest, signa); /* signa may differ from the sign of a. */ + return taga; + } + /* Infinity-Infinity is undefined. */ + return arith_invalid(deststnr); + } + else if (tagb == TW_Infinity) + { + FPU_copy_to_regi(b, TAG_Special, deststnr); + setsign(dest, signb); /* signb may differ from the sign of b. */ + return tagb; + } + +#ifdef PARANOID + EXCEPTION(EX_INTERNAL|0x101); +#endif + + return FPU_Exception; +} + diff --git a/arch/x86/math-emu/reg_compare.c b/arch/x86/math-emu/reg_compare.c new file mode 100644 index 00000000000..f37c5b5a35a --- /dev/null +++ b/arch/x86/math-emu/reg_compare.c @@ -0,0 +1,381 @@ +/*---------------------------------------------------------------------------+ + | reg_compare.c | + | | + | Compare two floating point registers | + | | + | Copyright (C) 1992,1993,1994,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@suburbia.net | + | | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | compare() is the core FPU_REG comparison function | + +---------------------------------------------------------------------------*/ + +#include "fpu_system.h" +#include "exception.h" +#include "fpu_emu.h" +#include "control_w.h" +#include "status_w.h" + + +static int compare(FPU_REG const *b, int tagb) +{ + int diff, exp0, expb; + u_char st0_tag; + FPU_REG *st0_ptr; + FPU_REG x, y; + u_char st0_sign, signb = getsign(b); + + st0_ptr = &st(0); + st0_tag = FPU_gettag0(); + st0_sign = getsign(st0_ptr); + + if ( tagb == TAG_Special ) + tagb = FPU_Special(b); + if ( st0_tag == TAG_Special ) + st0_tag = FPU_Special(st0_ptr); + + if ( ((st0_tag != TAG_Valid) && (st0_tag != TW_Denormal)) + || ((tagb != TAG_Valid) && (tagb != TW_Denormal)) ) + { + if ( st0_tag == TAG_Zero ) + { + if ( tagb == TAG_Zero ) return COMP_A_eq_B; + if ( tagb == TAG_Valid ) + return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B); + if ( tagb == TW_Denormal ) + return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B) + | COMP_Denormal; + } + else if ( tagb == TAG_Zero ) + { + if ( st0_tag == TAG_Valid ) + return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B); + if ( st0_tag == TW_Denormal ) + return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B) + | COMP_Denormal; + } + + if ( st0_tag == TW_Infinity ) + { + if ( (tagb == TAG_Valid) || (tagb == TAG_Zero) ) + return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B); + else if ( tagb == TW_Denormal ) + return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B) + | COMP_Denormal; + else if ( tagb == TW_Infinity ) + { + /* The 80486 book says that infinities can be equal! */ + return (st0_sign == signb) ? COMP_A_eq_B : + ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B); + } + /* Fall through to the NaN code */ + } + else if ( tagb == TW_Infinity ) + { + if ( (st0_tag == TAG_Valid) || (st0_tag == TAG_Zero) ) + return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B); + if ( st0_tag == TW_Denormal ) + return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B) + | COMP_Denormal; + /* Fall through to the NaN code */ + } + + /* The only possibility now should be that one of the arguments + is a NaN */ + if ( (st0_tag == TW_NaN) || (tagb == TW_NaN) ) + { + int signalling = 0, unsupported = 0; + if ( st0_tag == TW_NaN ) + { + signalling = (st0_ptr->sigh & 0xc0000000) == 0x80000000; + unsupported = !((exponent(st0_ptr) == EXP_OVER) + && (st0_ptr->sigh & 0x80000000)); + } + if ( tagb == TW_NaN ) + { + signalling |= (b->sigh & 0xc0000000) == 0x80000000; + unsupported |= !((exponent(b) == EXP_OVER) + && (b->sigh & 0x80000000)); + } + if ( signalling || unsupported ) + return COMP_No_Comp | COMP_SNaN | COMP_NaN; + else + /* Neither is a signaling NaN */ + return COMP_No_Comp | COMP_NaN; + } + + EXCEPTION(EX_Invalid); + } + + if (st0_sign != signb) + { + return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B) + | ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ? + COMP_Denormal : 0); + } + + if ( (st0_tag == TW_Denormal) || (tagb == TW_Denormal) ) + { + FPU_to_exp16(st0_ptr, &x); + FPU_to_exp16(b, &y); + st0_ptr = &x; + b = &y; + exp0 = exponent16(st0_ptr); + expb = exponent16(b); + } + else + { + exp0 = exponent(st0_ptr); + expb = exponent(b); + } + +#ifdef PARANOID + if (!(st0_ptr->sigh & 0x80000000)) EXCEPTION(EX_Invalid); + if (!(b->sigh & 0x80000000)) EXCEPTION(EX_Invalid); +#endif /* PARANOID */ + + diff = exp0 - expb; + if ( diff == 0 ) + { + diff = st0_ptr->sigh - b->sigh; /* Works only if ms bits are + identical */ + if ( diff == 0 ) + { + diff = st0_ptr->sigl > b->sigl; + if ( diff == 0 ) + diff = -(st0_ptr->sigl < b->sigl); + } + } + + if ( diff > 0 ) + { + return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B) + | ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ? + COMP_Denormal : 0); + } + if ( diff < 0 ) + { + return ((st0_sign == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B) + | ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ? + COMP_Denormal : 0); + } + + return COMP_A_eq_B + | ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ? + COMP_Denormal : 0); + +} + + +/* This function requires that st(0) is not empty */ +int FPU_compare_st_data(FPU_REG const *loaded_data, u_char loaded_tag) +{ + int f = 0, c; + + c = compare(loaded_data, loaded_tag); + + if (c & COMP_NaN) + { + EXCEPTION(EX_Invalid); + f = SW_C3 | SW_C2 | SW_C0; + } + else + switch (c & 7) + { + case COMP_A_lt_B: + f = SW_C0; + break; + case COMP_A_eq_B: + f = SW_C3; + break; + case COMP_A_gt_B: + f = 0; + break; + case COMP_No_Comp: + f = SW_C3 | SW_C2 | SW_C0; + break; +#ifdef PARANOID + default: + EXCEPTION(EX_INTERNAL|0x121); + f = SW_C3 | SW_C2 | SW_C0; + break; +#endif /* PARANOID */ + } + setcc(f); + if (c & COMP_Denormal) + { + return denormal_operand() < 0; + } + return 0; +} + + +static int compare_st_st(int nr) +{ + int f = 0, c; + FPU_REG *st_ptr; + + if ( !NOT_EMPTY(0) || !NOT_EMPTY(nr) ) + { + setcc(SW_C3 | SW_C2 | SW_C0); + /* Stack fault */ + EXCEPTION(EX_StackUnder); + return !(control_word & CW_Invalid); + } + + st_ptr = &st(nr); + c = compare(st_ptr, FPU_gettagi(nr)); + if (c & COMP_NaN) + { + setcc(SW_C3 | SW_C2 | SW_C0); + EXCEPTION(EX_Invalid); + return !(control_word & CW_Invalid); + } + else + switch (c & 7) + { + case COMP_A_lt_B: + f = SW_C0; + break; + case COMP_A_eq_B: + f = SW_C3; + break; + case COMP_A_gt_B: + f = 0; + break; + case COMP_No_Comp: + f = SW_C3 | SW_C2 | SW_C0; + break; +#ifdef PARANOID + default: + EXCEPTION(EX_INTERNAL|0x122); + f = SW_C3 | SW_C2 | SW_C0; + break; +#endif /* PARANOID */ + } + setcc(f); + if (c & COMP_Denormal) + { + return denormal_operand() < 0; + } + return 0; +} + + +static int compare_u_st_st(int nr) +{ + int f = 0, c; + FPU_REG *st_ptr; + + if ( !NOT_EMPTY(0) || !NOT_EMPTY(nr) ) + { + setcc(SW_C3 | SW_C2 | SW_C0); + /* Stack fault */ + EXCEPTION(EX_StackUnder); + return !(control_word & CW_Invalid); + } + + st_ptr = &st(nr); + c = compare(st_ptr, FPU_gettagi(nr)); + if (c & COMP_NaN) + { + setcc(SW_C3 | SW_C2 | SW_C0); + if (c & COMP_SNaN) /* This is the only difference between + un-ordered and ordinary comparisons */ + { + EXCEPTION(EX_Invalid); + return !(control_word & CW_Invalid); + } + return 0; + } + else + switch (c & 7) + { + case COMP_A_lt_B: + f = SW_C0; + break; + case COMP_A_eq_B: + f = SW_C3; + break; + case COMP_A_gt_B: + f = 0; + break; + case COMP_No_Comp: + f = SW_C3 | SW_C2 | SW_C0; + break; +#ifdef PARANOID + default: + EXCEPTION(EX_INTERNAL|0x123); + f = SW_C3 | SW_C2 | SW_C0; + break; +#endif /* PARANOID */ + } + setcc(f); + if (c & COMP_Denormal) + { + return denormal_operand() < 0; + } + return 0; +} + +/*---------------------------------------------------------------------------*/ + +void fcom_st(void) +{ + /* fcom st(i) */ + compare_st_st(FPU_rm); +} + + +void fcompst(void) +{ + /* fcomp st(i) */ + if ( !compare_st_st(FPU_rm) ) + FPU_pop(); +} + + +void fcompp(void) +{ + /* fcompp */ + if (FPU_rm != 1) + { + FPU_illegal(); + return; + } + if ( !compare_st_st(1) ) + poppop(); +} + + +void fucom_(void) +{ + /* fucom st(i) */ + compare_u_st_st(FPU_rm); + +} + + +void fucomp(void) +{ + /* fucomp st(i) */ + if ( !compare_u_st_st(FPU_rm) ) + FPU_pop(); +} + + +void fucompp(void) +{ + /* fucompp */ + if (FPU_rm == 1) + { + if ( !compare_u_st_st(1) ) + poppop(); + } + else + FPU_illegal(); +} diff --git a/arch/x86/math-emu/reg_constant.c b/arch/x86/math-emu/reg_constant.c new file mode 100644 index 00000000000..a8501580196 --- /dev/null +++ b/arch/x86/math-emu/reg_constant.c @@ -0,0 +1,120 @@ +/*---------------------------------------------------------------------------+ + | reg_constant.c | + | | + | All of the constant FPU_REGs | + | | + | Copyright (C) 1992,1993,1994,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@suburbia.net | + | | + | | + +---------------------------------------------------------------------------*/ + +#include "fpu_system.h" +#include "fpu_emu.h" +#include "status_w.h" +#include "reg_constant.h" +#include "control_w.h" + + +#define MAKE_REG(s,e,l,h) { l, h, \ + ((EXTENDED_Ebias+(e)) | ((SIGN_##s != 0)*0x8000)) } + +FPU_REG const CONST_1 = MAKE_REG(POS, 0, 0x00000000, 0x80000000); +#if 0 +FPU_REG const CONST_2 = MAKE_REG(POS, 1, 0x00000000, 0x80000000); +FPU_REG const CONST_HALF = MAKE_REG(POS, -1, 0x00000000, 0x80000000); +#endif /* 0 */ +static FPU_REG const CONST_L2T = MAKE_REG(POS, 1, 0xcd1b8afe, 0xd49a784b); +static FPU_REG const CONST_L2E = MAKE_REG(POS, 0, 0x5c17f0bc, 0xb8aa3b29); +FPU_REG const CONST_PI = MAKE_REG(POS, 1, 0x2168c235, 0xc90fdaa2); +FPU_REG const CONST_PI2 = MAKE_REG(POS, 0, 0x2168c235, 0xc90fdaa2); +FPU_REG const CONST_PI4 = MAKE_REG(POS, -1, 0x2168c235, 0xc90fdaa2); +static FPU_REG const CONST_LG2 = MAKE_REG(POS, -2, 0xfbcff799, 0x9a209a84); +static FPU_REG const CONST_LN2 = MAKE_REG(POS, -1, 0xd1cf79ac, 0xb17217f7); + +/* Extra bits to take pi/2 to more than 128 bits precision. */ +FPU_REG const CONST_PI2extra = MAKE_REG(NEG, -66, + 0xfc8f8cbb, 0xece675d1); + +/* Only the sign (and tag) is used in internal zeroes */ +FPU_REG const CONST_Z = MAKE_REG(POS, EXP_UNDER, 0x0, 0x0); + +/* Only the sign and significand (and tag) are used in internal NaNs */ +/* The 80486 never generates one of these +FPU_REG const CONST_SNAN = MAKE_REG(POS, EXP_OVER, 0x00000001, 0x80000000); + */ +/* This is the real indefinite QNaN */ +FPU_REG const CONST_QNaN = MAKE_REG(NEG, EXP_OVER, 0x00000000, 0xC0000000); + +/* Only the sign (and tag) is used in internal infinities */ +FPU_REG const CONST_INF = MAKE_REG(POS, EXP_OVER, 0x00000000, 0x80000000); + + +static void fld_const(FPU_REG const *c, int adj, u_char tag) +{ + FPU_REG *st_new_ptr; + + if ( STACK_OVERFLOW ) + { + FPU_stack_overflow(); + return; + } + push(); + reg_copy(c, st_new_ptr); + st_new_ptr->sigl += adj; /* For all our fldxxx constants, we don't need to + borrow or carry. */ + FPU_settag0(tag); + clear_C1(); +} + +/* A fast way to find out whether x is one of RC_DOWN or RC_CHOP + (and not one of RC_RND or RC_UP). + */ +#define DOWN_OR_CHOP(x) (x & RC_DOWN) + +static void fld1(int rc) +{ + fld_const(&CONST_1, 0, TAG_Valid); +} + +static void fldl2t(int rc) +{ + fld_const(&CONST_L2T, (rc == RC_UP) ? 1 : 0, TAG_Valid); +} + +static void fldl2e(int rc) +{ + fld_const(&CONST_L2E, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid); +} + +static void fldpi(int rc) +{ + fld_const(&CONST_PI, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid); +} + +static void fldlg2(int rc) +{ + fld_const(&CONST_LG2, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid); +} + +static void fldln2(int rc) +{ + fld_const(&CONST_LN2, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid); +} + +static void fldz(int rc) +{ + fld_const(&CONST_Z, 0, TAG_Zero); +} + +typedef void (*FUNC_RC)(int); + +static FUNC_RC constants_table[] = { + fld1, fldl2t, fldl2e, fldpi, fldlg2, fldln2, fldz, (FUNC_RC)FPU_illegal +}; + +void fconst(void) +{ + (constants_table[FPU_rm])(control_word & CW_RC); +} diff --git a/arch/x86/math-emu/reg_constant.h b/arch/x86/math-emu/reg_constant.h new file mode 100644 index 00000000000..1bffaec3a13 --- /dev/null +++ b/arch/x86/math-emu/reg_constant.h @@ -0,0 +1,25 @@ +/*---------------------------------------------------------------------------+ + | reg_constant.h | + | | + | Copyright (C) 1992 W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + +---------------------------------------------------------------------------*/ + +#ifndef _REG_CONSTANT_H_ +#define _REG_CONSTANT_H_ + +#include "fpu_emu.h" + +extern FPU_REG const CONST_1; +extern FPU_REG const CONST_PI; +extern FPU_REG const CONST_PI2; +extern FPU_REG const CONST_PI2extra; +extern FPU_REG const CONST_PI4; +extern FPU_REG const CONST_Z; +extern FPU_REG const CONST_PINF; +extern FPU_REG const CONST_INF; +extern FPU_REG const CONST_MINF; +extern FPU_REG const CONST_QNaN; + +#endif /* _REG_CONSTANT_H_ */ diff --git a/arch/x86/math-emu/reg_convert.c b/arch/x86/math-emu/reg_convert.c new file mode 100644 index 00000000000..45a25875270 --- /dev/null +++ b/arch/x86/math-emu/reg_convert.c @@ -0,0 +1,53 @@ +/*---------------------------------------------------------------------------+ + | reg_convert.c | + | | + | Convert register representation. | + | | + | Copyright (C) 1992,1993,1994,1996,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@suburbia.net | + | | + | | + +---------------------------------------------------------------------------*/ + +#include "exception.h" +#include "fpu_emu.h" + + +int FPU_to_exp16(FPU_REG const *a, FPU_REG *x) +{ + int sign = getsign(a); + + *(long long *)&(x->sigl) = *(const long long *)&(a->sigl); + + /* Set up the exponent as a 16 bit quantity. */ + setexponent16(x, exponent(a)); + + if ( exponent16(x) == EXP_UNDER ) + { + /* The number is a de-normal or pseudodenormal. */ + /* We only deal with the significand and exponent. */ + + if (x->sigh & 0x80000000) + { + /* Is a pseudodenormal. */ + /* This is non-80486 behaviour because the number + loses its 'denormal' identity. */ + addexponent(x, 1); + } + else + { + /* Is a denormal. */ + addexponent(x, 1); + FPU_normalize_nuo(x); + } + } + + if ( !(x->sigh & 0x80000000) ) + { + EXCEPTION(EX_INTERNAL | 0x180); + } + + return sign; +} + diff --git a/arch/x86/math-emu/reg_divide.c b/arch/x86/math-emu/reg_divide.c new file mode 100644 index 00000000000..5cee7ff920d --- /dev/null +++ b/arch/x86/math-emu/reg_divide.c @@ -0,0 +1,207 @@ +/*---------------------------------------------------------------------------+ + | reg_divide.c | + | | + | Divide one FPU_REG by another and put the result in a destination FPU_REG.| + | | + | Copyright (C) 1996 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@jacobi.maths.monash.edu.au | + | | + | Return value is the tag of the answer, or-ed with FPU_Exception if | + | one was raised, or -1 on internal error. | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | The destination may be any FPU_REG, including one of the source FPU_REGs. | + +---------------------------------------------------------------------------*/ + +#include "exception.h" +#include "reg_constant.h" +#include "fpu_emu.h" +#include "fpu_system.h" + +/* + Divide one register by another and put the result into a third register. + */ +int FPU_div(int flags, int rm, int control_w) +{ + FPU_REG x, y; + FPU_REG const *a, *b, *st0_ptr, *st_ptr; + FPU_REG *dest; + u_char taga, tagb, signa, signb, sign, saved_sign; + int tag, deststnr; + + if ( flags & DEST_RM ) + deststnr = rm; + else + deststnr = 0; + + if ( flags & REV ) + { + b = &st(0); + st0_ptr = b; + tagb = FPU_gettag0(); + if ( flags & LOADED ) + { + a = (FPU_REG *)rm; + taga = flags & 0x0f; + } + else + { + a = &st(rm); + st_ptr = a; + taga = FPU_gettagi(rm); + } + } + else + { + a = &st(0); + st0_ptr = a; + taga = FPU_gettag0(); + if ( flags & LOADED ) + { + b = (FPU_REG *)rm; + tagb = flags & 0x0f; + } + else + { + b = &st(rm); + st_ptr = b; + tagb = FPU_gettagi(rm); + } + } + + signa = getsign(a); + signb = getsign(b); + + sign = signa ^ signb; + + dest = &st(deststnr); + saved_sign = getsign(dest); + + if ( !(taga | tagb) ) + { + /* Both regs Valid, this should be the most common case. */ + reg_copy(a, &x); + reg_copy(b, &y); + setpositive(&x); + setpositive(&y); + tag = FPU_u_div(&x, &y, dest, control_w, sign); + + if ( tag < 0 ) + return tag; + + FPU_settagi(deststnr, tag); + return tag; + } + + if ( taga == TAG_Special ) + taga = FPU_Special(a); + if ( tagb == TAG_Special ) + tagb = FPU_Special(b); + + if ( ((taga == TAG_Valid) && (tagb == TW_Denormal)) + || ((taga == TW_Denormal) && (tagb == TAG_Valid)) + || ((taga == TW_Denormal) && (tagb == TW_Denormal)) ) + { + if ( denormal_operand() < 0 ) + return FPU_Exception; + + FPU_to_exp16(a, &x); + FPU_to_exp16(b, &y); + tag = FPU_u_div(&x, &y, dest, control_w, sign); + if ( tag < 0 ) + return tag; + + FPU_settagi(deststnr, tag); + return tag; + } + else if ( (taga <= TW_Denormal) && (tagb <= TW_Denormal) ) + { + if ( tagb != TAG_Zero ) + { + /* Want to find Zero/Valid */ + if ( tagb == TW_Denormal ) + { + if ( denormal_operand() < 0 ) + return FPU_Exception; + } + + /* The result is zero. */ + FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr); + setsign(dest, sign); + return TAG_Zero; + } + /* We have an exception condition, either 0/0 or Valid/Zero. */ + if ( taga == TAG_Zero ) + { + /* 0/0 */ + return arith_invalid(deststnr); + } + /* Valid/Zero */ + return FPU_divide_by_zero(deststnr, sign); + } + /* Must have infinities, NaNs, etc */ + else if ( (taga == TW_NaN) || (tagb == TW_NaN) ) + { + if ( flags & LOADED ) + return real_2op_NaN((FPU_REG *)rm, flags & 0x0f, 0, st0_ptr); + + if ( flags & DEST_RM ) + { + int tag; + tag = FPU_gettag0(); + if ( tag == TAG_Special ) + tag = FPU_Special(st0_ptr); + return real_2op_NaN(st0_ptr, tag, rm, (flags & REV) ? st0_ptr : &st(rm)); + } + else + { + int tag; + tag = FPU_gettagi(rm); + if ( tag == TAG_Special ) + tag = FPU_Special(&st(rm)); + return real_2op_NaN(&st(rm), tag, 0, (flags & REV) ? st0_ptr : &st(rm)); + } + } + else if (taga == TW_Infinity) + { + if (tagb == TW_Infinity) + { + /* infinity/infinity */ + return arith_invalid(deststnr); + } + else + { + /* tagb must be Valid or Zero */ + if ( (tagb == TW_Denormal) && (denormal_operand() < 0) ) + return FPU_Exception; + + /* Infinity divided by Zero or Valid does + not raise and exception, but returns Infinity */ + FPU_copy_to_regi(a, TAG_Special, deststnr); + setsign(dest, sign); + return taga; + } + } + else if (tagb == TW_Infinity) + { + if ( (taga == TW_Denormal) && (denormal_operand() < 0) ) + return FPU_Exception; + + /* The result is zero. */ + FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr); + setsign(dest, sign); + return TAG_Zero; + } +#ifdef PARANOID + else + { + EXCEPTION(EX_INTERNAL|0x102); + return FPU_Exception; + } +#endif /* PARANOID */ + + return 0; +} diff --git a/arch/x86/math-emu/reg_ld_str.c b/arch/x86/math-emu/reg_ld_str.c new file mode 100644 index 00000000000..e976caef649 --- /dev/null +++ b/arch/x86/math-emu/reg_ld_str.c @@ -0,0 +1,1375 @@ +/*---------------------------------------------------------------------------+ + | reg_ld_str.c | + | | + | All of the functions which transfer data between user memory and FPU_REGs.| + | | + | Copyright (C) 1992,1993,1994,1996,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@suburbia.net | + | | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | Note: | + | The file contains code which accesses user memory. | + | Emulator static data may change when user memory is accessed, due to | + | other processes using the emulator while swapping is in progress. | + +---------------------------------------------------------------------------*/ + +#include "fpu_emu.h" + +#include + +#include "fpu_system.h" +#include "exception.h" +#include "reg_constant.h" +#include "control_w.h" +#include "status_w.h" + + +#define DOUBLE_Emax 1023 /* largest valid exponent */ +#define DOUBLE_Ebias 1023 +#define DOUBLE_Emin (-1022) /* smallest valid exponent */ + +#define SINGLE_Emax 127 /* largest valid exponent */ +#define SINGLE_Ebias 127 +#define SINGLE_Emin (-126) /* smallest valid exponent */ + + +static u_char normalize_no_excep(FPU_REG *r, int exp, int sign) +{ + u_char tag; + + setexponent16(r, exp); + + tag = FPU_normalize_nuo(r); + stdexp(r); + if ( sign ) + setnegative(r); + + return tag; +} + + +int FPU_tagof(FPU_REG *ptr) +{ + int exp; + + exp = exponent16(ptr) & 0x7fff; + if ( exp == 0 ) + { + if ( !(ptr->sigh | ptr->sigl) ) + { + return TAG_Zero; + } + /* The number is a de-normal or pseudodenormal. */ + return TAG_Special; + } + + if ( exp == 0x7fff ) + { + /* Is an Infinity, a NaN, or an unsupported data type. */ + return TAG_Special; + } + + if ( !(ptr->sigh & 0x80000000) ) + { + /* Unsupported data type. */ + /* Valid numbers have the ms bit set to 1. */ + /* Unnormal. */ + return TAG_Special; + } + + return TAG_Valid; +} + + +/* Get a long double from user memory */ +int FPU_load_extended(long double __user *s, int stnr) +{ + FPU_REG *sti_ptr = &st(stnr); + + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_READ, s, 10); + __copy_from_user(sti_ptr, s, 10); + RE_ENTRANT_CHECK_ON; + + return FPU_tagof(sti_ptr); +} + + +/* Get a double from user memory */ +int FPU_load_double(double __user *dfloat, FPU_REG *loaded_data) +{ + int exp, tag, negative; + unsigned m64, l64; + + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_READ, dfloat, 8); + FPU_get_user(m64, 1 + (unsigned long __user *) dfloat); + FPU_get_user(l64, (unsigned long __user *) dfloat); + RE_ENTRANT_CHECK_ON; + + negative = (m64 & 0x80000000) ? SIGN_Negative : SIGN_Positive; + exp = ((m64 & 0x7ff00000) >> 20) - DOUBLE_Ebias + EXTENDED_Ebias; + m64 &= 0xfffff; + if ( exp > DOUBLE_Emax + EXTENDED_Ebias ) + { + /* Infinity or NaN */ + if ((m64 == 0) && (l64 == 0)) + { + /* +- infinity */ + loaded_data->sigh = 0x80000000; + loaded_data->sigl = 0x00000000; + exp = EXP_Infinity + EXTENDED_Ebias; + tag = TAG_Special; + } + else + { + /* Must be a signaling or quiet NaN */ + exp = EXP_NaN + EXTENDED_Ebias; + loaded_data->sigh = (m64 << 11) | 0x80000000; + loaded_data->sigh |= l64 >> 21; + loaded_data->sigl = l64 << 11; + tag = TAG_Special; /* The calling function must look for NaNs */ + } + } + else if ( exp < DOUBLE_Emin + EXTENDED_Ebias ) + { + /* Zero or de-normal */ + if ((m64 == 0) && (l64 == 0)) + { + /* Zero */ + reg_copy(&CONST_Z, loaded_data); + exp = 0; + tag = TAG_Zero; + } + else + { + /* De-normal */ + loaded_data->sigh = m64 << 11; + loaded_data->sigh |= l64 >> 21; + loaded_data->sigl = l64 << 11; + + return normalize_no_excep(loaded_data, DOUBLE_Emin, negative) + | (denormal_operand() < 0 ? FPU_Exception : 0); + } + } + else + { + loaded_data->sigh = (m64 << 11) | 0x80000000; + loaded_data->sigh |= l64 >> 21; + loaded_data->sigl = l64 << 11; + + tag = TAG_Valid; + } + + setexponent16(loaded_data, exp | negative); + + return tag; +} + + +/* Get a float from user memory */ +int FPU_load_single(float __user *single, FPU_REG *loaded_data) +{ + unsigned m32; + int exp, tag, negative; + + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_READ, single, 4); + FPU_get_user(m32, (unsigned long __user *) single); + RE_ENTRANT_CHECK_ON; + + negative = (m32 & 0x80000000) ? SIGN_Negative : SIGN_Positive; + + if (!(m32 & 0x7fffffff)) + { + /* Zero */ + reg_copy(&CONST_Z, loaded_data); + addexponent(loaded_data, negative); + return TAG_Zero; + } + exp = ((m32 & 0x7f800000) >> 23) - SINGLE_Ebias + EXTENDED_Ebias; + m32 = (m32 & 0x7fffff) << 8; + if ( exp < SINGLE_Emin + EXTENDED_Ebias ) + { + /* De-normals */ + loaded_data->sigh = m32; + loaded_data->sigl = 0; + + return normalize_no_excep(loaded_data, SINGLE_Emin, negative) + | (denormal_operand() < 0 ? FPU_Exception : 0); + } + else if ( exp > SINGLE_Emax + EXTENDED_Ebias ) + { + /* Infinity or NaN */ + if ( m32 == 0 ) + { + /* +- infinity */ + loaded_data->sigh = 0x80000000; + loaded_data->sigl = 0x00000000; + exp = EXP_Infinity + EXTENDED_Ebias; + tag = TAG_Special; + } + else + { + /* Must be a signaling or quiet NaN */ + exp = EXP_NaN + EXTENDED_Ebias; + loaded_data->sigh = m32 | 0x80000000; + loaded_data->sigl = 0; + tag = TAG_Special; /* The calling function must look for NaNs */ + } + } + else + { + loaded_data->sigh = m32 | 0x80000000; + loaded_data->sigl = 0; + tag = TAG_Valid; + } + + setexponent16(loaded_data, exp | negative); /* Set the sign. */ + + return tag; +} + + +/* Get a long long from user memory */ +int FPU_load_int64(long long __user *_s) +{ + long long s; + int sign; + FPU_REG *st0_ptr = &st(0); + + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_READ, _s, 8); + if (copy_from_user(&s,_s,8)) + FPU_abort; + RE_ENTRANT_CHECK_ON; + + if (s == 0) + { + reg_copy(&CONST_Z, st0_ptr); + return TAG_Zero; + } + + if (s > 0) + sign = SIGN_Positive; + else + { + s = -s; + sign = SIGN_Negative; + } + + significand(st0_ptr) = s; + + return normalize_no_excep(st0_ptr, 63, sign); +} + + +/* Get a long from user memory */ +int FPU_load_int32(long __user *_s, FPU_REG *loaded_data) +{ + long s; + int negative; + + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_READ, _s, 4); + FPU_get_user(s, _s); + RE_ENTRANT_CHECK_ON; + + if (s == 0) + { reg_copy(&CONST_Z, loaded_data); return TAG_Zero; } + + if (s > 0) + negative = SIGN_Positive; + else + { + s = -s; + negative = SIGN_Negative; + } + + loaded_data->sigh = s; + loaded_data->sigl = 0; + + return normalize_no_excep(loaded_data, 31, negative); +} + + +/* Get a short from user memory */ +int FPU_load_int16(short __user *_s, FPU_REG *loaded_data) +{ + int s, negative; + + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_READ, _s, 2); + /* Cast as short to get the sign extended. */ + FPU_get_user(s, _s); + RE_ENTRANT_CHECK_ON; + + if (s == 0) + { reg_copy(&CONST_Z, loaded_data); return TAG_Zero; } + + if (s > 0) + negative = SIGN_Positive; + else + { + s = -s; + negative = SIGN_Negative; + } + + loaded_data->sigh = s << 16; + loaded_data->sigl = 0; + + return normalize_no_excep(loaded_data, 15, negative); +} + + +/* Get a packed bcd array from user memory */ +int FPU_load_bcd(u_char __user *s) +{ + FPU_REG *st0_ptr = &st(0); + int pos; + u_char bcd; + long long l=0; + int sign; + + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_READ, s, 10); + RE_ENTRANT_CHECK_ON; + for ( pos = 8; pos >= 0; pos--) + { + l *= 10; + RE_ENTRANT_CHECK_OFF; + FPU_get_user(bcd, s+pos); + RE_ENTRANT_CHECK_ON; + l += bcd >> 4; + l *= 10; + l += bcd & 0x0f; + } + + RE_ENTRANT_CHECK_OFF; + FPU_get_user(sign, s+9); + sign = sign & 0x80 ? SIGN_Negative : SIGN_Positive; + RE_ENTRANT_CHECK_ON; + + if ( l == 0 ) + { + reg_copy(&CONST_Z, st0_ptr); + addexponent(st0_ptr, sign); /* Set the sign. */ + return TAG_Zero; + } + else + { + significand(st0_ptr) = l; + return normalize_no_excep(st0_ptr, 63, sign); + } +} + +/*===========================================================================*/ + +/* Put a long double into user memory */ +int FPU_store_extended(FPU_REG *st0_ptr, u_char st0_tag, long double __user *d) +{ + /* + The only exception raised by an attempt to store to an + extended format is the Invalid Stack exception, i.e. + attempting to store from an empty register. + */ + + if ( st0_tag != TAG_Empty ) + { + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE, d, 10); + + FPU_put_user(st0_ptr->sigl, (unsigned long __user *) d); + FPU_put_user(st0_ptr->sigh, (unsigned long __user *) ((u_char __user *)d + 4)); + FPU_put_user(exponent16(st0_ptr), (unsigned short __user *) ((u_char __user *)d + 8)); + RE_ENTRANT_CHECK_ON; + + return 1; + } + + /* Empty register (stack underflow) */ + EXCEPTION(EX_StackUnder); + if ( control_word & CW_Invalid ) + { + /* The masked response */ + /* Put out the QNaN indefinite */ + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE,d,10); + FPU_put_user(0, (unsigned long __user *) d); + FPU_put_user(0xc0000000, 1 + (unsigned long __user *) d); + FPU_put_user(0xffff, 4 + (short __user *) d); + RE_ENTRANT_CHECK_ON; + return 1; + } + else + return 0; + +} + + +/* Put a double into user memory */ +int FPU_store_double(FPU_REG *st0_ptr, u_char st0_tag, double __user *dfloat) +{ + unsigned long l[2]; + unsigned long increment = 0; /* avoid gcc warnings */ + int precision_loss; + int exp; + FPU_REG tmp; + + if ( st0_tag == TAG_Valid ) + { + reg_copy(st0_ptr, &tmp); + exp = exponent(&tmp); + + if ( exp < DOUBLE_Emin ) /* It may be a denormal */ + { + addexponent(&tmp, -DOUBLE_Emin + 52); /* largest exp to be 51 */ + + denormal_arg: + + if ( (precision_loss = FPU_round_to_int(&tmp, st0_tag)) ) + { +#ifdef PECULIAR_486 + /* Did it round to a non-denormal ? */ + /* This behaviour might be regarded as peculiar, it appears + that the 80486 rounds to the dest precision, then + converts to decide underflow. */ + if ( !((tmp.sigh == 0x00100000) && (tmp.sigl == 0) && + (st0_ptr->sigl & 0x000007ff)) ) +#endif /* PECULIAR_486 */ + { + EXCEPTION(EX_Underflow); + /* This is a special case: see sec 16.2.5.1 of + the 80486 book */ + if ( !(control_word & CW_Underflow) ) + return 0; + } + EXCEPTION(precision_loss); + if ( !(control_word & CW_Precision) ) + return 0; + } + l[0] = tmp.sigl; + l[1] = tmp.sigh; + } + else + { + if ( tmp.sigl & 0x000007ff ) + { + precision_loss = 1; + switch (control_word & CW_RC) + { + case RC_RND: + /* Rounding can get a little messy.. */ + increment = ((tmp.sigl & 0x7ff) > 0x400) | /* nearest */ + ((tmp.sigl & 0xc00) == 0xc00); /* odd -> even */ + break; + case RC_DOWN: /* towards -infinity */ + increment = signpositive(&tmp) ? 0 : tmp.sigl & 0x7ff; + break; + case RC_UP: /* towards +infinity */ + increment = signpositive(&tmp) ? tmp.sigl & 0x7ff : 0; + break; + case RC_CHOP: + increment = 0; + break; + } + + /* Truncate the mantissa */ + tmp.sigl &= 0xfffff800; + + if ( increment ) + { + if ( tmp.sigl >= 0xfffff800 ) + { + /* the sigl part overflows */ + if ( tmp.sigh == 0xffffffff ) + { + /* The sigh part overflows */ + tmp.sigh = 0x80000000; + exp++; + if (exp >= EXP_OVER) + goto overflow; + } + else + { + tmp.sigh ++; + } + tmp.sigl = 0x00000000; + } + else + { + /* We only need to increment sigl */ + tmp.sigl += 0x00000800; + } + } + } + else + precision_loss = 0; + + l[0] = (tmp.sigl >> 11) | (tmp.sigh << 21); + l[1] = ((tmp.sigh >> 11) & 0xfffff); + + if ( exp > DOUBLE_Emax ) + { + overflow: + EXCEPTION(EX_Overflow); + if ( !(control_word & CW_Overflow) ) + return 0; + set_precision_flag_up(); + if ( !(control_word & CW_Precision) ) + return 0; + + /* This is a special case: see sec 16.2.5.1 of the 80486 book */ + /* Overflow to infinity */ + l[0] = 0x00000000; /* Set to */ + l[1] = 0x7ff00000; /* + INF */ + } + else + { + if ( precision_loss ) + { + if ( increment ) + set_precision_flag_up(); + else + set_precision_flag_down(); + } + /* Add the exponent */ + l[1] |= (((exp+DOUBLE_Ebias) & 0x7ff) << 20); + } + } + } + else if (st0_tag == TAG_Zero) + { + /* Number is zero */ + l[0] = 0; + l[1] = 0; + } + else if ( st0_tag == TAG_Special ) + { + st0_tag = FPU_Special(st0_ptr); + if ( st0_tag == TW_Denormal ) + { + /* A denormal will always underflow. */ +#ifndef PECULIAR_486 + /* An 80486 is supposed to be able to generate + a denormal exception here, but... */ + /* Underflow has priority. */ + if ( control_word & CW_Underflow ) + denormal_operand(); +#endif /* PECULIAR_486 */ + reg_copy(st0_ptr, &tmp); + goto denormal_arg; + } + else if (st0_tag == TW_Infinity) + { + l[0] = 0; + l[1] = 0x7ff00000; + } + else if (st0_tag == TW_NaN) + { + /* Is it really a NaN ? */ + if ( (exponent(st0_ptr) == EXP_OVER) + && (st0_ptr->sigh & 0x80000000) ) + { + /* See if we can get a valid NaN from the FPU_REG */ + l[0] = (st0_ptr->sigl >> 11) | (st0_ptr->sigh << 21); + l[1] = ((st0_ptr->sigh >> 11) & 0xfffff); + if ( !(st0_ptr->sigh & 0x40000000) ) + { + /* It is a signalling NaN */ + EXCEPTION(EX_Invalid); + if ( !(control_word & CW_Invalid) ) + return 0; + l[1] |= (0x40000000 >> 11); + } + l[1] |= 0x7ff00000; + } + else + { + /* It is an unsupported data type */ + EXCEPTION(EX_Invalid); + if ( !(control_word & CW_Invalid) ) + return 0; + l[0] = 0; + l[1] = 0xfff80000; + } + } + } + else if ( st0_tag == TAG_Empty ) + { + /* Empty register (stack underflow) */ + EXCEPTION(EX_StackUnder); + if ( control_word & CW_Invalid ) + { + /* The masked response */ + /* Put out the QNaN indefinite */ + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE,dfloat,8); + FPU_put_user(0, (unsigned long __user *) dfloat); + FPU_put_user(0xfff80000, 1 + (unsigned long __user *) dfloat); + RE_ENTRANT_CHECK_ON; + return 1; + } + else + return 0; + } + if ( getsign(st0_ptr) ) + l[1] |= 0x80000000; + + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE,dfloat,8); + FPU_put_user(l[0], (unsigned long __user *)dfloat); + FPU_put_user(l[1], 1 + (unsigned long __user *)dfloat); + RE_ENTRANT_CHECK_ON; + + return 1; +} + + +/* Put a float into user memory */ +int FPU_store_single(FPU_REG *st0_ptr, u_char st0_tag, float __user *single) +{ + long templ = 0; + unsigned long increment = 0; /* avoid gcc warnings */ + int precision_loss; + int exp; + FPU_REG tmp; + + if ( st0_tag == TAG_Valid ) + { + + reg_copy(st0_ptr, &tmp); + exp = exponent(&tmp); + + if ( exp < SINGLE_Emin ) + { + addexponent(&tmp, -SINGLE_Emin + 23); /* largest exp to be 22 */ + + denormal_arg: + + if ( (precision_loss = FPU_round_to_int(&tmp, st0_tag)) ) + { +#ifdef PECULIAR_486 + /* Did it round to a non-denormal ? */ + /* This behaviour might be regarded as peculiar, it appears + that the 80486 rounds to the dest precision, then + converts to decide underflow. */ + if ( !((tmp.sigl == 0x00800000) && + ((st0_ptr->sigh & 0x000000ff) || st0_ptr->sigl)) ) +#endif /* PECULIAR_486 */ + { + EXCEPTION(EX_Underflow); + /* This is a special case: see sec 16.2.5.1 of + the 80486 book */ + if ( !(control_word & CW_Underflow) ) + return 0; + } + EXCEPTION(precision_loss); + if ( !(control_word & CW_Precision) ) + return 0; + } + templ = tmp.sigl; + } + else + { + if ( tmp.sigl | (tmp.sigh & 0x000000ff) ) + { + unsigned long sigh = tmp.sigh; + unsigned long sigl = tmp.sigl; + + precision_loss = 1; + switch (control_word & CW_RC) + { + case RC_RND: + increment = ((sigh & 0xff) > 0x80) /* more than half */ + || (((sigh & 0xff) == 0x80) && sigl) /* more than half */ + || ((sigh & 0x180) == 0x180); /* round to even */ + break; + case RC_DOWN: /* towards -infinity */ + increment = signpositive(&tmp) + ? 0 : (sigl | (sigh & 0xff)); + break; + case RC_UP: /* towards +infinity */ + increment = signpositive(&tmp) + ? (sigl | (sigh & 0xff)) : 0; + break; + case RC_CHOP: + increment = 0; + break; + } + + /* Truncate part of the mantissa */ + tmp.sigl = 0; + + if (increment) + { + if ( sigh >= 0xffffff00 ) + { + /* The sigh part overflows */ + tmp.sigh = 0x80000000; + exp++; + if ( exp >= EXP_OVER ) + goto overflow; + } + else + { + tmp.sigh &= 0xffffff00; + tmp.sigh += 0x100; + } + } + else + { + tmp.sigh &= 0xffffff00; /* Finish the truncation */ + } + } + else + precision_loss = 0; + + templ = (tmp.sigh >> 8) & 0x007fffff; + + if ( exp > SINGLE_Emax ) + { + overflow: + EXCEPTION(EX_Overflow); + if ( !(control_word & CW_Overflow) ) + return 0; + set_precision_flag_up(); + if ( !(control_word & CW_Precision) ) + return 0; + + /* This is a special case: see sec 16.2.5.1 of the 80486 book. */ + /* Masked response is overflow to infinity. */ + templ = 0x7f800000; + } + else + { + if ( precision_loss ) + { + if ( increment ) + set_precision_flag_up(); + else + set_precision_flag_down(); + } + /* Add the exponent */ + templ |= ((exp+SINGLE_Ebias) & 0xff) << 23; + } + } + } + else if (st0_tag == TAG_Zero) + { + templ = 0; + } + else if ( st0_tag == TAG_Special ) + { + st0_tag = FPU_Special(st0_ptr); + if (st0_tag == TW_Denormal) + { + reg_copy(st0_ptr, &tmp); + + /* A denormal will always underflow. */ +#ifndef PECULIAR_486 + /* An 80486 is supposed to be able to generate + a denormal exception here, but... */ + /* Underflow has priority. */ + if ( control_word & CW_Underflow ) + denormal_operand(); +#endif /* PECULIAR_486 */ + goto denormal_arg; + } + else if (st0_tag == TW_Infinity) + { + templ = 0x7f800000; + } + else if (st0_tag == TW_NaN) + { + /* Is it really a NaN ? */ + if ( (exponent(st0_ptr) == EXP_OVER) && (st0_ptr->sigh & 0x80000000) ) + { + /* See if we can get a valid NaN from the FPU_REG */ + templ = st0_ptr->sigh >> 8; + if ( !(st0_ptr->sigh & 0x40000000) ) + { + /* It is a signalling NaN */ + EXCEPTION(EX_Invalid); + if ( !(control_word & CW_Invalid) ) + return 0; + templ |= (0x40000000 >> 8); + } + templ |= 0x7f800000; + } + else + { + /* It is an unsupported data type */ + EXCEPTION(EX_Invalid); + if ( !(control_word & CW_Invalid) ) + return 0; + templ = 0xffc00000; + } + } +#ifdef PARANOID + else + { + EXCEPTION(EX_INTERNAL|0x164); + return 0; + } +#endif + } + else if ( st0_tag == TAG_Empty ) + { + /* Empty register (stack underflow) */ + EXCEPTION(EX_StackUnder); + if ( control_word & EX_Invalid ) + { + /* The masked response */ + /* Put out the QNaN indefinite */ + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE,single,4); + FPU_put_user(0xffc00000, (unsigned long __user *) single); + RE_ENTRANT_CHECK_ON; + return 1; + } + else + return 0; + } +#ifdef PARANOID + else + { + EXCEPTION(EX_INTERNAL|0x163); + return 0; + } +#endif + if ( getsign(st0_ptr) ) + templ |= 0x80000000; + + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE,single,4); + FPU_put_user(templ,(unsigned long __user *) single); + RE_ENTRANT_CHECK_ON; + + return 1; +} + + +/* Put a long long into user memory */ +int FPU_store_int64(FPU_REG *st0_ptr, u_char st0_tag, long long __user *d) +{ + FPU_REG t; + long long tll; + int precision_loss; + + if ( st0_tag == TAG_Empty ) + { + /* Empty register (stack underflow) */ + EXCEPTION(EX_StackUnder); + goto invalid_operand; + } + else if ( st0_tag == TAG_Special ) + { + st0_tag = FPU_Special(st0_ptr); + if ( (st0_tag == TW_Infinity) || + (st0_tag == TW_NaN) ) + { + EXCEPTION(EX_Invalid); + goto invalid_operand; + } + } + + reg_copy(st0_ptr, &t); + precision_loss = FPU_round_to_int(&t, st0_tag); + ((long *)&tll)[0] = t.sigl; + ((long *)&tll)[1] = t.sigh; + if ( (precision_loss == 1) || + ((t.sigh & 0x80000000) && + !((t.sigh == 0x80000000) && (t.sigl == 0) && + signnegative(&t))) ) + { + EXCEPTION(EX_Invalid); + /* This is a special case: see sec 16.2.5.1 of the 80486 book */ + invalid_operand: + if ( control_word & EX_Invalid ) + { + /* Produce something like QNaN "indefinite" */ + tll = 0x8000000000000000LL; + } + else + return 0; + } + else + { + if ( precision_loss ) + set_precision_flag(precision_loss); + if ( signnegative(&t) ) + tll = - tll; + } + + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE,d,8); + if (copy_to_user(d, &tll, 8)) + FPU_abort; + RE_ENTRANT_CHECK_ON; + + return 1; +} + + +/* Put a long into user memory */ +int FPU_store_int32(FPU_REG *st0_ptr, u_char st0_tag, long __user *d) +{ + FPU_REG t; + int precision_loss; + + if ( st0_tag == TAG_Empty ) + { + /* Empty register (stack underflow) */ + EXCEPTION(EX_StackUnder); + goto invalid_operand; + } + else if ( st0_tag == TAG_Special ) + { + st0_tag = FPU_Special(st0_ptr); + if ( (st0_tag == TW_Infinity) || + (st0_tag == TW_NaN) ) + { + EXCEPTION(EX_Invalid); + goto invalid_operand; + } + } + + reg_copy(st0_ptr, &t); + precision_loss = FPU_round_to_int(&t, st0_tag); + if (t.sigh || + ((t.sigl & 0x80000000) && + !((t.sigl == 0x80000000) && signnegative(&t))) ) + { + EXCEPTION(EX_Invalid); + /* This is a special case: see sec 16.2.5.1 of the 80486 book */ + invalid_operand: + if ( control_word & EX_Invalid ) + { + /* Produce something like QNaN "indefinite" */ + t.sigl = 0x80000000; + } + else + return 0; + } + else + { + if ( precision_loss ) + set_precision_flag(precision_loss); + if ( signnegative(&t) ) + t.sigl = -(long)t.sigl; + } + + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE,d,4); + FPU_put_user(t.sigl, (unsigned long __user *) d); + RE_ENTRANT_CHECK_ON; + + return 1; +} + + +/* Put a short into user memory */ +int FPU_store_int16(FPU_REG *st0_ptr, u_char st0_tag, short __user *d) +{ + FPU_REG t; + int precision_loss; + + if ( st0_tag == TAG_Empty ) + { + /* Empty register (stack underflow) */ + EXCEPTION(EX_StackUnder); + goto invalid_operand; + } + else if ( st0_tag == TAG_Special ) + { + st0_tag = FPU_Special(st0_ptr); + if ( (st0_tag == TW_Infinity) || + (st0_tag == TW_NaN) ) + { + EXCEPTION(EX_Invalid); + goto invalid_operand; + } + } + + reg_copy(st0_ptr, &t); + precision_loss = FPU_round_to_int(&t, st0_tag); + if (t.sigh || + ((t.sigl & 0xffff8000) && + !((t.sigl == 0x8000) && signnegative(&t))) ) + { + EXCEPTION(EX_Invalid); + /* This is a special case: see sec 16.2.5.1 of the 80486 book */ + invalid_operand: + if ( control_word & EX_Invalid ) + { + /* Produce something like QNaN "indefinite" */ + t.sigl = 0x8000; + } + else + return 0; + } + else + { + if ( precision_loss ) + set_precision_flag(precision_loss); + if ( signnegative(&t) ) + t.sigl = -t.sigl; + } + + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE,d,2); + FPU_put_user((short)t.sigl, d); + RE_ENTRANT_CHECK_ON; + + return 1; +} + + +/* Put a packed bcd array into user memory */ +int FPU_store_bcd(FPU_REG *st0_ptr, u_char st0_tag, u_char __user *d) +{ + FPU_REG t; + unsigned long long ll; + u_char b; + int i, precision_loss; + u_char sign = (getsign(st0_ptr) == SIGN_NEG) ? 0x80 : 0; + + if ( st0_tag == TAG_Empty ) + { + /* Empty register (stack underflow) */ + EXCEPTION(EX_StackUnder); + goto invalid_operand; + } + else if ( st0_tag == TAG_Special ) + { + st0_tag = FPU_Special(st0_ptr); + if ( (st0_tag == TW_Infinity) || + (st0_tag == TW_NaN) ) + { + EXCEPTION(EX_Invalid); + goto invalid_operand; + } + } + + reg_copy(st0_ptr, &t); + precision_loss = FPU_round_to_int(&t, st0_tag); + ll = significand(&t); + + /* Check for overflow, by comparing with 999999999999999999 decimal. */ + if ( (t.sigh > 0x0de0b6b3) || + ((t.sigh == 0x0de0b6b3) && (t.sigl > 0xa763ffff)) ) + { + EXCEPTION(EX_Invalid); + /* This is a special case: see sec 16.2.5.1 of the 80486 book */ + invalid_operand: + if ( control_word & CW_Invalid ) + { + /* Produce the QNaN "indefinite" */ + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE,d,10); + for ( i = 0; i < 7; i++) + FPU_put_user(0, d+i); /* These bytes "undefined" */ + FPU_put_user(0xc0, d+7); /* This byte "undefined" */ + FPU_put_user(0xff, d+8); + FPU_put_user(0xff, d+9); + RE_ENTRANT_CHECK_ON; + return 1; + } + else + return 0; + } + else if ( precision_loss ) + { + /* Precision loss doesn't stop the data transfer */ + set_precision_flag(precision_loss); + } + + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE,d,10); + RE_ENTRANT_CHECK_ON; + for ( i = 0; i < 9; i++) + { + b = FPU_div_small(&ll, 10); + b |= (FPU_div_small(&ll, 10)) << 4; + RE_ENTRANT_CHECK_OFF; + FPU_put_user(b, d+i); + RE_ENTRANT_CHECK_ON; + } + RE_ENTRANT_CHECK_OFF; + FPU_put_user(sign, d+9); + RE_ENTRANT_CHECK_ON; + + return 1; +} + +/*===========================================================================*/ + +/* r gets mangled such that sig is int, sign: + it is NOT normalized */ +/* The return value (in eax) is zero if the result is exact, + if bits are changed due to rounding, truncation, etc, then + a non-zero value is returned */ +/* Overflow is signalled by a non-zero return value (in eax). + In the case of overflow, the returned significand always has the + largest possible value */ +int FPU_round_to_int(FPU_REG *r, u_char tag) +{ + u_char very_big; + unsigned eax; + + if (tag == TAG_Zero) + { + /* Make sure that zero is returned */ + significand(r) = 0; + return 0; /* o.k. */ + } + + if (exponent(r) > 63) + { + r->sigl = r->sigh = ~0; /* The largest representable number */ + return 1; /* overflow */ + } + + eax = FPU_shrxs(&r->sigl, 63 - exponent(r)); + very_big = !(~(r->sigh) | ~(r->sigl)); /* test for 0xfff...fff */ +#define half_or_more (eax & 0x80000000) +#define frac_part (eax) +#define more_than_half ((eax & 0x80000001) == 0x80000001) + switch (control_word & CW_RC) + { + case RC_RND: + if ( more_than_half /* nearest */ + || (half_or_more && (r->sigl & 1)) ) /* odd -> even */ + { + if ( very_big ) return 1; /* overflow */ + significand(r) ++; + return PRECISION_LOST_UP; + } + break; + case RC_DOWN: + if (frac_part && getsign(r)) + { + if ( very_big ) return 1; /* overflow */ + significand(r) ++; + return PRECISION_LOST_UP; + } + break; + case RC_UP: + if (frac_part && !getsign(r)) + { + if ( very_big ) return 1; /* overflow */ + significand(r) ++; + return PRECISION_LOST_UP; + } + break; + case RC_CHOP: + break; + } + + return eax ? PRECISION_LOST_DOWN : 0; + +} + +/*===========================================================================*/ + +u_char __user *fldenv(fpu_addr_modes addr_modes, u_char __user *s) +{ + unsigned short tag_word = 0; + u_char tag; + int i; + + if ( (addr_modes.default_mode == VM86) || + ((addr_modes.default_mode == PM16) + ^ (addr_modes.override.operand_size == OP_SIZE_PREFIX)) ) + { + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_READ, s, 0x0e); + FPU_get_user(control_word, (unsigned short __user *) s); + FPU_get_user(partial_status, (unsigned short __user *) (s+2)); + FPU_get_user(tag_word, (unsigned short __user *) (s+4)); + FPU_get_user(instruction_address.offset, (unsigned short __user *) (s+6)); + FPU_get_user(instruction_address.selector, (unsigned short __user *) (s+8)); + FPU_get_user(operand_address.offset, (unsigned short __user *) (s+0x0a)); + FPU_get_user(operand_address.selector, (unsigned short __user *) (s+0x0c)); + RE_ENTRANT_CHECK_ON; + s += 0x0e; + if ( addr_modes.default_mode == VM86 ) + { + instruction_address.offset + += (instruction_address.selector & 0xf000) << 4; + operand_address.offset += (operand_address.selector & 0xf000) << 4; + } + } + else + { + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_READ, s, 0x1c); + FPU_get_user(control_word, (unsigned short __user *) s); + FPU_get_user(partial_status, (unsigned short __user *) (s+4)); + FPU_get_user(tag_word, (unsigned short __user *) (s+8)); + FPU_get_user(instruction_address.offset, (unsigned long __user *) (s+0x0c)); + FPU_get_user(instruction_address.selector, (unsigned short __user *) (s+0x10)); + FPU_get_user(instruction_address.opcode, (unsigned short __user *) (s+0x12)); + FPU_get_user(operand_address.offset, (unsigned long __user *) (s+0x14)); + FPU_get_user(operand_address.selector, (unsigned long __user *) (s+0x18)); + RE_ENTRANT_CHECK_ON; + s += 0x1c; + } + +#ifdef PECULIAR_486 + control_word &= ~0xe080; +#endif /* PECULIAR_486 */ + + top = (partial_status >> SW_Top_Shift) & 7; + + if ( partial_status & ~control_word & CW_Exceptions ) + partial_status |= (SW_Summary | SW_Backward); + else + partial_status &= ~(SW_Summary | SW_Backward); + + for ( i = 0; i < 8; i++ ) + { + tag = tag_word & 3; + tag_word >>= 2; + + if ( tag == TAG_Empty ) + /* New tag is empty. Accept it */ + FPU_settag(i, TAG_Empty); + else if ( FPU_gettag(i) == TAG_Empty ) + { + /* Old tag is empty and new tag is not empty. New tag is determined + by old reg contents */ + if ( exponent(&fpu_register(i)) == - EXTENDED_Ebias ) + { + if ( !(fpu_register(i).sigl | fpu_register(i).sigh) ) + FPU_settag(i, TAG_Zero); + else + FPU_settag(i, TAG_Special); + } + else if ( exponent(&fpu_register(i)) == 0x7fff - EXTENDED_Ebias ) + { + FPU_settag(i, TAG_Special); + } + else if ( fpu_register(i).sigh & 0x80000000 ) + FPU_settag(i, TAG_Valid); + else + FPU_settag(i, TAG_Special); /* An Un-normal */ + } + /* Else old tag is not empty and new tag is not empty. Old tag + remains correct */ + } + + return s; +} + + +void frstor(fpu_addr_modes addr_modes, u_char __user *data_address) +{ + int i, regnr; + u_char __user *s = fldenv(addr_modes, data_address); + int offset = (top & 7) * 10, other = 80 - offset; + + /* Copy all registers in stack order. */ + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_READ,s,80); + __copy_from_user(register_base+offset, s, other); + if ( offset ) + __copy_from_user(register_base, s+other, offset); + RE_ENTRANT_CHECK_ON; + + for ( i = 0; i < 8; i++ ) + { + regnr = (i+top) & 7; + if ( FPU_gettag(regnr) != TAG_Empty ) + /* The loaded data over-rides all other cases. */ + FPU_settag(regnr, FPU_tagof(&st(i))); + } + +} + + +u_char __user *fstenv(fpu_addr_modes addr_modes, u_char __user *d) +{ + if ( (addr_modes.default_mode == VM86) || + ((addr_modes.default_mode == PM16) + ^ (addr_modes.override.operand_size == OP_SIZE_PREFIX)) ) + { + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE,d,14); +#ifdef PECULIAR_486 + FPU_put_user(control_word & ~0xe080, (unsigned long __user *) d); +#else + FPU_put_user(control_word, (unsigned short __user *) d); +#endif /* PECULIAR_486 */ + FPU_put_user(status_word(), (unsigned short __user *) (d+2)); + FPU_put_user(fpu_tag_word, (unsigned short __user *) (d+4)); + FPU_put_user(instruction_address.offset, (unsigned short __user *) (d+6)); + FPU_put_user(operand_address.offset, (unsigned short __user *) (d+0x0a)); + if ( addr_modes.default_mode == VM86 ) + { + FPU_put_user((instruction_address.offset & 0xf0000) >> 4, + (unsigned short __user *) (d+8)); + FPU_put_user((operand_address.offset & 0xf0000) >> 4, + (unsigned short __user *) (d+0x0c)); + } + else + { + FPU_put_user(instruction_address.selector, (unsigned short __user *) (d+8)); + FPU_put_user(operand_address.selector, (unsigned short __user *) (d+0x0c)); + } + RE_ENTRANT_CHECK_ON; + d += 0x0e; + } + else + { + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE, d, 7*4); +#ifdef PECULIAR_486 + control_word &= ~0xe080; + /* An 80486 sets nearly all of the reserved bits to 1. */ + control_word |= 0xffff0040; + partial_status = status_word() | 0xffff0000; + fpu_tag_word |= 0xffff0000; + I387.soft.fcs &= ~0xf8000000; + I387.soft.fos |= 0xffff0000; +#endif /* PECULIAR_486 */ + if (__copy_to_user(d, &control_word, 7*4)) + FPU_abort; + RE_ENTRANT_CHECK_ON; + d += 0x1c; + } + + control_word |= CW_Exceptions; + partial_status &= ~(SW_Summary | SW_Backward); + + return d; +} + + +void fsave(fpu_addr_modes addr_modes, u_char __user *data_address) +{ + u_char __user *d; + int offset = (top & 7) * 10, other = 80 - offset; + + d = fstenv(addr_modes, data_address); + + RE_ENTRANT_CHECK_OFF; + FPU_access_ok(VERIFY_WRITE,d,80); + + /* Copy all registers in stack order. */ + if (__copy_to_user(d, register_base+offset, other)) + FPU_abort; + if ( offset ) + if (__copy_to_user(d+other, register_base, offset)) + FPU_abort; + RE_ENTRANT_CHECK_ON; + + finit(); +} + +/*===========================================================================*/ diff --git a/arch/x86/math-emu/reg_mul.c b/arch/x86/math-emu/reg_mul.c new file mode 100644 index 00000000000..40f50b61bc6 --- /dev/null +++ b/arch/x86/math-emu/reg_mul.c @@ -0,0 +1,132 @@ +/*---------------------------------------------------------------------------+ + | reg_mul.c | + | | + | Multiply one FPU_REG by another, put the result in a destination FPU_REG. | + | | + | Copyright (C) 1992,1993,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@suburbia.net | + | | + | Returns the tag of the result if no exceptions or errors occurred. | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | The destination may be any FPU_REG, including one of the source FPU_REGs. | + +---------------------------------------------------------------------------*/ + +#include "fpu_emu.h" +#include "exception.h" +#include "reg_constant.h" +#include "fpu_system.h" + + +/* + Multiply two registers to give a register result. + The sources are st(deststnr) and (b,tagb,signb). + The destination is st(deststnr). + */ +/* This routine must be called with non-empty source registers */ +int FPU_mul(FPU_REG const *b, u_char tagb, int deststnr, int control_w) +{ + FPU_REG *a = &st(deststnr); + FPU_REG *dest = a; + u_char taga = FPU_gettagi(deststnr); + u_char saved_sign = getsign(dest); + u_char sign = (getsign(a) ^ getsign(b)); + int tag; + + + if ( !(taga | tagb) ) + { + /* Both regs Valid, this should be the most common case. */ + + tag = FPU_u_mul(a, b, dest, control_w, sign, exponent(a) + exponent(b)); + if ( tag < 0 ) + { + setsign(dest, saved_sign); + return tag; + } + FPU_settagi(deststnr, tag); + return tag; + } + + if ( taga == TAG_Special ) + taga = FPU_Special(a); + if ( tagb == TAG_Special ) + tagb = FPU_Special(b); + + if ( ((taga == TAG_Valid) && (tagb == TW_Denormal)) + || ((taga == TW_Denormal) && (tagb == TAG_Valid)) + || ((taga == TW_Denormal) && (tagb == TW_Denormal)) ) + { + FPU_REG x, y; + if ( denormal_operand() < 0 ) + return FPU_Exception; + + FPU_to_exp16(a, &x); + FPU_to_exp16(b, &y); + tag = FPU_u_mul(&x, &y, dest, control_w, sign, + exponent16(&x) + exponent16(&y)); + if ( tag < 0 ) + { + setsign(dest, saved_sign); + return tag; + } + FPU_settagi(deststnr, tag); + return tag; + } + else if ( (taga <= TW_Denormal) && (tagb <= TW_Denormal) ) + { + if ( ((tagb == TW_Denormal) || (taga == TW_Denormal)) + && (denormal_operand() < 0) ) + return FPU_Exception; + + /* Must have either both arguments == zero, or + one valid and the other zero. + The result is therefore zero. */ + FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr); + /* The 80486 book says that the answer is +0, but a real + 80486 behaves this way. + IEEE-754 apparently says it should be this way. */ + setsign(dest, sign); + return TAG_Zero; + } + /* Must have infinities, NaNs, etc */ + else if ( (taga == TW_NaN) || (tagb == TW_NaN) ) + { + return real_2op_NaN(b, tagb, deststnr, &st(0)); + } + else if ( ((taga == TW_Infinity) && (tagb == TAG_Zero)) + || ((tagb == TW_Infinity) && (taga == TAG_Zero)) ) + { + return arith_invalid(deststnr); /* Zero*Infinity is invalid */ + } + else if ( ((taga == TW_Denormal) || (tagb == TW_Denormal)) + && (denormal_operand() < 0) ) + { + return FPU_Exception; + } + else if (taga == TW_Infinity) + { + FPU_copy_to_regi(a, TAG_Special, deststnr); + setsign(dest, sign); + return TAG_Special; + } + else if (tagb == TW_Infinity) + { + FPU_copy_to_regi(b, TAG_Special, deststnr); + setsign(dest, sign); + return TAG_Special; + } + +#ifdef PARANOID + else + { + EXCEPTION(EX_INTERNAL|0x102); + return FPU_Exception; + } +#endif /* PARANOID */ + + return 0; +} diff --git a/arch/x86/math-emu/reg_norm.S b/arch/x86/math-emu/reg_norm.S new file mode 100644 index 00000000000..8b6352efcee --- /dev/null +++ b/arch/x86/math-emu/reg_norm.S @@ -0,0 +1,147 @@ +/*---------------------------------------------------------------------------+ + | reg_norm.S | + | | + | Copyright (C) 1992,1993,1994,1995,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@suburbia.net | + | | + | Normalize the value in a FPU_REG. | + | | + | Call from C as: | + | int FPU_normalize(FPU_REG *n) | + | | + | int FPU_normalize_nuo(FPU_REG *n) | + | | + | Return value is the tag of the answer, or-ed with FPU_Exception if | + | one was raised, or -1 on internal error. | + | | + +---------------------------------------------------------------------------*/ + +#include "fpu_emu.h" + + +.text +ENTRY(FPU_normalize) + pushl %ebp + movl %esp,%ebp + pushl %ebx + + movl PARAM1,%ebx + + movl SIGH(%ebx),%edx + movl SIGL(%ebx),%eax + + orl %edx,%edx /* ms bits */ + js L_done /* Already normalized */ + jnz L_shift_1 /* Shift left 1 - 31 bits */ + + orl %eax,%eax + jz L_zero /* The contents are zero */ + + movl %eax,%edx + xorl %eax,%eax + subw $32,EXP(%ebx) /* This can cause an underflow */ + +/* We need to shift left by 1 - 31 bits */ +L_shift_1: + bsrl %edx,%ecx /* get the required shift in %ecx */ + subl $31,%ecx + negl %ecx + shld %cl,%eax,%edx + shl %cl,%eax + subw %cx,EXP(%ebx) /* This can cause an underflow */ + + movl %edx,SIGH(%ebx) + movl %eax,SIGL(%ebx) + +L_done: + cmpw EXP_OVER,EXP(%ebx) + jge L_overflow + + cmpw EXP_UNDER,EXP(%ebx) + jle L_underflow + +L_exit_valid: + movl TAG_Valid,%eax + + /* Convert the exponent to 80x87 form. */ + addw EXTENDED_Ebias,EXP(%ebx) + andw $0x7fff,EXP(%ebx) + +L_exit: + popl %ebx + leave + ret + + +L_zero: + movw $0,EXP(%ebx) + movl TAG_Zero,%eax + jmp L_exit + +L_underflow: + /* Convert the exponent to 80x87 form. */ + addw EXTENDED_Ebias,EXP(%ebx) + push %ebx + call arith_underflow + pop %ebx + jmp L_exit + +L_overflow: + /* Convert the exponent to 80x87 form. */ + addw EXTENDED_Ebias,EXP(%ebx) + push %ebx + call arith_overflow + pop %ebx + jmp L_exit + + + +/* Normalise without reporting underflow or overflow */ +ENTRY(FPU_normalize_nuo) + pushl %ebp + movl %esp,%ebp + pushl %ebx + + movl PARAM1,%ebx + + movl SIGH(%ebx),%edx + movl SIGL(%ebx),%eax + + orl %edx,%edx /* ms bits */ + js L_exit_nuo_valid /* Already normalized */ + jnz L_nuo_shift_1 /* Shift left 1 - 31 bits */ + + orl %eax,%eax + jz L_exit_nuo_zero /* The contents are zero */ + + movl %eax,%edx + xorl %eax,%eax + subw $32,EXP(%ebx) /* This can cause an underflow */ + +/* We need to shift left by 1 - 31 bits */ +L_nuo_shift_1: + bsrl %edx,%ecx /* get the required shift in %ecx */ + subl $31,%ecx + negl %ecx + shld %cl,%eax,%edx + shl %cl,%eax + subw %cx,EXP(%ebx) /* This can cause an underflow */ + + movl %edx,SIGH(%ebx) + movl %eax,SIGL(%ebx) + +L_exit_nuo_valid: + movl TAG_Valid,%eax + + popl %ebx + leave + ret + +L_exit_nuo_zero: + movl TAG_Zero,%eax + movw EXP_UNDER,EXP(%ebx) + + popl %ebx + leave + ret diff --git a/arch/x86/math-emu/reg_round.S b/arch/x86/math-emu/reg_round.S new file mode 100644 index 00000000000..d1d4e48b4f6 --- /dev/null +++ b/arch/x86/math-emu/reg_round.S @@ -0,0 +1,708 @@ + .file "reg_round.S" +/*---------------------------------------------------------------------------+ + | reg_round.S | + | | + | Rounding/truncation/etc for FPU basic arithmetic functions. | + | | + | Copyright (C) 1993,1995,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@suburbia.net | + | | + | This code has four possible entry points. | + | The following must be entered by a jmp instruction: | + | fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit. | + | | + | The FPU_round entry point is intended to be used by C code. | + | From C, call as: | + | int FPU_round(FPU_REG *arg, unsigned int extent, unsigned int control_w) | + | | + | Return value is the tag of the answer, or-ed with FPU_Exception if | + | one was raised, or -1 on internal error. | + | | + | For correct "up" and "down" rounding, the argument must have the correct | + | sign. | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | Four entry points. | + | | + | Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points: | + | %eax:%ebx 64 bit significand | + | %edx 32 bit extension of the significand | + | %edi pointer to an FPU_REG for the result to be stored | + | stack calling function must have set up a C stack frame and | + | pushed %esi, %edi, and %ebx | + | | + | Needed just for the fpu_reg_round_sqrt entry point: | + | %cx A control word in the same format as the FPU control word. | + | Otherwise, PARAM4 must give such a value. | + | | + | | + | The significand and its extension are assumed to be exact in the | + | following sense: | + | If the significand by itself is the exact result then the significand | + | extension (%edx) must contain 0, otherwise the significand extension | + | must be non-zero. | + | If the significand extension is non-zero then the significand is | + | smaller than the magnitude of the correct exact result by an amount | + | greater than zero and less than one ls bit of the significand. | + | The significand extension is only required to have three possible | + | non-zero values: | + | less than 0x80000000 <=> the significand is less than 1/2 an ls | + | bit smaller than the magnitude of the | + | true exact result. | + | exactly 0x80000000 <=> the significand is exactly 1/2 an ls bit | + | smaller than the magnitude of the true | + | exact result. | + | greater than 0x80000000 <=> the significand is more than 1/2 an ls | + | bit smaller than the magnitude of the | + | true exact result. | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | The code in this module has become quite complex, but it should handle | + | all of the FPU flags which are set at this stage of the basic arithmetic | + | computations. | + | There are a few rare cases where the results are not set identically to | + | a real FPU. These require a bit more thought because at this stage the | + | results of the code here appear to be more consistent... | + | This may be changed in a future version. | + +---------------------------------------------------------------------------*/ + + +#include "fpu_emu.h" +#include "exception.h" +#include "control_w.h" + +/* Flags for FPU_bits_lost */ +#define LOST_DOWN $1 +#define LOST_UP $2 + +/* Flags for FPU_denormal */ +#define DENORMAL $1 +#define UNMASKED_UNDERFLOW $2 + + +#ifndef NON_REENTRANT_FPU +/* Make the code re-entrant by putting + local storage on the stack: */ +#define FPU_bits_lost (%esp) +#define FPU_denormal 1(%esp) + +#else +/* Not re-entrant, so we can gain speed by putting + local storage in a static area: */ +.data + .align 4,0 +FPU_bits_lost: + .byte 0 +FPU_denormal: + .byte 0 +#endif /* NON_REENTRANT_FPU */ + + +.text +.globl fpu_reg_round +.globl fpu_Arith_exit + +/* Entry point when called from C */ +ENTRY(FPU_round) + pushl %ebp + movl %esp,%ebp + pushl %esi + pushl %edi + pushl %ebx + + movl PARAM1,%edi + movl SIGH(%edi),%eax + movl SIGL(%edi),%ebx + movl PARAM2,%edx + +fpu_reg_round: /* Normal entry point */ + movl PARAM4,%ecx + +#ifndef NON_REENTRANT_FPU + pushl %ebx /* adjust the stack pointer */ +#endif /* NON_REENTRANT_FPU */ + +#ifdef PARANOID +/* Cannot use this here yet */ +/* orl %eax,%eax */ +/* jns L_entry_bugged */ +#endif /* PARANOID */ + + cmpw EXP_UNDER,EXP(%edi) + jle L_Make_denorm /* The number is a de-normal */ + + movb $0,FPU_denormal /* 0 -> not a de-normal */ + +Denorm_done: + movb $0,FPU_bits_lost /* No bits yet lost in rounding */ + + movl %ecx,%esi + andl CW_PC,%ecx + cmpl PR_64_BITS,%ecx + je LRound_To_64 + + cmpl PR_53_BITS,%ecx + je LRound_To_53 + + cmpl PR_24_BITS,%ecx + je LRound_To_24 + +#ifdef PECULIAR_486 +/* With the precision control bits set to 01 "(reserved)", a real 80486 + behaves as if the precision control bits were set to 11 "64 bits" */ + cmpl PR_RESERVED_BITS,%ecx + je LRound_To_64 +#ifdef PARANOID + jmp L_bugged_denorm_486 +#endif /* PARANOID */ +#else +#ifdef PARANOID + jmp L_bugged_denorm /* There is no bug, just a bad control word */ +#endif /* PARANOID */ +#endif /* PECULIAR_486 */ + + +/* Round etc to 24 bit precision */ +LRound_To_24: + movl %esi,%ecx + andl CW_RC,%ecx + cmpl RC_RND,%ecx + je LRound_nearest_24 + + cmpl RC_CHOP,%ecx + je LCheck_truncate_24 + + cmpl RC_UP,%ecx /* Towards +infinity */ + je LUp_24 + + cmpl RC_DOWN,%ecx /* Towards -infinity */ + je LDown_24 + +#ifdef PARANOID + jmp L_bugged_round24 +#endif /* PARANOID */ + +LUp_24: + cmpb SIGN_POS,PARAM5 + jne LCheck_truncate_24 /* If negative then up==truncate */ + + jmp LCheck_24_round_up + +LDown_24: + cmpb SIGN_POS,PARAM5 + je LCheck_truncate_24 /* If positive then down==truncate */ + +LCheck_24_round_up: + movl %eax,%ecx + andl $0x000000ff,%ecx + orl %ebx,%ecx + orl %edx,%ecx + jnz LDo_24_round_up + jmp L_Re_normalise + +LRound_nearest_24: + /* Do rounding of the 24th bit if needed (nearest or even) */ + movl %eax,%ecx + andl $0x000000ff,%ecx + cmpl $0x00000080,%ecx + jc LCheck_truncate_24 /* less than half, no increment needed */ + + jne LGreater_Half_24 /* greater than half, increment needed */ + + /* Possibly half, we need to check the ls bits */ + orl %ebx,%ebx + jnz LGreater_Half_24 /* greater than half, increment needed */ + + orl %edx,%edx + jnz LGreater_Half_24 /* greater than half, increment needed */ + + /* Exactly half, increment only if 24th bit is 1 (round to even) */ + testl $0x00000100,%eax + jz LDo_truncate_24 + +LGreater_Half_24: /* Rounding: increment at the 24th bit */ +LDo_24_round_up: + andl $0xffffff00,%eax /* Truncate to 24 bits */ + xorl %ebx,%ebx + movb LOST_UP,FPU_bits_lost + addl $0x00000100,%eax + jmp LCheck_Round_Overflow + +LCheck_truncate_24: + movl %eax,%ecx + andl $0x000000ff,%ecx + orl %ebx,%ecx + orl %edx,%ecx + jz L_Re_normalise /* No truncation needed */ + +LDo_truncate_24: + andl $0xffffff00,%eax /* Truncate to 24 bits */ + xorl %ebx,%ebx + movb LOST_DOWN,FPU_bits_lost + jmp L_Re_normalise + + +/* Round etc to 53 bit precision */ +LRound_To_53: + movl %esi,%ecx + andl CW_RC,%ecx + cmpl RC_RND,%ecx + je LRound_nearest_53 + + cmpl RC_CHOP,%ecx + je LCheck_truncate_53 + + cmpl RC_UP,%ecx /* Towards +infinity */ + je LUp_53 + + cmpl RC_DOWN,%ecx /* Towards -infinity */ + je LDown_53 + +#ifdef PARANOID + jmp L_bugged_round53 +#endif /* PARANOID */ + +LUp_53: + cmpb SIGN_POS,PARAM5 + jne LCheck_truncate_53 /* If negative then up==truncate */ + + jmp LCheck_53_round_up + +LDown_53: + cmpb SIGN_POS,PARAM5 + je LCheck_truncate_53 /* If positive then down==truncate */ + +LCheck_53_round_up: + movl %ebx,%ecx + andl $0x000007ff,%ecx + orl %edx,%ecx + jnz LDo_53_round_up + jmp L_Re_normalise + +LRound_nearest_53: + /* Do rounding of the 53rd bit if needed (nearest or even) */ + movl %ebx,%ecx + andl $0x000007ff,%ecx + cmpl $0x00000400,%ecx + jc LCheck_truncate_53 /* less than half, no increment needed */ + + jnz LGreater_Half_53 /* greater than half, increment needed */ + + /* Possibly half, we need to check the ls bits */ + orl %edx,%edx + jnz LGreater_Half_53 /* greater than half, increment needed */ + + /* Exactly half, increment only if 53rd bit is 1 (round to even) */ + testl $0x00000800,%ebx + jz LTruncate_53 + +LGreater_Half_53: /* Rounding: increment at the 53rd bit */ +LDo_53_round_up: + movb LOST_UP,FPU_bits_lost + andl $0xfffff800,%ebx /* Truncate to 53 bits */ + addl $0x00000800,%ebx + adcl $0,%eax + jmp LCheck_Round_Overflow + +LCheck_truncate_53: + movl %ebx,%ecx + andl $0x000007ff,%ecx + orl %edx,%ecx + jz L_Re_normalise + +LTruncate_53: + movb LOST_DOWN,FPU_bits_lost + andl $0xfffff800,%ebx /* Truncate to 53 bits */ + jmp L_Re_normalise + + +/* Round etc to 64 bit precision */ +LRound_To_64: + movl %esi,%ecx + andl CW_RC,%ecx + cmpl RC_RND,%ecx + je LRound_nearest_64 + + cmpl RC_CHOP,%ecx + je LCheck_truncate_64 + + cmpl RC_UP,%ecx /* Towards +infinity */ + je LUp_64 + + cmpl RC_DOWN,%ecx /* Towards -infinity */ + je LDown_64 + +#ifdef PARANOID + jmp L_bugged_round64 +#endif /* PARANOID */ + +LUp_64: + cmpb SIGN_POS,PARAM5 + jne LCheck_truncate_64 /* If negative then up==truncate */ + + orl %edx,%edx + jnz LDo_64_round_up + jmp L_Re_normalise + +LDown_64: + cmpb SIGN_POS,PARAM5 + je LCheck_truncate_64 /* If positive then down==truncate */ + + orl %edx,%edx + jnz LDo_64_round_up + jmp L_Re_normalise + +LRound_nearest_64: + cmpl $0x80000000,%edx + jc LCheck_truncate_64 + + jne LDo_64_round_up + + /* Now test for round-to-even */ + testb $1,%bl + jz LCheck_truncate_64 + +LDo_64_round_up: + movb LOST_UP,FPU_bits_lost + addl $1,%ebx + adcl $0,%eax + +LCheck_Round_Overflow: + jnc L_Re_normalise + + /* Overflow, adjust the result (significand to 1.0) */ + rcrl $1,%eax + rcrl $1,%ebx + incw EXP(%edi) + jmp L_Re_normalise + +LCheck_truncate_64: + orl %edx,%edx + jz L_Re_normalise + +LTruncate_64: + movb LOST_DOWN,FPU_bits_lost + +L_Re_normalise: + testb $0xff,FPU_denormal + jnz Normalise_result + +L_Normalised: + movl TAG_Valid,%edx + +L_deNormalised: + cmpb LOST_UP,FPU_bits_lost + je L_precision_lost_up + + cmpb LOST_DOWN,FPU_bits_lost + je L_precision_lost_down + +L_no_precision_loss: + /* store the result */ + +L_Store_significand: + movl %eax,SIGH(%edi) + movl %ebx,SIGL(%edi) + + cmpw EXP_OVER,EXP(%edi) + jge L_overflow + + movl %edx,%eax + + /* Convert the exponent to 80x87 form. */ + addw EXTENDED_Ebias,EXP(%edi) + andw $0x7fff,EXP(%edi) + +fpu_reg_round_signed_special_exit: + + cmpb SIGN_POS,PARAM5 + je fpu_reg_round_special_exit + + orw $0x8000,EXP(%edi) /* Negative sign for the result. */ + +fpu_reg_round_special_exit: + +#ifndef NON_REENTRANT_FPU + popl %ebx /* adjust the stack pointer */ +#endif /* NON_REENTRANT_FPU */ + +fpu_Arith_exit: + popl %ebx + popl %edi + popl %esi + leave + ret + + +/* + * Set the FPU status flags to represent precision loss due to + * round-up. + */ +L_precision_lost_up: + push %edx + push %eax + call set_precision_flag_up + popl %eax + popl %edx + jmp L_no_precision_loss + +/* + * Set the FPU status flags to represent precision loss due to + * truncation. + */ +L_precision_lost_down: + push %edx + push %eax + call set_precision_flag_down + popl %eax + popl %edx + jmp L_no_precision_loss + + +/* + * The number is a denormal (which might get rounded up to a normal) + * Shift the number right the required number of bits, which will + * have to be undone later... + */ +L_Make_denorm: + /* The action to be taken depends upon whether the underflow + exception is masked */ + testb CW_Underflow,%cl /* Underflow mask. */ + jz Unmasked_underflow /* Do not make a denormal. */ + + movb DENORMAL,FPU_denormal + + pushl %ecx /* Save */ + movw EXP_UNDER+1,%cx + subw EXP(%edi),%cx + + cmpw $64,%cx /* shrd only works for 0..31 bits */ + jnc Denorm_shift_more_than_63 + + cmpw $32,%cx /* shrd only works for 0..31 bits */ + jnc Denorm_shift_more_than_32 + +/* + * We got here without jumps by assuming that the most common requirement + * is for a small de-normalising shift. + * Shift by [1..31] bits + */ + addw %cx,EXP(%edi) + orl %edx,%edx /* extension */ + setne %ch /* Save whether %edx is non-zero */ + xorl %edx,%edx + shrd %cl,%ebx,%edx + shrd %cl,%eax,%ebx + shr %cl,%eax + orb %ch,%dl + popl %ecx + jmp Denorm_done + +/* Shift by [32..63] bits */ +Denorm_shift_more_than_32: + addw %cx,EXP(%edi) + subb $32,%cl + orl %edx,%edx + setne %ch + orb %ch,%bl + xorl %edx,%edx + shrd %cl,%ebx,%edx + shrd %cl,%eax,%ebx + shr %cl,%eax + orl %edx,%edx /* test these 32 bits */ + setne %cl + orb %ch,%bl + orb %cl,%bl + movl %ebx,%edx + movl %eax,%ebx + xorl %eax,%eax + popl %ecx + jmp Denorm_done + +/* Shift by [64..) bits */ +Denorm_shift_more_than_63: + cmpw $64,%cx + jne Denorm_shift_more_than_64 + +/* Exactly 64 bit shift */ + addw %cx,EXP(%edi) + xorl %ecx,%ecx + orl %edx,%edx + setne %cl + orl %ebx,%ebx + setne %ch + orb %ch,%cl + orb %cl,%al + movl %eax,%edx + xorl %eax,%eax + xorl %ebx,%ebx + popl %ecx + jmp Denorm_done + +Denorm_shift_more_than_64: + movw EXP_UNDER+1,EXP(%edi) +/* This is easy, %eax must be non-zero, so.. */ + movl $1,%edx + xorl %eax,%eax + xorl %ebx,%ebx + popl %ecx + jmp Denorm_done + + +Unmasked_underflow: + movb UNMASKED_UNDERFLOW,FPU_denormal + jmp Denorm_done + + +/* Undo the de-normalisation. */ +Normalise_result: + cmpb UNMASKED_UNDERFLOW,FPU_denormal + je Signal_underflow + +/* The number must be a denormal if we got here. */ +#ifdef PARANOID + /* But check it... just in case. */ + cmpw EXP_UNDER+1,EXP(%edi) + jne L_norm_bugged +#endif /* PARANOID */ + +#ifdef PECULIAR_486 + /* + * This implements a special feature of 80486 behaviour. + * Underflow will be signalled even if the number is + * not a denormal after rounding. + * This difference occurs only for masked underflow, and not + * in the unmasked case. + * Actual 80486 behaviour differs from this in some circumstances. + */ + orl %eax,%eax /* ms bits */ + js LPseudoDenormal /* Will be masked underflow */ +#else + orl %eax,%eax /* ms bits */ + js L_Normalised /* No longer a denormal */ +#endif /* PECULIAR_486 */ + + jnz LDenormal_adj_exponent + + orl %ebx,%ebx + jz L_underflow_to_zero /* The contents are zero */ + +LDenormal_adj_exponent: + decw EXP(%edi) + +LPseudoDenormal: + testb $0xff,FPU_bits_lost /* bits lost == underflow */ + movl TAG_Special,%edx + jz L_deNormalised + + /* There must be a masked underflow */ + push %eax + pushl EX_Underflow + call EXCEPTION + popl %eax + popl %eax + movl TAG_Special,%edx + jmp L_deNormalised + + +/* + * The operations resulted in a number too small to represent. + * Masked response. + */ +L_underflow_to_zero: + push %eax + call set_precision_flag_down + popl %eax + + push %eax + pushl EX_Underflow + call EXCEPTION + popl %eax + popl %eax + +/* Reduce the exponent to EXP_UNDER */ + movw EXP_UNDER,EXP(%edi) + movl TAG_Zero,%edx + jmp L_Store_significand + + +/* The operations resulted in a number too large to represent. */ +L_overflow: + addw EXTENDED_Ebias,EXP(%edi) /* Set for unmasked response. */ + push %edi + call arith_overflow + pop %edi + jmp fpu_reg_round_signed_special_exit + + +Signal_underflow: + /* The number may have been changed to a non-denormal */ + /* by the rounding operations. */ + cmpw EXP_UNDER,EXP(%edi) + jle Do_unmasked_underflow + + jmp L_Normalised + +Do_unmasked_underflow: + /* Increase the exponent by the magic number */ + addw $(3*(1<<13)),EXP(%edi) + push %eax + pushl EX_Underflow + call EXCEPTION + popl %eax + popl %eax + jmp L_Normalised + + +#ifdef PARANOID +#ifdef PECULIAR_486 +L_bugged_denorm_486: + pushl EX_INTERNAL|0x236 + call EXCEPTION + popl %ebx + jmp L_exception_exit +#else +L_bugged_denorm: + pushl EX_INTERNAL|0x230 + call EXCEPTION + popl %ebx + jmp L_exception_exit +#endif /* PECULIAR_486 */ + +L_bugged_round24: + pushl EX_INTERNAL|0x231 + call EXCEPTION + popl %ebx + jmp L_exception_exit + +L_bugged_round53: + pushl EX_INTERNAL|0x232 + call EXCEPTION + popl %ebx + jmp L_exception_exit + +L_bugged_round64: + pushl EX_INTERNAL|0x233 + call EXCEPTION + popl %ebx + jmp L_exception_exit + +L_norm_bugged: + pushl EX_INTERNAL|0x234 + call EXCEPTION + popl %ebx + jmp L_exception_exit + +L_entry_bugged: + pushl EX_INTERNAL|0x235 + call EXCEPTION + popl %ebx +L_exception_exit: + mov $-1,%eax + jmp fpu_reg_round_special_exit +#endif /* PARANOID */ diff --git a/arch/x86/math-emu/reg_u_add.S b/arch/x86/math-emu/reg_u_add.S new file mode 100644 index 00000000000..47c4c2434d8 --- /dev/null +++ b/arch/x86/math-emu/reg_u_add.S @@ -0,0 +1,167 @@ + .file "reg_u_add.S" +/*---------------------------------------------------------------------------+ + | reg_u_add.S | + | | + | Add two valid (TAG_Valid) FPU_REG numbers, of the same sign, and put the | + | result in a destination FPU_REG. | + | | + | Copyright (C) 1992,1993,1995,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@suburbia.net | + | | + | Call from C as: | + | int FPU_u_add(FPU_REG *arg1, FPU_REG *arg2, FPU_REG *answ, | + | int control_w) | + | Return value is the tag of the answer, or-ed with FPU_Exception if | + | one was raised, or -1 on internal error. | + | | + +---------------------------------------------------------------------------*/ + +/* + | Kernel addition routine FPU_u_add(reg *arg1, reg *arg2, reg *answ). + | Takes two valid reg f.p. numbers (TAG_Valid), which are + | treated as unsigned numbers, + | and returns their sum as a TAG_Valid or TAG_Special f.p. number. + | The returned number is normalized. + | Basic checks are performed if PARANOID is defined. + */ + +#include "exception.h" +#include "fpu_emu.h" +#include "control_w.h" + +.text +ENTRY(FPU_u_add) + pushl %ebp + movl %esp,%ebp + pushl %esi + pushl %edi + pushl %ebx + + movl PARAM1,%esi /* source 1 */ + movl PARAM2,%edi /* source 2 */ + + movl PARAM6,%ecx + movl %ecx,%edx + subl PARAM7,%ecx /* exp1 - exp2 */ + jge L_arg1_larger + + /* num1 is smaller */ + movl SIGL(%esi),%ebx + movl SIGH(%esi),%eax + + movl %edi,%esi + movl PARAM7,%edx + negw %cx + jmp L_accum_loaded + +L_arg1_larger: + /* num1 has larger or equal exponent */ + movl SIGL(%edi),%ebx + movl SIGH(%edi),%eax + +L_accum_loaded: + movl PARAM3,%edi /* destination */ + movw %dx,EXP(%edi) /* Copy exponent to destination */ + + xorl %edx,%edx /* clear the extension */ + +#ifdef PARANOID + testl $0x80000000,%eax + je L_bugged + + testl $0x80000000,SIGH(%esi) + je L_bugged +#endif /* PARANOID */ + +/* The number to be shifted is in %eax:%ebx:%edx */ + cmpw $32,%cx /* shrd only works for 0..31 bits */ + jnc L_more_than_31 + +/* less than 32 bits */ + shrd %cl,%ebx,%edx + shrd %cl,%eax,%ebx + shr %cl,%eax + jmp L_shift_done + +L_more_than_31: + cmpw $64,%cx + jnc L_more_than_63 + + subb $32,%cl + jz L_exactly_32 + + shrd %cl,%eax,%edx + shr %cl,%eax + orl %ebx,%ebx + jz L_more_31_no_low /* none of the lowest bits is set */ + + orl $1,%edx /* record the fact in the extension */ + +L_more_31_no_low: + movl %eax,%ebx + xorl %eax,%eax + jmp L_shift_done + +L_exactly_32: + movl %ebx,%edx + movl %eax,%ebx + xorl %eax,%eax + jmp L_shift_done + +L_more_than_63: + cmpw $65,%cx + jnc L_more_than_64 + + movl %eax,%edx + orl %ebx,%ebx + jz L_more_63_no_low + + orl $1,%edx + jmp L_more_63_no_low + +L_more_than_64: + movl $1,%edx /* The shifted nr always at least one '1' */ + +L_more_63_no_low: + xorl %ebx,%ebx + xorl %eax,%eax + +L_shift_done: + /* Now do the addition */ + addl SIGL(%esi),%ebx + adcl SIGH(%esi),%eax + jnc L_round_the_result + + /* Overflow, adjust the result */ + rcrl $1,%eax + rcrl $1,%ebx + rcrl $1,%edx + jnc L_no_bit_lost + + orl $1,%edx + +L_no_bit_lost: + incw EXP(%edi) + +L_round_the_result: + jmp fpu_reg_round /* Round the result */ + + + +#ifdef PARANOID +/* If we ever get here then we have problems! */ +L_bugged: + pushl EX_INTERNAL|0x201 + call EXCEPTION + pop %ebx + movl $-1,%eax + jmp L_exit + +L_exit: + popl %ebx + popl %edi + popl %esi + leave + ret +#endif /* PARANOID */ diff --git a/arch/x86/math-emu/reg_u_div.S b/arch/x86/math-emu/reg_u_div.S new file mode 100644 index 00000000000..cc00654b6f9 --- /dev/null +++ b/arch/x86/math-emu/reg_u_div.S @@ -0,0 +1,471 @@ + .file "reg_u_div.S" +/*---------------------------------------------------------------------------+ + | reg_u_div.S | + | | + | Divide one FPU_REG by another and put the result in a destination FPU_REG.| + | | + | Copyright (C) 1992,1993,1995,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@suburbia.net | + | | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | Call from C as: | + | int FPU_u_div(FPU_REG *a, FPU_REG *b, FPU_REG *dest, | + | unsigned int control_word, char *sign) | + | | + | Does not compute the destination exponent, but does adjust it. | + | | + | Return value is the tag of the answer, or-ed with FPU_Exception if | + | one was raised, or -1 on internal error. | + +---------------------------------------------------------------------------*/ + +#include "exception.h" +#include "fpu_emu.h" +#include "control_w.h" + + +/* #define dSIGL(x) (x) */ +/* #define dSIGH(x) 4(x) */ + + +#ifndef NON_REENTRANT_FPU +/* + Local storage on the stack: + Result: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0 + Overflow flag: ovfl_flag + */ +#define FPU_accum_3 -4(%ebp) +#define FPU_accum_2 -8(%ebp) +#define FPU_accum_1 -12(%ebp) +#define FPU_accum_0 -16(%ebp) +#define FPU_result_1 -20(%ebp) +#define FPU_result_2 -24(%ebp) +#define FPU_ovfl_flag -28(%ebp) + +#else +.data +/* + Local storage in a static area: + Result: FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0 + Overflow flag: ovfl_flag + */ + .align 4,0 +FPU_accum_3: + .long 0 +FPU_accum_2: + .long 0 +FPU_accum_1: + .long 0 +FPU_accum_0: + .long 0 +FPU_result_1: + .long 0 +FPU_result_2: + .long 0 +FPU_ovfl_flag: + .byte 0 +#endif /* NON_REENTRANT_FPU */ + +#define REGA PARAM1 +#define REGB PARAM2 +#define DEST PARAM3 + +.text +ENTRY(FPU_u_div) + pushl %ebp + movl %esp,%ebp +#ifndef NON_REENTRANT_FPU + subl $28,%esp +#endif /* NON_REENTRANT_FPU */ + + pushl %esi + pushl %edi + pushl %ebx + + movl REGA,%esi + movl REGB,%ebx + movl DEST,%edi + + movswl EXP(%esi),%edx + movswl EXP(%ebx),%eax + subl %eax,%edx + addl EXP_BIAS,%edx + + /* A denormal and a large number can cause an exponent underflow */ + cmpl EXP_WAY_UNDER,%edx + jg xExp_not_underflow + + /* Set to a really low value allow correct handling */ + movl EXP_WAY_UNDER,%edx + +xExp_not_underflow: + + movw %dx,EXP(%edi) + +#ifdef PARANOID +/* testl $0x80000000, SIGH(%esi) // Dividend */ +/* je L_bugged */ + testl $0x80000000, SIGH(%ebx) /* Divisor */ + je L_bugged +#endif /* PARANOID */ + +/* Check if the divisor can be treated as having just 32 bits */ + cmpl $0,SIGL(%ebx) + jnz L_Full_Division /* Can't do a quick divide */ + +/* We should be able to zip through the division here */ + movl SIGH(%ebx),%ecx /* The divisor */ + movl SIGH(%esi),%edx /* Dividend */ + movl SIGL(%esi),%eax /* Dividend */ + + cmpl %ecx,%edx + setaeb FPU_ovfl_flag /* Keep a record */ + jb L_no_adjust + + subl %ecx,%edx /* Prevent the overflow */ + +L_no_adjust: + /* Divide the 64 bit number by the 32 bit denominator */ + divl %ecx + movl %eax,FPU_result_2 + + /* Work on the remainder of the first division */ + xorl %eax,%eax + divl %ecx + movl %eax,FPU_result_1 + + /* Work on the remainder of the 64 bit division */ + xorl %eax,%eax + divl %ecx + + testb $255,FPU_ovfl_flag /* was the num > denom ? */ + je L_no_overflow + + /* Do the shifting here */ + /* increase the exponent */ + incw EXP(%edi) + + /* shift the mantissa right one bit */ + stc /* To set the ms bit */ + rcrl FPU_result_2 + rcrl FPU_result_1 + rcrl %eax + +L_no_overflow: + jmp LRound_precision /* Do the rounding as required */ + + +/*---------------------------------------------------------------------------+ + | Divide: Return arg1/arg2 to arg3. | + | | + | This routine does not use the exponents of arg1 and arg2, but does | + | adjust the exponent of arg3. | + | | + | The maximum returned value is (ignoring exponents) | + | .ffffffff ffffffff | + | ------------------ = 1.ffffffff fffffffe | + | .80000000 00000000 | + | and the minimum is | + | .80000000 00000000 | + | ------------------ = .80000000 00000001 (rounded) | + | .ffffffff ffffffff | + | | + +---------------------------------------------------------------------------*/ + + +L_Full_Division: + /* Save extended dividend in local register */ + movl SIGL(%esi),%eax + movl %eax,FPU_accum_2 + movl SIGH(%esi),%eax + movl %eax,FPU_accum_3 + xorl %eax,%eax + movl %eax,FPU_accum_1 /* zero the extension */ + movl %eax,FPU_accum_0 /* zero the extension */ + + movl SIGL(%esi),%eax /* Get the current num */ + movl SIGH(%esi),%edx + +/*----------------------------------------------------------------------*/ +/* Initialization done. + Do the first 32 bits. */ + + movb $0,FPU_ovfl_flag + cmpl SIGH(%ebx),%edx /* Test for imminent overflow */ + jb LLess_than_1 + ja LGreater_than_1 + + cmpl SIGL(%ebx),%eax + jb LLess_than_1 + +LGreater_than_1: +/* The dividend is greater or equal, would cause overflow */ + setaeb FPU_ovfl_flag /* Keep a record */ + + subl SIGL(%ebx),%eax + sbbl SIGH(%ebx),%edx /* Prevent the overflow */ + movl %eax,FPU_accum_2 + movl %edx,FPU_accum_3 + +LLess_than_1: +/* At this point, we have a dividend < divisor, with a record of + adjustment in FPU_ovfl_flag */ + + /* We will divide by a number which is too large */ + movl SIGH(%ebx),%ecx + addl $1,%ecx + jnc LFirst_div_not_1 + + /* here we need to divide by 100000000h, + i.e., no division at all.. */ + mov %edx,%eax + jmp LFirst_div_done + +LFirst_div_not_1: + divl %ecx /* Divide the numerator by the augmented + denom ms dw */ + +LFirst_div_done: + movl %eax,FPU_result_2 /* Put the result in the answer */ + + mull SIGH(%ebx) /* mul by the ms dw of the denom */ + + subl %eax,FPU_accum_2 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_3 + + movl FPU_result_2,%eax /* Get the result back */ + mull SIGL(%ebx) /* now mul the ls dw of the denom */ + + subl %eax,FPU_accum_1 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_2 + sbbl $0,FPU_accum_3 + je LDo_2nd_32_bits /* Must check for non-zero result here */ + +#ifdef PARANOID + jb L_bugged_1 +#endif /* PARANOID */ + + /* need to subtract another once of the denom */ + incl FPU_result_2 /* Correct the answer */ + + movl SIGL(%ebx),%eax + movl SIGH(%ebx),%edx + subl %eax,FPU_accum_1 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_2 + +#ifdef PARANOID + sbbl $0,FPU_accum_3 + jne L_bugged_1 /* Must check for non-zero result here */ +#endif /* PARANOID */ + +/*----------------------------------------------------------------------*/ +/* Half of the main problem is done, there is just a reduced numerator + to handle now. + Work with the second 32 bits, FPU_accum_0 not used from now on */ +LDo_2nd_32_bits: + movl FPU_accum_2,%edx /* get the reduced num */ + movl FPU_accum_1,%eax + + /* need to check for possible subsequent overflow */ + cmpl SIGH(%ebx),%edx + jb LDo_2nd_div + ja LPrevent_2nd_overflow + + cmpl SIGL(%ebx),%eax + jb LDo_2nd_div + +LPrevent_2nd_overflow: +/* The numerator is greater or equal, would cause overflow */ + /* prevent overflow */ + subl SIGL(%ebx),%eax + sbbl SIGH(%ebx),%edx + movl %edx,FPU_accum_2 + movl %eax,FPU_accum_1 + + incl FPU_result_2 /* Reflect the subtraction in the answer */ + +#ifdef PARANOID + je L_bugged_2 /* Can't bump the result to 1.0 */ +#endif /* PARANOID */ + +LDo_2nd_div: + cmpl $0,%ecx /* augmented denom msw */ + jnz LSecond_div_not_1 + + /* %ecx == 0, we are dividing by 1.0 */ + mov %edx,%eax + jmp LSecond_div_done + +LSecond_div_not_1: + divl %ecx /* Divide the numerator by the denom ms dw */ + +LSecond_div_done: + movl %eax,FPU_result_1 /* Put the result in the answer */ + + mull SIGH(%ebx) /* mul by the ms dw of the denom */ + + subl %eax,FPU_accum_1 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_2 + +#ifdef PARANOID + jc L_bugged_2 +#endif /* PARANOID */ + + movl FPU_result_1,%eax /* Get the result back */ + mull SIGL(%ebx) /* now mul the ls dw of the denom */ + + subl %eax,FPU_accum_0 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_1 /* Subtract from the num local reg */ + sbbl $0,FPU_accum_2 + +#ifdef PARANOID + jc L_bugged_2 +#endif /* PARANOID */ + + jz LDo_3rd_32_bits + +#ifdef PARANOID + cmpl $1,FPU_accum_2 + jne L_bugged_2 +#endif /* PARANOID */ + + /* need to subtract another once of the denom */ + movl SIGL(%ebx),%eax + movl SIGH(%ebx),%edx + subl %eax,FPU_accum_0 /* Subtract from the num local reg */ + sbbl %edx,FPU_accum_1 + sbbl $0,FPU_accum_2 + +#ifdef PARANOID + jc L_bugged_2 + jne L_bugged_2 +#endif /* PARANOID */ + + addl $1,FPU_result_1 /* Correct the answer */ + adcl $0,FPU_result_2 + +#ifdef PARANOID + jc L_bugged_2 /* Must check for non-zero result here */ +#endif /* PARANOID */ + +/*----------------------------------------------------------------------*/ +/* The division is essentially finished here, we just need to perform + tidying operations. + Deal with the 3rd 32 bits */ +LDo_3rd_32_bits: + movl FPU_accum_1,%edx /* get the reduced num */ + movl FPU_accum_0,%eax + + /* need to check for possible subsequent overflow */ + cmpl SIGH(%ebx),%edx /* denom */ + jb LRound_prep + ja LPrevent_3rd_overflow + + cmpl SIGL(%ebx),%eax /* denom */ + jb LRound_prep + +LPrevent_3rd_overflow: + /* prevent overflow */ + subl SIGL(%ebx),%eax + sbbl SIGH(%ebx),%edx + movl %edx,FPU_accum_1 + movl %eax,FPU_accum_0 + + addl $1,FPU_result_1 /* Reflect the subtraction in the answer */ + adcl $0,FPU_result_2 + jne LRound_prep + jnc LRound_prep + + /* This is a tricky spot, there is an overflow of the answer */ + movb $255,FPU_ovfl_flag /* Overflow -> 1.000 */ + +LRound_prep: +/* + * Prepare for rounding. + * To test for rounding, we just need to compare 2*accum with the + * denom. + */ + movl FPU_accum_0,%ecx + movl FPU_accum_1,%edx + movl %ecx,%eax + orl %edx,%eax + jz LRound_ovfl /* The accumulator contains zero. */ + + /* Multiply by 2 */ + clc + rcll $1,%ecx + rcll $1,%edx + jc LRound_large /* No need to compare, denom smaller */ + + subl SIGL(%ebx),%ecx + sbbl SIGH(%ebx),%edx + jnc LRound_not_small + + movl $0x70000000,%eax /* Denom was larger */ + jmp LRound_ovfl + +LRound_not_small: + jnz LRound_large + + movl $0x80000000,%eax /* Remainder was exactly 1/2 denom */ + jmp LRound_ovfl + +LRound_large: + movl $0xff000000,%eax /* Denom was smaller */ + +LRound_ovfl: +/* We are now ready to deal with rounding, but first we must get + the bits properly aligned */ + testb $255,FPU_ovfl_flag /* was the num > denom ? */ + je LRound_precision + + incw EXP(%edi) + + /* shift the mantissa right one bit */ + stc /* Will set the ms bit */ + rcrl FPU_result_2 + rcrl FPU_result_1 + rcrl %eax + +/* Round the result as required */ +LRound_precision: + decw EXP(%edi) /* binary point between 1st & 2nd bits */ + + movl %eax,%edx + movl FPU_result_1,%ebx + movl FPU_result_2,%eax + jmp fpu_reg_round + + +#ifdef PARANOID +/* The logic is wrong if we got here */ +L_bugged: + pushl EX_INTERNAL|0x202 + call EXCEPTION + pop %ebx + jmp L_exit + +L_bugged_1: + pushl EX_INTERNAL|0x203 + call EXCEPTION + pop %ebx + jmp L_exit + +L_bugged_2: + pushl EX_INTERNAL|0x204 + call EXCEPTION + pop %ebx + jmp L_exit + +L_exit: + movl $-1,%eax + popl %ebx + popl %edi + popl %esi + + leave + ret +#endif /* PARANOID */ diff --git a/arch/x86/math-emu/reg_u_mul.S b/arch/x86/math-emu/reg_u_mul.S new file mode 100644 index 00000000000..973f12af97d --- /dev/null +++ b/arch/x86/math-emu/reg_u_mul.S @@ -0,0 +1,148 @@ + .file "reg_u_mul.S" +/*---------------------------------------------------------------------------+ + | reg_u_mul.S | + | | + | Core multiplication routine | + | | + | Copyright (C) 1992,1993,1995,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@suburbia.net | + | | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | Basic multiplication routine. | + | Does not check the resulting exponent for overflow/underflow | + | | + | FPU_u_mul(FPU_REG *a, FPU_REG *b, FPU_REG *c, unsigned int cw); | + | | + | Internal working is at approx 128 bits. | + | Result is rounded to nearest 53 or 64 bits, using "nearest or even". | + +---------------------------------------------------------------------------*/ + +#include "exception.h" +#include "fpu_emu.h" +#include "control_w.h" + + + +#ifndef NON_REENTRANT_FPU +/* Local storage on the stack: */ +#define FPU_accum_0 -4(%ebp) /* ms word */ +#define FPU_accum_1 -8(%ebp) + +#else +/* Local storage in a static area: */ +.data + .align 4,0 +FPU_accum_0: + .long 0 +FPU_accum_1: + .long 0 +#endif /* NON_REENTRANT_FPU */ + + +.text +ENTRY(FPU_u_mul) + pushl %ebp + movl %esp,%ebp +#ifndef NON_REENTRANT_FPU + subl $8,%esp +#endif /* NON_REENTRANT_FPU */ + + pushl %esi + pushl %edi + pushl %ebx + + movl PARAM1,%esi + movl PARAM2,%edi + +#ifdef PARANOID + testl $0x80000000,SIGH(%esi) + jz L_bugged + testl $0x80000000,SIGH(%edi) + jz L_bugged +#endif /* PARANOID */ + + xorl %ecx,%ecx + xorl %ebx,%ebx + + movl SIGL(%esi),%eax + mull SIGL(%edi) + movl %eax,FPU_accum_0 + movl %edx,FPU_accum_1 + + movl SIGL(%esi),%eax + mull SIGH(%edi) + addl %eax,FPU_accum_1 + adcl %edx,%ebx +/* adcl $0,%ecx // overflow here is not possible */ + + movl SIGH(%esi),%eax + mull SIGL(%edi) + addl %eax,FPU_accum_1 + adcl %edx,%ebx + adcl $0,%ecx + + movl SIGH(%esi),%eax + mull SIGH(%edi) + addl %eax,%ebx + adcl %edx,%ecx + + /* Get the sum of the exponents. */ + movl PARAM6,%eax + subl EXP_BIAS-1,%eax + + /* Two denormals can cause an exponent underflow */ + cmpl EXP_WAY_UNDER,%eax + jg Exp_not_underflow + + /* Set to a really low value allow correct handling */ + movl EXP_WAY_UNDER,%eax + +Exp_not_underflow: + +/* Have now finished with the sources */ + movl PARAM3,%edi /* Point to the destination */ + movw %ax,EXP(%edi) + +/* Now make sure that the result is normalized */ + testl $0x80000000,%ecx + jnz LResult_Normalised + + /* Normalize by shifting left one bit */ + shll $1,FPU_accum_0 + rcll $1,FPU_accum_1 + rcll $1,%ebx + rcll $1,%ecx + decw EXP(%edi) + +LResult_Normalised: + movl FPU_accum_0,%eax + movl FPU_accum_1,%edx + orl %eax,%eax + jz L_extent_zero + + orl $1,%edx + +L_extent_zero: + movl %ecx,%eax + jmp fpu_reg_round + + +#ifdef PARANOID +L_bugged: + pushl EX_INTERNAL|0x205 + call EXCEPTION + pop %ebx + jmp L_exit + +L_exit: + popl %ebx + popl %edi + popl %esi + leave + ret +#endif /* PARANOID */ + diff --git a/arch/x86/math-emu/reg_u_sub.S b/arch/x86/math-emu/reg_u_sub.S new file mode 100644 index 00000000000..1b6c24801d2 --- /dev/null +++ b/arch/x86/math-emu/reg_u_sub.S @@ -0,0 +1,272 @@ + .file "reg_u_sub.S" +/*---------------------------------------------------------------------------+ + | reg_u_sub.S | + | | + | Core floating point subtraction routine. | + | | + | Copyright (C) 1992,1993,1995,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@suburbia.net | + | | + | Call from C as: | + | int FPU_u_sub(FPU_REG *arg1, FPU_REG *arg2, FPU_REG *answ, | + | int control_w) | + | Return value is the tag of the answer, or-ed with FPU_Exception if | + | one was raised, or -1 on internal error. | + | | + +---------------------------------------------------------------------------*/ + +/* + | Kernel subtraction routine FPU_u_sub(reg *arg1, reg *arg2, reg *answ). + | Takes two valid reg f.p. numbers (TAG_Valid), which are + | treated as unsigned numbers, + | and returns their difference as a TAG_Valid or TAG_Zero f.p. + | number. + | The first number (arg1) must be the larger. + | The returned number is normalized. + | Basic checks are performed if PARANOID is defined. + */ + +#include "exception.h" +#include "fpu_emu.h" +#include "control_w.h" + +.text +ENTRY(FPU_u_sub) + pushl %ebp + movl %esp,%ebp + pushl %esi + pushl %edi + pushl %ebx + + movl PARAM1,%esi /* source 1 */ + movl PARAM2,%edi /* source 2 */ + + movl PARAM6,%ecx + subl PARAM7,%ecx /* exp1 - exp2 */ + +#ifdef PARANOID + /* source 2 is always smaller than source 1 */ + js L_bugged_1 + + testl $0x80000000,SIGH(%edi) /* The args are assumed to be be normalized */ + je L_bugged_2 + + testl $0x80000000,SIGH(%esi) + je L_bugged_2 +#endif /* PARANOID */ + +/*--------------------------------------+ + | Form a register holding the | + | smaller number | + +--------------------------------------*/ + movl SIGH(%edi),%eax /* register ms word */ + movl SIGL(%edi),%ebx /* register ls word */ + + movl PARAM3,%edi /* destination */ + movl PARAM6,%edx + movw %dx,EXP(%edi) /* Copy exponent to destination */ + + xorl %edx,%edx /* register extension */ + +/*--------------------------------------+ + | Shift the temporary register | + | right the required number of | + | places. | + +--------------------------------------*/ + + cmpw $32,%cx /* shrd only works for 0..31 bits */ + jnc L_more_than_31 + +/* less than 32 bits */ + shrd %cl,%ebx,%edx + shrd %cl,%eax,%ebx + shr %cl,%eax + jmp L_shift_done + +L_more_than_31: + cmpw $64,%cx + jnc L_more_than_63 + + subb $32,%cl + jz L_exactly_32 + + shrd %cl,%eax,%edx + shr %cl,%eax + orl %ebx,%ebx + jz L_more_31_no_low /* none of the lowest bits is set */ + + orl $1,%edx /* record the fact in the extension */ + +L_more_31_no_low: + movl %eax,%ebx + xorl %eax,%eax + jmp L_shift_done + +L_exactly_32: + movl %ebx,%edx + movl %eax,%ebx + xorl %eax,%eax + jmp L_shift_done + +L_more_than_63: + cmpw $65,%cx + jnc L_more_than_64 + + /* Shift right by 64 bits */ + movl %eax,%edx + orl %ebx,%ebx + jz L_more_63_no_low + + orl $1,%edx + jmp L_more_63_no_low + +L_more_than_64: + jne L_more_than_65 + + /* Shift right by 65 bits */ + /* Carry is clear if we get here */ + movl %eax,%edx + rcrl %edx + jnc L_shift_65_nc + + orl $1,%edx + jmp L_more_63_no_low + +L_shift_65_nc: + orl %ebx,%ebx + jz L_more_63_no_low + + orl $1,%edx + jmp L_more_63_no_low + +L_more_than_65: + movl $1,%edx /* The shifted nr always at least one '1' */ + +L_more_63_no_low: + xorl %ebx,%ebx + xorl %eax,%eax + +L_shift_done: +L_subtr: +/*------------------------------+ + | Do the subtraction | + +------------------------------*/ + xorl %ecx,%ecx + subl %edx,%ecx + movl %ecx,%edx + movl SIGL(%esi),%ecx + sbbl %ebx,%ecx + movl %ecx,%ebx + movl SIGH(%esi),%ecx + sbbl %eax,%ecx + movl %ecx,%eax + +#ifdef PARANOID + /* We can never get a borrow */ + jc L_bugged +#endif /* PARANOID */ + +/*--------------------------------------+ + | Normalize the result | + +--------------------------------------*/ + testl $0x80000000,%eax + jnz L_round /* no shifting needed */ + + orl %eax,%eax + jnz L_shift_1 /* shift left 1 - 31 bits */ + + orl %ebx,%ebx + jnz L_shift_32 /* shift left 32 - 63 bits */ + +/* + * A rare case, the only one which is non-zero if we got here + * is: 1000000 .... 0000 + * -0111111 .... 1111 1 + * -------------------- + * 0000000 .... 0000 1 + */ + + cmpl $0x80000000,%edx + jnz L_must_be_zero + + /* Shift left 64 bits */ + subw $64,EXP(%edi) + xchg %edx,%eax + jmp fpu_reg_round + +L_must_be_zero: +#ifdef PARANOID + orl %edx,%edx + jnz L_bugged_3 +#endif /* PARANOID */ + + /* The result is zero */ + movw $0,EXP(%edi) /* exponent */ + movl $0,SIGL(%edi) + movl $0,SIGH(%edi) + movl TAG_Zero,%eax + jmp L_exit + +L_shift_32: + movl %ebx,%eax + movl %edx,%ebx + movl $0,%edx + subw $32,EXP(%edi) /* Can get underflow here */ + +/* We need to shift left by 1 - 31 bits */ +L_shift_1: + bsrl %eax,%ecx /* get the required shift in %ecx */ + subl $31,%ecx + negl %ecx + shld %cl,%ebx,%eax + shld %cl,%edx,%ebx + shl %cl,%edx + subw %cx,EXP(%edi) /* Can get underflow here */ + +L_round: + jmp fpu_reg_round /* Round the result */ + + +#ifdef PARANOID +L_bugged_1: + pushl EX_INTERNAL|0x206 + call EXCEPTION + pop %ebx + jmp L_error_exit + +L_bugged_2: + pushl EX_INTERNAL|0x209 + call EXCEPTION + pop %ebx + jmp L_error_exit + +L_bugged_3: + pushl EX_INTERNAL|0x210 + call EXCEPTION + pop %ebx + jmp L_error_exit + +L_bugged_4: + pushl EX_INTERNAL|0x211 + call EXCEPTION + pop %ebx + jmp L_error_exit + +L_bugged: + pushl EX_INTERNAL|0x212 + call EXCEPTION + pop %ebx + jmp L_error_exit + +L_error_exit: + movl $-1,%eax + +#endif /* PARANOID */ + +L_exit: + popl %ebx + popl %edi + popl %esi + leave + ret diff --git a/arch/x86/math-emu/round_Xsig.S b/arch/x86/math-emu/round_Xsig.S new file mode 100644 index 00000000000..bbe0e87718e --- /dev/null +++ b/arch/x86/math-emu/round_Xsig.S @@ -0,0 +1,141 @@ +/*---------------------------------------------------------------------------+ + | round_Xsig.S | + | | + | Copyright (C) 1992,1993,1994,1995 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@jacobi.maths.monash.edu.au | + | | + | Normalize and round a 12 byte quantity. | + | Call from C as: | + | int round_Xsig(Xsig *n) | + | | + | Normalize a 12 byte quantity. | + | Call from C as: | + | int norm_Xsig(Xsig *n) | + | | + | Each function returns the size of the shift (nr of bits). | + | | + +---------------------------------------------------------------------------*/ + .file "round_Xsig.S" + +#include "fpu_emu.h" + + +.text +ENTRY(round_Xsig) + pushl %ebp + movl %esp,%ebp + pushl %ebx /* Reserve some space */ + pushl %ebx + pushl %esi + + movl PARAM1,%esi + + movl 8(%esi),%edx + movl 4(%esi),%ebx + movl (%esi),%eax + + movl $0,-4(%ebp) + + orl %edx,%edx /* ms bits */ + js L_round /* Already normalized */ + jnz L_shift_1 /* Shift left 1 - 31 bits */ + + movl %ebx,%edx + movl %eax,%ebx + xorl %eax,%eax + movl $-32,-4(%ebp) + +/* We need to shift left by 1 - 31 bits */ +L_shift_1: + bsrl %edx,%ecx /* get the required shift in %ecx */ + subl $31,%ecx + negl %ecx + subl %ecx,-4(%ebp) + shld %cl,%ebx,%edx + shld %cl,%eax,%ebx + shl %cl,%eax + +L_round: + testl $0x80000000,%eax + jz L_exit + + addl $1,%ebx + adcl $0,%edx + jnz L_exit + + movl $0x80000000,%edx + incl -4(%ebp) + +L_exit: + movl %edx,8(%esi) + movl %ebx,4(%esi) + movl %eax,(%esi) + + movl -4(%ebp),%eax + + popl %esi + popl %ebx + leave + ret + + + + +ENTRY(norm_Xsig) + pushl %ebp + movl %esp,%ebp + pushl %ebx /* Reserve some space */ + pushl %ebx + pushl %esi + + movl PARAM1,%esi + + movl 8(%esi),%edx + movl 4(%esi),%ebx + movl (%esi),%eax + + movl $0,-4(%ebp) + + orl %edx,%edx /* ms bits */ + js L_n_exit /* Already normalized */ + jnz L_n_shift_1 /* Shift left 1 - 31 bits */ + + movl %ebx,%edx + movl %eax,%ebx + xorl %eax,%eax + movl $-32,-4(%ebp) + + orl %edx,%edx /* ms bits */ + js L_n_exit /* Normalized now */ + jnz L_n_shift_1 /* Shift left 1 - 31 bits */ + + movl %ebx,%edx + movl %eax,%ebx + xorl %eax,%eax + addl $-32,-4(%ebp) + jmp L_n_exit /* Might not be normalized, + but shift no more. */ + +/* We need to shift left by 1 - 31 bits */ +L_n_shift_1: + bsrl %edx,%ecx /* get the required shift in %ecx */ + subl $31,%ecx + negl %ecx + subl %ecx,-4(%ebp) + shld %cl,%ebx,%edx + shld %cl,%eax,%ebx + shl %cl,%eax + +L_n_exit: + movl %edx,8(%esi) + movl %ebx,4(%esi) + movl %eax,(%esi) + + movl -4(%ebp),%eax + + popl %esi + popl %ebx + leave + ret + diff --git a/arch/x86/math-emu/shr_Xsig.S b/arch/x86/math-emu/shr_Xsig.S new file mode 100644 index 00000000000..31cdd118e91 --- /dev/null +++ b/arch/x86/math-emu/shr_Xsig.S @@ -0,0 +1,87 @@ + .file "shr_Xsig.S" +/*---------------------------------------------------------------------------+ + | shr_Xsig.S | + | | + | 12 byte right shift function | + | | + | Copyright (C) 1992,1994,1995 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@jacobi.maths.monash.edu.au | + | | + | Call from C as: | + | void shr_Xsig(Xsig *arg, unsigned nr) | + | | + | Extended shift right function. | + | Fastest for small shifts. | + | Shifts the 12 byte quantity pointed to by the first arg (arg) | + | right by the number of bits specified by the second arg (nr). | + | | + +---------------------------------------------------------------------------*/ + +#include "fpu_emu.h" + +.text +ENTRY(shr_Xsig) + push %ebp + movl %esp,%ebp + pushl %esi + movl PARAM2,%ecx + movl PARAM1,%esi + cmpl $32,%ecx /* shrd only works for 0..31 bits */ + jnc L_more_than_31 + +/* less than 32 bits */ + pushl %ebx + movl (%esi),%eax /* lsl */ + movl 4(%esi),%ebx /* midl */ + movl 8(%esi),%edx /* msl */ + shrd %cl,%ebx,%eax + shrd %cl,%edx,%ebx + shr %cl,%edx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %edx,8(%esi) + popl %ebx + popl %esi + leave + ret + +L_more_than_31: + cmpl $64,%ecx + jnc L_more_than_63 + + subb $32,%cl + movl 4(%esi),%eax /* midl */ + movl 8(%esi),%edx /* msl */ + shrd %cl,%edx,%eax + shr %cl,%edx + movl %eax,(%esi) + movl %edx,4(%esi) + movl $0,8(%esi) + popl %esi + leave + ret + +L_more_than_63: + cmpl $96,%ecx + jnc L_more_than_95 + + subb $64,%cl + movl 8(%esi),%eax /* msl */ + shr %cl,%eax + xorl %edx,%edx + movl %eax,(%esi) + movl %edx,4(%esi) + movl %edx,8(%esi) + popl %esi + leave + ret + +L_more_than_95: + xorl %eax,%eax + movl %eax,(%esi) + movl %eax,4(%esi) + movl %eax,8(%esi) + popl %esi + leave + ret diff --git a/arch/x86/math-emu/status_w.h b/arch/x86/math-emu/status_w.h new file mode 100644 index 00000000000..59e73302aa6 --- /dev/null +++ b/arch/x86/math-emu/status_w.h @@ -0,0 +1,67 @@ +/*---------------------------------------------------------------------------+ + | status_w.h | + | | + | Copyright (C) 1992,1993 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@vaxc.cc.monash.edu.au | + | | + +---------------------------------------------------------------------------*/ + +#ifndef _STATUS_H_ +#define _STATUS_H_ + +#include "fpu_emu.h" /* for definition of PECULIAR_486 */ + +#ifdef __ASSEMBLY__ +#define Const__(x) $##x +#else +#define Const__(x) x +#endif + +#define SW_Backward Const__(0x8000) /* backward compatibility */ +#define SW_C3 Const__(0x4000) /* condition bit 3 */ +#define SW_Top Const__(0x3800) /* top of stack */ +#define SW_Top_Shift Const__(11) /* shift for top of stack bits */ +#define SW_C2 Const__(0x0400) /* condition bit 2 */ +#define SW_C1 Const__(0x0200) /* condition bit 1 */ +#define SW_C0 Const__(0x0100) /* condition bit 0 */ +#define SW_Summary Const__(0x0080) /* exception summary */ +#define SW_Stack_Fault Const__(0x0040) /* stack fault */ +#define SW_Precision Const__(0x0020) /* loss of precision */ +#define SW_Underflow Const__(0x0010) /* underflow */ +#define SW_Overflow Const__(0x0008) /* overflow */ +#define SW_Zero_Div Const__(0x0004) /* divide by zero */ +#define SW_Denorm_Op Const__(0x0002) /* denormalized operand */ +#define SW_Invalid Const__(0x0001) /* invalid operation */ + +#define SW_Exc_Mask Const__(0x27f) /* Status word exception bit mask */ + +#ifndef __ASSEMBLY__ + +#define COMP_A_gt_B 1 +#define COMP_A_eq_B 2 +#define COMP_A_lt_B 3 +#define COMP_No_Comp 4 +#define COMP_Denormal 0x20 +#define COMP_NaN 0x40 +#define COMP_SNaN 0x80 + +#define status_word() \ + ((partial_status & ~SW_Top & 0xffff) | ((top << SW_Top_Shift) & SW_Top)) +static inline void setcc(int cc) +{ + partial_status &= ~(SW_C0|SW_C1|SW_C2|SW_C3); + partial_status |= (cc) & (SW_C0|SW_C1|SW_C2|SW_C3); +} + +#ifdef PECULIAR_486 + /* Default, this conveys no information, but an 80486 does it. */ + /* Clear the SW_C1 bit, "other bits undefined". */ +# define clear_C1() { partial_status &= ~SW_C1; } +# else +# define clear_C1() +#endif /* PECULIAR_486 */ + +#endif /* __ASSEMBLY__ */ + +#endif /* _STATUS_H_ */ diff --git a/arch/x86/math-emu/version.h b/arch/x86/math-emu/version.h new file mode 100644 index 00000000000..a0d73a1d2b6 --- /dev/null +++ b/arch/x86/math-emu/version.h @@ -0,0 +1,12 @@ +/*---------------------------------------------------------------------------+ + | version.h | + | | + | | + | Copyright (C) 1992,1993,1994,1996,1997,1999 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | + | E-mail billm@melbpc.org.au | + | | + | | + +---------------------------------------------------------------------------*/ + +#define FPU_VERSION "wm-FPU-emu version 2.01" diff --git a/arch/x86/math-emu/wm_shrx.S b/arch/x86/math-emu/wm_shrx.S new file mode 100644 index 00000000000..51842831798 --- /dev/null +++ b/arch/x86/math-emu/wm_shrx.S @@ -0,0 +1,204 @@ + .file "wm_shrx.S" +/*---------------------------------------------------------------------------+ + | wm_shrx.S | + | | + | 64 bit right shift functions | + | | + | Copyright (C) 1992,1995 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@jacobi.maths.monash.edu.au | + | | + | Call from C as: | + | unsigned FPU_shrx(void *arg1, unsigned arg2) | + | and | + | unsigned FPU_shrxs(void *arg1, unsigned arg2) | + | | + +---------------------------------------------------------------------------*/ + +#include "fpu_emu.h" + +.text +/*---------------------------------------------------------------------------+ + | unsigned FPU_shrx(void *arg1, unsigned arg2) | + | | + | Extended shift right function. | + | Fastest for small shifts. | + | Shifts the 64 bit quantity pointed to by the first arg (arg1) | + | right by the number of bits specified by the second arg (arg2). | + | Forms a 96 bit quantity from the 64 bit arg and eax: | + | [ 64 bit arg ][ eax ] | + | shift right ---------> | + | The eax register is initialized to 0 before the shifting. | + | Results returned in the 64 bit arg and eax. | + +---------------------------------------------------------------------------*/ + +ENTRY(FPU_shrx) + push %ebp + movl %esp,%ebp + pushl %esi + movl PARAM2,%ecx + movl PARAM1,%esi + cmpl $32,%ecx /* shrd only works for 0..31 bits */ + jnc L_more_than_31 + +/* less than 32 bits */ + pushl %ebx + movl (%esi),%ebx /* lsl */ + movl 4(%esi),%edx /* msl */ + xorl %eax,%eax /* extension */ + shrd %cl,%ebx,%eax + shrd %cl,%edx,%ebx + shr %cl,%edx + movl %ebx,(%esi) + movl %edx,4(%esi) + popl %ebx + popl %esi + leave + ret + +L_more_than_31: + cmpl $64,%ecx + jnc L_more_than_63 + + subb $32,%cl + movl (%esi),%eax /* lsl */ + movl 4(%esi),%edx /* msl */ + shrd %cl,%edx,%eax + shr %cl,%edx + movl %edx,(%esi) + movl $0,4(%esi) + popl %esi + leave + ret + +L_more_than_63: + cmpl $96,%ecx + jnc L_more_than_95 + + subb $64,%cl + movl 4(%esi),%eax /* msl */ + shr %cl,%eax + xorl %edx,%edx + movl %edx,(%esi) + movl %edx,4(%esi) + popl %esi + leave + ret + +L_more_than_95: + xorl %eax,%eax + movl %eax,(%esi) + movl %eax,4(%esi) + popl %esi + leave + ret + + +/*---------------------------------------------------------------------------+ + | unsigned FPU_shrxs(void *arg1, unsigned arg2) | + | | + | Extended shift right function (optimized for small floating point | + | integers). | + | Shifts the 64 bit quantity pointed to by the first arg (arg1) | + | right by the number of bits specified by the second arg (arg2). | + | Forms a 96 bit quantity from the 64 bit arg and eax: | + | [ 64 bit arg ][ eax ] | + | shift right ---------> | + | The eax register is initialized to 0 before the shifting. | + | The lower 8 bits of eax are lost and replaced by a flag which is | + | set (to 0x01) if any bit, apart from the first one, is set in the | + | part which has been shifted out of the arg. | + | Results returned in the 64 bit arg and eax. | + +---------------------------------------------------------------------------*/ +ENTRY(FPU_shrxs) + push %ebp + movl %esp,%ebp + pushl %esi + pushl %ebx + movl PARAM2,%ecx + movl PARAM1,%esi + cmpl $64,%ecx /* shrd only works for 0..31 bits */ + jnc Ls_more_than_63 + + cmpl $32,%ecx /* shrd only works for 0..31 bits */ + jc Ls_less_than_32 + +/* We got here without jumps by assuming that the most common requirement + is for small integers */ +/* Shift by [32..63] bits */ + subb $32,%cl + movl (%esi),%eax /* lsl */ + movl 4(%esi),%edx /* msl */ + xorl %ebx,%ebx + shrd %cl,%eax,%ebx + shrd %cl,%edx,%eax + shr %cl,%edx + orl %ebx,%ebx /* test these 32 bits */ + setne %bl + test $0x7fffffff,%eax /* and 31 bits here */ + setne %bh + orw %bx,%bx /* Any of the 63 bit set ? */ + setne %al + movl %edx,(%esi) + movl $0,4(%esi) + popl %ebx + popl %esi + leave + ret + +/* Shift by [0..31] bits */ +Ls_less_than_32: + movl (%esi),%ebx /* lsl */ + movl 4(%esi),%edx /* msl */ + xorl %eax,%eax /* extension */ + shrd %cl,%ebx,%eax + shrd %cl,%edx,%ebx + shr %cl,%edx + test $0x7fffffff,%eax /* only need to look at eax here */ + setne %al + movl %ebx,(%esi) + movl %edx,4(%esi) + popl %ebx + popl %esi + leave + ret + +/* Shift by [64..95] bits */ +Ls_more_than_63: + cmpl $96,%ecx + jnc Ls_more_than_95 + + subb $64,%cl + movl (%esi),%ebx /* lsl */ + movl 4(%esi),%eax /* msl */ + xorl %edx,%edx /* extension */ + shrd %cl,%ebx,%edx + shrd %cl,%eax,%ebx + shr %cl,%eax + orl %ebx,%edx + setne %bl + test $0x7fffffff,%eax /* only need to look at eax here */ + setne %bh + orw %bx,%bx + setne %al + xorl %edx,%edx + movl %edx,(%esi) /* set to zero */ + movl %edx,4(%esi) /* set to zero */ + popl %ebx + popl %esi + leave + ret + +Ls_more_than_95: +/* Shift by [96..inf) bits */ + xorl %eax,%eax + movl (%esi),%ebx + orl 4(%esi),%ebx + setne %al + xorl %ebx,%ebx + movl %ebx,(%esi) + movl %ebx,4(%esi) + popl %ebx + popl %esi + leave + ret diff --git a/arch/x86/math-emu/wm_sqrt.S b/arch/x86/math-emu/wm_sqrt.S new file mode 100644 index 00000000000..d258f59564e --- /dev/null +++ b/arch/x86/math-emu/wm_sqrt.S @@ -0,0 +1,470 @@ + .file "wm_sqrt.S" +/*---------------------------------------------------------------------------+ + | wm_sqrt.S | + | | + | Fixed point arithmetic square root evaluation. | + | | + | Copyright (C) 1992,1993,1995,1997 | + | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | + | Australia. E-mail billm@suburbia.net | + | | + | Call from C as: | + | int wm_sqrt(FPU_REG *n, unsigned int control_word) | + | | + +---------------------------------------------------------------------------*/ + +/*---------------------------------------------------------------------------+ + | wm_sqrt(FPU_REG *n, unsigned int control_word) | + | returns the square root of n in n. | + | | + | Use Newton's method to compute the square root of a number, which must | + | be in the range [1.0 .. 4.0), to 64 bits accuracy. | + | Does not check the sign or tag of the argument. | + | Sets the exponent, but not the sign or tag of the result. | + | | + | The guess is kept in %esi:%edi | + +---------------------------------------------------------------------------*/ + +#include "exception.h" +#include "fpu_emu.h" + + +#ifndef NON_REENTRANT_FPU +/* Local storage on the stack: */ +#define FPU_accum_3 -4(%ebp) /* ms word */ +#define FPU_accum_2 -8(%ebp) +#define FPU_accum_1 -12(%ebp) +#define FPU_accum_0 -16(%ebp) + +/* + * The de-normalised argument: + * sq_2 sq_1 sq_0 + * b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0 + * ^ binary point here + */ +#define FPU_fsqrt_arg_2 -20(%ebp) /* ms word */ +#define FPU_fsqrt_arg_1 -24(%ebp) +#define FPU_fsqrt_arg_0 -28(%ebp) /* ls word, at most the ms bit is set */ + +#else +/* Local storage in a static area: */ +.data + .align 4,0 +FPU_accum_3: + .long 0 /* ms word */ +FPU_accum_2: + .long 0 +FPU_accum_1: + .long 0 +FPU_accum_0: + .long 0 + +/* The de-normalised argument: + sq_2 sq_1 sq_0 + b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0 + ^ binary point here + */ +FPU_fsqrt_arg_2: + .long 0 /* ms word */ +FPU_fsqrt_arg_1: + .long 0 +FPU_fsqrt_arg_0: + .long 0 /* ls word, at most the ms bit is set */ +#endif /* NON_REENTRANT_FPU */ + + +.text +ENTRY(wm_sqrt) + pushl %ebp + movl %esp,%ebp +#ifndef NON_REENTRANT_FPU + subl $28,%esp +#endif /* NON_REENTRANT_FPU */ + pushl %esi + pushl %edi + pushl %ebx + + movl PARAM1,%esi + + movl SIGH(%esi),%eax + movl SIGL(%esi),%ecx + xorl %edx,%edx + +/* We use a rough linear estimate for the first guess.. */ + + cmpw EXP_BIAS,EXP(%esi) + jnz sqrt_arg_ge_2 + + shrl $1,%eax /* arg is in the range [1.0 .. 2.0) */ + rcrl $1,%ecx + rcrl $1,%edx + +sqrt_arg_ge_2: +/* From here on, n is never accessed directly again until it is + replaced by the answer. */ + + movl %eax,FPU_fsqrt_arg_2 /* ms word of n */ + movl %ecx,FPU_fsqrt_arg_1 + movl %edx,FPU_fsqrt_arg_0 + +/* Make a linear first estimate */ + shrl $1,%eax + addl $0x40000000,%eax + movl $0xaaaaaaaa,%ecx + mull %ecx + shll %edx /* max result was 7fff... */ + testl $0x80000000,%edx /* but min was 3fff... */ + jnz sqrt_prelim_no_adjust + + movl $0x80000000,%edx /* round up */ + +sqrt_prelim_no_adjust: + movl %edx,%esi /* Our first guess */ + +/* We have now computed (approx) (2 + x) / 3, which forms the basis + for a few iterations of Newton's method */ + + movl FPU_fsqrt_arg_2,%ecx /* ms word */ + +/* + * From our initial estimate, three iterations are enough to get us + * to 30 bits or so. This will then allow two iterations at better + * precision to complete the process. + */ + +/* Compute (g + n/g)/2 at each iteration (g is the guess). */ + shrl %ecx /* Doing this first will prevent a divide */ + /* overflow later. */ + + movl %ecx,%edx /* msw of the arg / 2 */ + divl %esi /* current estimate */ + shrl %esi /* divide by 2 */ + addl %eax,%esi /* the new estimate */ + + movl %ecx,%edx + divl %esi + shrl %esi + addl %eax,%esi + + movl %ecx,%edx + divl %esi + shrl %esi + addl %eax,%esi + +/* + * Now that an estimate accurate to about 30 bits has been obtained (in %esi), + * we improve it to 60 bits or so. + * + * The strategy from now on is to compute new estimates from + * guess := guess + (n - guess^2) / (2 * guess) + */ + +/* First, find the square of the guess */ + movl %esi,%eax + mull %esi +/* guess^2 now in %edx:%eax */ + + movl FPU_fsqrt_arg_1,%ecx + subl %ecx,%eax + movl FPU_fsqrt_arg_2,%ecx /* ms word of normalized n */ + sbbl %ecx,%edx + jnc sqrt_stage_2_positive + +/* Subtraction gives a negative result, + negate the result before division. */ + notl %edx + notl %eax + addl $1,%eax + adcl $0,%edx + + divl %esi + movl %eax,%ecx + + movl %edx,%eax + divl %esi + jmp sqrt_stage_2_finish + +sqrt_stage_2_positive: + divl %esi + movl %eax,%ecx + + movl %edx,%eax + divl %esi + + notl %ecx + notl %eax + addl $1,%eax + adcl $0,%ecx + +sqrt_stage_2_finish: + sarl $1,%ecx /* divide by 2 */ + rcrl $1,%eax + + /* Form the new estimate in %esi:%edi */ + movl %eax,%edi + addl %ecx,%esi + + jnz sqrt_stage_2_done /* result should be [1..2) */ + +#ifdef PARANOID +/* It should be possible to get here only if the arg is ffff....ffff */ + cmp $0xffffffff,FPU_fsqrt_arg_1 + jnz sqrt_stage_2_error +#endif /* PARANOID */ + +/* The best rounded result. */ + xorl %eax,%eax + decl %eax + movl %eax,%edi + movl %eax,%esi + movl $0x7fffffff,%eax + jmp sqrt_round_result + +#ifdef PARANOID +sqrt_stage_2_error: + pushl EX_INTERNAL|0x213 + call EXCEPTION +#endif /* PARANOID */ + +sqrt_stage_2_done: + +/* Now the square root has been computed to better than 60 bits. */ + +/* Find the square of the guess. */ + movl %edi,%eax /* ls word of guess */ + mull %edi + movl %edx,FPU_accum_1 + + movl %esi,%eax + mull %esi + movl %edx,FPU_accum_3 + movl %eax,FPU_accum_2 + + movl %edi,%eax + mull %esi + addl %eax,FPU_accum_1 + adcl %edx,FPU_accum_2 + adcl $0,FPU_accum_3 + +/* movl %esi,%eax */ +/* mull %edi */ + addl %eax,FPU_accum_1 + adcl %edx,FPU_accum_2 + adcl $0,FPU_accum_3 + +/* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */ + + movl FPU_fsqrt_arg_0,%eax /* get normalized n */ + subl %eax,FPU_accum_1 + movl FPU_fsqrt_arg_1,%eax + sbbl %eax,FPU_accum_2 + movl FPU_fsqrt_arg_2,%eax /* ms word of normalized n */ + sbbl %eax,FPU_accum_3 + jnc sqrt_stage_3_positive + +/* Subtraction gives a negative result, + negate the result before division */ + notl FPU_accum_1 + notl FPU_accum_2 + notl FPU_accum_3 + addl $1,FPU_accum_1 + adcl $0,FPU_accum_2 + +#ifdef PARANOID + adcl $0,FPU_accum_3 /* This must be zero */ + jz sqrt_stage_3_no_error + +sqrt_stage_3_error: + pushl EX_INTERNAL|0x207 + call EXCEPTION + +sqrt_stage_3_no_error: +#endif /* PARANOID */ + + movl FPU_accum_2,%edx + movl FPU_accum_1,%eax + divl %esi + movl %eax,%ecx + + movl %edx,%eax + divl %esi + + sarl $1,%ecx /* divide by 2 */ + rcrl $1,%eax + + /* prepare to round the result */ + + addl %ecx,%edi + adcl $0,%esi + + jmp sqrt_stage_3_finished + +sqrt_stage_3_positive: + movl FPU_accum_2,%edx + movl FPU_accum_1,%eax + divl %esi + movl %eax,%ecx + + movl %edx,%eax + divl %esi + + sarl $1,%ecx /* divide by 2 */ + rcrl $1,%eax + + /* prepare to round the result */ + + notl %eax /* Negate the correction term */ + notl %ecx + addl $1,%eax + adcl $0,%ecx /* carry here ==> correction == 0 */ + adcl $0xffffffff,%esi + + addl %ecx,%edi + adcl $0,%esi + +sqrt_stage_3_finished: + +/* + * The result in %esi:%edi:%esi should be good to about 90 bits here, + * and the rounding information here does not have sufficient accuracy + * in a few rare cases. + */ + cmpl $0xffffffe0,%eax + ja sqrt_near_exact_x + + cmpl $0x00000020,%eax + jb sqrt_near_exact + + cmpl $0x7fffffe0,%eax + jb sqrt_round_result + + cmpl $0x80000020,%eax + jb sqrt_get_more_precision + +sqrt_round_result: +/* Set up for rounding operations */ + movl %eax,%edx + movl %esi,%eax + movl %edi,%ebx + movl PARAM1,%edi + movw EXP_BIAS,EXP(%edi) /* Result is in [1.0 .. 2.0) */ + jmp fpu_reg_round + + +sqrt_near_exact_x: +/* First, the estimate must be rounded up. */ + addl $1,%edi + adcl $0,%esi + +sqrt_near_exact: +/* + * This is an easy case because x^1/2 is monotonic. + * We need just find the square of our estimate, compare it + * with the argument, and deduce whether our estimate is + * above, below, or exact. We use the fact that the estimate + * is known to be accurate to about 90 bits. + */ + movl %edi,%eax /* ls word of guess */ + mull %edi + movl %edx,%ebx /* 2nd ls word of square */ + movl %eax,%ecx /* ls word of square */ + + movl %edi,%eax + mull %esi + addl %eax,%ebx + addl %eax,%ebx + +#ifdef PARANOID + cmp $0xffffffb0,%ebx + jb sqrt_near_exact_ok + + cmp $0x00000050,%ebx + ja sqrt_near_exact_ok + + pushl EX_INTERNAL|0x214 + call EXCEPTION + +sqrt_near_exact_ok: +#endif /* PARANOID */ + + or %ebx,%ebx + js sqrt_near_exact_small + + jnz sqrt_near_exact_large + + or %ebx,%edx + jnz sqrt_near_exact_large + +/* Our estimate is exactly the right answer */ + xorl %eax,%eax + jmp sqrt_round_result + +sqrt_near_exact_small: +/* Our estimate is too small */ + movl $0x000000ff,%eax + jmp sqrt_round_result + +sqrt_near_exact_large: +/* Our estimate is too large, we need to decrement it */ + subl $1,%edi + sbbl $0,%esi + movl $0xffffff00,%eax + jmp sqrt_round_result + + +sqrt_get_more_precision: +/* This case is almost the same as the above, except we start + with an extra bit of precision in the estimate. */ + stc /* The extra bit. */ + rcll $1,%edi /* Shift the estimate left one bit */ + rcll $1,%esi + + movl %edi,%eax /* ls word of guess */ + mull %edi + movl %edx,%ebx /* 2nd ls word of square */ + movl %eax,%ecx /* ls word of square */ + + movl %edi,%eax + mull %esi + addl %eax,%ebx + addl %eax,%ebx + +/* Put our estimate back to its original value */ + stc /* The ms bit. */ + rcrl $1,%esi /* Shift the estimate left one bit */ + rcrl $1,%edi + +#ifdef PARANOID + cmp $0xffffff60,%ebx + jb sqrt_more_prec_ok + + cmp $0x000000a0,%ebx + ja sqrt_more_prec_ok + + pushl EX_INTERNAL|0x215 + call EXCEPTION + +sqrt_more_prec_ok: +#endif /* PARANOID */ + + or %ebx,%ebx + js sqrt_more_prec_small + + jnz sqrt_more_prec_large + + or %ebx,%ecx + jnz sqrt_more_prec_large + +/* Our estimate is exactly the right answer */ + movl $0x80000000,%eax + jmp sqrt_round_result + +sqrt_more_prec_small: +/* Our estimate is too small */ + movl $0x800000ff,%eax + jmp sqrt_round_result + +sqrt_more_prec_large: +/* Our estimate is too large */ + movl $0x7fffff00,%eax + jmp sqrt_round_result -- cgit v1.2.3-70-g09d2 From 44f0257fc316ff4b33aa3438dd8d891b7d6d72b9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 11 Oct 2007 11:16:33 +0200 Subject: i386: move lib Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/i386/Makefile | 2 +- arch/i386/lib/Makefile | 5 - arch/i386/lib/Makefile_32 | 11 - arch/i386/lib/bitops_32.c | 70 ---- arch/i386/lib/checksum_32.S | 546 --------------------------- arch/i386/lib/delay_32.c | 103 ----- arch/i386/lib/getuser_32.S | 78 ---- arch/i386/lib/memcpy_32.c | 43 --- arch/i386/lib/mmx_32.c | 403 -------------------- arch/i386/lib/msr-on-cpu.c | 119 ------ arch/i386/lib/putuser_32.S | 98 ----- arch/i386/lib/semaphore_32.S | 219 ----------- arch/i386/lib/string_32.c | 257 ------------- arch/i386/lib/strstr_32.c | 31 -- arch/i386/lib/usercopy_32.c | 882 ------------------------------------------- arch/x86/lib/Makefile | 5 + arch/x86/lib/Makefile_32 | 11 + arch/x86/lib/bitops_32.c | 70 ++++ arch/x86/lib/checksum_32.S | 546 +++++++++++++++++++++++++++ arch/x86/lib/delay_32.c | 103 +++++ arch/x86/lib/getuser_32.S | 78 ++++ arch/x86/lib/memcpy_32.c | 43 +++ arch/x86/lib/mmx_32.c | 403 ++++++++++++++++++++ arch/x86/lib/msr-on-cpu.c | 119 ++++++ arch/x86/lib/putuser_32.S | 98 +++++ arch/x86/lib/semaphore_32.S | 219 +++++++++++ arch/x86/lib/string_32.c | 257 +++++++++++++ arch/x86/lib/strstr_32.c | 31 ++ arch/x86/lib/usercopy_32.c | 882 +++++++++++++++++++++++++++++++++++++++++++ arch/x86_64/lib/Makefile | 2 +- arch/x86_64/lib/msr-on-cpu.c | 2 +- 31 files changed, 2868 insertions(+), 2868 deletions(-) delete mode 100644 arch/i386/lib/Makefile delete mode 100644 arch/i386/lib/Makefile_32 delete mode 100644 arch/i386/lib/bitops_32.c delete mode 100644 arch/i386/lib/checksum_32.S delete mode 100644 arch/i386/lib/delay_32.c delete mode 100644 arch/i386/lib/getuser_32.S delete mode 100644 arch/i386/lib/memcpy_32.c delete mode 100644 arch/i386/lib/mmx_32.c delete mode 100644 arch/i386/lib/msr-on-cpu.c delete mode 100644 arch/i386/lib/putuser_32.S delete mode 100644 arch/i386/lib/semaphore_32.S delete mode 100644 arch/i386/lib/string_32.c delete mode 100644 arch/i386/lib/strstr_32.c delete mode 100644 arch/i386/lib/usercopy_32.c create mode 100644 arch/x86/lib/Makefile create mode 100644 arch/x86/lib/Makefile_32 create mode 100644 arch/x86/lib/bitops_32.c create mode 100644 arch/x86/lib/checksum_32.S create mode 100644 arch/x86/lib/delay_32.c create mode 100644 arch/x86/lib/getuser_32.S create mode 100644 arch/x86/lib/memcpy_32.c create mode 100644 arch/x86/lib/mmx_32.c create mode 100644 arch/x86/lib/msr-on-cpu.c create mode 100644 arch/x86/lib/putuser_32.S create mode 100644 arch/x86/lib/semaphore_32.S create mode 100644 arch/x86/lib/string_32.c create mode 100644 arch/x86/lib/strstr_32.c create mode 100644 arch/x86/lib/usercopy_32.c (limited to 'arch/x86') diff --git a/arch/i386/Makefile b/arch/i386/Makefile index fe374b65430..93492b37061 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -101,7 +101,7 @@ mflags-y += -Iinclude/asm-i386/mach-default head-y := arch/i386/kernel/head_32.o arch/i386/kernel/init_task_32.o -libs-y += arch/i386/lib/ +libs-y += arch/x86/lib/ core-y += arch/i386/kernel/ \ arch/i386/mm/ \ $(mcore-y)/ \ diff --git a/arch/i386/lib/Makefile b/arch/i386/lib/Makefile deleted file mode 100644 index 2b1547e0ac6..00000000000 --- a/arch/i386/lib/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -ifeq ($(CONFIG_X86_32),y) -include ${srctree}/arch/i386/lib/Makefile_32 -else -include ${srctree}/arch/x86_64/lib/Makefile_64 -endif diff --git a/arch/i386/lib/Makefile_32 b/arch/i386/lib/Makefile_32 deleted file mode 100644 index 98d1f1e2e2e..00000000000 --- a/arch/i386/lib/Makefile_32 +++ /dev/null @@ -1,11 +0,0 @@ -# -# Makefile for i386-specific library files.. -# - - -lib-y = checksum_32.o delay_32.o usercopy_32.o getuser_32.o putuser_32.o memcpy_32.o strstr_32.o \ - bitops_32.o semaphore_32.o string_32.o - -lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o - -obj-$(CONFIG_SMP) += msr-on-cpu.o diff --git a/arch/i386/lib/bitops_32.c b/arch/i386/lib/bitops_32.c deleted file mode 100644 index afd0045595d..00000000000 --- a/arch/i386/lib/bitops_32.c +++ /dev/null @@ -1,70 +0,0 @@ -#include -#include - -/** - * find_next_bit - find the first set bit in a memory region - * @addr: The address to base the search on - * @offset: The bitnumber to start searching at - * @size: The maximum size to search - */ -int find_next_bit(const unsigned long *addr, int size, int offset) -{ - const unsigned long *p = addr + (offset >> 5); - int set = 0, bit = offset & 31, res; - - if (bit) { - /* - * Look for nonzero in the first 32 bits: - */ - __asm__("bsfl %1,%0\n\t" - "jne 1f\n\t" - "movl $32, %0\n" - "1:" - : "=r" (set) - : "r" (*p >> bit)); - if (set < (32 - bit)) - return set + offset; - set = 32 - bit; - p++; - } - /* - * No set bit yet, search remaining full words for a bit - */ - res = find_first_bit (p, size - 32 * (p - addr)); - return (offset + set + res); -} -EXPORT_SYMBOL(find_next_bit); - -/** - * find_next_zero_bit - find the first zero bit in a memory region - * @addr: The address to base the search on - * @offset: The bitnumber to start searching at - * @size: The maximum size to search - */ -int find_next_zero_bit(const unsigned long *addr, int size, int offset) -{ - const unsigned long *p = addr + (offset >> 5); - int set = 0, bit = offset & 31, res; - - if (bit) { - /* - * Look for zero in the first 32 bits. - */ - __asm__("bsfl %1,%0\n\t" - "jne 1f\n\t" - "movl $32, %0\n" - "1:" - : "=r" (set) - : "r" (~(*p >> bit))); - if (set < (32 - bit)) - return set + offset; - set = 32 - bit; - p++; - } - /* - * No zero yet, search remaining full bytes for a zero - */ - res = find_first_zero_bit(p, size - 32 * (p - addr)); - return (offset + set + res); -} -EXPORT_SYMBOL(find_next_zero_bit); diff --git a/arch/i386/lib/checksum_32.S b/arch/i386/lib/checksum_32.S deleted file mode 100644 index adbccd0bbb7..00000000000 --- a/arch/i386/lib/checksum_32.S +++ /dev/null @@ -1,546 +0,0 @@ -/* - * INET An implementation of the TCP/IP protocol suite for the LINUX - * operating system. INET is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * IP/TCP/UDP checksumming routines - * - * Authors: Jorge Cwik, - * Arnt Gulbrandsen, - * Tom May, - * Pentium Pro/II routines: - * Alexander Kjeldaas - * Finn Arne Gangstad - * Lots of code moved from tcp.c and ip.c; see those files - * for more names. - * - * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception - * handling. - * Andi Kleen, add zeroing on error - * converted to pure assembler - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include - -/* - * computes a partial checksum, e.g. for TCP/UDP fragments - */ - -/* -unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) - */ - -.text - -#ifndef CONFIG_X86_USE_PPRO_CHECKSUM - - /* - * Experiments with Ethernet and SLIP connections show that buff - * is aligned on either a 2-byte or 4-byte boundary. We get at - * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. - * Fortunately, it is easy to convert 2-byte alignment to 4-byte - * alignment for the unrolled loop. - */ -ENTRY(csum_partial) - CFI_STARTPROC - pushl %esi - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET esi, 0 - pushl %ebx - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET ebx, 0 - movl 20(%esp),%eax # Function arg: unsigned int sum - movl 16(%esp),%ecx # Function arg: int len - movl 12(%esp),%esi # Function arg: unsigned char *buff - testl $3, %esi # Check alignment. - jz 2f # Jump if alignment is ok. - testl $1, %esi # Check alignment. - jz 10f # Jump if alignment is boundary of 2bytes. - - # buf is odd - dec %ecx - jl 8f - movzbl (%esi), %ebx - adcl %ebx, %eax - roll $8, %eax - inc %esi - testl $2, %esi - jz 2f -10: - subl $2, %ecx # Alignment uses up two bytes. - jae 1f # Jump if we had at least two bytes. - addl $2, %ecx # ecx was < 2. Deal with it. - jmp 4f -1: movw (%esi), %bx - addl $2, %esi - addw %bx, %ax - adcl $0, %eax -2: - movl %ecx, %edx - shrl $5, %ecx - jz 2f - testl %esi, %esi -1: movl (%esi), %ebx - adcl %ebx, %eax - movl 4(%esi), %ebx - adcl %ebx, %eax - movl 8(%esi), %ebx - adcl %ebx, %eax - movl 12(%esi), %ebx - adcl %ebx, %eax - movl 16(%esi), %ebx - adcl %ebx, %eax - movl 20(%esi), %ebx - adcl %ebx, %eax - movl 24(%esi), %ebx - adcl %ebx, %eax - movl 28(%esi), %ebx - adcl %ebx, %eax - lea 32(%esi), %esi - dec %ecx - jne 1b - adcl $0, %eax -2: movl %edx, %ecx - andl $0x1c, %edx - je 4f - shrl $2, %edx # This clears CF -3: adcl (%esi), %eax - lea 4(%esi), %esi - dec %edx - jne 3b - adcl $0, %eax -4: andl $3, %ecx - jz 7f - cmpl $2, %ecx - jb 5f - movw (%esi),%cx - leal 2(%esi),%esi - je 6f - shll $16,%ecx -5: movb (%esi),%cl -6: addl %ecx,%eax - adcl $0, %eax -7: - testl $1, 12(%esp) - jz 8f - roll $8, %eax -8: - popl %ebx - CFI_ADJUST_CFA_OFFSET -4 - CFI_RESTORE ebx - popl %esi - CFI_ADJUST_CFA_OFFSET -4 - CFI_RESTORE esi - ret - CFI_ENDPROC -ENDPROC(csum_partial) - -#else - -/* Version for PentiumII/PPro */ - -ENTRY(csum_partial) - CFI_STARTPROC - pushl %esi - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET esi, 0 - pushl %ebx - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET ebx, 0 - movl 20(%esp),%eax # Function arg: unsigned int sum - movl 16(%esp),%ecx # Function arg: int len - movl 12(%esp),%esi # Function arg: const unsigned char *buf - - testl $3, %esi - jnz 25f -10: - movl %ecx, %edx - movl %ecx, %ebx - andl $0x7c, %ebx - shrl $7, %ecx - addl %ebx,%esi - shrl $2, %ebx - negl %ebx - lea 45f(%ebx,%ebx,2), %ebx - testl %esi, %esi - jmp *%ebx - - # Handle 2-byte-aligned regions -20: addw (%esi), %ax - lea 2(%esi), %esi - adcl $0, %eax - jmp 10b -25: - testl $1, %esi - jz 30f - # buf is odd - dec %ecx - jl 90f - movzbl (%esi), %ebx - addl %ebx, %eax - adcl $0, %eax - roll $8, %eax - inc %esi - testl $2, %esi - jz 10b - -30: subl $2, %ecx - ja 20b - je 32f - addl $2, %ecx - jz 80f - movzbl (%esi),%ebx # csumming 1 byte, 2-aligned - addl %ebx, %eax - adcl $0, %eax - jmp 80f -32: - addw (%esi), %ax # csumming 2 bytes, 2-aligned - adcl $0, %eax - jmp 80f - -40: - addl -128(%esi), %eax - adcl -124(%esi), %eax - adcl -120(%esi), %eax - adcl -116(%esi), %eax - adcl -112(%esi), %eax - adcl -108(%esi), %eax - adcl -104(%esi), %eax - adcl -100(%esi), %eax - adcl -96(%esi), %eax - adcl -92(%esi), %eax - adcl -88(%esi), %eax - adcl -84(%esi), %eax - adcl -80(%esi), %eax - adcl -76(%esi), %eax - adcl -72(%esi), %eax - adcl -68(%esi), %eax - adcl -64(%esi), %eax - adcl -60(%esi), %eax - adcl -56(%esi), %eax - adcl -52(%esi), %eax - adcl -48(%esi), %eax - adcl -44(%esi), %eax - adcl -40(%esi), %eax - adcl -36(%esi), %eax - adcl -32(%esi), %eax - adcl -28(%esi), %eax - adcl -24(%esi), %eax - adcl -20(%esi), %eax - adcl -16(%esi), %eax - adcl -12(%esi), %eax - adcl -8(%esi), %eax - adcl -4(%esi), %eax -45: - lea 128(%esi), %esi - adcl $0, %eax - dec %ecx - jge 40b - movl %edx, %ecx -50: andl $3, %ecx - jz 80f - - # Handle the last 1-3 bytes without jumping - notl %ecx # 1->2, 2->1, 3->0, higher bits are masked - movl $0xffffff,%ebx # by the shll and shrl instructions - shll $3,%ecx - shrl %cl,%ebx - andl -128(%esi),%ebx # esi is 4-aligned so should be ok - addl %ebx,%eax - adcl $0,%eax -80: - testl $1, 12(%esp) - jz 90f - roll $8, %eax -90: - popl %ebx - CFI_ADJUST_CFA_OFFSET -4 - CFI_RESTORE ebx - popl %esi - CFI_ADJUST_CFA_OFFSET -4 - CFI_RESTORE esi - ret - CFI_ENDPROC -ENDPROC(csum_partial) - -#endif - -/* -unsigned int csum_partial_copy_generic (const char *src, char *dst, - int len, int sum, int *src_err_ptr, int *dst_err_ptr) - */ - -/* - * Copy from ds while checksumming, otherwise like csum_partial - * - * The macros SRC and DST specify the type of access for the instruction. - * thus we can call a custom exception handler for all access types. - * - * FIXME: could someone double-check whether I haven't mixed up some SRC and - * DST definitions? It's damn hard to trigger all cases. I hope I got - * them all but there's no guarantee. - */ - -#define SRC(y...) \ - 9999: y; \ - .section __ex_table, "a"; \ - .long 9999b, 6001f ; \ - .previous - -#define DST(y...) \ - 9999: y; \ - .section __ex_table, "a"; \ - .long 9999b, 6002f ; \ - .previous - -#ifndef CONFIG_X86_USE_PPRO_CHECKSUM - -#define ARGBASE 16 -#define FP 12 - -ENTRY(csum_partial_copy_generic) - CFI_STARTPROC - subl $4,%esp - CFI_ADJUST_CFA_OFFSET 4 - pushl %edi - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET edi, 0 - pushl %esi - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET esi, 0 - pushl %ebx - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET ebx, 0 - movl ARGBASE+16(%esp),%eax # sum - movl ARGBASE+12(%esp),%ecx # len - movl ARGBASE+4(%esp),%esi # src - movl ARGBASE+8(%esp),%edi # dst - - testl $2, %edi # Check alignment. - jz 2f # Jump if alignment is ok. - subl $2, %ecx # Alignment uses up two bytes. - jae 1f # Jump if we had at least two bytes. - addl $2, %ecx # ecx was < 2. Deal with it. - jmp 4f -SRC(1: movw (%esi), %bx ) - addl $2, %esi -DST( movw %bx, (%edi) ) - addl $2, %edi - addw %bx, %ax - adcl $0, %eax -2: - movl %ecx, FP(%esp) - shrl $5, %ecx - jz 2f - testl %esi, %esi -SRC(1: movl (%esi), %ebx ) -SRC( movl 4(%esi), %edx ) - adcl %ebx, %eax -DST( movl %ebx, (%edi) ) - adcl %edx, %eax -DST( movl %edx, 4(%edi) ) - -SRC( movl 8(%esi), %ebx ) -SRC( movl 12(%esi), %edx ) - adcl %ebx, %eax -DST( movl %ebx, 8(%edi) ) - adcl %edx, %eax -DST( movl %edx, 12(%edi) ) - -SRC( movl 16(%esi), %ebx ) -SRC( movl 20(%esi), %edx ) - adcl %ebx, %eax -DST( movl %ebx, 16(%edi) ) - adcl %edx, %eax -DST( movl %edx, 20(%edi) ) - -SRC( movl 24(%esi), %ebx ) -SRC( movl 28(%esi), %edx ) - adcl %ebx, %eax -DST( movl %ebx, 24(%edi) ) - adcl %edx, %eax -DST( movl %edx, 28(%edi) ) - - lea 32(%esi), %esi - lea 32(%edi), %edi - dec %ecx - jne 1b - adcl $0, %eax -2: movl FP(%esp), %edx - movl %edx, %ecx - andl $0x1c, %edx - je 4f - shrl $2, %edx # This clears CF -SRC(3: movl (%esi), %ebx ) - adcl %ebx, %eax -DST( movl %ebx, (%edi) ) - lea 4(%esi), %esi - lea 4(%edi), %edi - dec %edx - jne 3b - adcl $0, %eax -4: andl $3, %ecx - jz 7f - cmpl $2, %ecx - jb 5f -SRC( movw (%esi), %cx ) - leal 2(%esi), %esi -DST( movw %cx, (%edi) ) - leal 2(%edi), %edi - je 6f - shll $16,%ecx -SRC(5: movb (%esi), %cl ) -DST( movb %cl, (%edi) ) -6: addl %ecx, %eax - adcl $0, %eax -7: -5000: - -# Exception handler: -.section .fixup, "ax" - -6001: - movl ARGBASE+20(%esp), %ebx # src_err_ptr - movl $-EFAULT, (%ebx) - - # zero the complete destination - computing the rest - # is too much work - movl ARGBASE+8(%esp), %edi # dst - movl ARGBASE+12(%esp), %ecx # len - xorl %eax,%eax - rep ; stosb - - jmp 5000b - -6002: - movl ARGBASE+24(%esp), %ebx # dst_err_ptr - movl $-EFAULT,(%ebx) - jmp 5000b - -.previous - - popl %ebx - CFI_ADJUST_CFA_OFFSET -4 - CFI_RESTORE ebx - popl %esi - CFI_ADJUST_CFA_OFFSET -4 - CFI_RESTORE esi - popl %edi - CFI_ADJUST_CFA_OFFSET -4 - CFI_RESTORE edi - popl %ecx # equivalent to addl $4,%esp - CFI_ADJUST_CFA_OFFSET -4 - ret - CFI_ENDPROC -ENDPROC(csum_partial_copy_generic) - -#else - -/* Version for PentiumII/PPro */ - -#define ROUND1(x) \ - SRC(movl x(%esi), %ebx ) ; \ - addl %ebx, %eax ; \ - DST(movl %ebx, x(%edi) ) ; - -#define ROUND(x) \ - SRC(movl x(%esi), %ebx ) ; \ - adcl %ebx, %eax ; \ - DST(movl %ebx, x(%edi) ) ; - -#define ARGBASE 12 - -ENTRY(csum_partial_copy_generic) - CFI_STARTPROC - pushl %ebx - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET ebx, 0 - pushl %edi - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET edi, 0 - pushl %esi - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET esi, 0 - movl ARGBASE+4(%esp),%esi #src - movl ARGBASE+8(%esp),%edi #dst - movl ARGBASE+12(%esp),%ecx #len - movl ARGBASE+16(%esp),%eax #sum -# movl %ecx, %edx - movl %ecx, %ebx - movl %esi, %edx - shrl $6, %ecx - andl $0x3c, %ebx - negl %ebx - subl %ebx, %esi - subl %ebx, %edi - lea -1(%esi),%edx - andl $-32,%edx - lea 3f(%ebx,%ebx), %ebx - testl %esi, %esi - jmp *%ebx -1: addl $64,%esi - addl $64,%edi - SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl) - ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52) - ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36) - ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20) - ROUND (-16) ROUND(-12) ROUND(-8) ROUND(-4) -3: adcl $0,%eax - addl $64, %edx - dec %ecx - jge 1b -4: movl ARGBASE+12(%esp),%edx #len - andl $3, %edx - jz 7f - cmpl $2, %edx - jb 5f -SRC( movw (%esi), %dx ) - leal 2(%esi), %esi -DST( movw %dx, (%edi) ) - leal 2(%edi), %edi - je 6f - shll $16,%edx -5: -SRC( movb (%esi), %dl ) -DST( movb %dl, (%edi) ) -6: addl %edx, %eax - adcl $0, %eax -7: -.section .fixup, "ax" -6001: movl ARGBASE+20(%esp), %ebx # src_err_ptr - movl $-EFAULT, (%ebx) - # zero the complete destination (computing the rest is too much work) - movl ARGBASE+8(%esp),%edi # dst - movl ARGBASE+12(%esp),%ecx # len - xorl %eax,%eax - rep; stosb - jmp 7b -6002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr - movl $-EFAULT, (%ebx) - jmp 7b -.previous - - popl %esi - CFI_ADJUST_CFA_OFFSET -4 - CFI_RESTORE esi - popl %edi - CFI_ADJUST_CFA_OFFSET -4 - CFI_RESTORE edi - popl %ebx - CFI_ADJUST_CFA_OFFSET -4 - CFI_RESTORE ebx - ret - CFI_ENDPROC -ENDPROC(csum_partial_copy_generic) - -#undef ROUND -#undef ROUND1 - -#endif diff --git a/arch/i386/lib/delay_32.c b/arch/i386/lib/delay_32.c deleted file mode 100644 index f6edb11364d..00000000000 --- a/arch/i386/lib/delay_32.c +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Precise Delay Loops for i386 - * - * Copyright (C) 1993 Linus Torvalds - * Copyright (C) 1997 Martin Mares - * - * The __delay function must _NOT_ be inlined as its execution time - * depends wildly on alignment on many x86 processors. The additional - * jump magic is needed to get the timing stable on all the CPU's - * we have to worry about. - */ - -#include -#include -#include - -#include -#include -#include - -#ifdef CONFIG_SMP -# include -#endif - -/* simple loop based delay: */ -static void delay_loop(unsigned long loops) -{ - int d0; - - __asm__ __volatile__( - "\tjmp 1f\n" - ".align 16\n" - "1:\tjmp 2f\n" - ".align 16\n" - "2:\tdecl %0\n\tjns 2b" - :"=&a" (d0) - :"0" (loops)); -} - -/* TSC based delay: */ -static void delay_tsc(unsigned long loops) -{ - unsigned long bclock, now; - - rdtscl(bclock); - do { - rep_nop(); - rdtscl(now); - } while ((now-bclock) < loops); -} - -/* - * Since we calibrate only once at boot, this - * function should be set once at boot and not changed - */ -static void (*delay_fn)(unsigned long) = delay_loop; - -void use_tsc_delay(void) -{ - delay_fn = delay_tsc; -} - -int read_current_timer(unsigned long *timer_val) -{ - if (delay_fn == delay_tsc) { - rdtscl(*timer_val); - return 0; - } - return -1; -} - -void __delay(unsigned long loops) -{ - delay_fn(loops); -} - -inline void __const_udelay(unsigned long xloops) -{ - int d0; - - xloops *= 4; - __asm__("mull %0" - :"=d" (xloops), "=&a" (d0) - :"1" (xloops), "0" - (cpu_data[raw_smp_processor_id()].loops_per_jiffy * (HZ/4))); - - __delay(++xloops); -} - -void __udelay(unsigned long usecs) -{ - __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */ -} - -void __ndelay(unsigned long nsecs) -{ - __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ -} - -EXPORT_SYMBOL(__delay); -EXPORT_SYMBOL(__const_udelay); -EXPORT_SYMBOL(__udelay); -EXPORT_SYMBOL(__ndelay); diff --git a/arch/i386/lib/getuser_32.S b/arch/i386/lib/getuser_32.S deleted file mode 100644 index 6d84b53f12a..00000000000 --- a/arch/i386/lib/getuser_32.S +++ /dev/null @@ -1,78 +0,0 @@ -/* - * __get_user functions. - * - * (C) Copyright 1998 Linus Torvalds - * - * These functions have a non-standard call interface - * to make them more efficient, especially as they - * return an error value in addition to the "real" - * return value. - */ -#include -#include -#include - - -/* - * __get_user_X - * - * Inputs: %eax contains the address - * - * Outputs: %eax is error code (0 or -EFAULT) - * %edx contains zero-extended value - * - * These functions should not modify any other registers, - * as they get called from within inline assembly. - */ - -.text -ENTRY(__get_user_1) - CFI_STARTPROC - GET_THREAD_INFO(%edx) - cmpl TI_addr_limit(%edx),%eax - jae bad_get_user -1: movzbl (%eax),%edx - xorl %eax,%eax - ret - CFI_ENDPROC -ENDPROC(__get_user_1) - -ENTRY(__get_user_2) - CFI_STARTPROC - addl $1,%eax - jc bad_get_user - GET_THREAD_INFO(%edx) - cmpl TI_addr_limit(%edx),%eax - jae bad_get_user -2: movzwl -1(%eax),%edx - xorl %eax,%eax - ret - CFI_ENDPROC -ENDPROC(__get_user_2) - -ENTRY(__get_user_4) - CFI_STARTPROC - addl $3,%eax - jc bad_get_user - GET_THREAD_INFO(%edx) - cmpl TI_addr_limit(%edx),%eax - jae bad_get_user -3: movl -3(%eax),%edx - xorl %eax,%eax - ret - CFI_ENDPROC -ENDPROC(__get_user_4) - -bad_get_user: - CFI_STARTPROC - xorl %edx,%edx - movl $-14,%eax - ret - CFI_ENDPROC -END(bad_get_user) - -.section __ex_table,"a" - .long 1b,bad_get_user - .long 2b,bad_get_user - .long 3b,bad_get_user -.previous diff --git a/arch/i386/lib/memcpy_32.c b/arch/i386/lib/memcpy_32.c deleted file mode 100644 index 8ac51b82a63..00000000000 --- a/arch/i386/lib/memcpy_32.c +++ /dev/null @@ -1,43 +0,0 @@ -#include -#include - -#undef memcpy -#undef memset - -void *memcpy(void *to, const void *from, size_t n) -{ -#ifdef CONFIG_X86_USE_3DNOW - return __memcpy3d(to, from, n); -#else - return __memcpy(to, from, n); -#endif -} -EXPORT_SYMBOL(memcpy); - -void *memset(void *s, int c, size_t count) -{ - return __memset(s, c, count); -} -EXPORT_SYMBOL(memset); - -void *memmove(void *dest, const void *src, size_t n) -{ - int d0, d1, d2; - - if (dest < src) { - memcpy(dest,src,n); - } else { - __asm__ __volatile__( - "std\n\t" - "rep\n\t" - "movsb\n\t" - "cld" - : "=&c" (d0), "=&S" (d1), "=&D" (d2) - :"0" (n), - "1" (n-1+(const char *)src), - "2" (n-1+(char *)dest) - :"memory"); - } - return dest; -} -EXPORT_SYMBOL(memmove); diff --git a/arch/i386/lib/mmx_32.c b/arch/i386/lib/mmx_32.c deleted file mode 100644 index 28084d2e8dd..00000000000 --- a/arch/i386/lib/mmx_32.c +++ /dev/null @@ -1,403 +0,0 @@ -#include -#include -#include -#include -#include - -#include - - -/* - * MMX 3DNow! library helper functions - * - * To do: - * We can use MMX just for prefetch in IRQ's. This may be a win. - * (reported so on K6-III) - * We should use a better code neutral filler for the short jump - * leal ebx. [ebx] is apparently best for K6-2, but Cyrix ?? - * We also want to clobber the filler register so we don't get any - * register forwarding stalls on the filler. - * - * Add *user handling. Checksums are not a win with MMX on any CPU - * tested so far for any MMX solution figured. - * - * 22/09/2000 - Arjan van de Ven - * Improved for non-egineering-sample Athlons - * - */ - -void *_mmx_memcpy(void *to, const void *from, size_t len) -{ - void *p; - int i; - - if (unlikely(in_interrupt())) - return __memcpy(to, from, len); - - p = to; - i = len >> 6; /* len/64 */ - - kernel_fpu_begin(); - - __asm__ __volatile__ ( - "1: prefetch (%0)\n" /* This set is 28 bytes */ - " prefetch 64(%0)\n" - " prefetch 128(%0)\n" - " prefetch 192(%0)\n" - " prefetch 256(%0)\n" - "2: \n" - ".section .fixup, \"ax\"\n" - "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ - " jmp 2b\n" - ".previous\n" - ".section __ex_table,\"a\"\n" - " .align 4\n" - " .long 1b, 3b\n" - ".previous" - : : "r" (from) ); - - - for(; i>5; i--) - { - __asm__ __volatile__ ( - "1: prefetch 320(%0)\n" - "2: movq (%0), %%mm0\n" - " movq 8(%0), %%mm1\n" - " movq 16(%0), %%mm2\n" - " movq 24(%0), %%mm3\n" - " movq %%mm0, (%1)\n" - " movq %%mm1, 8(%1)\n" - " movq %%mm2, 16(%1)\n" - " movq %%mm3, 24(%1)\n" - " movq 32(%0), %%mm0\n" - " movq 40(%0), %%mm1\n" - " movq 48(%0), %%mm2\n" - " movq 56(%0), %%mm3\n" - " movq %%mm0, 32(%1)\n" - " movq %%mm1, 40(%1)\n" - " movq %%mm2, 48(%1)\n" - " movq %%mm3, 56(%1)\n" - ".section .fixup, \"ax\"\n" - "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ - " jmp 2b\n" - ".previous\n" - ".section __ex_table,\"a\"\n" - " .align 4\n" - " .long 1b, 3b\n" - ".previous" - : : "r" (from), "r" (to) : "memory"); - from+=64; - to+=64; - } - - for(; i>0; i--) - { - __asm__ __volatile__ ( - " movq (%0), %%mm0\n" - " movq 8(%0), %%mm1\n" - " movq 16(%0), %%mm2\n" - " movq 24(%0), %%mm3\n" - " movq %%mm0, (%1)\n" - " movq %%mm1, 8(%1)\n" - " movq %%mm2, 16(%1)\n" - " movq %%mm3, 24(%1)\n" - " movq 32(%0), %%mm0\n" - " movq 40(%0), %%mm1\n" - " movq 48(%0), %%mm2\n" - " movq 56(%0), %%mm3\n" - " movq %%mm0, 32(%1)\n" - " movq %%mm1, 40(%1)\n" - " movq %%mm2, 48(%1)\n" - " movq %%mm3, 56(%1)\n" - : : "r" (from), "r" (to) : "memory"); - from+=64; - to+=64; - } - /* - * Now do the tail of the block - */ - __memcpy(to, from, len&63); - kernel_fpu_end(); - return p; -} - -#ifdef CONFIG_MK7 - -/* - * The K7 has streaming cache bypass load/store. The Cyrix III, K6 and - * other MMX using processors do not. - */ - -static void fast_clear_page(void *page) -{ - int i; - - kernel_fpu_begin(); - - __asm__ __volatile__ ( - " pxor %%mm0, %%mm0\n" : : - ); - - for(i=0;i<4096/64;i++) - { - __asm__ __volatile__ ( - " movntq %%mm0, (%0)\n" - " movntq %%mm0, 8(%0)\n" - " movntq %%mm0, 16(%0)\n" - " movntq %%mm0, 24(%0)\n" - " movntq %%mm0, 32(%0)\n" - " movntq %%mm0, 40(%0)\n" - " movntq %%mm0, 48(%0)\n" - " movntq %%mm0, 56(%0)\n" - : : "r" (page) : "memory"); - page+=64; - } - /* since movntq is weakly-ordered, a "sfence" is needed to become - * ordered again. - */ - __asm__ __volatile__ ( - " sfence \n" : : - ); - kernel_fpu_end(); -} - -static void fast_copy_page(void *to, void *from) -{ - int i; - - kernel_fpu_begin(); - - /* maybe the prefetch stuff can go before the expensive fnsave... - * but that is for later. -AV - */ - __asm__ __volatile__ ( - "1: prefetch (%0)\n" - " prefetch 64(%0)\n" - " prefetch 128(%0)\n" - " prefetch 192(%0)\n" - " prefetch 256(%0)\n" - "2: \n" - ".section .fixup, \"ax\"\n" - "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ - " jmp 2b\n" - ".previous\n" - ".section __ex_table,\"a\"\n" - " .align 4\n" - " .long 1b, 3b\n" - ".previous" - : : "r" (from) ); - - for(i=0; i<(4096-320)/64; i++) - { - __asm__ __volatile__ ( - "1: prefetch 320(%0)\n" - "2: movq (%0), %%mm0\n" - " movntq %%mm0, (%1)\n" - " movq 8(%0), %%mm1\n" - " movntq %%mm1, 8(%1)\n" - " movq 16(%0), %%mm2\n" - " movntq %%mm2, 16(%1)\n" - " movq 24(%0), %%mm3\n" - " movntq %%mm3, 24(%1)\n" - " movq 32(%0), %%mm4\n" - " movntq %%mm4, 32(%1)\n" - " movq 40(%0), %%mm5\n" - " movntq %%mm5, 40(%1)\n" - " movq 48(%0), %%mm6\n" - " movntq %%mm6, 48(%1)\n" - " movq 56(%0), %%mm7\n" - " movntq %%mm7, 56(%1)\n" - ".section .fixup, \"ax\"\n" - "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ - " jmp 2b\n" - ".previous\n" - ".section __ex_table,\"a\"\n" - " .align 4\n" - " .long 1b, 3b\n" - ".previous" - : : "r" (from), "r" (to) : "memory"); - from+=64; - to+=64; - } - for(i=(4096-320)/64; i<4096/64; i++) - { - __asm__ __volatile__ ( - "2: movq (%0), %%mm0\n" - " movntq %%mm0, (%1)\n" - " movq 8(%0), %%mm1\n" - " movntq %%mm1, 8(%1)\n" - " movq 16(%0), %%mm2\n" - " movntq %%mm2, 16(%1)\n" - " movq 24(%0), %%mm3\n" - " movntq %%mm3, 24(%1)\n" - " movq 32(%0), %%mm4\n" - " movntq %%mm4, 32(%1)\n" - " movq 40(%0), %%mm5\n" - " movntq %%mm5, 40(%1)\n" - " movq 48(%0), %%mm6\n" - " movntq %%mm6, 48(%1)\n" - " movq 56(%0), %%mm7\n" - " movntq %%mm7, 56(%1)\n" - : : "r" (from), "r" (to) : "memory"); - from+=64; - to+=64; - } - /* since movntq is weakly-ordered, a "sfence" is needed to become - * ordered again. - */ - __asm__ __volatile__ ( - " sfence \n" : : - ); - kernel_fpu_end(); -} - -#else - -/* - * Generic MMX implementation without K7 specific streaming - */ - -static void fast_clear_page(void *page) -{ - int i; - - kernel_fpu_begin(); - - __asm__ __volatile__ ( - " pxor %%mm0, %%mm0\n" : : - ); - - for(i=0;i<4096/128;i++) - { - __asm__ __volatile__ ( - " movq %%mm0, (%0)\n" - " movq %%mm0, 8(%0)\n" - " movq %%mm0, 16(%0)\n" - " movq %%mm0, 24(%0)\n" - " movq %%mm0, 32(%0)\n" - " movq %%mm0, 40(%0)\n" - " movq %%mm0, 48(%0)\n" - " movq %%mm0, 56(%0)\n" - " movq %%mm0, 64(%0)\n" - " movq %%mm0, 72(%0)\n" - " movq %%mm0, 80(%0)\n" - " movq %%mm0, 88(%0)\n" - " movq %%mm0, 96(%0)\n" - " movq %%mm0, 104(%0)\n" - " movq %%mm0, 112(%0)\n" - " movq %%mm0, 120(%0)\n" - : : "r" (page) : "memory"); - page+=128; - } - - kernel_fpu_end(); -} - -static void fast_copy_page(void *to, void *from) -{ - int i; - - - kernel_fpu_begin(); - - __asm__ __volatile__ ( - "1: prefetch (%0)\n" - " prefetch 64(%0)\n" - " prefetch 128(%0)\n" - " prefetch 192(%0)\n" - " prefetch 256(%0)\n" - "2: \n" - ".section .fixup, \"ax\"\n" - "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ - " jmp 2b\n" - ".previous\n" - ".section __ex_table,\"a\"\n" - " .align 4\n" - " .long 1b, 3b\n" - ".previous" - : : "r" (from) ); - - for(i=0; i<4096/64; i++) - { - __asm__ __volatile__ ( - "1: prefetch 320(%0)\n" - "2: movq (%0), %%mm0\n" - " movq 8(%0), %%mm1\n" - " movq 16(%0), %%mm2\n" - " movq 24(%0), %%mm3\n" - " movq %%mm0, (%1)\n" - " movq %%mm1, 8(%1)\n" - " movq %%mm2, 16(%1)\n" - " movq %%mm3, 24(%1)\n" - " movq 32(%0), %%mm0\n" - " movq 40(%0), %%mm1\n" - " movq 48(%0), %%mm2\n" - " movq 56(%0), %%mm3\n" - " movq %%mm0, 32(%1)\n" - " movq %%mm1, 40(%1)\n" - " movq %%mm2, 48(%1)\n" - " movq %%mm3, 56(%1)\n" - ".section .fixup, \"ax\"\n" - "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ - " jmp 2b\n" - ".previous\n" - ".section __ex_table,\"a\"\n" - " .align 4\n" - " .long 1b, 3b\n" - ".previous" - : : "r" (from), "r" (to) : "memory"); - from+=64; - to+=64; - } - kernel_fpu_end(); -} - - -#endif - -/* - * Favour MMX for page clear and copy. - */ - -static void slow_zero_page(void * page) -{ - int d0, d1; - __asm__ __volatile__( \ - "cld\n\t" \ - "rep ; stosl" \ - : "=&c" (d0), "=&D" (d1) - :"a" (0),"1" (page),"0" (1024) - :"memory"); -} - -void mmx_clear_page(void * page) -{ - if(unlikely(in_interrupt())) - slow_zero_page(page); - else - fast_clear_page(page); -} - -static void slow_copy_page(void *to, void *from) -{ - int d0, d1, d2; - __asm__ __volatile__( \ - "cld\n\t" \ - "rep ; movsl" \ - : "=&c" (d0), "=&D" (d1), "=&S" (d2) \ - : "0" (1024),"1" ((long) to),"2" ((long) from) \ - : "memory"); -} - - -void mmx_copy_page(void *to, void *from) -{ - if(unlikely(in_interrupt())) - slow_copy_page(to, from); - else - fast_copy_page(to, from); -} - -EXPORT_SYMBOL(_mmx_memcpy); -EXPORT_SYMBOL(mmx_clear_page); -EXPORT_SYMBOL(mmx_copy_page); diff --git a/arch/i386/lib/msr-on-cpu.c b/arch/i386/lib/msr-on-cpu.c deleted file mode 100644 index 7767962f25d..00000000000 --- a/arch/i386/lib/msr-on-cpu.c +++ /dev/null @@ -1,119 +0,0 @@ -#include -#include -#include -#include - -struct msr_info { - u32 msr_no; - u32 l, h; - int err; -}; - -static void __rdmsr_on_cpu(void *info) -{ - struct msr_info *rv = info; - - rdmsr(rv->msr_no, rv->l, rv->h); -} - -static void __rdmsr_safe_on_cpu(void *info) -{ - struct msr_info *rv = info; - - rv->err = rdmsr_safe(rv->msr_no, &rv->l, &rv->h); -} - -static int _rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h, int safe) -{ - int err = 0; - preempt_disable(); - if (smp_processor_id() == cpu) - if (safe) - err = rdmsr_safe(msr_no, l, h); - else - rdmsr(msr_no, *l, *h); - else { - struct msr_info rv; - - rv.msr_no = msr_no; - if (safe) { - smp_call_function_single(cpu, __rdmsr_safe_on_cpu, - &rv, 0, 1); - err = rv.err; - } else { - smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 0, 1); - } - *l = rv.l; - *h = rv.h; - } - preempt_enable(); - return err; -} - -static void __wrmsr_on_cpu(void *info) -{ - struct msr_info *rv = info; - - wrmsr(rv->msr_no, rv->l, rv->h); -} - -static void __wrmsr_safe_on_cpu(void *info) -{ - struct msr_info *rv = info; - - rv->err = wrmsr_safe(rv->msr_no, rv->l, rv->h); -} - -static int _wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h, int safe) -{ - int err = 0; - preempt_disable(); - if (smp_processor_id() == cpu) - if (safe) - err = wrmsr_safe(msr_no, l, h); - else - wrmsr(msr_no, l, h); - else { - struct msr_info rv; - - rv.msr_no = msr_no; - rv.l = l; - rv.h = h; - if (safe) { - smp_call_function_single(cpu, __wrmsr_safe_on_cpu, - &rv, 0, 1); - err = rv.err; - } else { - smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 0, 1); - } - } - preempt_enable(); - return err; -} - -void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) -{ - _wrmsr_on_cpu(cpu, msr_no, l, h, 0); -} - -void rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) -{ - _rdmsr_on_cpu(cpu, msr_no, l, h, 0); -} - -/* These "safe" variants are slower and should be used when the target MSR - may not actually exist. */ -int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) -{ - return _wrmsr_on_cpu(cpu, msr_no, l, h, 1); -} - -int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) -{ - return _rdmsr_on_cpu(cpu, msr_no, l, h, 1); -} - -EXPORT_SYMBOL(rdmsr_on_cpu); -EXPORT_SYMBOL(wrmsr_on_cpu); -EXPORT_SYMBOL(rdmsr_safe_on_cpu); -EXPORT_SYMBOL(wrmsr_safe_on_cpu); diff --git a/arch/i386/lib/putuser_32.S b/arch/i386/lib/putuser_32.S deleted file mode 100644 index f58fba109d1..00000000000 --- a/arch/i386/lib/putuser_32.S +++ /dev/null @@ -1,98 +0,0 @@ -/* - * __put_user functions. - * - * (C) Copyright 2005 Linus Torvalds - * - * These functions have a non-standard call interface - * to make them more efficient, especially as they - * return an error value in addition to the "real" - * return value. - */ -#include -#include -#include - - -/* - * __put_user_X - * - * Inputs: %eax[:%edx] contains the data - * %ecx contains the address - * - * Outputs: %eax is error code (0 or -EFAULT) - * - * These functions should not modify any other registers, - * as they get called from within inline assembly. - */ - -#define ENTER CFI_STARTPROC ; \ - pushl %ebx ; \ - CFI_ADJUST_CFA_OFFSET 4 ; \ - CFI_REL_OFFSET ebx, 0 ; \ - GET_THREAD_INFO(%ebx) -#define EXIT popl %ebx ; \ - CFI_ADJUST_CFA_OFFSET -4 ; \ - CFI_RESTORE ebx ; \ - ret ; \ - CFI_ENDPROC - -.text -ENTRY(__put_user_1) - ENTER - cmpl TI_addr_limit(%ebx),%ecx - jae bad_put_user -1: movb %al,(%ecx) - xorl %eax,%eax - EXIT -ENDPROC(__put_user_1) - -ENTRY(__put_user_2) - ENTER - movl TI_addr_limit(%ebx),%ebx - subl $1,%ebx - cmpl %ebx,%ecx - jae bad_put_user -2: movw %ax,(%ecx) - xorl %eax,%eax - EXIT -ENDPROC(__put_user_2) - -ENTRY(__put_user_4) - ENTER - movl TI_addr_limit(%ebx),%ebx - subl $3,%ebx - cmpl %ebx,%ecx - jae bad_put_user -3: movl %eax,(%ecx) - xorl %eax,%eax - EXIT -ENDPROC(__put_user_4) - -ENTRY(__put_user_8) - ENTER - movl TI_addr_limit(%ebx),%ebx - subl $7,%ebx - cmpl %ebx,%ecx - jae bad_put_user -4: movl %eax,(%ecx) -5: movl %edx,4(%ecx) - xorl %eax,%eax - EXIT -ENDPROC(__put_user_8) - -bad_put_user: - CFI_STARTPROC simple - CFI_DEF_CFA esp, 2*4 - CFI_OFFSET eip, -1*4 - CFI_OFFSET ebx, -2*4 - movl $-14,%eax - EXIT -END(bad_put_user) - -.section __ex_table,"a" - .long 1b,bad_put_user - .long 2b,bad_put_user - .long 3b,bad_put_user - .long 4b,bad_put_user - .long 5b,bad_put_user -.previous diff --git a/arch/i386/lib/semaphore_32.S b/arch/i386/lib/semaphore_32.S deleted file mode 100644 index c01eb39c0b4..00000000000 --- a/arch/i386/lib/semaphore_32.S +++ /dev/null @@ -1,219 +0,0 @@ -/* - * i386 semaphore implementation. - * - * (C) Copyright 1999 Linus Torvalds - * - * Portions Copyright 1999 Red Hat, Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * rw semaphores implemented November 1999 by Benjamin LaHaise - */ - -#include -#include -#include -#include -#include - -/* - * The semaphore operations have a special calling sequence that - * allow us to do a simpler in-line version of them. These routines - * need to convert that sequence back into the C sequence when - * there is contention on the semaphore. - * - * %eax contains the semaphore pointer on entry. Save the C-clobbered - * registers (%eax, %edx and %ecx) except %eax whish is either a return - * value or just clobbered.. - */ - .section .sched.text -ENTRY(__down_failed) - CFI_STARTPROC - FRAME - pushl %edx - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET edx,0 - pushl %ecx - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET ecx,0 - call __down - popl %ecx - CFI_ADJUST_CFA_OFFSET -4 - CFI_RESTORE ecx - popl %edx - CFI_ADJUST_CFA_OFFSET -4 - CFI_RESTORE edx - ENDFRAME - ret - CFI_ENDPROC - END(__down_failed) - -ENTRY(__down_failed_interruptible) - CFI_STARTPROC - FRAME - pushl %edx - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET edx,0 - pushl %ecx - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET ecx,0 - call __down_interruptible - popl %ecx - CFI_ADJUST_CFA_OFFSET -4 - CFI_RESTORE ecx - popl %edx - CFI_ADJUST_CFA_OFFSET -4 - CFI_RESTORE edx - ENDFRAME - ret - CFI_ENDPROC - END(__down_failed_interruptible) - -ENTRY(__down_failed_trylock) - CFI_STARTPROC - FRAME - pushl %edx - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET edx,0 - pushl %ecx - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET ecx,0 - call __down_trylock - popl %ecx - CFI_ADJUST_CFA_OFFSET -4 - CFI_RESTORE ecx - popl %edx - CFI_ADJUST_CFA_OFFSET -4 - CFI_RESTORE edx - ENDFRAME - ret - CFI_ENDPROC - END(__down_failed_trylock) - -ENTRY(__up_wakeup) - CFI_STARTPROC - FRAME - pushl %edx - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET edx,0 - pushl %ecx - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET ecx,0 - call __up - popl %ecx - CFI_ADJUST_CFA_OFFSET -4 - CFI_RESTORE ecx - popl %edx - CFI_ADJUST_CFA_OFFSET -4 - CFI_RESTORE edx - ENDFRAME - ret - CFI_ENDPROC - END(__up_wakeup) - -/* - * rw spinlock fallbacks - */ -#ifdef CONFIG_SMP -ENTRY(__write_lock_failed) - CFI_STARTPROC simple - FRAME -2: LOCK_PREFIX - addl $ RW_LOCK_BIAS,(%eax) -1: rep; nop - cmpl $ RW_LOCK_BIAS,(%eax) - jne 1b - LOCK_PREFIX - subl $ RW_LOCK_BIAS,(%eax) - jnz 2b - ENDFRAME - ret - CFI_ENDPROC - END(__write_lock_failed) - -ENTRY(__read_lock_failed) - CFI_STARTPROC - FRAME -2: LOCK_PREFIX - incl (%eax) -1: rep; nop - cmpl $1,(%eax) - js 1b - LOCK_PREFIX - decl (%eax) - js 2b - ENDFRAME - ret - CFI_ENDPROC - END(__read_lock_failed) - -#endif - -#ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM - -/* Fix up special calling conventions */ -ENTRY(call_rwsem_down_read_failed) - CFI_STARTPROC - push %ecx - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET ecx,0 - push %edx - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET edx,0 - call rwsem_down_read_failed - pop %edx - CFI_ADJUST_CFA_OFFSET -4 - pop %ecx - CFI_ADJUST_CFA_OFFSET -4 - ret - CFI_ENDPROC - END(call_rwsem_down_read_failed) - -ENTRY(call_rwsem_down_write_failed) - CFI_STARTPROC - push %ecx - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET ecx,0 - calll rwsem_down_write_failed - pop %ecx - CFI_ADJUST_CFA_OFFSET -4 - ret - CFI_ENDPROC - END(call_rwsem_down_write_failed) - -ENTRY(call_rwsem_wake) - CFI_STARTPROC - decw %dx /* do nothing if still outstanding active readers */ - jnz 1f - push %ecx - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET ecx,0 - call rwsem_wake - pop %ecx - CFI_ADJUST_CFA_OFFSET -4 -1: ret - CFI_ENDPROC - END(call_rwsem_wake) - -/* Fix up special calling conventions */ -ENTRY(call_rwsem_downgrade_wake) - CFI_STARTPROC - push %ecx - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET ecx,0 - push %edx - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET edx,0 - call rwsem_downgrade_wake - pop %edx - CFI_ADJUST_CFA_OFFSET -4 - pop %ecx - CFI_ADJUST_CFA_OFFSET -4 - ret - CFI_ENDPROC - END(call_rwsem_downgrade_wake) - -#endif diff --git a/arch/i386/lib/string_32.c b/arch/i386/lib/string_32.c deleted file mode 100644 index 2c773fefa3d..00000000000 --- a/arch/i386/lib/string_32.c +++ /dev/null @@ -1,257 +0,0 @@ -/* - * Most of the string-functions are rather heavily hand-optimized, - * see especially strsep,strstr,str[c]spn. They should work, but are not - * very easy to understand. Everything is done entirely within the register - * set, making the functions fast and clean. String instructions have been - * used through-out, making for "slightly" unclear code :-) - * - * AK: On P4 and K7 using non string instruction implementations might be faster - * for large memory blocks. But most of them are unlikely to be used on large - * strings. - */ - -#include -#include - -#ifdef __HAVE_ARCH_STRCPY -char *strcpy(char * dest,const char *src) -{ - int d0, d1, d2; - asm volatile( "1:\tlodsb\n\t" - "stosb\n\t" - "testb %%al,%%al\n\t" - "jne 1b" - : "=&S" (d0), "=&D" (d1), "=&a" (d2) - :"0" (src),"1" (dest) : "memory"); - return dest; -} -EXPORT_SYMBOL(strcpy); -#endif - -#ifdef __HAVE_ARCH_STRNCPY -char *strncpy(char * dest,const char *src,size_t count) -{ - int d0, d1, d2, d3; - asm volatile( "1:\tdecl %2\n\t" - "js 2f\n\t" - "lodsb\n\t" - "stosb\n\t" - "testb %%al,%%al\n\t" - "jne 1b\n\t" - "rep\n\t" - "stosb\n" - "2:" - : "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3) - :"0" (src),"1" (dest),"2" (count) : "memory"); - return dest; -} -EXPORT_SYMBOL(strncpy); -#endif - -#ifdef __HAVE_ARCH_STRCAT -char *strcat(char * dest,const char * src) -{ - int d0, d1, d2, d3; - asm volatile( "repne\n\t" - "scasb\n\t" - "decl %1\n" - "1:\tlodsb\n\t" - "stosb\n\t" - "testb %%al,%%al\n\t" - "jne 1b" - : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3) - : "0" (src), "1" (dest), "2" (0), "3" (0xffffffffu): "memory"); - return dest; -} -EXPORT_SYMBOL(strcat); -#endif - -#ifdef __HAVE_ARCH_STRNCAT -char *strncat(char * dest,const char * src,size_t count) -{ - int d0, d1, d2, d3; - asm volatile( "repne\n\t" - "scasb\n\t" - "decl %1\n\t" - "movl %8,%3\n" - "1:\tdecl %3\n\t" - "js 2f\n\t" - "lodsb\n\t" - "stosb\n\t" - "testb %%al,%%al\n\t" - "jne 1b\n" - "2:\txorl %2,%2\n\t" - "stosb" - : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3) - : "0" (src),"1" (dest),"2" (0),"3" (0xffffffffu), "g" (count) - : "memory"); - return dest; -} -EXPORT_SYMBOL(strncat); -#endif - -#ifdef __HAVE_ARCH_STRCMP -int strcmp(const char * cs,const char * ct) -{ - int d0, d1; - int res; - asm volatile( "1:\tlodsb\n\t" - "scasb\n\t" - "jne 2f\n\t" - "testb %%al,%%al\n\t" - "jne 1b\n\t" - "xorl %%eax,%%eax\n\t" - "jmp 3f\n" - "2:\tsbbl %%eax,%%eax\n\t" - "orb $1,%%al\n" - "3:" - :"=a" (res), "=&S" (d0), "=&D" (d1) - :"1" (cs),"2" (ct) - :"memory"); - return res; -} -EXPORT_SYMBOL(strcmp); -#endif - -#ifdef __HAVE_ARCH_STRNCMP -int strncmp(const char * cs,const char * ct,size_t count) -{ - int res; - int d0, d1, d2; - asm volatile( "1:\tdecl %3\n\t" - "js 2f\n\t" - "lodsb\n\t" - "scasb\n\t" - "jne 3f\n\t" - "testb %%al,%%al\n\t" - "jne 1b\n" - "2:\txorl %%eax,%%eax\n\t" - "jmp 4f\n" - "3:\tsbbl %%eax,%%eax\n\t" - "orb $1,%%al\n" - "4:" - :"=a" (res), "=&S" (d0), "=&D" (d1), "=&c" (d2) - :"1" (cs),"2" (ct),"3" (count) - :"memory"); - return res; -} -EXPORT_SYMBOL(strncmp); -#endif - -#ifdef __HAVE_ARCH_STRCHR -char *strchr(const char * s, int c) -{ - int d0; - char * res; - asm volatile( "movb %%al,%%ah\n" - "1:\tlodsb\n\t" - "cmpb %%ah,%%al\n\t" - "je 2f\n\t" - "testb %%al,%%al\n\t" - "jne 1b\n\t" - "movl $1,%1\n" - "2:\tmovl %1,%0\n\t" - "decl %0" - :"=a" (res), "=&S" (d0) - :"1" (s),"0" (c) - :"memory"); - return res; -} -EXPORT_SYMBOL(strchr); -#endif - -#ifdef __HAVE_ARCH_STRRCHR -char *strrchr(const char * s, int c) -{ - int d0, d1; - char * res; - asm volatile( "movb %%al,%%ah\n" - "1:\tlodsb\n\t" - "cmpb %%ah,%%al\n\t" - "jne 2f\n\t" - "leal -1(%%esi),%0\n" - "2:\ttestb %%al,%%al\n\t" - "jne 1b" - :"=g" (res), "=&S" (d0), "=&a" (d1) - :"0" (0),"1" (s),"2" (c) - :"memory"); - return res; -} -EXPORT_SYMBOL(strrchr); -#endif - -#ifdef __HAVE_ARCH_STRLEN -size_t strlen(const char * s) -{ - int d0; - int res; - asm volatile( "repne\n\t" - "scasb\n\t" - "notl %0\n\t" - "decl %0" - :"=c" (res), "=&D" (d0) - :"1" (s),"a" (0), "0" (0xffffffffu) - :"memory"); - return res; -} -EXPORT_SYMBOL(strlen); -#endif - -#ifdef __HAVE_ARCH_MEMCHR -void *memchr(const void *cs,int c,size_t count) -{ - int d0; - void *res; - if (!count) - return NULL; - asm volatile( "repne\n\t" - "scasb\n\t" - "je 1f\n\t" - "movl $1,%0\n" - "1:\tdecl %0" - :"=D" (res), "=&c" (d0) - :"a" (c),"0" (cs),"1" (count) - :"memory"); - return res; -} -EXPORT_SYMBOL(memchr); -#endif - -#ifdef __HAVE_ARCH_MEMSCAN -void *memscan(void * addr, int c, size_t size) -{ - if (!size) - return addr; - asm volatile("repnz; scasb\n\t" - "jnz 1f\n\t" - "dec %%edi\n" - "1:" - : "=D" (addr), "=c" (size) - : "0" (addr), "1" (size), "a" (c) - : "memory"); - return addr; -} -EXPORT_SYMBOL(memscan); -#endif - -#ifdef __HAVE_ARCH_STRNLEN -size_t strnlen(const char *s, size_t count) -{ - int d0; - int res; - asm volatile( "movl %2,%0\n\t" - "jmp 2f\n" - "1:\tcmpb $0,(%0)\n\t" - "je 3f\n\t" - "incl %0\n" - "2:\tdecl %1\n\t" - "cmpl $-1,%1\n\t" - "jne 1b\n" - "3:\tsubl %2,%0" - :"=a" (res), "=&d" (d0) - :"c" (s),"1" (count) - :"memory"); - return res; -} -EXPORT_SYMBOL(strnlen); -#endif diff --git a/arch/i386/lib/strstr_32.c b/arch/i386/lib/strstr_32.c deleted file mode 100644 index a3dafbf59da..00000000000 --- a/arch/i386/lib/strstr_32.c +++ /dev/null @@ -1,31 +0,0 @@ -#include - -char * strstr(const char * cs,const char * ct) -{ -int d0, d1; -register char * __res; -__asm__ __volatile__( - "movl %6,%%edi\n\t" - "repne\n\t" - "scasb\n\t" - "notl %%ecx\n\t" - "decl %%ecx\n\t" /* NOTE! This also sets Z if searchstring='' */ - "movl %%ecx,%%edx\n" - "1:\tmovl %6,%%edi\n\t" - "movl %%esi,%%eax\n\t" - "movl %%edx,%%ecx\n\t" - "repe\n\t" - "cmpsb\n\t" - "je 2f\n\t" /* also works for empty string, see above */ - "xchgl %%eax,%%esi\n\t" - "incl %%esi\n\t" - "cmpb $0,-1(%%eax)\n\t" - "jne 1b\n\t" - "xorl %%eax,%%eax\n\t" - "2:" - :"=a" (__res), "=&c" (d0), "=&S" (d1) - :"0" (0), "1" (0xffffffff), "2" (cs), "g" (ct) - :"dx", "di"); -return __res; -} - diff --git a/arch/i386/lib/usercopy_32.c b/arch/i386/lib/usercopy_32.c deleted file mode 100644 index 9f38b12b4af..00000000000 --- a/arch/i386/lib/usercopy_32.c +++ /dev/null @@ -1,882 +0,0 @@ -/* - * User address space access functions. - * The non inlined parts of asm-i386/uaccess.h are here. - * - * Copyright 1997 Andi Kleen - * Copyright 1997 Linus Torvalds - */ -#include -#include -#include -#include -#include -#include -#include -#include - -static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned long n) -{ -#ifdef CONFIG_X86_INTEL_USERCOPY - if (n >= 64 && ((a1 ^ a2) & movsl_mask.mask)) - return 0; -#endif - return 1; -} -#define movsl_is_ok(a1,a2,n) \ - __movsl_is_ok((unsigned long)(a1),(unsigned long)(a2),(n)) - -/* - * Copy a null terminated string from userspace. - */ - -#define __do_strncpy_from_user(dst,src,count,res) \ -do { \ - int __d0, __d1, __d2; \ - might_sleep(); \ - __asm__ __volatile__( \ - " testl %1,%1\n" \ - " jz 2f\n" \ - "0: lodsb\n" \ - " stosb\n" \ - " testb %%al,%%al\n" \ - " jz 1f\n" \ - " decl %1\n" \ - " jnz 0b\n" \ - "1: subl %1,%0\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: movl %5,%0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 0b,3b\n" \ - ".previous" \ - : "=d"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \ - "=&D" (__d2) \ - : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \ - : "memory"); \ -} while (0) - -/** - * __strncpy_from_user: - Copy a NUL terminated string from userspace, with less checking. - * @dst: Destination address, in kernel space. This buffer must be at - * least @count bytes long. - * @src: Source address, in user space. - * @count: Maximum number of bytes to copy, including the trailing NUL. - * - * Copies a NUL-terminated string from userspace to kernel space. - * Caller must check the specified block with access_ok() before calling - * this function. - * - * On success, returns the length of the string (not including the trailing - * NUL). - * - * If access to userspace fails, returns -EFAULT (some data may have been - * copied). - * - * If @count is smaller than the length of the string, copies @count bytes - * and returns @count. - */ -long -__strncpy_from_user(char *dst, const char __user *src, long count) -{ - long res; - __do_strncpy_from_user(dst, src, count, res); - return res; -} -EXPORT_SYMBOL(__strncpy_from_user); - -/** - * strncpy_from_user: - Copy a NUL terminated string from userspace. - * @dst: Destination address, in kernel space. This buffer must be at - * least @count bytes long. - * @src: Source address, in user space. - * @count: Maximum number of bytes to copy, including the trailing NUL. - * - * Copies a NUL-terminated string from userspace to kernel space. - * - * On success, returns the length of the string (not including the trailing - * NUL). - * - * If access to userspace fails, returns -EFAULT (some data may have been - * copied). - * - * If @count is smaller than the length of the string, copies @count bytes - * and returns @count. - */ -long -strncpy_from_user(char *dst, const char __user *src, long count) -{ - long res = -EFAULT; - if (access_ok(VERIFY_READ, src, 1)) - __do_strncpy_from_user(dst, src, count, res); - return res; -} -EXPORT_SYMBOL(strncpy_from_user); - -/* - * Zero Userspace - */ - -#define __do_clear_user(addr,size) \ -do { \ - int __d0; \ - might_sleep(); \ - __asm__ __volatile__( \ - "0: rep; stosl\n" \ - " movl %2,%0\n" \ - "1: rep; stosb\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: lea 0(%2,%0,4),%0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 0b,3b\n" \ - " .long 1b,2b\n" \ - ".previous" \ - : "=&c"(size), "=&D" (__d0) \ - : "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0)); \ -} while (0) - -/** - * clear_user: - Zero a block of memory in user space. - * @to: Destination address, in user space. - * @n: Number of bytes to zero. - * - * Zero a block of memory in user space. - * - * Returns number of bytes that could not be cleared. - * On success, this will be zero. - */ -unsigned long -clear_user(void __user *to, unsigned long n) -{ - might_sleep(); - if (access_ok(VERIFY_WRITE, to, n)) - __do_clear_user(to, n); - return n; -} -EXPORT_SYMBOL(clear_user); - -/** - * __clear_user: - Zero a block of memory in user space, with less checking. - * @to: Destination address, in user space. - * @n: Number of bytes to zero. - * - * Zero a block of memory in user space. Caller must check - * the specified block with access_ok() before calling this function. - * - * Returns number of bytes that could not be cleared. - * On success, this will be zero. - */ -unsigned long -__clear_user(void __user *to, unsigned long n) -{ - __do_clear_user(to, n); - return n; -} -EXPORT_SYMBOL(__clear_user); - -/** - * strnlen_user: - Get the size of a string in user space. - * @s: The string to measure. - * @n: The maximum valid length - * - * Get the size of a NUL-terminated string in user space. - * - * Returns the size of the string INCLUDING the terminating NUL. - * On exception, returns 0. - * If the string is too long, returns a value greater than @n. - */ -long strnlen_user(const char __user *s, long n) -{ - unsigned long mask = -__addr_ok(s); - unsigned long res, tmp; - - might_sleep(); - - __asm__ __volatile__( - " testl %0, %0\n" - " jz 3f\n" - " andl %0,%%ecx\n" - "0: repne; scasb\n" - " setne %%al\n" - " subl %%ecx,%0\n" - " addl %0,%%eax\n" - "1:\n" - ".section .fixup,\"ax\"\n" - "2: xorl %%eax,%%eax\n" - " jmp 1b\n" - "3: movb $1,%%al\n" - " jmp 1b\n" - ".previous\n" - ".section __ex_table,\"a\"\n" - " .align 4\n" - " .long 0b,2b\n" - ".previous" - :"=r" (n), "=D" (s), "=a" (res), "=c" (tmp) - :"0" (n), "1" (s), "2" (0), "3" (mask) - :"cc"); - return res & mask; -} -EXPORT_SYMBOL(strnlen_user); - -#ifdef CONFIG_X86_INTEL_USERCOPY -static unsigned long -__copy_user_intel(void __user *to, const void *from, unsigned long size) -{ - int d0, d1; - __asm__ __volatile__( - " .align 2,0x90\n" - "1: movl 32(%4), %%eax\n" - " cmpl $67, %0\n" - " jbe 3f\n" - "2: movl 64(%4), %%eax\n" - " .align 2,0x90\n" - "3: movl 0(%4), %%eax\n" - "4: movl 4(%4), %%edx\n" - "5: movl %%eax, 0(%3)\n" - "6: movl %%edx, 4(%3)\n" - "7: movl 8(%4), %%eax\n" - "8: movl 12(%4),%%edx\n" - "9: movl %%eax, 8(%3)\n" - "10: movl %%edx, 12(%3)\n" - "11: movl 16(%4), %%eax\n" - "12: movl 20(%4), %%edx\n" - "13: movl %%eax, 16(%3)\n" - "14: movl %%edx, 20(%3)\n" - "15: movl 24(%4), %%eax\n" - "16: movl 28(%4), %%edx\n" - "17: movl %%eax, 24(%3)\n" - "18: movl %%edx, 28(%3)\n" - "19: movl 32(%4), %%eax\n" - "20: movl 36(%4), %%edx\n" - "21: movl %%eax, 32(%3)\n" - "22: movl %%edx, 36(%3)\n" - "23: movl 40(%4), %%eax\n" - "24: movl 44(%4), %%edx\n" - "25: movl %%eax, 40(%3)\n" - "26: movl %%edx, 44(%3)\n" - "27: movl 48(%4), %%eax\n" - "28: movl 52(%4), %%edx\n" - "29: movl %%eax, 48(%3)\n" - "30: movl %%edx, 52(%3)\n" - "31: movl 56(%4), %%eax\n" - "32: movl 60(%4), %%edx\n" - "33: movl %%eax, 56(%3)\n" - "34: movl %%edx, 60(%3)\n" - " addl $-64, %0\n" - " addl $64, %4\n" - " addl $64, %3\n" - " cmpl $63, %0\n" - " ja 1b\n" - "35: movl %0, %%eax\n" - " shrl $2, %0\n" - " andl $3, %%eax\n" - " cld\n" - "99: rep; movsl\n" - "36: movl %%eax, %0\n" - "37: rep; movsb\n" - "100:\n" - ".section .fixup,\"ax\"\n" - "101: lea 0(%%eax,%0,4),%0\n" - " jmp 100b\n" - ".previous\n" - ".section __ex_table,\"a\"\n" - " .align 4\n" - " .long 1b,100b\n" - " .long 2b,100b\n" - " .long 3b,100b\n" - " .long 4b,100b\n" - " .long 5b,100b\n" - " .long 6b,100b\n" - " .long 7b,100b\n" - " .long 8b,100b\n" - " .long 9b,100b\n" - " .long 10b,100b\n" - " .long 11b,100b\n" - " .long 12b,100b\n" - " .long 13b,100b\n" - " .long 14b,100b\n" - " .long 15b,100b\n" - " .long 16b,100b\n" - " .long 17b,100b\n" - " .long 18b,100b\n" - " .long 19b,100b\n" - " .long 20b,100b\n" - " .long 21b,100b\n" - " .long 22b,100b\n" - " .long 23b,100b\n" - " .long 24b,100b\n" - " .long 25b,100b\n" - " .long 26b,100b\n" - " .long 27b,100b\n" - " .long 28b,100b\n" - " .long 29b,100b\n" - " .long 30b,100b\n" - " .long 31b,100b\n" - " .long 32b,100b\n" - " .long 33b,100b\n" - " .long 34b,100b\n" - " .long 35b,100b\n" - " .long 36b,100b\n" - " .long 37b,100b\n" - " .long 99b,101b\n" - ".previous" - : "=&c"(size), "=&D" (d0), "=&S" (d1) - : "1"(to), "2"(from), "0"(size) - : "eax", "edx", "memory"); - return size; -} - -static unsigned long -__copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size) -{ - int d0, d1; - __asm__ __volatile__( - " .align 2,0x90\n" - "0: movl 32(%4), %%eax\n" - " cmpl $67, %0\n" - " jbe 2f\n" - "1: movl 64(%4), %%eax\n" - " .align 2,0x90\n" - "2: movl 0(%4), %%eax\n" - "21: movl 4(%4), %%edx\n" - " movl %%eax, 0(%3)\n" - " movl %%edx, 4(%3)\n" - "3: movl 8(%4), %%eax\n" - "31: movl 12(%4),%%edx\n" - " movl %%eax, 8(%3)\n" - " movl %%edx, 12(%3)\n" - "4: movl 16(%4), %%eax\n" - "41: movl 20(%4), %%edx\n" - " movl %%eax, 16(%3)\n" - " movl %%edx, 20(%3)\n" - "10: movl 24(%4), %%eax\n" - "51: movl 28(%4), %%edx\n" - " movl %%eax, 24(%3)\n" - " movl %%edx, 28(%3)\n" - "11: movl 32(%4), %%eax\n" - "61: movl 36(%4), %%edx\n" - " movl %%eax, 32(%3)\n" - " movl %%edx, 36(%3)\n" - "12: movl 40(%4), %%eax\n" - "71: movl 44(%4), %%edx\n" - " movl %%eax, 40(%3)\n" - " movl %%edx, 44(%3)\n" - "13: movl 48(%4), %%eax\n" - "81: movl 52(%4), %%edx\n" - " movl %%eax, 48(%3)\n" - " movl %%edx, 52(%3)\n" - "14: movl 56(%4), %%eax\n" - "91: movl 60(%4), %%edx\n" - " movl %%eax, 56(%3)\n" - " movl %%edx, 60(%3)\n" - " addl $-64, %0\n" - " addl $64, %4\n" - " addl $64, %3\n" - " cmpl $63, %0\n" - " ja 0b\n" - "5: movl %0, %%eax\n" - " shrl $2, %0\n" - " andl $3, %%eax\n" - " cld\n" - "6: rep; movsl\n" - " movl %%eax,%0\n" - "7: rep; movsb\n" - "8:\n" - ".section .fixup,\"ax\"\n" - "9: lea 0(%%eax,%0,4),%0\n" - "16: pushl %0\n" - " pushl %%eax\n" - " xorl %%eax,%%eax\n" - " rep; stosb\n" - " popl %%eax\n" - " popl %0\n" - " jmp 8b\n" - ".previous\n" - ".section __ex_table,\"a\"\n" - " .align 4\n" - " .long 0b,16b\n" - " .long 1b,16b\n" - " .long 2b,16b\n" - " .long 21b,16b\n" - " .long 3b,16b\n" - " .long 31b,16b\n" - " .long 4b,16b\n" - " .long 41b,16b\n" - " .long 10b,16b\n" - " .long 51b,16b\n" - " .long 11b,16b\n" - " .long 61b,16b\n" - " .long 12b,16b\n" - " .long 71b,16b\n" - " .long 13b,16b\n" - " .long 81b,16b\n" - " .long 14b,16b\n" - " .long 91b,16b\n" - " .long 6b,9b\n" - " .long 7b,16b\n" - ".previous" - : "=&c"(size), "=&D" (d0), "=&S" (d1) - : "1"(to), "2"(from), "0"(size) - : "eax", "edx", "memory"); - return size; -} - -/* - * Non Temporal Hint version of __copy_user_zeroing_intel. It is cache aware. - * hyoshiok@miraclelinux.com - */ - -static unsigned long __copy_user_zeroing_intel_nocache(void *to, - const void __user *from, unsigned long size) -{ - int d0, d1; - - __asm__ __volatile__( - " .align 2,0x90\n" - "0: movl 32(%4), %%eax\n" - " cmpl $67, %0\n" - " jbe 2f\n" - "1: movl 64(%4), %%eax\n" - " .align 2,0x90\n" - "2: movl 0(%4), %%eax\n" - "21: movl 4(%4), %%edx\n" - " movnti %%eax, 0(%3)\n" - " movnti %%edx, 4(%3)\n" - "3: movl 8(%4), %%eax\n" - "31: movl 12(%4),%%edx\n" - " movnti %%eax, 8(%3)\n" - " movnti %%edx, 12(%3)\n" - "4: movl 16(%4), %%eax\n" - "41: movl 20(%4), %%edx\n" - " movnti %%eax, 16(%3)\n" - " movnti %%edx, 20(%3)\n" - "10: movl 24(%4), %%eax\n" - "51: movl 28(%4), %%edx\n" - " movnti %%eax, 24(%3)\n" - " movnti %%edx, 28(%3)\n" - "11: movl 32(%4), %%eax\n" - "61: movl 36(%4), %%edx\n" - " movnti %%eax, 32(%3)\n" - " movnti %%edx, 36(%3)\n" - "12: movl 40(%4), %%eax\n" - "71: movl 44(%4), %%edx\n" - " movnti %%eax, 40(%3)\n" - " movnti %%edx, 44(%3)\n" - "13: movl 48(%4), %%eax\n" - "81: movl 52(%4), %%edx\n" - " movnti %%eax, 48(%3)\n" - " movnti %%edx, 52(%3)\n" - "14: movl 56(%4), %%eax\n" - "91: movl 60(%4), %%edx\n" - " movnti %%eax, 56(%3)\n" - " movnti %%edx, 60(%3)\n" - " addl $-64, %0\n" - " addl $64, %4\n" - " addl $64, %3\n" - " cmpl $63, %0\n" - " ja 0b\n" - " sfence \n" - "5: movl %0, %%eax\n" - " shrl $2, %0\n" - " andl $3, %%eax\n" - " cld\n" - "6: rep; movsl\n" - " movl %%eax,%0\n" - "7: rep; movsb\n" - "8:\n" - ".section .fixup,\"ax\"\n" - "9: lea 0(%%eax,%0,4),%0\n" - "16: pushl %0\n" - " pushl %%eax\n" - " xorl %%eax,%%eax\n" - " rep; stosb\n" - " popl %%eax\n" - " popl %0\n" - " jmp 8b\n" - ".previous\n" - ".section __ex_table,\"a\"\n" - " .align 4\n" - " .long 0b,16b\n" - " .long 1b,16b\n" - " .long 2b,16b\n" - " .long 21b,16b\n" - " .long 3b,16b\n" - " .long 31b,16b\n" - " .long 4b,16b\n" - " .long 41b,16b\n" - " .long 10b,16b\n" - " .long 51b,16b\n" - " .long 11b,16b\n" - " .long 61b,16b\n" - " .long 12b,16b\n" - " .long 71b,16b\n" - " .long 13b,16b\n" - " .long 81b,16b\n" - " .long 14b,16b\n" - " .long 91b,16b\n" - " .long 6b,9b\n" - " .long 7b,16b\n" - ".previous" - : "=&c"(size), "=&D" (d0), "=&S" (d1) - : "1"(to), "2"(from), "0"(size) - : "eax", "edx", "memory"); - return size; -} - -static unsigned long __copy_user_intel_nocache(void *to, - const void __user *from, unsigned long size) -{ - int d0, d1; - - __asm__ __volatile__( - " .align 2,0x90\n" - "0: movl 32(%4), %%eax\n" - " cmpl $67, %0\n" - " jbe 2f\n" - "1: movl 64(%4), %%eax\n" - " .align 2,0x90\n" - "2: movl 0(%4), %%eax\n" - "21: movl 4(%4), %%edx\n" - " movnti %%eax, 0(%3)\n" - " movnti %%edx, 4(%3)\n" - "3: movl 8(%4), %%eax\n" - "31: movl 12(%4),%%edx\n" - " movnti %%eax, 8(%3)\n" - " movnti %%edx, 12(%3)\n" - "4: movl 16(%4), %%eax\n" - "41: movl 20(%4), %%edx\n" - " movnti %%eax, 16(%3)\n" - " movnti %%edx, 20(%3)\n" - "10: movl 24(%4), %%eax\n" - "51: movl 28(%4), %%edx\n" - " movnti %%eax, 24(%3)\n" - " movnti %%edx, 28(%3)\n" - "11: movl 32(%4), %%eax\n" - "61: movl 36(%4), %%edx\n" - " movnti %%eax, 32(%3)\n" - " movnti %%edx, 36(%3)\n" - "12: movl 40(%4), %%eax\n" - "71: movl 44(%4), %%edx\n" - " movnti %%eax, 40(%3)\n" - " movnti %%edx, 44(%3)\n" - "13: movl 48(%4), %%eax\n" - "81: movl 52(%4), %%edx\n" - " movnti %%eax, 48(%3)\n" - " movnti %%edx, 52(%3)\n" - "14: movl 56(%4), %%eax\n" - "91: movl 60(%4), %%edx\n" - " movnti %%eax, 56(%3)\n" - " movnti %%edx, 60(%3)\n" - " addl $-64, %0\n" - " addl $64, %4\n" - " addl $64, %3\n" - " cmpl $63, %0\n" - " ja 0b\n" - " sfence \n" - "5: movl %0, %%eax\n" - " shrl $2, %0\n" - " andl $3, %%eax\n" - " cld\n" - "6: rep; movsl\n" - " movl %%eax,%0\n" - "7: rep; movsb\n" - "8:\n" - ".section .fixup,\"ax\"\n" - "9: lea 0(%%eax,%0,4),%0\n" - "16: jmp 8b\n" - ".previous\n" - ".section __ex_table,\"a\"\n" - " .align 4\n" - " .long 0b,16b\n" - " .long 1b,16b\n" - " .long 2b,16b\n" - " .long 21b,16b\n" - " .long 3b,16b\n" - " .long 31b,16b\n" - " .long 4b,16b\n" - " .long 41b,16b\n" - " .long 10b,16b\n" - " .long 51b,16b\n" - " .long 11b,16b\n" - " .long 61b,16b\n" - " .long 12b,16b\n" - " .long 71b,16b\n" - " .long 13b,16b\n" - " .long 81b,16b\n" - " .long 14b,16b\n" - " .long 91b,16b\n" - " .long 6b,9b\n" - " .long 7b,16b\n" - ".previous" - : "=&c"(size), "=&D" (d0), "=&S" (d1) - : "1"(to), "2"(from), "0"(size) - : "eax", "edx", "memory"); - return size; -} - -#else - -/* - * Leave these declared but undefined. They should not be any references to - * them - */ -unsigned long __copy_user_zeroing_intel(void *to, const void __user *from, - unsigned long size); -unsigned long __copy_user_intel(void __user *to, const void *from, - unsigned long size); -unsigned long __copy_user_zeroing_intel_nocache(void *to, - const void __user *from, unsigned long size); -#endif /* CONFIG_X86_INTEL_USERCOPY */ - -/* Generic arbitrary sized copy. */ -#define __copy_user(to,from,size) \ -do { \ - int __d0, __d1, __d2; \ - __asm__ __volatile__( \ - " cmp $7,%0\n" \ - " jbe 1f\n" \ - " movl %1,%0\n" \ - " negl %0\n" \ - " andl $7,%0\n" \ - " subl %0,%3\n" \ - "4: rep; movsb\n" \ - " movl %3,%0\n" \ - " shrl $2,%0\n" \ - " andl $3,%3\n" \ - " .align 2,0x90\n" \ - "0: rep; movsl\n" \ - " movl %3,%0\n" \ - "1: rep; movsb\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "5: addl %3,%0\n" \ - " jmp 2b\n" \ - "3: lea 0(%3,%0,4),%0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 4b,5b\n" \ - " .long 0b,3b\n" \ - " .long 1b,2b\n" \ - ".previous" \ - : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \ - : "3"(size), "0"(size), "1"(to), "2"(from) \ - : "memory"); \ -} while (0) - -#define __copy_user_zeroing(to,from,size) \ -do { \ - int __d0, __d1, __d2; \ - __asm__ __volatile__( \ - " cmp $7,%0\n" \ - " jbe 1f\n" \ - " movl %1,%0\n" \ - " negl %0\n" \ - " andl $7,%0\n" \ - " subl %0,%3\n" \ - "4: rep; movsb\n" \ - " movl %3,%0\n" \ - " shrl $2,%0\n" \ - " andl $3,%3\n" \ - " .align 2,0x90\n" \ - "0: rep; movsl\n" \ - " movl %3,%0\n" \ - "1: rep; movsb\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "5: addl %3,%0\n" \ - " jmp 6f\n" \ - "3: lea 0(%3,%0,4),%0\n" \ - "6: pushl %0\n" \ - " pushl %%eax\n" \ - " xorl %%eax,%%eax\n" \ - " rep; stosb\n" \ - " popl %%eax\n" \ - " popl %0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 4b,5b\n" \ - " .long 0b,3b\n" \ - " .long 1b,6b\n" \ - ".previous" \ - : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \ - : "3"(size), "0"(size), "1"(to), "2"(from) \ - : "memory"); \ -} while (0) - -unsigned long __copy_to_user_ll(void __user *to, const void *from, - unsigned long n) -{ -#ifndef CONFIG_X86_WP_WORKS_OK - if (unlikely(boot_cpu_data.wp_works_ok == 0) && - ((unsigned long )to) < TASK_SIZE) { - /* - * When we are in an atomic section (see - * mm/filemap.c:file_read_actor), return the full - * length to take the slow path. - */ - if (in_atomic()) - return n; - - /* - * CPU does not honor the WP bit when writing - * from supervisory mode, and due to preemption or SMP, - * the page tables can change at any time. - * Do it manually. Manfred - */ - while (n) { - unsigned long offset = ((unsigned long)to)%PAGE_SIZE; - unsigned long len = PAGE_SIZE - offset; - int retval; - struct page *pg; - void *maddr; - - if (len > n) - len = n; - -survive: - down_read(¤t->mm->mmap_sem); - retval = get_user_pages(current, current->mm, - (unsigned long )to, 1, 1, 0, &pg, NULL); - - if (retval == -ENOMEM && is_init(current)) { - up_read(¤t->mm->mmap_sem); - congestion_wait(WRITE, HZ/50); - goto survive; - } - - if (retval != 1) { - up_read(¤t->mm->mmap_sem); - break; - } - - maddr = kmap_atomic(pg, KM_USER0); - memcpy(maddr + offset, from, len); - kunmap_atomic(maddr, KM_USER0); - set_page_dirty_lock(pg); - put_page(pg); - up_read(¤t->mm->mmap_sem); - - from += len; - to += len; - n -= len; - } - return n; - } -#endif - if (movsl_is_ok(to, from, n)) - __copy_user(to, from, n); - else - n = __copy_user_intel(to, from, n); - return n; -} -EXPORT_SYMBOL(__copy_to_user_ll); - -unsigned long __copy_from_user_ll(void *to, const void __user *from, - unsigned long n) -{ - if (movsl_is_ok(to, from, n)) - __copy_user_zeroing(to, from, n); - else - n = __copy_user_zeroing_intel(to, from, n); - return n; -} -EXPORT_SYMBOL(__copy_from_user_ll); - -unsigned long __copy_from_user_ll_nozero(void *to, const void __user *from, - unsigned long n) -{ - if (movsl_is_ok(to, from, n)) - __copy_user(to, from, n); - else - n = __copy_user_intel((void __user *)to, - (const void *)from, n); - return n; -} -EXPORT_SYMBOL(__copy_from_user_ll_nozero); - -unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from, - unsigned long n) -{ -#ifdef CONFIG_X86_INTEL_USERCOPY - if ( n > 64 && cpu_has_xmm2) - n = __copy_user_zeroing_intel_nocache(to, from, n); - else - __copy_user_zeroing(to, from, n); -#else - __copy_user_zeroing(to, from, n); -#endif - return n; -} - -unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from, - unsigned long n) -{ -#ifdef CONFIG_X86_INTEL_USERCOPY - if ( n > 64 && cpu_has_xmm2) - n = __copy_user_intel_nocache(to, from, n); - else - __copy_user(to, from, n); -#else - __copy_user(to, from, n); -#endif - return n; -} - -/** - * copy_to_user: - Copy a block of data into user space. - * @to: Destination address, in user space. - * @from: Source address, in kernel space. - * @n: Number of bytes to copy. - * - * Context: User context only. This function may sleep. - * - * Copy data from kernel space to user space. - * - * Returns number of bytes that could not be copied. - * On success, this will be zero. - */ -unsigned long -copy_to_user(void __user *to, const void *from, unsigned long n) -{ - if (access_ok(VERIFY_WRITE, to, n)) - n = __copy_to_user(to, from, n); - return n; -} -EXPORT_SYMBOL(copy_to_user); - -/** - * copy_from_user: - Copy a block of data from user space. - * @to: Destination address, in kernel space. - * @from: Source address, in user space. - * @n: Number of bytes to copy. - * - * Context: User context only. This function may sleep. - * - * Copy data from user space to kernel space. - * - * Returns number of bytes that could not be copied. - * On success, this will be zero. - * - * If some data could not be copied, this function will pad the copied - * data to the requested size using zero bytes. - */ -unsigned long -copy_from_user(void *to, const void __user *from, unsigned long n) -{ - if (access_ok(VERIFY_READ, from, n)) - n = __copy_from_user(to, from, n); - else - memset(to, 0, n); - return n; -} -EXPORT_SYMBOL(copy_from_user); diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile new file mode 100644 index 00000000000..2d7d724a2a6 --- /dev/null +++ b/arch/x86/lib/Makefile @@ -0,0 +1,5 @@ +ifeq ($(CONFIG_X86_32),y) +include ${srctree}/arch/x86/lib/Makefile_32 +else +include ${srctree}/arch/x86_64/lib/Makefile_64 +endif diff --git a/arch/x86/lib/Makefile_32 b/arch/x86/lib/Makefile_32 new file mode 100644 index 00000000000..98d1f1e2e2e --- /dev/null +++ b/arch/x86/lib/Makefile_32 @@ -0,0 +1,11 @@ +# +# Makefile for i386-specific library files.. +# + + +lib-y = checksum_32.o delay_32.o usercopy_32.o getuser_32.o putuser_32.o memcpy_32.o strstr_32.o \ + bitops_32.o semaphore_32.o string_32.o + +lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o + +obj-$(CONFIG_SMP) += msr-on-cpu.o diff --git a/arch/x86/lib/bitops_32.c b/arch/x86/lib/bitops_32.c new file mode 100644 index 00000000000..afd0045595d --- /dev/null +++ b/arch/x86/lib/bitops_32.c @@ -0,0 +1,70 @@ +#include +#include + +/** + * find_next_bit - find the first set bit in a memory region + * @addr: The address to base the search on + * @offset: The bitnumber to start searching at + * @size: The maximum size to search + */ +int find_next_bit(const unsigned long *addr, int size, int offset) +{ + const unsigned long *p = addr + (offset >> 5); + int set = 0, bit = offset & 31, res; + + if (bit) { + /* + * Look for nonzero in the first 32 bits: + */ + __asm__("bsfl %1,%0\n\t" + "jne 1f\n\t" + "movl $32, %0\n" + "1:" + : "=r" (set) + : "r" (*p >> bit)); + if (set < (32 - bit)) + return set + offset; + set = 32 - bit; + p++; + } + /* + * No set bit yet, search remaining full words for a bit + */ + res = find_first_bit (p, size - 32 * (p - addr)); + return (offset + set + res); +} +EXPORT_SYMBOL(find_next_bit); + +/** + * find_next_zero_bit - find the first zero bit in a memory region + * @addr: The address to base the search on + * @offset: The bitnumber to start searching at + * @size: The maximum size to search + */ +int find_next_zero_bit(const unsigned long *addr, int size, int offset) +{ + const unsigned long *p = addr + (offset >> 5); + int set = 0, bit = offset & 31, res; + + if (bit) { + /* + * Look for zero in the first 32 bits. + */ + __asm__("bsfl %1,%0\n\t" + "jne 1f\n\t" + "movl $32, %0\n" + "1:" + : "=r" (set) + : "r" (~(*p >> bit))); + if (set < (32 - bit)) + return set + offset; + set = 32 - bit; + p++; + } + /* + * No zero yet, search remaining full bytes for a zero + */ + res = find_first_zero_bit(p, size - 32 * (p - addr)); + return (offset + set + res); +} +EXPORT_SYMBOL(find_next_zero_bit); diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S new file mode 100644 index 00000000000..adbccd0bbb7 --- /dev/null +++ b/arch/x86/lib/checksum_32.S @@ -0,0 +1,546 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * IP/TCP/UDP checksumming routines + * + * Authors: Jorge Cwik, + * Arnt Gulbrandsen, + * Tom May, + * Pentium Pro/II routines: + * Alexander Kjeldaas + * Finn Arne Gangstad + * Lots of code moved from tcp.c and ip.c; see those files + * for more names. + * + * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception + * handling. + * Andi Kleen, add zeroing on error + * converted to pure assembler + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include + +/* + * computes a partial checksum, e.g. for TCP/UDP fragments + */ + +/* +unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) + */ + +.text + +#ifndef CONFIG_X86_USE_PPRO_CHECKSUM + + /* + * Experiments with Ethernet and SLIP connections show that buff + * is aligned on either a 2-byte or 4-byte boundary. We get at + * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. + * Fortunately, it is easy to convert 2-byte alignment to 4-byte + * alignment for the unrolled loop. + */ +ENTRY(csum_partial) + CFI_STARTPROC + pushl %esi + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET esi, 0 + pushl %ebx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ebx, 0 + movl 20(%esp),%eax # Function arg: unsigned int sum + movl 16(%esp),%ecx # Function arg: int len + movl 12(%esp),%esi # Function arg: unsigned char *buff + testl $3, %esi # Check alignment. + jz 2f # Jump if alignment is ok. + testl $1, %esi # Check alignment. + jz 10f # Jump if alignment is boundary of 2bytes. + + # buf is odd + dec %ecx + jl 8f + movzbl (%esi), %ebx + adcl %ebx, %eax + roll $8, %eax + inc %esi + testl $2, %esi + jz 2f +10: + subl $2, %ecx # Alignment uses up two bytes. + jae 1f # Jump if we had at least two bytes. + addl $2, %ecx # ecx was < 2. Deal with it. + jmp 4f +1: movw (%esi), %bx + addl $2, %esi + addw %bx, %ax + adcl $0, %eax +2: + movl %ecx, %edx + shrl $5, %ecx + jz 2f + testl %esi, %esi +1: movl (%esi), %ebx + adcl %ebx, %eax + movl 4(%esi), %ebx + adcl %ebx, %eax + movl 8(%esi), %ebx + adcl %ebx, %eax + movl 12(%esi), %ebx + adcl %ebx, %eax + movl 16(%esi), %ebx + adcl %ebx, %eax + movl 20(%esi), %ebx + adcl %ebx, %eax + movl 24(%esi), %ebx + adcl %ebx, %eax + movl 28(%esi), %ebx + adcl %ebx, %eax + lea 32(%esi), %esi + dec %ecx + jne 1b + adcl $0, %eax +2: movl %edx, %ecx + andl $0x1c, %edx + je 4f + shrl $2, %edx # This clears CF +3: adcl (%esi), %eax + lea 4(%esi), %esi + dec %edx + jne 3b + adcl $0, %eax +4: andl $3, %ecx + jz 7f + cmpl $2, %ecx + jb 5f + movw (%esi),%cx + leal 2(%esi),%esi + je 6f + shll $16,%ecx +5: movb (%esi),%cl +6: addl %ecx,%eax + adcl $0, %eax +7: + testl $1, 12(%esp) + jz 8f + roll $8, %eax +8: + popl %ebx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE ebx + popl %esi + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE esi + ret + CFI_ENDPROC +ENDPROC(csum_partial) + +#else + +/* Version for PentiumII/PPro */ + +ENTRY(csum_partial) + CFI_STARTPROC + pushl %esi + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET esi, 0 + pushl %ebx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ebx, 0 + movl 20(%esp),%eax # Function arg: unsigned int sum + movl 16(%esp),%ecx # Function arg: int len + movl 12(%esp),%esi # Function arg: const unsigned char *buf + + testl $3, %esi + jnz 25f +10: + movl %ecx, %edx + movl %ecx, %ebx + andl $0x7c, %ebx + shrl $7, %ecx + addl %ebx,%esi + shrl $2, %ebx + negl %ebx + lea 45f(%ebx,%ebx,2), %ebx + testl %esi, %esi + jmp *%ebx + + # Handle 2-byte-aligned regions +20: addw (%esi), %ax + lea 2(%esi), %esi + adcl $0, %eax + jmp 10b +25: + testl $1, %esi + jz 30f + # buf is odd + dec %ecx + jl 90f + movzbl (%esi), %ebx + addl %ebx, %eax + adcl $0, %eax + roll $8, %eax + inc %esi + testl $2, %esi + jz 10b + +30: subl $2, %ecx + ja 20b + je 32f + addl $2, %ecx + jz 80f + movzbl (%esi),%ebx # csumming 1 byte, 2-aligned + addl %ebx, %eax + adcl $0, %eax + jmp 80f +32: + addw (%esi), %ax # csumming 2 bytes, 2-aligned + adcl $0, %eax + jmp 80f + +40: + addl -128(%esi), %eax + adcl -124(%esi), %eax + adcl -120(%esi), %eax + adcl -116(%esi), %eax + adcl -112(%esi), %eax + adcl -108(%esi), %eax + adcl -104(%esi), %eax + adcl -100(%esi), %eax + adcl -96(%esi), %eax + adcl -92(%esi), %eax + adcl -88(%esi), %eax + adcl -84(%esi), %eax + adcl -80(%esi), %eax + adcl -76(%esi), %eax + adcl -72(%esi), %eax + adcl -68(%esi), %eax + adcl -64(%esi), %eax + adcl -60(%esi), %eax + adcl -56(%esi), %eax + adcl -52(%esi), %eax + adcl -48(%esi), %eax + adcl -44(%esi), %eax + adcl -40(%esi), %eax + adcl -36(%esi), %eax + adcl -32(%esi), %eax + adcl -28(%esi), %eax + adcl -24(%esi), %eax + adcl -20(%esi), %eax + adcl -16(%esi), %eax + adcl -12(%esi), %eax + adcl -8(%esi), %eax + adcl -4(%esi), %eax +45: + lea 128(%esi), %esi + adcl $0, %eax + dec %ecx + jge 40b + movl %edx, %ecx +50: andl $3, %ecx + jz 80f + + # Handle the last 1-3 bytes without jumping + notl %ecx # 1->2, 2->1, 3->0, higher bits are masked + movl $0xffffff,%ebx # by the shll and shrl instructions + shll $3,%ecx + shrl %cl,%ebx + andl -128(%esi),%ebx # esi is 4-aligned so should be ok + addl %ebx,%eax + adcl $0,%eax +80: + testl $1, 12(%esp) + jz 90f + roll $8, %eax +90: + popl %ebx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE ebx + popl %esi + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE esi + ret + CFI_ENDPROC +ENDPROC(csum_partial) + +#endif + +/* +unsigned int csum_partial_copy_generic (const char *src, char *dst, + int len, int sum, int *src_err_ptr, int *dst_err_ptr) + */ + +/* + * Copy from ds while checksumming, otherwise like csum_partial + * + * The macros SRC and DST specify the type of access for the instruction. + * thus we can call a custom exception handler for all access types. + * + * FIXME: could someone double-check whether I haven't mixed up some SRC and + * DST definitions? It's damn hard to trigger all cases. I hope I got + * them all but there's no guarantee. + */ + +#define SRC(y...) \ + 9999: y; \ + .section __ex_table, "a"; \ + .long 9999b, 6001f ; \ + .previous + +#define DST(y...) \ + 9999: y; \ + .section __ex_table, "a"; \ + .long 9999b, 6002f ; \ + .previous + +#ifndef CONFIG_X86_USE_PPRO_CHECKSUM + +#define ARGBASE 16 +#define FP 12 + +ENTRY(csum_partial_copy_generic) + CFI_STARTPROC + subl $4,%esp + CFI_ADJUST_CFA_OFFSET 4 + pushl %edi + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edi, 0 + pushl %esi + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET esi, 0 + pushl %ebx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ebx, 0 + movl ARGBASE+16(%esp),%eax # sum + movl ARGBASE+12(%esp),%ecx # len + movl ARGBASE+4(%esp),%esi # src + movl ARGBASE+8(%esp),%edi # dst + + testl $2, %edi # Check alignment. + jz 2f # Jump if alignment is ok. + subl $2, %ecx # Alignment uses up two bytes. + jae 1f # Jump if we had at least two bytes. + addl $2, %ecx # ecx was < 2. Deal with it. + jmp 4f +SRC(1: movw (%esi), %bx ) + addl $2, %esi +DST( movw %bx, (%edi) ) + addl $2, %edi + addw %bx, %ax + adcl $0, %eax +2: + movl %ecx, FP(%esp) + shrl $5, %ecx + jz 2f + testl %esi, %esi +SRC(1: movl (%esi), %ebx ) +SRC( movl 4(%esi), %edx ) + adcl %ebx, %eax +DST( movl %ebx, (%edi) ) + adcl %edx, %eax +DST( movl %edx, 4(%edi) ) + +SRC( movl 8(%esi), %ebx ) +SRC( movl 12(%esi), %edx ) + adcl %ebx, %eax +DST( movl %ebx, 8(%edi) ) + adcl %edx, %eax +DST( movl %edx, 12(%edi) ) + +SRC( movl 16(%esi), %ebx ) +SRC( movl 20(%esi), %edx ) + adcl %ebx, %eax +DST( movl %ebx, 16(%edi) ) + adcl %edx, %eax +DST( movl %edx, 20(%edi) ) + +SRC( movl 24(%esi), %ebx ) +SRC( movl 28(%esi), %edx ) + adcl %ebx, %eax +DST( movl %ebx, 24(%edi) ) + adcl %edx, %eax +DST( movl %edx, 28(%edi) ) + + lea 32(%esi), %esi + lea 32(%edi), %edi + dec %ecx + jne 1b + adcl $0, %eax +2: movl FP(%esp), %edx + movl %edx, %ecx + andl $0x1c, %edx + je 4f + shrl $2, %edx # This clears CF +SRC(3: movl (%esi), %ebx ) + adcl %ebx, %eax +DST( movl %ebx, (%edi) ) + lea 4(%esi), %esi + lea 4(%edi), %edi + dec %edx + jne 3b + adcl $0, %eax +4: andl $3, %ecx + jz 7f + cmpl $2, %ecx + jb 5f +SRC( movw (%esi), %cx ) + leal 2(%esi), %esi +DST( movw %cx, (%edi) ) + leal 2(%edi), %edi + je 6f + shll $16,%ecx +SRC(5: movb (%esi), %cl ) +DST( movb %cl, (%edi) ) +6: addl %ecx, %eax + adcl $0, %eax +7: +5000: + +# Exception handler: +.section .fixup, "ax" + +6001: + movl ARGBASE+20(%esp), %ebx # src_err_ptr + movl $-EFAULT, (%ebx) + + # zero the complete destination - computing the rest + # is too much work + movl ARGBASE+8(%esp), %edi # dst + movl ARGBASE+12(%esp), %ecx # len + xorl %eax,%eax + rep ; stosb + + jmp 5000b + +6002: + movl ARGBASE+24(%esp), %ebx # dst_err_ptr + movl $-EFAULT,(%ebx) + jmp 5000b + +.previous + + popl %ebx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE ebx + popl %esi + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE esi + popl %edi + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE edi + popl %ecx # equivalent to addl $4,%esp + CFI_ADJUST_CFA_OFFSET -4 + ret + CFI_ENDPROC +ENDPROC(csum_partial_copy_generic) + +#else + +/* Version for PentiumII/PPro */ + +#define ROUND1(x) \ + SRC(movl x(%esi), %ebx ) ; \ + addl %ebx, %eax ; \ + DST(movl %ebx, x(%edi) ) ; + +#define ROUND(x) \ + SRC(movl x(%esi), %ebx ) ; \ + adcl %ebx, %eax ; \ + DST(movl %ebx, x(%edi) ) ; + +#define ARGBASE 12 + +ENTRY(csum_partial_copy_generic) + CFI_STARTPROC + pushl %ebx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ebx, 0 + pushl %edi + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edi, 0 + pushl %esi + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET esi, 0 + movl ARGBASE+4(%esp),%esi #src + movl ARGBASE+8(%esp),%edi #dst + movl ARGBASE+12(%esp),%ecx #len + movl ARGBASE+16(%esp),%eax #sum +# movl %ecx, %edx + movl %ecx, %ebx + movl %esi, %edx + shrl $6, %ecx + andl $0x3c, %ebx + negl %ebx + subl %ebx, %esi + subl %ebx, %edi + lea -1(%esi),%edx + andl $-32,%edx + lea 3f(%ebx,%ebx), %ebx + testl %esi, %esi + jmp *%ebx +1: addl $64,%esi + addl $64,%edi + SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl) + ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52) + ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36) + ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20) + ROUND (-16) ROUND(-12) ROUND(-8) ROUND(-4) +3: adcl $0,%eax + addl $64, %edx + dec %ecx + jge 1b +4: movl ARGBASE+12(%esp),%edx #len + andl $3, %edx + jz 7f + cmpl $2, %edx + jb 5f +SRC( movw (%esi), %dx ) + leal 2(%esi), %esi +DST( movw %dx, (%edi) ) + leal 2(%edi), %edi + je 6f + shll $16,%edx +5: +SRC( movb (%esi), %dl ) +DST( movb %dl, (%edi) ) +6: addl %edx, %eax + adcl $0, %eax +7: +.section .fixup, "ax" +6001: movl ARGBASE+20(%esp), %ebx # src_err_ptr + movl $-EFAULT, (%ebx) + # zero the complete destination (computing the rest is too much work) + movl ARGBASE+8(%esp),%edi # dst + movl ARGBASE+12(%esp),%ecx # len + xorl %eax,%eax + rep; stosb + jmp 7b +6002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr + movl $-EFAULT, (%ebx) + jmp 7b +.previous + + popl %esi + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE esi + popl %edi + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE edi + popl %ebx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE ebx + ret + CFI_ENDPROC +ENDPROC(csum_partial_copy_generic) + +#undef ROUND +#undef ROUND1 + +#endif diff --git a/arch/x86/lib/delay_32.c b/arch/x86/lib/delay_32.c new file mode 100644 index 00000000000..f6edb11364d --- /dev/null +++ b/arch/x86/lib/delay_32.c @@ -0,0 +1,103 @@ +/* + * Precise Delay Loops for i386 + * + * Copyright (C) 1993 Linus Torvalds + * Copyright (C) 1997 Martin Mares + * + * The __delay function must _NOT_ be inlined as its execution time + * depends wildly on alignment on many x86 processors. The additional + * jump magic is needed to get the timing stable on all the CPU's + * we have to worry about. + */ + +#include +#include +#include + +#include +#include +#include + +#ifdef CONFIG_SMP +# include +#endif + +/* simple loop based delay: */ +static void delay_loop(unsigned long loops) +{ + int d0; + + __asm__ __volatile__( + "\tjmp 1f\n" + ".align 16\n" + "1:\tjmp 2f\n" + ".align 16\n" + "2:\tdecl %0\n\tjns 2b" + :"=&a" (d0) + :"0" (loops)); +} + +/* TSC based delay: */ +static void delay_tsc(unsigned long loops) +{ + unsigned long bclock, now; + + rdtscl(bclock); + do { + rep_nop(); + rdtscl(now); + } while ((now-bclock) < loops); +} + +/* + * Since we calibrate only once at boot, this + * function should be set once at boot and not changed + */ +static void (*delay_fn)(unsigned long) = delay_loop; + +void use_tsc_delay(void) +{ + delay_fn = delay_tsc; +} + +int read_current_timer(unsigned long *timer_val) +{ + if (delay_fn == delay_tsc) { + rdtscl(*timer_val); + return 0; + } + return -1; +} + +void __delay(unsigned long loops) +{ + delay_fn(loops); +} + +inline void __const_udelay(unsigned long xloops) +{ + int d0; + + xloops *= 4; + __asm__("mull %0" + :"=d" (xloops), "=&a" (d0) + :"1" (xloops), "0" + (cpu_data[raw_smp_processor_id()].loops_per_jiffy * (HZ/4))); + + __delay(++xloops); +} + +void __udelay(unsigned long usecs) +{ + __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */ +} + +void __ndelay(unsigned long nsecs) +{ + __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ +} + +EXPORT_SYMBOL(__delay); +EXPORT_SYMBOL(__const_udelay); +EXPORT_SYMBOL(__udelay); +EXPORT_SYMBOL(__ndelay); diff --git a/arch/x86/lib/getuser_32.S b/arch/x86/lib/getuser_32.S new file mode 100644 index 00000000000..6d84b53f12a --- /dev/null +++ b/arch/x86/lib/getuser_32.S @@ -0,0 +1,78 @@ +/* + * __get_user functions. + * + * (C) Copyright 1998 Linus Torvalds + * + * These functions have a non-standard call interface + * to make them more efficient, especially as they + * return an error value in addition to the "real" + * return value. + */ +#include +#include +#include + + +/* + * __get_user_X + * + * Inputs: %eax contains the address + * + * Outputs: %eax is error code (0 or -EFAULT) + * %edx contains zero-extended value + * + * These functions should not modify any other registers, + * as they get called from within inline assembly. + */ + +.text +ENTRY(__get_user_1) + CFI_STARTPROC + GET_THREAD_INFO(%edx) + cmpl TI_addr_limit(%edx),%eax + jae bad_get_user +1: movzbl (%eax),%edx + xorl %eax,%eax + ret + CFI_ENDPROC +ENDPROC(__get_user_1) + +ENTRY(__get_user_2) + CFI_STARTPROC + addl $1,%eax + jc bad_get_user + GET_THREAD_INFO(%edx) + cmpl TI_addr_limit(%edx),%eax + jae bad_get_user +2: movzwl -1(%eax),%edx + xorl %eax,%eax + ret + CFI_ENDPROC +ENDPROC(__get_user_2) + +ENTRY(__get_user_4) + CFI_STARTPROC + addl $3,%eax + jc bad_get_user + GET_THREAD_INFO(%edx) + cmpl TI_addr_limit(%edx),%eax + jae bad_get_user +3: movl -3(%eax),%edx + xorl %eax,%eax + ret + CFI_ENDPROC +ENDPROC(__get_user_4) + +bad_get_user: + CFI_STARTPROC + xorl %edx,%edx + movl $-14,%eax + ret + CFI_ENDPROC +END(bad_get_user) + +.section __ex_table,"a" + .long 1b,bad_get_user + .long 2b,bad_get_user + .long 3b,bad_get_user +.previous diff --git a/arch/x86/lib/memcpy_32.c b/arch/x86/lib/memcpy_32.c new file mode 100644 index 00000000000..8ac51b82a63 --- /dev/null +++ b/arch/x86/lib/memcpy_32.c @@ -0,0 +1,43 @@ +#include +#include + +#undef memcpy +#undef memset + +void *memcpy(void *to, const void *from, size_t n) +{ +#ifdef CONFIG_X86_USE_3DNOW + return __memcpy3d(to, from, n); +#else + return __memcpy(to, from, n); +#endif +} +EXPORT_SYMBOL(memcpy); + +void *memset(void *s, int c, size_t count) +{ + return __memset(s, c, count); +} +EXPORT_SYMBOL(memset); + +void *memmove(void *dest, const void *src, size_t n) +{ + int d0, d1, d2; + + if (dest < src) { + memcpy(dest,src,n); + } else { + __asm__ __volatile__( + "std\n\t" + "rep\n\t" + "movsb\n\t" + "cld" + : "=&c" (d0), "=&S" (d1), "=&D" (d2) + :"0" (n), + "1" (n-1+(const char *)src), + "2" (n-1+(char *)dest) + :"memory"); + } + return dest; +} +EXPORT_SYMBOL(memmove); diff --git a/arch/x86/lib/mmx_32.c b/arch/x86/lib/mmx_32.c new file mode 100644 index 00000000000..28084d2e8dd --- /dev/null +++ b/arch/x86/lib/mmx_32.c @@ -0,0 +1,403 @@ +#include +#include +#include +#include +#include + +#include + + +/* + * MMX 3DNow! library helper functions + * + * To do: + * We can use MMX just for prefetch in IRQ's. This may be a win. + * (reported so on K6-III) + * We should use a better code neutral filler for the short jump + * leal ebx. [ebx] is apparently best for K6-2, but Cyrix ?? + * We also want to clobber the filler register so we don't get any + * register forwarding stalls on the filler. + * + * Add *user handling. Checksums are not a win with MMX on any CPU + * tested so far for any MMX solution figured. + * + * 22/09/2000 - Arjan van de Ven + * Improved for non-egineering-sample Athlons + * + */ + +void *_mmx_memcpy(void *to, const void *from, size_t len) +{ + void *p; + int i; + + if (unlikely(in_interrupt())) + return __memcpy(to, from, len); + + p = to; + i = len >> 6; /* len/64 */ + + kernel_fpu_begin(); + + __asm__ __volatile__ ( + "1: prefetch (%0)\n" /* This set is 28 bytes */ + " prefetch 64(%0)\n" + " prefetch 128(%0)\n" + " prefetch 192(%0)\n" + " prefetch 256(%0)\n" + "2: \n" + ".section .fixup, \"ax\"\n" + "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 1b, 3b\n" + ".previous" + : : "r" (from) ); + + + for(; i>5; i--) + { + __asm__ __volatile__ ( + "1: prefetch 320(%0)\n" + "2: movq (%0), %%mm0\n" + " movq 8(%0), %%mm1\n" + " movq 16(%0), %%mm2\n" + " movq 24(%0), %%mm3\n" + " movq %%mm0, (%1)\n" + " movq %%mm1, 8(%1)\n" + " movq %%mm2, 16(%1)\n" + " movq %%mm3, 24(%1)\n" + " movq 32(%0), %%mm0\n" + " movq 40(%0), %%mm1\n" + " movq 48(%0), %%mm2\n" + " movq 56(%0), %%mm3\n" + " movq %%mm0, 32(%1)\n" + " movq %%mm1, 40(%1)\n" + " movq %%mm2, 48(%1)\n" + " movq %%mm3, 56(%1)\n" + ".section .fixup, \"ax\"\n" + "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 1b, 3b\n" + ".previous" + : : "r" (from), "r" (to) : "memory"); + from+=64; + to+=64; + } + + for(; i>0; i--) + { + __asm__ __volatile__ ( + " movq (%0), %%mm0\n" + " movq 8(%0), %%mm1\n" + " movq 16(%0), %%mm2\n" + " movq 24(%0), %%mm3\n" + " movq %%mm0, (%1)\n" + " movq %%mm1, 8(%1)\n" + " movq %%mm2, 16(%1)\n" + " movq %%mm3, 24(%1)\n" + " movq 32(%0), %%mm0\n" + " movq 40(%0), %%mm1\n" + " movq 48(%0), %%mm2\n" + " movq 56(%0), %%mm3\n" + " movq %%mm0, 32(%1)\n" + " movq %%mm1, 40(%1)\n" + " movq %%mm2, 48(%1)\n" + " movq %%mm3, 56(%1)\n" + : : "r" (from), "r" (to) : "memory"); + from+=64; + to+=64; + } + /* + * Now do the tail of the block + */ + __memcpy(to, from, len&63); + kernel_fpu_end(); + return p; +} + +#ifdef CONFIG_MK7 + +/* + * The K7 has streaming cache bypass load/store. The Cyrix III, K6 and + * other MMX using processors do not. + */ + +static void fast_clear_page(void *page) +{ + int i; + + kernel_fpu_begin(); + + __asm__ __volatile__ ( + " pxor %%mm0, %%mm0\n" : : + ); + + for(i=0;i<4096/64;i++) + { + __asm__ __volatile__ ( + " movntq %%mm0, (%0)\n" + " movntq %%mm0, 8(%0)\n" + " movntq %%mm0, 16(%0)\n" + " movntq %%mm0, 24(%0)\n" + " movntq %%mm0, 32(%0)\n" + " movntq %%mm0, 40(%0)\n" + " movntq %%mm0, 48(%0)\n" + " movntq %%mm0, 56(%0)\n" + : : "r" (page) : "memory"); + page+=64; + } + /* since movntq is weakly-ordered, a "sfence" is needed to become + * ordered again. + */ + __asm__ __volatile__ ( + " sfence \n" : : + ); + kernel_fpu_end(); +} + +static void fast_copy_page(void *to, void *from) +{ + int i; + + kernel_fpu_begin(); + + /* maybe the prefetch stuff can go before the expensive fnsave... + * but that is for later. -AV + */ + __asm__ __volatile__ ( + "1: prefetch (%0)\n" + " prefetch 64(%0)\n" + " prefetch 128(%0)\n" + " prefetch 192(%0)\n" + " prefetch 256(%0)\n" + "2: \n" + ".section .fixup, \"ax\"\n" + "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 1b, 3b\n" + ".previous" + : : "r" (from) ); + + for(i=0; i<(4096-320)/64; i++) + { + __asm__ __volatile__ ( + "1: prefetch 320(%0)\n" + "2: movq (%0), %%mm0\n" + " movntq %%mm0, (%1)\n" + " movq 8(%0), %%mm1\n" + " movntq %%mm1, 8(%1)\n" + " movq 16(%0), %%mm2\n" + " movntq %%mm2, 16(%1)\n" + " movq 24(%0), %%mm3\n" + " movntq %%mm3, 24(%1)\n" + " movq 32(%0), %%mm4\n" + " movntq %%mm4, 32(%1)\n" + " movq 40(%0), %%mm5\n" + " movntq %%mm5, 40(%1)\n" + " movq 48(%0), %%mm6\n" + " movntq %%mm6, 48(%1)\n" + " movq 56(%0), %%mm7\n" + " movntq %%mm7, 56(%1)\n" + ".section .fixup, \"ax\"\n" + "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 1b, 3b\n" + ".previous" + : : "r" (from), "r" (to) : "memory"); + from+=64; + to+=64; + } + for(i=(4096-320)/64; i<4096/64; i++) + { + __asm__ __volatile__ ( + "2: movq (%0), %%mm0\n" + " movntq %%mm0, (%1)\n" + " movq 8(%0), %%mm1\n" + " movntq %%mm1, 8(%1)\n" + " movq 16(%0), %%mm2\n" + " movntq %%mm2, 16(%1)\n" + " movq 24(%0), %%mm3\n" + " movntq %%mm3, 24(%1)\n" + " movq 32(%0), %%mm4\n" + " movntq %%mm4, 32(%1)\n" + " movq 40(%0), %%mm5\n" + " movntq %%mm5, 40(%1)\n" + " movq 48(%0), %%mm6\n" + " movntq %%mm6, 48(%1)\n" + " movq 56(%0), %%mm7\n" + " movntq %%mm7, 56(%1)\n" + : : "r" (from), "r" (to) : "memory"); + from+=64; + to+=64; + } + /* since movntq is weakly-ordered, a "sfence" is needed to become + * ordered again. + */ + __asm__ __volatile__ ( + " sfence \n" : : + ); + kernel_fpu_end(); +} + +#else + +/* + * Generic MMX implementation without K7 specific streaming + */ + +static void fast_clear_page(void *page) +{ + int i; + + kernel_fpu_begin(); + + __asm__ __volatile__ ( + " pxor %%mm0, %%mm0\n" : : + ); + + for(i=0;i<4096/128;i++) + { + __asm__ __volatile__ ( + " movq %%mm0, (%0)\n" + " movq %%mm0, 8(%0)\n" + " movq %%mm0, 16(%0)\n" + " movq %%mm0, 24(%0)\n" + " movq %%mm0, 32(%0)\n" + " movq %%mm0, 40(%0)\n" + " movq %%mm0, 48(%0)\n" + " movq %%mm0, 56(%0)\n" + " movq %%mm0, 64(%0)\n" + " movq %%mm0, 72(%0)\n" + " movq %%mm0, 80(%0)\n" + " movq %%mm0, 88(%0)\n" + " movq %%mm0, 96(%0)\n" + " movq %%mm0, 104(%0)\n" + " movq %%mm0, 112(%0)\n" + " movq %%mm0, 120(%0)\n" + : : "r" (page) : "memory"); + page+=128; + } + + kernel_fpu_end(); +} + +static void fast_copy_page(void *to, void *from) +{ + int i; + + + kernel_fpu_begin(); + + __asm__ __volatile__ ( + "1: prefetch (%0)\n" + " prefetch 64(%0)\n" + " prefetch 128(%0)\n" + " prefetch 192(%0)\n" + " prefetch 256(%0)\n" + "2: \n" + ".section .fixup, \"ax\"\n" + "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 1b, 3b\n" + ".previous" + : : "r" (from) ); + + for(i=0; i<4096/64; i++) + { + __asm__ __volatile__ ( + "1: prefetch 320(%0)\n" + "2: movq (%0), %%mm0\n" + " movq 8(%0), %%mm1\n" + " movq 16(%0), %%mm2\n" + " movq 24(%0), %%mm3\n" + " movq %%mm0, (%1)\n" + " movq %%mm1, 8(%1)\n" + " movq %%mm2, 16(%1)\n" + " movq %%mm3, 24(%1)\n" + " movq 32(%0), %%mm0\n" + " movq 40(%0), %%mm1\n" + " movq 48(%0), %%mm2\n" + " movq 56(%0), %%mm3\n" + " movq %%mm0, 32(%1)\n" + " movq %%mm1, 40(%1)\n" + " movq %%mm2, 48(%1)\n" + " movq %%mm3, 56(%1)\n" + ".section .fixup, \"ax\"\n" + "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 1b, 3b\n" + ".previous" + : : "r" (from), "r" (to) : "memory"); + from+=64; + to+=64; + } + kernel_fpu_end(); +} + + +#endif + +/* + * Favour MMX for page clear and copy. + */ + +static void slow_zero_page(void * page) +{ + int d0, d1; + __asm__ __volatile__( \ + "cld\n\t" \ + "rep ; stosl" \ + : "=&c" (d0), "=&D" (d1) + :"a" (0),"1" (page),"0" (1024) + :"memory"); +} + +void mmx_clear_page(void * page) +{ + if(unlikely(in_interrupt())) + slow_zero_page(page); + else + fast_clear_page(page); +} + +static void slow_copy_page(void *to, void *from) +{ + int d0, d1, d2; + __asm__ __volatile__( \ + "cld\n\t" \ + "rep ; movsl" \ + : "=&c" (d0), "=&D" (d1), "=&S" (d2) \ + : "0" (1024),"1" ((long) to),"2" ((long) from) \ + : "memory"); +} + + +void mmx_copy_page(void *to, void *from) +{ + if(unlikely(in_interrupt())) + slow_copy_page(to, from); + else + fast_copy_page(to, from); +} + +EXPORT_SYMBOL(_mmx_memcpy); +EXPORT_SYMBOL(mmx_clear_page); +EXPORT_SYMBOL(mmx_copy_page); diff --git a/arch/x86/lib/msr-on-cpu.c b/arch/x86/lib/msr-on-cpu.c new file mode 100644 index 00000000000..7767962f25d --- /dev/null +++ b/arch/x86/lib/msr-on-cpu.c @@ -0,0 +1,119 @@ +#include +#include +#include +#include + +struct msr_info { + u32 msr_no; + u32 l, h; + int err; +}; + +static void __rdmsr_on_cpu(void *info) +{ + struct msr_info *rv = info; + + rdmsr(rv->msr_no, rv->l, rv->h); +} + +static void __rdmsr_safe_on_cpu(void *info) +{ + struct msr_info *rv = info; + + rv->err = rdmsr_safe(rv->msr_no, &rv->l, &rv->h); +} + +static int _rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h, int safe) +{ + int err = 0; + preempt_disable(); + if (smp_processor_id() == cpu) + if (safe) + err = rdmsr_safe(msr_no, l, h); + else + rdmsr(msr_no, *l, *h); + else { + struct msr_info rv; + + rv.msr_no = msr_no; + if (safe) { + smp_call_function_single(cpu, __rdmsr_safe_on_cpu, + &rv, 0, 1); + err = rv.err; + } else { + smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 0, 1); + } + *l = rv.l; + *h = rv.h; + } + preempt_enable(); + return err; +} + +static void __wrmsr_on_cpu(void *info) +{ + struct msr_info *rv = info; + + wrmsr(rv->msr_no, rv->l, rv->h); +} + +static void __wrmsr_safe_on_cpu(void *info) +{ + struct msr_info *rv = info; + + rv->err = wrmsr_safe(rv->msr_no, rv->l, rv->h); +} + +static int _wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h, int safe) +{ + int err = 0; + preempt_disable(); + if (smp_processor_id() == cpu) + if (safe) + err = wrmsr_safe(msr_no, l, h); + else + wrmsr(msr_no, l, h); + else { + struct msr_info rv; + + rv.msr_no = msr_no; + rv.l = l; + rv.h = h; + if (safe) { + smp_call_function_single(cpu, __wrmsr_safe_on_cpu, + &rv, 0, 1); + err = rv.err; + } else { + smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 0, 1); + } + } + preempt_enable(); + return err; +} + +void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) +{ + _wrmsr_on_cpu(cpu, msr_no, l, h, 0); +} + +void rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) +{ + _rdmsr_on_cpu(cpu, msr_no, l, h, 0); +} + +/* These "safe" variants are slower and should be used when the target MSR + may not actually exist. */ +int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) +{ + return _wrmsr_on_cpu(cpu, msr_no, l, h, 1); +} + +int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) +{ + return _rdmsr_on_cpu(cpu, msr_no, l, h, 1); +} + +EXPORT_SYMBOL(rdmsr_on_cpu); +EXPORT_SYMBOL(wrmsr_on_cpu); +EXPORT_SYMBOL(rdmsr_safe_on_cpu); +EXPORT_SYMBOL(wrmsr_safe_on_cpu); diff --git a/arch/x86/lib/putuser_32.S b/arch/x86/lib/putuser_32.S new file mode 100644 index 00000000000..f58fba109d1 --- /dev/null +++ b/arch/x86/lib/putuser_32.S @@ -0,0 +1,98 @@ +/* + * __put_user functions. + * + * (C) Copyright 2005 Linus Torvalds + * + * These functions have a non-standard call interface + * to make them more efficient, especially as they + * return an error value in addition to the "real" + * return value. + */ +#include +#include +#include + + +/* + * __put_user_X + * + * Inputs: %eax[:%edx] contains the data + * %ecx contains the address + * + * Outputs: %eax is error code (0 or -EFAULT) + * + * These functions should not modify any other registers, + * as they get called from within inline assembly. + */ + +#define ENTER CFI_STARTPROC ; \ + pushl %ebx ; \ + CFI_ADJUST_CFA_OFFSET 4 ; \ + CFI_REL_OFFSET ebx, 0 ; \ + GET_THREAD_INFO(%ebx) +#define EXIT popl %ebx ; \ + CFI_ADJUST_CFA_OFFSET -4 ; \ + CFI_RESTORE ebx ; \ + ret ; \ + CFI_ENDPROC + +.text +ENTRY(__put_user_1) + ENTER + cmpl TI_addr_limit(%ebx),%ecx + jae bad_put_user +1: movb %al,(%ecx) + xorl %eax,%eax + EXIT +ENDPROC(__put_user_1) + +ENTRY(__put_user_2) + ENTER + movl TI_addr_limit(%ebx),%ebx + subl $1,%ebx + cmpl %ebx,%ecx + jae bad_put_user +2: movw %ax,(%ecx) + xorl %eax,%eax + EXIT +ENDPROC(__put_user_2) + +ENTRY(__put_user_4) + ENTER + movl TI_addr_limit(%ebx),%ebx + subl $3,%ebx + cmpl %ebx,%ecx + jae bad_put_user +3: movl %eax,(%ecx) + xorl %eax,%eax + EXIT +ENDPROC(__put_user_4) + +ENTRY(__put_user_8) + ENTER + movl TI_addr_limit(%ebx),%ebx + subl $7,%ebx + cmpl %ebx,%ecx + jae bad_put_user +4: movl %eax,(%ecx) +5: movl %edx,4(%ecx) + xorl %eax,%eax + EXIT +ENDPROC(__put_user_8) + +bad_put_user: + CFI_STARTPROC simple + CFI_DEF_CFA esp, 2*4 + CFI_OFFSET eip, -1*4 + CFI_OFFSET ebx, -2*4 + movl $-14,%eax + EXIT +END(bad_put_user) + +.section __ex_table,"a" + .long 1b,bad_put_user + .long 2b,bad_put_user + .long 3b,bad_put_user + .long 4b,bad_put_user + .long 5b,bad_put_user +.previous diff --git a/arch/x86/lib/semaphore_32.S b/arch/x86/lib/semaphore_32.S new file mode 100644 index 00000000000..c01eb39c0b4 --- /dev/null +++ b/arch/x86/lib/semaphore_32.S @@ -0,0 +1,219 @@ +/* + * i386 semaphore implementation. + * + * (C) Copyright 1999 Linus Torvalds + * + * Portions Copyright 1999 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * rw semaphores implemented November 1999 by Benjamin LaHaise + */ + +#include +#include +#include +#include +#include + +/* + * The semaphore operations have a special calling sequence that + * allow us to do a simpler in-line version of them. These routines + * need to convert that sequence back into the C sequence when + * there is contention on the semaphore. + * + * %eax contains the semaphore pointer on entry. Save the C-clobbered + * registers (%eax, %edx and %ecx) except %eax whish is either a return + * value or just clobbered.. + */ + .section .sched.text +ENTRY(__down_failed) + CFI_STARTPROC + FRAME + pushl %edx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edx,0 + pushl %ecx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ecx,0 + call __down + popl %ecx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE ecx + popl %edx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE edx + ENDFRAME + ret + CFI_ENDPROC + END(__down_failed) + +ENTRY(__down_failed_interruptible) + CFI_STARTPROC + FRAME + pushl %edx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edx,0 + pushl %ecx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ecx,0 + call __down_interruptible + popl %ecx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE ecx + popl %edx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE edx + ENDFRAME + ret + CFI_ENDPROC + END(__down_failed_interruptible) + +ENTRY(__down_failed_trylock) + CFI_STARTPROC + FRAME + pushl %edx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edx,0 + pushl %ecx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ecx,0 + call __down_trylock + popl %ecx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE ecx + popl %edx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE edx + ENDFRAME + ret + CFI_ENDPROC + END(__down_failed_trylock) + +ENTRY(__up_wakeup) + CFI_STARTPROC + FRAME + pushl %edx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edx,0 + pushl %ecx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ecx,0 + call __up + popl %ecx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE ecx + popl %edx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE edx + ENDFRAME + ret + CFI_ENDPROC + END(__up_wakeup) + +/* + * rw spinlock fallbacks + */ +#ifdef CONFIG_SMP +ENTRY(__write_lock_failed) + CFI_STARTPROC simple + FRAME +2: LOCK_PREFIX + addl $ RW_LOCK_BIAS,(%eax) +1: rep; nop + cmpl $ RW_LOCK_BIAS,(%eax) + jne 1b + LOCK_PREFIX + subl $ RW_LOCK_BIAS,(%eax) + jnz 2b + ENDFRAME + ret + CFI_ENDPROC + END(__write_lock_failed) + +ENTRY(__read_lock_failed) + CFI_STARTPROC + FRAME +2: LOCK_PREFIX + incl (%eax) +1: rep; nop + cmpl $1,(%eax) + js 1b + LOCK_PREFIX + decl (%eax) + js 2b + ENDFRAME + ret + CFI_ENDPROC + END(__read_lock_failed) + +#endif + +#ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM + +/* Fix up special calling conventions */ +ENTRY(call_rwsem_down_read_failed) + CFI_STARTPROC + push %ecx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ecx,0 + push %edx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edx,0 + call rwsem_down_read_failed + pop %edx + CFI_ADJUST_CFA_OFFSET -4 + pop %ecx + CFI_ADJUST_CFA_OFFSET -4 + ret + CFI_ENDPROC + END(call_rwsem_down_read_failed) + +ENTRY(call_rwsem_down_write_failed) + CFI_STARTPROC + push %ecx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ecx,0 + calll rwsem_down_write_failed + pop %ecx + CFI_ADJUST_CFA_OFFSET -4 + ret + CFI_ENDPROC + END(call_rwsem_down_write_failed) + +ENTRY(call_rwsem_wake) + CFI_STARTPROC + decw %dx /* do nothing if still outstanding active readers */ + jnz 1f + push %ecx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ecx,0 + call rwsem_wake + pop %ecx + CFI_ADJUST_CFA_OFFSET -4 +1: ret + CFI_ENDPROC + END(call_rwsem_wake) + +/* Fix up special calling conventions */ +ENTRY(call_rwsem_downgrade_wake) + CFI_STARTPROC + push %ecx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ecx,0 + push %edx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edx,0 + call rwsem_downgrade_wake + pop %edx + CFI_ADJUST_CFA_OFFSET -4 + pop %ecx + CFI_ADJUST_CFA_OFFSET -4 + ret + CFI_ENDPROC + END(call_rwsem_downgrade_wake) + +#endif diff --git a/arch/x86/lib/string_32.c b/arch/x86/lib/string_32.c new file mode 100644 index 00000000000..2c773fefa3d --- /dev/null +++ b/arch/x86/lib/string_32.c @@ -0,0 +1,257 @@ +/* + * Most of the string-functions are rather heavily hand-optimized, + * see especially strsep,strstr,str[c]spn. They should work, but are not + * very easy to understand. Everything is done entirely within the register + * set, making the functions fast and clean. String instructions have been + * used through-out, making for "slightly" unclear code :-) + * + * AK: On P4 and K7 using non string instruction implementations might be faster + * for large memory blocks. But most of them are unlikely to be used on large + * strings. + */ + +#include +#include + +#ifdef __HAVE_ARCH_STRCPY +char *strcpy(char * dest,const char *src) +{ + int d0, d1, d2; + asm volatile( "1:\tlodsb\n\t" + "stosb\n\t" + "testb %%al,%%al\n\t" + "jne 1b" + : "=&S" (d0), "=&D" (d1), "=&a" (d2) + :"0" (src),"1" (dest) : "memory"); + return dest; +} +EXPORT_SYMBOL(strcpy); +#endif + +#ifdef __HAVE_ARCH_STRNCPY +char *strncpy(char * dest,const char *src,size_t count) +{ + int d0, d1, d2, d3; + asm volatile( "1:\tdecl %2\n\t" + "js 2f\n\t" + "lodsb\n\t" + "stosb\n\t" + "testb %%al,%%al\n\t" + "jne 1b\n\t" + "rep\n\t" + "stosb\n" + "2:" + : "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3) + :"0" (src),"1" (dest),"2" (count) : "memory"); + return dest; +} +EXPORT_SYMBOL(strncpy); +#endif + +#ifdef __HAVE_ARCH_STRCAT +char *strcat(char * dest,const char * src) +{ + int d0, d1, d2, d3; + asm volatile( "repne\n\t" + "scasb\n\t" + "decl %1\n" + "1:\tlodsb\n\t" + "stosb\n\t" + "testb %%al,%%al\n\t" + "jne 1b" + : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3) + : "0" (src), "1" (dest), "2" (0), "3" (0xffffffffu): "memory"); + return dest; +} +EXPORT_SYMBOL(strcat); +#endif + +#ifdef __HAVE_ARCH_STRNCAT +char *strncat(char * dest,const char * src,size_t count) +{ + int d0, d1, d2, d3; + asm volatile( "repne\n\t" + "scasb\n\t" + "decl %1\n\t" + "movl %8,%3\n" + "1:\tdecl %3\n\t" + "js 2f\n\t" + "lodsb\n\t" + "stosb\n\t" + "testb %%al,%%al\n\t" + "jne 1b\n" + "2:\txorl %2,%2\n\t" + "stosb" + : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3) + : "0" (src),"1" (dest),"2" (0),"3" (0xffffffffu), "g" (count) + : "memory"); + return dest; +} +EXPORT_SYMBOL(strncat); +#endif + +#ifdef __HAVE_ARCH_STRCMP +int strcmp(const char * cs,const char * ct) +{ + int d0, d1; + int res; + asm volatile( "1:\tlodsb\n\t" + "scasb\n\t" + "jne 2f\n\t" + "testb %%al,%%al\n\t" + "jne 1b\n\t" + "xorl %%eax,%%eax\n\t" + "jmp 3f\n" + "2:\tsbbl %%eax,%%eax\n\t" + "orb $1,%%al\n" + "3:" + :"=a" (res), "=&S" (d0), "=&D" (d1) + :"1" (cs),"2" (ct) + :"memory"); + return res; +} +EXPORT_SYMBOL(strcmp); +#endif + +#ifdef __HAVE_ARCH_STRNCMP +int strncmp(const char * cs,const char * ct,size_t count) +{ + int res; + int d0, d1, d2; + asm volatile( "1:\tdecl %3\n\t" + "js 2f\n\t" + "lodsb\n\t" + "scasb\n\t" + "jne 3f\n\t" + "testb %%al,%%al\n\t" + "jne 1b\n" + "2:\txorl %%eax,%%eax\n\t" + "jmp 4f\n" + "3:\tsbbl %%eax,%%eax\n\t" + "orb $1,%%al\n" + "4:" + :"=a" (res), "=&S" (d0), "=&D" (d1), "=&c" (d2) + :"1" (cs),"2" (ct),"3" (count) + :"memory"); + return res; +} +EXPORT_SYMBOL(strncmp); +#endif + +#ifdef __HAVE_ARCH_STRCHR +char *strchr(const char * s, int c) +{ + int d0; + char * res; + asm volatile( "movb %%al,%%ah\n" + "1:\tlodsb\n\t" + "cmpb %%ah,%%al\n\t" + "je 2f\n\t" + "testb %%al,%%al\n\t" + "jne 1b\n\t" + "movl $1,%1\n" + "2:\tmovl %1,%0\n\t" + "decl %0" + :"=a" (res), "=&S" (d0) + :"1" (s),"0" (c) + :"memory"); + return res; +} +EXPORT_SYMBOL(strchr); +#endif + +#ifdef __HAVE_ARCH_STRRCHR +char *strrchr(const char * s, int c) +{ + int d0, d1; + char * res; + asm volatile( "movb %%al,%%ah\n" + "1:\tlodsb\n\t" + "cmpb %%ah,%%al\n\t" + "jne 2f\n\t" + "leal -1(%%esi),%0\n" + "2:\ttestb %%al,%%al\n\t" + "jne 1b" + :"=g" (res), "=&S" (d0), "=&a" (d1) + :"0" (0),"1" (s),"2" (c) + :"memory"); + return res; +} +EXPORT_SYMBOL(strrchr); +#endif + +#ifdef __HAVE_ARCH_STRLEN +size_t strlen(const char * s) +{ + int d0; + int res; + asm volatile( "repne\n\t" + "scasb\n\t" + "notl %0\n\t" + "decl %0" + :"=c" (res), "=&D" (d0) + :"1" (s),"a" (0), "0" (0xffffffffu) + :"memory"); + return res; +} +EXPORT_SYMBOL(strlen); +#endif + +#ifdef __HAVE_ARCH_MEMCHR +void *memchr(const void *cs,int c,size_t count) +{ + int d0; + void *res; + if (!count) + return NULL; + asm volatile( "repne\n\t" + "scasb\n\t" + "je 1f\n\t" + "movl $1,%0\n" + "1:\tdecl %0" + :"=D" (res), "=&c" (d0) + :"a" (c),"0" (cs),"1" (count) + :"memory"); + return res; +} +EXPORT_SYMBOL(memchr); +#endif + +#ifdef __HAVE_ARCH_MEMSCAN +void *memscan(void * addr, int c, size_t size) +{ + if (!size) + return addr; + asm volatile("repnz; scasb\n\t" + "jnz 1f\n\t" + "dec %%edi\n" + "1:" + : "=D" (addr), "=c" (size) + : "0" (addr), "1" (size), "a" (c) + : "memory"); + return addr; +} +EXPORT_SYMBOL(memscan); +#endif + +#ifdef __HAVE_ARCH_STRNLEN +size_t strnlen(const char *s, size_t count) +{ + int d0; + int res; + asm volatile( "movl %2,%0\n\t" + "jmp 2f\n" + "1:\tcmpb $0,(%0)\n\t" + "je 3f\n\t" + "incl %0\n" + "2:\tdecl %1\n\t" + "cmpl $-1,%1\n\t" + "jne 1b\n" + "3:\tsubl %2,%0" + :"=a" (res), "=&d" (d0) + :"c" (s),"1" (count) + :"memory"); + return res; +} +EXPORT_SYMBOL(strnlen); +#endif diff --git a/arch/x86/lib/strstr_32.c b/arch/x86/lib/strstr_32.c new file mode 100644 index 00000000000..a3dafbf59da --- /dev/null +++ b/arch/x86/lib/strstr_32.c @@ -0,0 +1,31 @@ +#include + +char * strstr(const char * cs,const char * ct) +{ +int d0, d1; +register char * __res; +__asm__ __volatile__( + "movl %6,%%edi\n\t" + "repne\n\t" + "scasb\n\t" + "notl %%ecx\n\t" + "decl %%ecx\n\t" /* NOTE! This also sets Z if searchstring='' */ + "movl %%ecx,%%edx\n" + "1:\tmovl %6,%%edi\n\t" + "movl %%esi,%%eax\n\t" + "movl %%edx,%%ecx\n\t" + "repe\n\t" + "cmpsb\n\t" + "je 2f\n\t" /* also works for empty string, see above */ + "xchgl %%eax,%%esi\n\t" + "incl %%esi\n\t" + "cmpb $0,-1(%%eax)\n\t" + "jne 1b\n\t" + "xorl %%eax,%%eax\n\t" + "2:" + :"=a" (__res), "=&c" (d0), "=&S" (d1) + :"0" (0), "1" (0xffffffff), "2" (cs), "g" (ct) + :"dx", "di"); +return __res; +} + diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c new file mode 100644 index 00000000000..9f38b12b4af --- /dev/null +++ b/arch/x86/lib/usercopy_32.c @@ -0,0 +1,882 @@ +/* + * User address space access functions. + * The non inlined parts of asm-i386/uaccess.h are here. + * + * Copyright 1997 Andi Kleen + * Copyright 1997 Linus Torvalds + */ +#include +#include +#include +#include +#include +#include +#include +#include + +static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned long n) +{ +#ifdef CONFIG_X86_INTEL_USERCOPY + if (n >= 64 && ((a1 ^ a2) & movsl_mask.mask)) + return 0; +#endif + return 1; +} +#define movsl_is_ok(a1,a2,n) \ + __movsl_is_ok((unsigned long)(a1),(unsigned long)(a2),(n)) + +/* + * Copy a null terminated string from userspace. + */ + +#define __do_strncpy_from_user(dst,src,count,res) \ +do { \ + int __d0, __d1, __d2; \ + might_sleep(); \ + __asm__ __volatile__( \ + " testl %1,%1\n" \ + " jz 2f\n" \ + "0: lodsb\n" \ + " stosb\n" \ + " testb %%al,%%al\n" \ + " jz 1f\n" \ + " decl %1\n" \ + " jnz 0b\n" \ + "1: subl %1,%0\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: movl %5,%0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 0b,3b\n" \ + ".previous" \ + : "=d"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \ + "=&D" (__d2) \ + : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \ + : "memory"); \ +} while (0) + +/** + * __strncpy_from_user: - Copy a NUL terminated string from userspace, with less checking. + * @dst: Destination address, in kernel space. This buffer must be at + * least @count bytes long. + * @src: Source address, in user space. + * @count: Maximum number of bytes to copy, including the trailing NUL. + * + * Copies a NUL-terminated string from userspace to kernel space. + * Caller must check the specified block with access_ok() before calling + * this function. + * + * On success, returns the length of the string (not including the trailing + * NUL). + * + * If access to userspace fails, returns -EFAULT (some data may have been + * copied). + * + * If @count is smaller than the length of the string, copies @count bytes + * and returns @count. + */ +long +__strncpy_from_user(char *dst, const char __user *src, long count) +{ + long res; + __do_strncpy_from_user(dst, src, count, res); + return res; +} +EXPORT_SYMBOL(__strncpy_from_user); + +/** + * strncpy_from_user: - Copy a NUL terminated string from userspace. + * @dst: Destination address, in kernel space. This buffer must be at + * least @count bytes long. + * @src: Source address, in user space. + * @count: Maximum number of bytes to copy, including the trailing NUL. + * + * Copies a NUL-terminated string from userspace to kernel space. + * + * On success, returns the length of the string (not including the trailing + * NUL). + * + * If access to userspace fails, returns -EFAULT (some data may have been + * copied). + * + * If @count is smaller than the length of the string, copies @count bytes + * and returns @count. + */ +long +strncpy_from_user(char *dst, const char __user *src, long count) +{ + long res = -EFAULT; + if (access_ok(VERIFY_READ, src, 1)) + __do_strncpy_from_user(dst, src, count, res); + return res; +} +EXPORT_SYMBOL(strncpy_from_user); + +/* + * Zero Userspace + */ + +#define __do_clear_user(addr,size) \ +do { \ + int __d0; \ + might_sleep(); \ + __asm__ __volatile__( \ + "0: rep; stosl\n" \ + " movl %2,%0\n" \ + "1: rep; stosb\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: lea 0(%2,%0,4),%0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 0b,3b\n" \ + " .long 1b,2b\n" \ + ".previous" \ + : "=&c"(size), "=&D" (__d0) \ + : "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0)); \ +} while (0) + +/** + * clear_user: - Zero a block of memory in user space. + * @to: Destination address, in user space. + * @n: Number of bytes to zero. + * + * Zero a block of memory in user space. + * + * Returns number of bytes that could not be cleared. + * On success, this will be zero. + */ +unsigned long +clear_user(void __user *to, unsigned long n) +{ + might_sleep(); + if (access_ok(VERIFY_WRITE, to, n)) + __do_clear_user(to, n); + return n; +} +EXPORT_SYMBOL(clear_user); + +/** + * __clear_user: - Zero a block of memory in user space, with less checking. + * @to: Destination address, in user space. + * @n: Number of bytes to zero. + * + * Zero a block of memory in user space. Caller must check + * the specified block with access_ok() before calling this function. + * + * Returns number of bytes that could not be cleared. + * On success, this will be zero. + */ +unsigned long +__clear_user(void __user *to, unsigned long n) +{ + __do_clear_user(to, n); + return n; +} +EXPORT_SYMBOL(__clear_user); + +/** + * strnlen_user: - Get the size of a string in user space. + * @s: The string to measure. + * @n: The maximum valid length + * + * Get the size of a NUL-terminated string in user space. + * + * Returns the size of the string INCLUDING the terminating NUL. + * On exception, returns 0. + * If the string is too long, returns a value greater than @n. + */ +long strnlen_user(const char __user *s, long n) +{ + unsigned long mask = -__addr_ok(s); + unsigned long res, tmp; + + might_sleep(); + + __asm__ __volatile__( + " testl %0, %0\n" + " jz 3f\n" + " andl %0,%%ecx\n" + "0: repne; scasb\n" + " setne %%al\n" + " subl %%ecx,%0\n" + " addl %0,%%eax\n" + "1:\n" + ".section .fixup,\"ax\"\n" + "2: xorl %%eax,%%eax\n" + " jmp 1b\n" + "3: movb $1,%%al\n" + " jmp 1b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 0b,2b\n" + ".previous" + :"=r" (n), "=D" (s), "=a" (res), "=c" (tmp) + :"0" (n), "1" (s), "2" (0), "3" (mask) + :"cc"); + return res & mask; +} +EXPORT_SYMBOL(strnlen_user); + +#ifdef CONFIG_X86_INTEL_USERCOPY +static unsigned long +__copy_user_intel(void __user *to, const void *from, unsigned long size) +{ + int d0, d1; + __asm__ __volatile__( + " .align 2,0x90\n" + "1: movl 32(%4), %%eax\n" + " cmpl $67, %0\n" + " jbe 3f\n" + "2: movl 64(%4), %%eax\n" + " .align 2,0x90\n" + "3: movl 0(%4), %%eax\n" + "4: movl 4(%4), %%edx\n" + "5: movl %%eax, 0(%3)\n" + "6: movl %%edx, 4(%3)\n" + "7: movl 8(%4), %%eax\n" + "8: movl 12(%4),%%edx\n" + "9: movl %%eax, 8(%3)\n" + "10: movl %%edx, 12(%3)\n" + "11: movl 16(%4), %%eax\n" + "12: movl 20(%4), %%edx\n" + "13: movl %%eax, 16(%3)\n" + "14: movl %%edx, 20(%3)\n" + "15: movl 24(%4), %%eax\n" + "16: movl 28(%4), %%edx\n" + "17: movl %%eax, 24(%3)\n" + "18: movl %%edx, 28(%3)\n" + "19: movl 32(%4), %%eax\n" + "20: movl 36(%4), %%edx\n" + "21: movl %%eax, 32(%3)\n" + "22: movl %%edx, 36(%3)\n" + "23: movl 40(%4), %%eax\n" + "24: movl 44(%4), %%edx\n" + "25: movl %%eax, 40(%3)\n" + "26: movl %%edx, 44(%3)\n" + "27: movl 48(%4), %%eax\n" + "28: movl 52(%4), %%edx\n" + "29: movl %%eax, 48(%3)\n" + "30: movl %%edx, 52(%3)\n" + "31: movl 56(%4), %%eax\n" + "32: movl 60(%4), %%edx\n" + "33: movl %%eax, 56(%3)\n" + "34: movl %%edx, 60(%3)\n" + " addl $-64, %0\n" + " addl $64, %4\n" + " addl $64, %3\n" + " cmpl $63, %0\n" + " ja 1b\n" + "35: movl %0, %%eax\n" + " shrl $2, %0\n" + " andl $3, %%eax\n" + " cld\n" + "99: rep; movsl\n" + "36: movl %%eax, %0\n" + "37: rep; movsb\n" + "100:\n" + ".section .fixup,\"ax\"\n" + "101: lea 0(%%eax,%0,4),%0\n" + " jmp 100b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 1b,100b\n" + " .long 2b,100b\n" + " .long 3b,100b\n" + " .long 4b,100b\n" + " .long 5b,100b\n" + " .long 6b,100b\n" + " .long 7b,100b\n" + " .long 8b,100b\n" + " .long 9b,100b\n" + " .long 10b,100b\n" + " .long 11b,100b\n" + " .long 12b,100b\n" + " .long 13b,100b\n" + " .long 14b,100b\n" + " .long 15b,100b\n" + " .long 16b,100b\n" + " .long 17b,100b\n" + " .long 18b,100b\n" + " .long 19b,100b\n" + " .long 20b,100b\n" + " .long 21b,100b\n" + " .long 22b,100b\n" + " .long 23b,100b\n" + " .long 24b,100b\n" + " .long 25b,100b\n" + " .long 26b,100b\n" + " .long 27b,100b\n" + " .long 28b,100b\n" + " .long 29b,100b\n" + " .long 30b,100b\n" + " .long 31b,100b\n" + " .long 32b,100b\n" + " .long 33b,100b\n" + " .long 34b,100b\n" + " .long 35b,100b\n" + " .long 36b,100b\n" + " .long 37b,100b\n" + " .long 99b,101b\n" + ".previous" + : "=&c"(size), "=&D" (d0), "=&S" (d1) + : "1"(to), "2"(from), "0"(size) + : "eax", "edx", "memory"); + return size; +} + +static unsigned long +__copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size) +{ + int d0, d1; + __asm__ __volatile__( + " .align 2,0x90\n" + "0: movl 32(%4), %%eax\n" + " cmpl $67, %0\n" + " jbe 2f\n" + "1: movl 64(%4), %%eax\n" + " .align 2,0x90\n" + "2: movl 0(%4), %%eax\n" + "21: movl 4(%4), %%edx\n" + " movl %%eax, 0(%3)\n" + " movl %%edx, 4(%3)\n" + "3: movl 8(%4), %%eax\n" + "31: movl 12(%4),%%edx\n" + " movl %%eax, 8(%3)\n" + " movl %%edx, 12(%3)\n" + "4: movl 16(%4), %%eax\n" + "41: movl 20(%4), %%edx\n" + " movl %%eax, 16(%3)\n" + " movl %%edx, 20(%3)\n" + "10: movl 24(%4), %%eax\n" + "51: movl 28(%4), %%edx\n" + " movl %%eax, 24(%3)\n" + " movl %%edx, 28(%3)\n" + "11: movl 32(%4), %%eax\n" + "61: movl 36(%4), %%edx\n" + " movl %%eax, 32(%3)\n" + " movl %%edx, 36(%3)\n" + "12: movl 40(%4), %%eax\n" + "71: movl 44(%4), %%edx\n" + " movl %%eax, 40(%3)\n" + " movl %%edx, 44(%3)\n" + "13: movl 48(%4), %%eax\n" + "81: movl 52(%4), %%edx\n" + " movl %%eax, 48(%3)\n" + " movl %%edx, 52(%3)\n" + "14: movl 56(%4), %%eax\n" + "91: movl 60(%4), %%edx\n" + " movl %%eax, 56(%3)\n" + " movl %%edx, 60(%3)\n" + " addl $-64, %0\n" + " addl $64, %4\n" + " addl $64, %3\n" + " cmpl $63, %0\n" + " ja 0b\n" + "5: movl %0, %%eax\n" + " shrl $2, %0\n" + " andl $3, %%eax\n" + " cld\n" + "6: rep; movsl\n" + " movl %%eax,%0\n" + "7: rep; movsb\n" + "8:\n" + ".section .fixup,\"ax\"\n" + "9: lea 0(%%eax,%0,4),%0\n" + "16: pushl %0\n" + " pushl %%eax\n" + " xorl %%eax,%%eax\n" + " rep; stosb\n" + " popl %%eax\n" + " popl %0\n" + " jmp 8b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 0b,16b\n" + " .long 1b,16b\n" + " .long 2b,16b\n" + " .long 21b,16b\n" + " .long 3b,16b\n" + " .long 31b,16b\n" + " .long 4b,16b\n" + " .long 41b,16b\n" + " .long 10b,16b\n" + " .long 51b,16b\n" + " .long 11b,16b\n" + " .long 61b,16b\n" + " .long 12b,16b\n" + " .long 71b,16b\n" + " .long 13b,16b\n" + " .long 81b,16b\n" + " .long 14b,16b\n" + " .long 91b,16b\n" + " .long 6b,9b\n" + " .long 7b,16b\n" + ".previous" + : "=&c"(size), "=&D" (d0), "=&S" (d1) + : "1"(to), "2"(from), "0"(size) + : "eax", "edx", "memory"); + return size; +} + +/* + * Non Temporal Hint version of __copy_user_zeroing_intel. It is cache aware. + * hyoshiok@miraclelinux.com + */ + +static unsigned long __copy_user_zeroing_intel_nocache(void *to, + const void __user *from, unsigned long size) +{ + int d0, d1; + + __asm__ __volatile__( + " .align 2,0x90\n" + "0: movl 32(%4), %%eax\n" + " cmpl $67, %0\n" + " jbe 2f\n" + "1: movl 64(%4), %%eax\n" + " .align 2,0x90\n" + "2: movl 0(%4), %%eax\n" + "21: movl 4(%4), %%edx\n" + " movnti %%eax, 0(%3)\n" + " movnti %%edx, 4(%3)\n" + "3: movl 8(%4), %%eax\n" + "31: movl 12(%4),%%edx\n" + " movnti %%eax, 8(%3)\n" + " movnti %%edx, 12(%3)\n" + "4: movl 16(%4), %%eax\n" + "41: movl 20(%4), %%edx\n" + " movnti %%eax, 16(%3)\n" + " movnti %%edx, 20(%3)\n" + "10: movl 24(%4), %%eax\n" + "51: movl 28(%4), %%edx\n" + " movnti %%eax, 24(%3)\n" + " movnti %%edx, 28(%3)\n" + "11: movl 32(%4), %%eax\n" + "61: movl 36(%4), %%edx\n" + " movnti %%eax, 32(%3)\n" + " movnti %%edx, 36(%3)\n" + "12: movl 40(%4), %%eax\n" + "71: movl 44(%4), %%edx\n" + " movnti %%eax, 40(%3)\n" + " movnti %%edx, 44(%3)\n" + "13: movl 48(%4), %%eax\n" + "81: movl 52(%4), %%edx\n" + " movnti %%eax, 48(%3)\n" + " movnti %%edx, 52(%3)\n" + "14: movl 56(%4), %%eax\n" + "91: movl 60(%4), %%edx\n" + " movnti %%eax, 56(%3)\n" + " movnti %%edx, 60(%3)\n" + " addl $-64, %0\n" + " addl $64, %4\n" + " addl $64, %3\n" + " cmpl $63, %0\n" + " ja 0b\n" + " sfence \n" + "5: movl %0, %%eax\n" + " shrl $2, %0\n" + " andl $3, %%eax\n" + " cld\n" + "6: rep; movsl\n" + " movl %%eax,%0\n" + "7: rep; movsb\n" + "8:\n" + ".section .fixup,\"ax\"\n" + "9: lea 0(%%eax,%0,4),%0\n" + "16: pushl %0\n" + " pushl %%eax\n" + " xorl %%eax,%%eax\n" + " rep; stosb\n" + " popl %%eax\n" + " popl %0\n" + " jmp 8b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 0b,16b\n" + " .long 1b,16b\n" + " .long 2b,16b\n" + " .long 21b,16b\n" + " .long 3b,16b\n" + " .long 31b,16b\n" + " .long 4b,16b\n" + " .long 41b,16b\n" + " .long 10b,16b\n" + " .long 51b,16b\n" + " .long 11b,16b\n" + " .long 61b,16b\n" + " .long 12b,16b\n" + " .long 71b,16b\n" + " .long 13b,16b\n" + " .long 81b,16b\n" + " .long 14b,16b\n" + " .long 91b,16b\n" + " .long 6b,9b\n" + " .long 7b,16b\n" + ".previous" + : "=&c"(size), "=&D" (d0), "=&S" (d1) + : "1"(to), "2"(from), "0"(size) + : "eax", "edx", "memory"); + return size; +} + +static unsigned long __copy_user_intel_nocache(void *to, + const void __user *from, unsigned long size) +{ + int d0, d1; + + __asm__ __volatile__( + " .align 2,0x90\n" + "0: movl 32(%4), %%eax\n" + " cmpl $67, %0\n" + " jbe 2f\n" + "1: movl 64(%4), %%eax\n" + " .align 2,0x90\n" + "2: movl 0(%4), %%eax\n" + "21: movl 4(%4), %%edx\n" + " movnti %%eax, 0(%3)\n" + " movnti %%edx, 4(%3)\n" + "3: movl 8(%4), %%eax\n" + "31: movl 12(%4),%%edx\n" + " movnti %%eax, 8(%3)\n" + " movnti %%edx, 12(%3)\n" + "4: movl 16(%4), %%eax\n" + "41: movl 20(%4), %%edx\n" + " movnti %%eax, 16(%3)\n" + " movnti %%edx, 20(%3)\n" + "10: movl 24(%4), %%eax\n" + "51: movl 28(%4), %%edx\n" + " movnti %%eax, 24(%3)\n" + " movnti %%edx, 28(%3)\n" + "11: movl 32(%4), %%eax\n" + "61: movl 36(%4), %%edx\n" + " movnti %%eax, 32(%3)\n" + " movnti %%edx, 36(%3)\n" + "12: movl 40(%4), %%eax\n" + "71: movl 44(%4), %%edx\n" + " movnti %%eax, 40(%3)\n" + " movnti %%edx, 44(%3)\n" + "13: movl 48(%4), %%eax\n" + "81: movl 52(%4), %%edx\n" + " movnti %%eax, 48(%3)\n" + " movnti %%edx, 52(%3)\n" + "14: movl 56(%4), %%eax\n" + "91: movl 60(%4), %%edx\n" + " movnti %%eax, 56(%3)\n" + " movnti %%edx, 60(%3)\n" + " addl $-64, %0\n" + " addl $64, %4\n" + " addl $64, %3\n" + " cmpl $63, %0\n" + " ja 0b\n" + " sfence \n" + "5: movl %0, %%eax\n" + " shrl $2, %0\n" + " andl $3, %%eax\n" + " cld\n" + "6: rep; movsl\n" + " movl %%eax,%0\n" + "7: rep; movsb\n" + "8:\n" + ".section .fixup,\"ax\"\n" + "9: lea 0(%%eax,%0,4),%0\n" + "16: jmp 8b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 0b,16b\n" + " .long 1b,16b\n" + " .long 2b,16b\n" + " .long 21b,16b\n" + " .long 3b,16b\n" + " .long 31b,16b\n" + " .long 4b,16b\n" + " .long 41b,16b\n" + " .long 10b,16b\n" + " .long 51b,16b\n" + " .long 11b,16b\n" + " .long 61b,16b\n" + " .long 12b,16b\n" + " .long 71b,16b\n" + " .long 13b,16b\n" + " .long 81b,16b\n" + " .long 14b,16b\n" + " .long 91b,16b\n" + " .long 6b,9b\n" + " .long 7b,16b\n" + ".previous" + : "=&c"(size), "=&D" (d0), "=&S" (d1) + : "1"(to), "2"(from), "0"(size) + : "eax", "edx", "memory"); + return size; +} + +#else + +/* + * Leave these declared but undefined. They should not be any references to + * them + */ +unsigned long __copy_user_zeroing_intel(void *to, const void __user *from, + unsigned long size); +unsigned long __copy_user_intel(void __user *to, const void *from, + unsigned long size); +unsigned long __copy_user_zeroing_intel_nocache(void *to, + const void __user *from, unsigned long size); +#endif /* CONFIG_X86_INTEL_USERCOPY */ + +/* Generic arbitrary sized copy. */ +#define __copy_user(to,from,size) \ +do { \ + int __d0, __d1, __d2; \ + __asm__ __volatile__( \ + " cmp $7,%0\n" \ + " jbe 1f\n" \ + " movl %1,%0\n" \ + " negl %0\n" \ + " andl $7,%0\n" \ + " subl %0,%3\n" \ + "4: rep; movsb\n" \ + " movl %3,%0\n" \ + " shrl $2,%0\n" \ + " andl $3,%3\n" \ + " .align 2,0x90\n" \ + "0: rep; movsl\n" \ + " movl %3,%0\n" \ + "1: rep; movsb\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "5: addl %3,%0\n" \ + " jmp 2b\n" \ + "3: lea 0(%3,%0,4),%0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 4b,5b\n" \ + " .long 0b,3b\n" \ + " .long 1b,2b\n" \ + ".previous" \ + : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \ + : "3"(size), "0"(size), "1"(to), "2"(from) \ + : "memory"); \ +} while (0) + +#define __copy_user_zeroing(to,from,size) \ +do { \ + int __d0, __d1, __d2; \ + __asm__ __volatile__( \ + " cmp $7,%0\n" \ + " jbe 1f\n" \ + " movl %1,%0\n" \ + " negl %0\n" \ + " andl $7,%0\n" \ + " subl %0,%3\n" \ + "4: rep; movsb\n" \ + " movl %3,%0\n" \ + " shrl $2,%0\n" \ + " andl $3,%3\n" \ + " .align 2,0x90\n" \ + "0: rep; movsl\n" \ + " movl %3,%0\n" \ + "1: rep; movsb\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "5: addl %3,%0\n" \ + " jmp 6f\n" \ + "3: lea 0(%3,%0,4),%0\n" \ + "6: pushl %0\n" \ + " pushl %%eax\n" \ + " xorl %%eax,%%eax\n" \ + " rep; stosb\n" \ + " popl %%eax\n" \ + " popl %0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n" \ + " .long 4b,5b\n" \ + " .long 0b,3b\n" \ + " .long 1b,6b\n" \ + ".previous" \ + : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \ + : "3"(size), "0"(size), "1"(to), "2"(from) \ + : "memory"); \ +} while (0) + +unsigned long __copy_to_user_ll(void __user *to, const void *from, + unsigned long n) +{ +#ifndef CONFIG_X86_WP_WORKS_OK + if (unlikely(boot_cpu_data.wp_works_ok == 0) && + ((unsigned long )to) < TASK_SIZE) { + /* + * When we are in an atomic section (see + * mm/filemap.c:file_read_actor), return the full + * length to take the slow path. + */ + if (in_atomic()) + return n; + + /* + * CPU does not honor the WP bit when writing + * from supervisory mode, and due to preemption or SMP, + * the page tables can change at any time. + * Do it manually. Manfred + */ + while (n) { + unsigned long offset = ((unsigned long)to)%PAGE_SIZE; + unsigned long len = PAGE_SIZE - offset; + int retval; + struct page *pg; + void *maddr; + + if (len > n) + len = n; + +survive: + down_read(¤t->mm->mmap_sem); + retval = get_user_pages(current, current->mm, + (unsigned long )to, 1, 1, 0, &pg, NULL); + + if (retval == -ENOMEM && is_init(current)) { + up_read(¤t->mm->mmap_sem); + congestion_wait(WRITE, HZ/50); + goto survive; + } + + if (retval != 1) { + up_read(¤t->mm->mmap_sem); + break; + } + + maddr = kmap_atomic(pg, KM_USER0); + memcpy(maddr + offset, from, len); + kunmap_atomic(maddr, KM_USER0); + set_page_dirty_lock(pg); + put_page(pg); + up_read(¤t->mm->mmap_sem); + + from += len; + to += len; + n -= len; + } + return n; + } +#endif + if (movsl_is_ok(to, from, n)) + __copy_user(to, from, n); + else + n = __copy_user_intel(to, from, n); + return n; +} +EXPORT_SYMBOL(__copy_to_user_ll); + +unsigned long __copy_from_user_ll(void *to, const void __user *from, + unsigned long n) +{ + if (movsl_is_ok(to, from, n)) + __copy_user_zeroing(to, from, n); + else + n = __copy_user_zeroing_intel(to, from, n); + return n; +} +EXPORT_SYMBOL(__copy_from_user_ll); + +unsigned long __copy_from_user_ll_nozero(void *to, const void __user *from, + unsigned long n) +{ + if (movsl_is_ok(to, from, n)) + __copy_user(to, from, n); + else + n = __copy_user_intel((void __user *)to, + (const void *)from, n); + return n; +} +EXPORT_SYMBOL(__copy_from_user_ll_nozero); + +unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from, + unsigned long n) +{ +#ifdef CONFIG_X86_INTEL_USERCOPY + if ( n > 64 && cpu_has_xmm2) + n = __copy_user_zeroing_intel_nocache(to, from, n); + else + __copy_user_zeroing(to, from, n); +#else + __copy_user_zeroing(to, from, n); +#endif + return n; +} + +unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from, + unsigned long n) +{ +#ifdef CONFIG_X86_INTEL_USERCOPY + if ( n > 64 && cpu_has_xmm2) + n = __copy_user_intel_nocache(to, from, n); + else + __copy_user(to, from, n); +#else + __copy_user(to, from, n); +#endif + return n; +} + +/** + * copy_to_user: - Copy a block of data into user space. + * @to: Destination address, in user space. + * @from: Source address, in kernel space. + * @n: Number of bytes to copy. + * + * Context: User context only. This function may sleep. + * + * Copy data from kernel space to user space. + * + * Returns number of bytes that could not be copied. + * On success, this will be zero. + */ +unsigned long +copy_to_user(void __user *to, const void *from, unsigned long n) +{ + if (access_ok(VERIFY_WRITE, to, n)) + n = __copy_to_user(to, from, n); + return n; +} +EXPORT_SYMBOL(copy_to_user); + +/** + * copy_from_user: - Copy a block of data from user space. + * @to: Destination address, in kernel space. + * @from: Source address, in user space. + * @n: Number of bytes to copy. + * + * Context: User context only. This function may sleep. + * + * Copy data from user space to kernel space. + * + * Returns number of bytes that could not be copied. + * On success, this will be zero. + * + * If some data could not be copied, this function will pad the copied + * data to the requested size using zero bytes. + */ +unsigned long +copy_from_user(void *to, const void __user *from, unsigned long n) +{ + if (access_ok(VERIFY_READ, from, n)) + n = __copy_from_user(to, from, n); + else + memset(to, 0, n); + return n; +} +EXPORT_SYMBOL(copy_from_user); diff --git a/arch/x86_64/lib/Makefile b/arch/x86_64/lib/Makefile index 2b1547e0ac6..2d7d724a2a6 100644 --- a/arch/x86_64/lib/Makefile +++ b/arch/x86_64/lib/Makefile @@ -1,5 +1,5 @@ ifeq ($(CONFIG_X86_32),y) -include ${srctree}/arch/i386/lib/Makefile_32 +include ${srctree}/arch/x86/lib/Makefile_32 else include ${srctree}/arch/x86_64/lib/Makefile_64 endif diff --git a/arch/x86_64/lib/msr-on-cpu.c b/arch/x86_64/lib/msr-on-cpu.c index 47e0ec47c37..5672d4190fb 100644 --- a/arch/x86_64/lib/msr-on-cpu.c +++ b/arch/x86_64/lib/msr-on-cpu.c @@ -1 +1 @@ -#include "../../i386/lib/msr-on-cpu.c" +#include "../../x86/lib/msr-on-cpu.c" -- cgit v1.2.3-70-g09d2 From 4b60eb8380a0b588a03b6052d7ac93e1964c75b8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 11 Oct 2007 11:16:34 +0200 Subject: i386: move power Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/i386/Makefile | 2 +- arch/i386/power/Makefile | 2 - arch/i386/power/cpu.c | 133 ----------------------------------- arch/i386/power/suspend.c | 172 ---------------------------------------------- arch/i386/power/swsusp.S | 78 --------------------- arch/x86/power/Makefile | 2 + arch/x86/power/cpu.c | 133 +++++++++++++++++++++++++++++++++++ arch/x86/power/suspend.c | 172 ++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/power/swsusp.S | 78 +++++++++++++++++++++ 9 files changed, 386 insertions(+), 386 deletions(-) delete mode 100644 arch/i386/power/Makefile delete mode 100644 arch/i386/power/cpu.c delete mode 100644 arch/i386/power/suspend.c delete mode 100644 arch/i386/power/swsusp.S create mode 100644 arch/x86/power/Makefile create mode 100644 arch/x86/power/cpu.c create mode 100644 arch/x86/power/suspend.c create mode 100644 arch/x86/power/swsusp.S (limited to 'arch/x86') diff --git a/arch/i386/Makefile b/arch/i386/Makefile index 93492b37061..da64799484a 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -110,7 +110,7 @@ drivers-$(CONFIG_MATH_EMULATION) += arch/x86/math-emu/ drivers-$(CONFIG_PCI) += arch/i386/pci/ # must be linked after kernel/ drivers-$(CONFIG_OPROFILE) += arch/i386/oprofile/ -drivers-$(CONFIG_PM) += arch/i386/power/ +drivers-$(CONFIG_PM) += arch/x86/power/ drivers-$(CONFIG_FB) += arch/i386/video/ CFLAGS += $(mflags-y) diff --git a/arch/i386/power/Makefile b/arch/i386/power/Makefile deleted file mode 100644 index d764ec95006..00000000000 --- a/arch/i386/power/Makefile +++ /dev/null @@ -1,2 +0,0 @@ -obj-$(CONFIG_PM) += cpu.o -obj-$(CONFIG_HIBERNATION) += swsusp.o suspend.o diff --git a/arch/i386/power/cpu.c b/arch/i386/power/cpu.c deleted file mode 100644 index 998fd3ec0d6..00000000000 --- a/arch/i386/power/cpu.c +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Suspend support specific for i386. - * - * Distribute under GPLv2 - * - * Copyright (c) 2002 Pavel Machek - * Copyright (c) 2001 Patrick Mochel - */ - -#include -#include -#include -#include - -static struct saved_context saved_context; - -unsigned long saved_context_ebx; -unsigned long saved_context_esp, saved_context_ebp; -unsigned long saved_context_esi, saved_context_edi; -unsigned long saved_context_eflags; - -void __save_processor_state(struct saved_context *ctxt) -{ - mtrr_save_fixed_ranges(NULL); - kernel_fpu_begin(); - - /* - * descriptor tables - */ - store_gdt(&ctxt->gdt); - store_idt(&ctxt->idt); - store_tr(ctxt->tr); - - /* - * segment registers - */ - savesegment(es, ctxt->es); - savesegment(fs, ctxt->fs); - savesegment(gs, ctxt->gs); - savesegment(ss, ctxt->ss); - - /* - * control registers - */ - ctxt->cr0 = read_cr0(); - ctxt->cr2 = read_cr2(); - ctxt->cr3 = read_cr3(); - ctxt->cr4 = read_cr4(); -} - -void save_processor_state(void) -{ - __save_processor_state(&saved_context); -} - -static void do_fpu_end(void) -{ - /* - * Restore FPU regs if necessary. - */ - kernel_fpu_end(); -} - -static void fix_processor_context(void) -{ - int cpu = smp_processor_id(); - struct tss_struct * t = &per_cpu(init_tss, cpu); - - set_tss_desc(cpu,t); /* This just modifies memory; should not be necessary. But... This is necessary, because 386 hardware has concept of busy TSS or some similar stupidity. */ - - load_TR_desc(); /* This does ltr */ - load_LDT(¤t->active_mm->context); /* This does lldt */ - - /* - * Now maybe reload the debug registers - */ - if (current->thread.debugreg[7]){ - set_debugreg(current->thread.debugreg[0], 0); - set_debugreg(current->thread.debugreg[1], 1); - set_debugreg(current->thread.debugreg[2], 2); - set_debugreg(current->thread.debugreg[3], 3); - /* no 4 and 5 */ - set_debugreg(current->thread.debugreg[6], 6); - set_debugreg(current->thread.debugreg[7], 7); - } - -} - -void __restore_processor_state(struct saved_context *ctxt) -{ - /* - * control registers - */ - write_cr4(ctxt->cr4); - write_cr3(ctxt->cr3); - write_cr2(ctxt->cr2); - write_cr0(ctxt->cr0); - - /* - * now restore the descriptor tables to their proper values - * ltr is done i fix_processor_context(). - */ - load_gdt(&ctxt->gdt); - load_idt(&ctxt->idt); - - /* - * segment registers - */ - loadsegment(es, ctxt->es); - loadsegment(fs, ctxt->fs); - loadsegment(gs, ctxt->gs); - loadsegment(ss, ctxt->ss); - - /* - * sysenter MSRs - */ - if (boot_cpu_has(X86_FEATURE_SEP)) - enable_sep_cpu(); - - fix_processor_context(); - do_fpu_end(); - mtrr_ap_init(); - mcheck_init(&boot_cpu_data); -} - -void restore_processor_state(void) -{ - __restore_processor_state(&saved_context); -} - -/* Needed by apm.c */ -EXPORT_SYMBOL(save_processor_state); -EXPORT_SYMBOL(restore_processor_state); diff --git a/arch/i386/power/suspend.c b/arch/i386/power/suspend.c deleted file mode 100644 index a0020b913f3..00000000000 --- a/arch/i386/power/suspend.c +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Suspend support specific for i386 - temporary page tables - * - * Distribute under GPLv2 - * - * Copyright (c) 2006 Rafael J. Wysocki - */ - -#include -#include - -#include -#include -#include - -/* Defined in arch/i386/power/swsusp.S */ -extern int restore_image(void); - -/* References to section boundaries */ -extern const void __nosave_begin, __nosave_end; - -/* Pointer to the temporary resume page tables */ -pgd_t *resume_pg_dir; - -/* The following three functions are based on the analogous code in - * arch/i386/mm/init.c - */ - -/* - * Create a middle page table on a resume-safe page and put a pointer to it in - * the given global directory entry. This only returns the gd entry - * in non-PAE compilation mode, since the middle layer is folded. - */ -static pmd_t *resume_one_md_table_init(pgd_t *pgd) -{ - pud_t *pud; - pmd_t *pmd_table; - -#ifdef CONFIG_X86_PAE - pmd_table = (pmd_t *)get_safe_page(GFP_ATOMIC); - if (!pmd_table) - return NULL; - - set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); - pud = pud_offset(pgd, 0); - - BUG_ON(pmd_table != pmd_offset(pud, 0)); -#else - pud = pud_offset(pgd, 0); - pmd_table = pmd_offset(pud, 0); -#endif - - return pmd_table; -} - -/* - * Create a page table on a resume-safe page and place a pointer to it in - * a middle page directory entry. - */ -static pte_t *resume_one_page_table_init(pmd_t *pmd) -{ - if (pmd_none(*pmd)) { - pte_t *page_table = (pte_t *)get_safe_page(GFP_ATOMIC); - if (!page_table) - return NULL; - - set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); - - BUG_ON(page_table != pte_offset_kernel(pmd, 0)); - - return page_table; - } - - return pte_offset_kernel(pmd, 0); -} - -/* - * This maps the physical memory to kernel virtual address space, a total - * of max_low_pfn pages, by creating page tables starting from address - * PAGE_OFFSET. The page tables are allocated out of resume-safe pages. - */ -static int resume_physical_mapping_init(pgd_t *pgd_base) -{ - unsigned long pfn; - pgd_t *pgd; - pmd_t *pmd; - pte_t *pte; - int pgd_idx, pmd_idx; - - pgd_idx = pgd_index(PAGE_OFFSET); - pgd = pgd_base + pgd_idx; - pfn = 0; - - for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) { - pmd = resume_one_md_table_init(pgd); - if (!pmd) - return -ENOMEM; - - if (pfn >= max_low_pfn) - continue; - - for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD; pmd++, pmd_idx++) { - if (pfn >= max_low_pfn) - break; - - /* Map with big pages if possible, otherwise create - * normal page tables. - * NOTE: We can mark everything as executable here - */ - if (cpu_has_pse) { - set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC)); - pfn += PTRS_PER_PTE; - } else { - pte_t *max_pte; - - pte = resume_one_page_table_init(pmd); - if (!pte) - return -ENOMEM; - - max_pte = pte + PTRS_PER_PTE; - for (; pte < max_pte; pte++, pfn++) { - if (pfn >= max_low_pfn) - break; - - set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); - } - } - } - } - return 0; -} - -static inline void resume_init_first_level_page_table(pgd_t *pg_dir) -{ -#ifdef CONFIG_X86_PAE - int i; - - /* Init entries of the first-level page table to the zero page */ - for (i = 0; i < PTRS_PER_PGD; i++) - set_pgd(pg_dir + i, - __pgd(__pa(empty_zero_page) | _PAGE_PRESENT)); -#endif -} - -int swsusp_arch_resume(void) -{ - int error; - - resume_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC); - if (!resume_pg_dir) - return -ENOMEM; - - resume_init_first_level_page_table(resume_pg_dir); - error = resume_physical_mapping_init(resume_pg_dir); - if (error) - return error; - - /* We have got enough memory and from now on we cannot recover */ - restore_image(); - return 0; -} - -/* - * pfn_is_nosave - check if given pfn is in the 'nosave' section - */ - -int pfn_is_nosave(unsigned long pfn) -{ - unsigned long nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT; - unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT; - return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn); -} diff --git a/arch/i386/power/swsusp.S b/arch/i386/power/swsusp.S deleted file mode 100644 index 53662e05b39..00000000000 --- a/arch/i386/power/swsusp.S +++ /dev/null @@ -1,78 +0,0 @@ -.text - -/* Originally gcc generated, modified by hand - * - * This may not use any stack, nor any variable that is not "NoSave": - * - * Its rewriting one kernel image with another. What is stack in "old" - * image could very well be data page in "new" image, and overwriting - * your own stack under you is bad idea. - */ - -#include -#include -#include -#include - - .text - -ENTRY(swsusp_arch_suspend) - - movl %esp, saved_context_esp - movl %ebx, saved_context_ebx - movl %ebp, saved_context_ebp - movl %esi, saved_context_esi - movl %edi, saved_context_edi - pushfl ; popl saved_context_eflags - - call swsusp_save - ret - -ENTRY(restore_image) - movl resume_pg_dir, %ecx - subl $__PAGE_OFFSET, %ecx - movl %ecx, %cr3 - - movl restore_pblist, %edx - .p2align 4,,7 - -copy_loop: - testl %edx, %edx - jz done - - movl pbe_address(%edx), %esi - movl pbe_orig_address(%edx), %edi - - movl $1024, %ecx - rep - movsl - - movl pbe_next(%edx), %edx - jmp copy_loop - .p2align 4,,7 - -done: - /* go back to the original page tables */ - movl $swapper_pg_dir, %ecx - subl $__PAGE_OFFSET, %ecx - movl %ecx, %cr3 - /* Flush TLB, including "global" things (vmalloc) */ - movl mmu_cr4_features, %eax - movl %eax, %edx - andl $~(1<<7), %edx; # PGE - movl %edx, %cr4; # turn off PGE - movl %cr3, %ecx; # flush TLB - movl %ecx, %cr3 - movl %eax, %cr4; # turn PGE back on - - movl saved_context_esp, %esp - movl saved_context_ebp, %ebp - movl saved_context_ebx, %ebx - movl saved_context_esi, %esi - movl saved_context_edi, %edi - - pushl saved_context_eflags ; popfl - - xorl %eax, %eax - - ret diff --git a/arch/x86/power/Makefile b/arch/x86/power/Makefile new file mode 100644 index 00000000000..d764ec95006 --- /dev/null +++ b/arch/x86/power/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_PM) += cpu.o +obj-$(CONFIG_HIBERNATION) += swsusp.o suspend.o diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c new file mode 100644 index 00000000000..998fd3ec0d6 --- /dev/null +++ b/arch/x86/power/cpu.c @@ -0,0 +1,133 @@ +/* + * Suspend support specific for i386. + * + * Distribute under GPLv2 + * + * Copyright (c) 2002 Pavel Machek + * Copyright (c) 2001 Patrick Mochel + */ + +#include +#include +#include +#include + +static struct saved_context saved_context; + +unsigned long saved_context_ebx; +unsigned long saved_context_esp, saved_context_ebp; +unsigned long saved_context_esi, saved_context_edi; +unsigned long saved_context_eflags; + +void __save_processor_state(struct saved_context *ctxt) +{ + mtrr_save_fixed_ranges(NULL); + kernel_fpu_begin(); + + /* + * descriptor tables + */ + store_gdt(&ctxt->gdt); + store_idt(&ctxt->idt); + store_tr(ctxt->tr); + + /* + * segment registers + */ + savesegment(es, ctxt->es); + savesegment(fs, ctxt->fs); + savesegment(gs, ctxt->gs); + savesegment(ss, ctxt->ss); + + /* + * control registers + */ + ctxt->cr0 = read_cr0(); + ctxt->cr2 = read_cr2(); + ctxt->cr3 = read_cr3(); + ctxt->cr4 = read_cr4(); +} + +void save_processor_state(void) +{ + __save_processor_state(&saved_context); +} + +static void do_fpu_end(void) +{ + /* + * Restore FPU regs if necessary. + */ + kernel_fpu_end(); +} + +static void fix_processor_context(void) +{ + int cpu = smp_processor_id(); + struct tss_struct * t = &per_cpu(init_tss, cpu); + + set_tss_desc(cpu,t); /* This just modifies memory; should not be necessary. But... This is necessary, because 386 hardware has concept of busy TSS or some similar stupidity. */ + + load_TR_desc(); /* This does ltr */ + load_LDT(¤t->active_mm->context); /* This does lldt */ + + /* + * Now maybe reload the debug registers + */ + if (current->thread.debugreg[7]){ + set_debugreg(current->thread.debugreg[0], 0); + set_debugreg(current->thread.debugreg[1], 1); + set_debugreg(current->thread.debugreg[2], 2); + set_debugreg(current->thread.debugreg[3], 3); + /* no 4 and 5 */ + set_debugreg(current->thread.debugreg[6], 6); + set_debugreg(current->thread.debugreg[7], 7); + } + +} + +void __restore_processor_state(struct saved_context *ctxt) +{ + /* + * control registers + */ + write_cr4(ctxt->cr4); + write_cr3(ctxt->cr3); + write_cr2(ctxt->cr2); + write_cr0(ctxt->cr0); + + /* + * now restore the descriptor tables to their proper values + * ltr is done i fix_processor_context(). + */ + load_gdt(&ctxt->gdt); + load_idt(&ctxt->idt); + + /* + * segment registers + */ + loadsegment(es, ctxt->es); + loadsegment(fs, ctxt->fs); + loadsegment(gs, ctxt->gs); + loadsegment(ss, ctxt->ss); + + /* + * sysenter MSRs + */ + if (boot_cpu_has(X86_FEATURE_SEP)) + enable_sep_cpu(); + + fix_processor_context(); + do_fpu_end(); + mtrr_ap_init(); + mcheck_init(&boot_cpu_data); +} + +void restore_processor_state(void) +{ + __restore_processor_state(&saved_context); +} + +/* Needed by apm.c */ +EXPORT_SYMBOL(save_processor_state); +EXPORT_SYMBOL(restore_processor_state); diff --git a/arch/x86/power/suspend.c b/arch/x86/power/suspend.c new file mode 100644 index 00000000000..a0020b913f3 --- /dev/null +++ b/arch/x86/power/suspend.c @@ -0,0 +1,172 @@ +/* + * Suspend support specific for i386 - temporary page tables + * + * Distribute under GPLv2 + * + * Copyright (c) 2006 Rafael J. Wysocki + */ + +#include +#include + +#include +#include +#include + +/* Defined in arch/i386/power/swsusp.S */ +extern int restore_image(void); + +/* References to section boundaries */ +extern const void __nosave_begin, __nosave_end; + +/* Pointer to the temporary resume page tables */ +pgd_t *resume_pg_dir; + +/* The following three functions are based on the analogous code in + * arch/i386/mm/init.c + */ + +/* + * Create a middle page table on a resume-safe page and put a pointer to it in + * the given global directory entry. This only returns the gd entry + * in non-PAE compilation mode, since the middle layer is folded. + */ +static pmd_t *resume_one_md_table_init(pgd_t *pgd) +{ + pud_t *pud; + pmd_t *pmd_table; + +#ifdef CONFIG_X86_PAE + pmd_table = (pmd_t *)get_safe_page(GFP_ATOMIC); + if (!pmd_table) + return NULL; + + set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); + pud = pud_offset(pgd, 0); + + BUG_ON(pmd_table != pmd_offset(pud, 0)); +#else + pud = pud_offset(pgd, 0); + pmd_table = pmd_offset(pud, 0); +#endif + + return pmd_table; +} + +/* + * Create a page table on a resume-safe page and place a pointer to it in + * a middle page directory entry. + */ +static pte_t *resume_one_page_table_init(pmd_t *pmd) +{ + if (pmd_none(*pmd)) { + pte_t *page_table = (pte_t *)get_safe_page(GFP_ATOMIC); + if (!page_table) + return NULL; + + set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); + + BUG_ON(page_table != pte_offset_kernel(pmd, 0)); + + return page_table; + } + + return pte_offset_kernel(pmd, 0); +} + +/* + * This maps the physical memory to kernel virtual address space, a total + * of max_low_pfn pages, by creating page tables starting from address + * PAGE_OFFSET. The page tables are allocated out of resume-safe pages. + */ +static int resume_physical_mapping_init(pgd_t *pgd_base) +{ + unsigned long pfn; + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + int pgd_idx, pmd_idx; + + pgd_idx = pgd_index(PAGE_OFFSET); + pgd = pgd_base + pgd_idx; + pfn = 0; + + for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) { + pmd = resume_one_md_table_init(pgd); + if (!pmd) + return -ENOMEM; + + if (pfn >= max_low_pfn) + continue; + + for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD; pmd++, pmd_idx++) { + if (pfn >= max_low_pfn) + break; + + /* Map with big pages if possible, otherwise create + * normal page tables. + * NOTE: We can mark everything as executable here + */ + if (cpu_has_pse) { + set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC)); + pfn += PTRS_PER_PTE; + } else { + pte_t *max_pte; + + pte = resume_one_page_table_init(pmd); + if (!pte) + return -ENOMEM; + + max_pte = pte + PTRS_PER_PTE; + for (; pte < max_pte; pte++, pfn++) { + if (pfn >= max_low_pfn) + break; + + set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); + } + } + } + } + return 0; +} + +static inline void resume_init_first_level_page_table(pgd_t *pg_dir) +{ +#ifdef CONFIG_X86_PAE + int i; + + /* Init entries of the first-level page table to the zero page */ + for (i = 0; i < PTRS_PER_PGD; i++) + set_pgd(pg_dir + i, + __pgd(__pa(empty_zero_page) | _PAGE_PRESENT)); +#endif +} + +int swsusp_arch_resume(void) +{ + int error; + + resume_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC); + if (!resume_pg_dir) + return -ENOMEM; + + resume_init_first_level_page_table(resume_pg_dir); + error = resume_physical_mapping_init(resume_pg_dir); + if (error) + return error; + + /* We have got enough memory and from now on we cannot recover */ + restore_image(); + return 0; +} + +/* + * pfn_is_nosave - check if given pfn is in the 'nosave' section + */ + +int pfn_is_nosave(unsigned long pfn) +{ + unsigned long nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT; + unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT; + return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn); +} diff --git a/arch/x86/power/swsusp.S b/arch/x86/power/swsusp.S new file mode 100644 index 00000000000..53662e05b39 --- /dev/null +++ b/arch/x86/power/swsusp.S @@ -0,0 +1,78 @@ +.text + +/* Originally gcc generated, modified by hand + * + * This may not use any stack, nor any variable that is not "NoSave": + * + * Its rewriting one kernel image with another. What is stack in "old" + * image could very well be data page in "new" image, and overwriting + * your own stack under you is bad idea. + */ + +#include +#include +#include +#include + + .text + +ENTRY(swsusp_arch_suspend) + + movl %esp, saved_context_esp + movl %ebx, saved_context_ebx + movl %ebp, saved_context_ebp + movl %esi, saved_context_esi + movl %edi, saved_context_edi + pushfl ; popl saved_context_eflags + + call swsusp_save + ret + +ENTRY(restore_image) + movl resume_pg_dir, %ecx + subl $__PAGE_OFFSET, %ecx + movl %ecx, %cr3 + + movl restore_pblist, %edx + .p2align 4,,7 + +copy_loop: + testl %edx, %edx + jz done + + movl pbe_address(%edx), %esi + movl pbe_orig_address(%edx), %edi + + movl $1024, %ecx + rep + movsl + + movl pbe_next(%edx), %edx + jmp copy_loop + .p2align 4,,7 + +done: + /* go back to the original page tables */ + movl $swapper_pg_dir, %ecx + subl $__PAGE_OFFSET, %ecx + movl %ecx, %cr3 + /* Flush TLB, including "global" things (vmalloc) */ + movl mmu_cr4_features, %eax + movl %eax, %edx + andl $~(1<<7), %edx; # PGE + movl %edx, %cr4; # turn off PGE + movl %cr3, %ecx; # flush TLB + movl %ecx, %cr3 + movl %eax, %cr4; # turn PGE back on + + movl saved_context_esp, %esp + movl saved_context_ebp, %ebp + movl saved_context_ebx, %ebx + movl saved_context_esi, %esi + movl saved_context_edi, %edi + + pushl saved_context_eflags ; popfl + + xorl %eax, %eax + + ret -- cgit v1.2.3-70-g09d2 From fb9aa6f1d4a1e11e66a680460b2c2b2b10b62f79 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 11 Oct 2007 11:16:36 +0200 Subject: i386: move pci Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/i386/Makefile | 2 +- arch/i386/pci/Makefile | 5 - arch/i386/pci/Makefile_32 | 14 - arch/i386/pci/acpi.c | 90 --- arch/i386/pci/common.c | 480 --------------- arch/i386/pci/direct.c | 302 ---------- arch/i386/pci/early.c | 59 -- arch/i386/pci/fixup.c | 446 -------------- arch/i386/pci/i386.c | 315 ---------- arch/i386/pci/init.c | 37 -- arch/i386/pci/irq.c | 1173 ------------------------------------- arch/i386/pci/legacy.c | 56 -- arch/i386/pci/mmconfig-shared.c | 315 ---------- arch/i386/pci/mmconfig_32.c | 148 ----- arch/i386/pci/numa.c | 135 ----- arch/i386/pci/pcbios.c | 492 ---------------- arch/i386/pci/pci.h | 149 ----- arch/i386/pci/visws.c | 111 ---- arch/x86/pci/Makefile | 5 + arch/x86/pci/Makefile_32 | 14 + arch/x86/pci/acpi.c | 90 +++ arch/x86/pci/common.c | 480 +++++++++++++++ arch/x86/pci/direct.c | 302 ++++++++++ arch/x86/pci/early.c | 59 ++ arch/x86/pci/fixup.c | 446 ++++++++++++++ arch/x86/pci/i386.c | 315 ++++++++++ arch/x86/pci/init.c | 37 ++ arch/x86/pci/irq.c | 1173 +++++++++++++++++++++++++++++++++++++ arch/x86/pci/legacy.c | 56 ++ arch/x86/pci/mmconfig-shared.c | 315 ++++++++++ arch/x86/pci/mmconfig_32.c | 148 +++++ arch/x86/pci/numa.c | 135 +++++ arch/x86/pci/pcbios.c | 492 ++++++++++++++++ arch/x86/pci/pci.h | 149 +++++ arch/x86/pci/visws.c | 111 ++++ arch/x86_64/pci/Makefile | 2 +- arch/x86_64/pci/Makefile_64 | 22 +- drivers/pci/hotplug/cpqphp_core.c | 2 +- drivers/pci/hotplug/cpqphp_pci.c | 2 +- drivers/pci/hotplug/ibmphp_core.c | 2 +- 40 files changed, 4343 insertions(+), 4343 deletions(-) delete mode 100644 arch/i386/pci/Makefile delete mode 100644 arch/i386/pci/Makefile_32 delete mode 100644 arch/i386/pci/acpi.c delete mode 100644 arch/i386/pci/common.c delete mode 100644 arch/i386/pci/direct.c delete mode 100644 arch/i386/pci/early.c delete mode 100644 arch/i386/pci/fixup.c delete mode 100644 arch/i386/pci/i386.c delete mode 100644 arch/i386/pci/init.c delete mode 100644 arch/i386/pci/irq.c delete mode 100644 arch/i386/pci/legacy.c delete mode 100644 arch/i386/pci/mmconfig-shared.c delete mode 100644 arch/i386/pci/mmconfig_32.c delete mode 100644 arch/i386/pci/numa.c delete mode 100644 arch/i386/pci/pcbios.c delete mode 100644 arch/i386/pci/pci.h delete mode 100644 arch/i386/pci/visws.c create mode 100644 arch/x86/pci/Makefile create mode 100644 arch/x86/pci/Makefile_32 create mode 100644 arch/x86/pci/acpi.c create mode 100644 arch/x86/pci/common.c create mode 100644 arch/x86/pci/direct.c create mode 100644 arch/x86/pci/early.c create mode 100644 arch/x86/pci/fixup.c create mode 100644 arch/x86/pci/i386.c create mode 100644 arch/x86/pci/init.c create mode 100644 arch/x86/pci/irq.c create mode 100644 arch/x86/pci/legacy.c create mode 100644 arch/x86/pci/mmconfig-shared.c create mode 100644 arch/x86/pci/mmconfig_32.c create mode 100644 arch/x86/pci/numa.c create mode 100644 arch/x86/pci/pcbios.c create mode 100644 arch/x86/pci/pci.h create mode 100644 arch/x86/pci/visws.c (limited to 'arch/x86') diff --git a/arch/i386/Makefile b/arch/i386/Makefile index da64799484a..87c959d5f35 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -107,7 +107,7 @@ core-y += arch/i386/kernel/ \ $(mcore-y)/ \ arch/x86/crypto/ drivers-$(CONFIG_MATH_EMULATION) += arch/x86/math-emu/ -drivers-$(CONFIG_PCI) += arch/i386/pci/ +drivers-$(CONFIG_PCI) += arch/x86/pci/ # must be linked after kernel/ drivers-$(CONFIG_OPROFILE) += arch/i386/oprofile/ drivers-$(CONFIG_PM) += arch/x86/power/ diff --git a/arch/i386/pci/Makefile b/arch/i386/pci/Makefile deleted file mode 100644 index d04d8f976d1..00000000000 --- a/arch/i386/pci/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -ifeq ($(CONFIG_X86_32),y) -include ${srctree}/arch/i386/pci/Makefile_32 -else -include ${srctree}/arch/x86_64/pci/Makefile_64 -endif diff --git a/arch/i386/pci/Makefile_32 b/arch/i386/pci/Makefile_32 deleted file mode 100644 index cdd6828b5ab..00000000000 --- a/arch/i386/pci/Makefile_32 +++ /dev/null @@ -1,14 +0,0 @@ -obj-y := i386.o init.o - -obj-$(CONFIG_PCI_BIOS) += pcbios.o -obj-$(CONFIG_PCI_MMCONFIG) += mmconfig_32.o direct.o mmconfig-shared.o -obj-$(CONFIG_PCI_DIRECT) += direct.o - -pci-y := fixup.o -pci-$(CONFIG_ACPI) += acpi.o -pci-y += legacy.o irq.o - -pci-$(CONFIG_X86_VISWS) := visws.o fixup.o -pci-$(CONFIG_X86_NUMAQ) := numa.o irq.o - -obj-y += $(pci-y) common.o early.o diff --git a/arch/i386/pci/acpi.c b/arch/i386/pci/acpi.c deleted file mode 100644 index bc8a44bddaa..00000000000 --- a/arch/i386/pci/acpi.c +++ /dev/null @@ -1,90 +0,0 @@ -#include -#include -#include -#include -#include -#include "pci.h" - -struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int domain, int busnum) -{ - struct pci_bus *bus; - struct pci_sysdata *sd; - int pxm; - - /* Allocate per-root-bus (not per bus) arch-specific data. - * TODO: leak; this memory is never freed. - * It's arguable whether it's worth the trouble to care. - */ - sd = kzalloc(sizeof(*sd), GFP_KERNEL); - if (!sd) { - printk(KERN_ERR "PCI: OOM, not probing PCI bus %02x\n", busnum); - return NULL; - } - - if (domain != 0) { - printk(KERN_WARNING "PCI: Multiple domains not supported\n"); - kfree(sd); - return NULL; - } - - sd->node = -1; - - pxm = acpi_get_pxm(device->handle); -#ifdef CONFIG_ACPI_NUMA - if (pxm >= 0) - sd->node = pxm_to_node(pxm); -#endif - - bus = pci_scan_bus_parented(NULL, busnum, &pci_root_ops, sd); - if (!bus) - kfree(sd); - -#ifdef CONFIG_ACPI_NUMA - if (bus != NULL) { - if (pxm >= 0) { - printk("bus %d -> pxm %d -> node %d\n", - busnum, pxm, sd->node); - } - } -#endif - - return bus; -} - -extern int pci_routeirq; -static int __init pci_acpi_init(void) -{ - struct pci_dev *dev = NULL; - - if (pcibios_scanned) - return 0; - - if (acpi_noirq) - return 0; - - printk(KERN_INFO "PCI: Using ACPI for IRQ routing\n"); - acpi_irq_penalty_init(); - pcibios_scanned++; - pcibios_enable_irq = acpi_pci_irq_enable; - pcibios_disable_irq = acpi_pci_irq_disable; - - if (pci_routeirq) { - /* - * PCI IRQ routing is set up by pci_enable_device(), but we - * also do it here in case there are still broken drivers that - * don't use pci_enable_device(). - */ - printk(KERN_INFO "PCI: Routing PCI interrupts for all devices because \"pci=routeirq\" specified\n"); - for_each_pci_dev(dev) - acpi_pci_irq_enable(dev); - } else - printk(KERN_INFO "PCI: If a device doesn't work, try \"pci=routeirq\". If it helps, post a report\n"); - -#ifdef CONFIG_X86_IO_APIC - if (acpi_ioapic) - print_IO_APIC(); -#endif - - return 0; -} -subsys_initcall(pci_acpi_init); diff --git a/arch/i386/pci/common.c b/arch/i386/pci/common.c deleted file mode 100644 index ebc6f3c6634..00000000000 --- a/arch/i386/pci/common.c +++ /dev/null @@ -1,480 +0,0 @@ -/* - * Low-Level PCI Support for PC - * - * (c) 1999--2000 Martin Mares - */ - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "pci.h" - -unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 | - PCI_PROBE_MMCONF; - -static int pci_bf_sort; -int pci_routeirq; -int pcibios_last_bus = -1; -unsigned long pirq_table_addr; -struct pci_bus *pci_root_bus; -struct pci_raw_ops *raw_pci_ops; - -static int pci_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value) -{ - return raw_pci_ops->read(0, bus->number, devfn, where, size, value); -} - -static int pci_write(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 value) -{ - return raw_pci_ops->write(0, bus->number, devfn, where, size, value); -} - -struct pci_ops pci_root_ops = { - .read = pci_read, - .write = pci_write, -}; - -/* - * legacy, numa, and acpi all want to call pcibios_scan_root - * from their initcalls. This flag prevents that. - */ -int pcibios_scanned; - -/* - * This interrupt-safe spinlock protects all accesses to PCI - * configuration space. - */ -DEFINE_SPINLOCK(pci_config_lock); - -/* - * Several buggy motherboards address only 16 devices and mirror - * them to next 16 IDs. We try to detect this `feature' on all - * primary buses (those containing host bridges as they are - * expected to be unique) and remove the ghost devices. - */ - -static void __devinit pcibios_fixup_ghosts(struct pci_bus *b) -{ - struct list_head *ln, *mn; - struct pci_dev *d, *e; - int mirror = PCI_DEVFN(16,0); - int seen_host_bridge = 0; - int i; - - DBG("PCI: Scanning for ghost devices on bus %d\n", b->number); - list_for_each(ln, &b->devices) { - d = pci_dev_b(ln); - if ((d->class >> 8) == PCI_CLASS_BRIDGE_HOST) - seen_host_bridge++; - for (mn=ln->next; mn != &b->devices; mn=mn->next) { - e = pci_dev_b(mn); - if (e->devfn != d->devfn + mirror || - e->vendor != d->vendor || - e->device != d->device || - e->class != d->class) - continue; - for(i=0; iresource[i].start != d->resource[i].start || - e->resource[i].end != d->resource[i].end || - e->resource[i].flags != d->resource[i].flags) - continue; - break; - } - if (mn == &b->devices) - return; - } - if (!seen_host_bridge) - return; - printk(KERN_WARNING "PCI: Ignoring ghost devices on bus %02x\n", b->number); - - ln = &b->devices; - while (ln->next != &b->devices) { - d = pci_dev_b(ln->next); - if (d->devfn >= mirror) { - list_del(&d->global_list); - list_del(&d->bus_list); - kfree(d); - } else - ln = ln->next; - } -} - -/* - * Called after each bus is probed, but before its children - * are examined. - */ - -void __devinit pcibios_fixup_bus(struct pci_bus *b) -{ - pcibios_fixup_ghosts(b); - pci_read_bridge_bases(b); -} - -/* - * Only use DMI information to set this if nothing was passed - * on the kernel command line (which was parsed earlier). - */ - -static int __devinit set_bf_sort(struct dmi_system_id *d) -{ - if (pci_bf_sort == pci_bf_sort_default) { - pci_bf_sort = pci_dmi_bf; - printk(KERN_INFO "PCI: %s detected, enabling pci=bfsort.\n", d->ident); - } - return 0; -} - -/* - * Enable renumbering of PCI bus# ranges to reach all PCI busses (Cardbus) - */ -#ifdef __i386__ -static int __devinit assign_all_busses(struct dmi_system_id *d) -{ - pci_probe |= PCI_ASSIGN_ALL_BUSSES; - printk(KERN_INFO "%s detected: enabling PCI bus# renumbering" - " (pci=assign-busses)\n", d->ident); - return 0; -} -#endif - -static struct dmi_system_id __devinitdata pciprobe_dmi_table[] = { -#ifdef __i386__ -/* - * Laptops which need pci=assign-busses to see Cardbus cards - */ - { - .callback = assign_all_busses, - .ident = "Samsung X20 Laptop", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Samsung Electronics"), - DMI_MATCH(DMI_PRODUCT_NAME, "SX20S"), - }, - }, -#endif /* __i386__ */ - { - .callback = set_bf_sort, - .ident = "Dell PowerEdge 1950", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell"), - DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 1950"), - }, - }, - { - .callback = set_bf_sort, - .ident = "Dell PowerEdge 1955", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell"), - DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 1955"), - }, - }, - { - .callback = set_bf_sort, - .ident = "Dell PowerEdge 2900", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell"), - DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2900"), - }, - }, - { - .callback = set_bf_sort, - .ident = "Dell PowerEdge 2950", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell"), - DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2950"), - }, - }, - { - .callback = set_bf_sort, - .ident = "Dell PowerEdge R900", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell"), - DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R900"), - }, - }, - { - .callback = set_bf_sort, - .ident = "HP ProLiant BL20p G3", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "HP"), - DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL20p G3"), - }, - }, - { - .callback = set_bf_sort, - .ident = "HP ProLiant BL20p G4", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "HP"), - DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL20p G4"), - }, - }, - { - .callback = set_bf_sort, - .ident = "HP ProLiant BL30p G1", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "HP"), - DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL30p G1"), - }, - }, - { - .callback = set_bf_sort, - .ident = "HP ProLiant BL25p G1", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "HP"), - DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL25p G1"), - }, - }, - { - .callback = set_bf_sort, - .ident = "HP ProLiant BL35p G1", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "HP"), - DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL35p G1"), - }, - }, - { - .callback = set_bf_sort, - .ident = "HP ProLiant BL45p G1", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "HP"), - DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL45p G1"), - }, - }, - { - .callback = set_bf_sort, - .ident = "HP ProLiant BL45p G2", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "HP"), - DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL45p G2"), - }, - }, - { - .callback = set_bf_sort, - .ident = "HP ProLiant BL460c G1", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "HP"), - DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL460c G1"), - }, - }, - { - .callback = set_bf_sort, - .ident = "HP ProLiant BL465c G1", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "HP"), - DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL465c G1"), - }, - }, - { - .callback = set_bf_sort, - .ident = "HP ProLiant BL480c G1", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "HP"), - DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL480c G1"), - }, - }, - { - .callback = set_bf_sort, - .ident = "HP ProLiant BL685c G1", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "HP"), - DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL685c G1"), - }, - }, - {} -}; - -struct pci_bus * __devinit pcibios_scan_root(int busnum) -{ - struct pci_bus *bus = NULL; - struct pci_sysdata *sd; - - dmi_check_system(pciprobe_dmi_table); - - while ((bus = pci_find_next_bus(bus)) != NULL) { - if (bus->number == busnum) { - /* Already scanned */ - return bus; - } - } - - /* Allocate per-root-bus (not per bus) arch-specific data. - * TODO: leak; this memory is never freed. - * It's arguable whether it's worth the trouble to care. - */ - sd = kzalloc(sizeof(*sd), GFP_KERNEL); - if (!sd) { - printk(KERN_ERR "PCI: OOM, not probing PCI bus %02x\n", busnum); - return NULL; - } - - printk(KERN_DEBUG "PCI: Probing PCI hardware (bus %02x)\n", busnum); - - return pci_scan_bus_parented(NULL, busnum, &pci_root_ops, sd); -} - -extern u8 pci_cache_line_size; - -static int __init pcibios_init(void) -{ - struct cpuinfo_x86 *c = &boot_cpu_data; - - if (!raw_pci_ops) { - printk(KERN_WARNING "PCI: System does not support PCI\n"); - return 0; - } - - /* - * Assume PCI cacheline size of 32 bytes for all x86s except K7/K8 - * and P4. It's also good for 386/486s (which actually have 16) - * as quite a few PCI devices do not support smaller values. - */ - pci_cache_line_size = 32 >> 2; - if (c->x86 >= 6 && c->x86_vendor == X86_VENDOR_AMD) - pci_cache_line_size = 64 >> 2; /* K7 & K8 */ - else if (c->x86 > 6 && c->x86_vendor == X86_VENDOR_INTEL) - pci_cache_line_size = 128 >> 2; /* P4 */ - - pcibios_resource_survey(); - - if (pci_bf_sort >= pci_force_bf) - pci_sort_breadthfirst(); -#ifdef CONFIG_PCI_BIOS - if ((pci_probe & PCI_BIOS_SORT) && !(pci_probe & PCI_NO_SORT)) - pcibios_sort(); -#endif - return 0; -} - -subsys_initcall(pcibios_init); - -char * __devinit pcibios_setup(char *str) -{ - if (!strcmp(str, "off")) { - pci_probe = 0; - return NULL; - } else if (!strcmp(str, "bfsort")) { - pci_bf_sort = pci_force_bf; - return NULL; - } else if (!strcmp(str, "nobfsort")) { - pci_bf_sort = pci_force_nobf; - return NULL; - } -#ifdef CONFIG_PCI_BIOS - else if (!strcmp(str, "bios")) { - pci_probe = PCI_PROBE_BIOS; - return NULL; - } else if (!strcmp(str, "nobios")) { - pci_probe &= ~PCI_PROBE_BIOS; - return NULL; - } else if (!strcmp(str, "nosort")) { - pci_probe |= PCI_NO_SORT; - return NULL; - } else if (!strcmp(str, "biosirq")) { - pci_probe |= PCI_BIOS_IRQ_SCAN; - return NULL; - } else if (!strncmp(str, "pirqaddr=", 9)) { - pirq_table_addr = simple_strtoul(str+9, NULL, 0); - return NULL; - } -#endif -#ifdef CONFIG_PCI_DIRECT - else if (!strcmp(str, "conf1")) { - pci_probe = PCI_PROBE_CONF1 | PCI_NO_CHECKS; - return NULL; - } - else if (!strcmp(str, "conf2")) { - pci_probe = PCI_PROBE_CONF2 | PCI_NO_CHECKS; - return NULL; - } -#endif -#ifdef CONFIG_PCI_MMCONFIG - else if (!strcmp(str, "nommconf")) { - pci_probe &= ~PCI_PROBE_MMCONF; - return NULL; - } -#endif - else if (!strcmp(str, "noacpi")) { - acpi_noirq_set(); - return NULL; - } - else if (!strcmp(str, "noearly")) { - pci_probe |= PCI_PROBE_NOEARLY; - return NULL; - } -#ifndef CONFIG_X86_VISWS - else if (!strcmp(str, "usepirqmask")) { - pci_probe |= PCI_USE_PIRQ_MASK; - return NULL; - } else if (!strncmp(str, "irqmask=", 8)) { - pcibios_irq_mask = simple_strtol(str+8, NULL, 0); - return NULL; - } else if (!strncmp(str, "lastbus=", 8)) { - pcibios_last_bus = simple_strtol(str+8, NULL, 0); - return NULL; - } -#endif - else if (!strcmp(str, "rom")) { - pci_probe |= PCI_ASSIGN_ROMS; - return NULL; - } else if (!strcmp(str, "assign-busses")) { - pci_probe |= PCI_ASSIGN_ALL_BUSSES; - return NULL; - } else if (!strcmp(str, "routeirq")) { - pci_routeirq = 1; - return NULL; - } - return str; -} - -unsigned int pcibios_assign_all_busses(void) -{ - return (pci_probe & PCI_ASSIGN_ALL_BUSSES) ? 1 : 0; -} - -int pcibios_enable_device(struct pci_dev *dev, int mask) -{ - int err; - - if ((err = pcibios_enable_resources(dev, mask)) < 0) - return err; - - if (!dev->msi_enabled) - return pcibios_enable_irq(dev); - return 0; -} - -void pcibios_disable_device (struct pci_dev *dev) -{ - if (!dev->msi_enabled && pcibios_disable_irq) - pcibios_disable_irq(dev); -} - -struct pci_bus *pci_scan_bus_with_sysdata(int busno) -{ - struct pci_bus *bus = NULL; - struct pci_sysdata *sd; - - /* - * Allocate per-root-bus (not per bus) arch-specific data. - * TODO: leak; this memory is never freed. - * It's arguable whether it's worth the trouble to care. - */ - sd = kzalloc(sizeof(*sd), GFP_KERNEL); - if (!sd) { - printk(KERN_ERR "PCI: OOM, skipping PCI bus %02x\n", busno); - return NULL; - } - sd->node = -1; - bus = pci_scan_bus(busno, &pci_root_ops, sd); - if (!bus) - kfree(sd); - - return bus; -} diff --git a/arch/i386/pci/direct.c b/arch/i386/pci/direct.c deleted file mode 100644 index 431c9a51b15..00000000000 --- a/arch/i386/pci/direct.c +++ /dev/null @@ -1,302 +0,0 @@ -/* - * direct.c - Low-level direct PCI config space access - */ - -#include -#include -#include -#include "pci.h" - -/* - * Functions for accessing PCI configuration space with type 1 accesses - */ - -#define PCI_CONF1_ADDRESS(bus, devfn, reg) \ - (0x80000000 | (bus << 16) | (devfn << 8) | (reg & ~3)) - -int pci_conf1_read(unsigned int seg, unsigned int bus, - unsigned int devfn, int reg, int len, u32 *value) -{ - unsigned long flags; - - if ((bus > 255) || (devfn > 255) || (reg > 255)) { - *value = -1; - return -EINVAL; - } - - spin_lock_irqsave(&pci_config_lock, flags); - - outl(PCI_CONF1_ADDRESS(bus, devfn, reg), 0xCF8); - - switch (len) { - case 1: - *value = inb(0xCFC + (reg & 3)); - break; - case 2: - *value = inw(0xCFC + (reg & 2)); - break; - case 4: - *value = inl(0xCFC); - break; - } - - spin_unlock_irqrestore(&pci_config_lock, flags); - - return 0; -} - -int pci_conf1_write(unsigned int seg, unsigned int bus, - unsigned int devfn, int reg, int len, u32 value) -{ - unsigned long flags; - - if ((bus > 255) || (devfn > 255) || (reg > 255)) - return -EINVAL; - - spin_lock_irqsave(&pci_config_lock, flags); - - outl(PCI_CONF1_ADDRESS(bus, devfn, reg), 0xCF8); - - switch (len) { - case 1: - outb((u8)value, 0xCFC + (reg & 3)); - break; - case 2: - outw((u16)value, 0xCFC + (reg & 2)); - break; - case 4: - outl((u32)value, 0xCFC); - break; - } - - spin_unlock_irqrestore(&pci_config_lock, flags); - - return 0; -} - -#undef PCI_CONF1_ADDRESS - -struct pci_raw_ops pci_direct_conf1 = { - .read = pci_conf1_read, - .write = pci_conf1_write, -}; - - -/* - * Functions for accessing PCI configuration space with type 2 accesses - */ - -#define PCI_CONF2_ADDRESS(dev, reg) (u16)(0xC000 | (dev << 8) | reg) - -static int pci_conf2_read(unsigned int seg, unsigned int bus, - unsigned int devfn, int reg, int len, u32 *value) -{ - unsigned long flags; - int dev, fn; - - if ((bus > 255) || (devfn > 255) || (reg > 255)) { - *value = -1; - return -EINVAL; - } - - dev = PCI_SLOT(devfn); - fn = PCI_FUNC(devfn); - - if (dev & 0x10) - return PCIBIOS_DEVICE_NOT_FOUND; - - spin_lock_irqsave(&pci_config_lock, flags); - - outb((u8)(0xF0 | (fn << 1)), 0xCF8); - outb((u8)bus, 0xCFA); - - switch (len) { - case 1: - *value = inb(PCI_CONF2_ADDRESS(dev, reg)); - break; - case 2: - *value = inw(PCI_CONF2_ADDRESS(dev, reg)); - break; - case 4: - *value = inl(PCI_CONF2_ADDRESS(dev, reg)); - break; - } - - outb(0, 0xCF8); - - spin_unlock_irqrestore(&pci_config_lock, flags); - - return 0; -} - -static int pci_conf2_write(unsigned int seg, unsigned int bus, - unsigned int devfn, int reg, int len, u32 value) -{ - unsigned long flags; - int dev, fn; - - if ((bus > 255) || (devfn > 255) || (reg > 255)) - return -EINVAL; - - dev = PCI_SLOT(devfn); - fn = PCI_FUNC(devfn); - - if (dev & 0x10) - return PCIBIOS_DEVICE_NOT_FOUND; - - spin_lock_irqsave(&pci_config_lock, flags); - - outb((u8)(0xF0 | (fn << 1)), 0xCF8); - outb((u8)bus, 0xCFA); - - switch (len) { - case 1: - outb((u8)value, PCI_CONF2_ADDRESS(dev, reg)); - break; - case 2: - outw((u16)value, PCI_CONF2_ADDRESS(dev, reg)); - break; - case 4: - outl((u32)value, PCI_CONF2_ADDRESS(dev, reg)); - break; - } - - outb(0, 0xCF8); - - spin_unlock_irqrestore(&pci_config_lock, flags); - - return 0; -} - -#undef PCI_CONF2_ADDRESS - -static struct pci_raw_ops pci_direct_conf2 = { - .read = pci_conf2_read, - .write = pci_conf2_write, -}; - - -/* - * Before we decide to use direct hardware access mechanisms, we try to do some - * trivial checks to ensure it at least _seems_ to be working -- we just test - * whether bus 00 contains a host bridge (this is similar to checking - * techniques used in XFree86, but ours should be more reliable since we - * attempt to make use of direct access hints provided by the PCI BIOS). - * - * This should be close to trivial, but it isn't, because there are buggy - * chipsets (yes, you guessed it, by Intel and Compaq) that have no class ID. - */ -static int __init pci_sanity_check(struct pci_raw_ops *o) -{ - u32 x = 0; - int devfn; - - if (pci_probe & PCI_NO_CHECKS) - return 1; - /* Assume Type 1 works for newer systems. - This handles machines that don't have anything on PCI Bus 0. */ - if (dmi_get_year(DMI_BIOS_DATE) >= 2001) - return 1; - - for (devfn = 0; devfn < 0x100; devfn++) { - if (o->read(0, 0, devfn, PCI_CLASS_DEVICE, 2, &x)) - continue; - if (x == PCI_CLASS_BRIDGE_HOST || x == PCI_CLASS_DISPLAY_VGA) - return 1; - - if (o->read(0, 0, devfn, PCI_VENDOR_ID, 2, &x)) - continue; - if (x == PCI_VENDOR_ID_INTEL || x == PCI_VENDOR_ID_COMPAQ) - return 1; - } - - DBG(KERN_WARNING "PCI: Sanity check failed\n"); - return 0; -} - -static int __init pci_check_type1(void) -{ - unsigned long flags; - unsigned int tmp; - int works = 0; - - local_irq_save(flags); - - outb(0x01, 0xCFB); - tmp = inl(0xCF8); - outl(0x80000000, 0xCF8); - if (inl(0xCF8) == 0x80000000 && pci_sanity_check(&pci_direct_conf1)) { - works = 1; - } - outl(tmp, 0xCF8); - local_irq_restore(flags); - - return works; -} - -static int __init pci_check_type2(void) -{ - unsigned long flags; - int works = 0; - - local_irq_save(flags); - - outb(0x00, 0xCFB); - outb(0x00, 0xCF8); - outb(0x00, 0xCFA); - if (inb(0xCF8) == 0x00 && inb(0xCFA) == 0x00 && - pci_sanity_check(&pci_direct_conf2)) { - works = 1; - } - - local_irq_restore(flags); - - return works; -} - -void __init pci_direct_init(int type) -{ - if (type == 0) - return; - printk(KERN_INFO "PCI: Using configuration type %d\n", type); - if (type == 1) - raw_pci_ops = &pci_direct_conf1; - else - raw_pci_ops = &pci_direct_conf2; -} - -int __init pci_direct_probe(void) -{ - struct resource *region, *region2; - - if ((pci_probe & PCI_PROBE_CONF1) == 0) - goto type2; - region = request_region(0xCF8, 8, "PCI conf1"); - if (!region) - goto type2; - - if (pci_check_type1()) - return 1; - release_resource(region); - - type2: - if ((pci_probe & PCI_PROBE_CONF2) == 0) - return 0; - region = request_region(0xCF8, 4, "PCI conf2"); - if (!region) - return 0; - region2 = request_region(0xC000, 0x1000, "PCI conf2"); - if (!region2) - goto fail2; - - if (pci_check_type2()) { - printk(KERN_INFO "PCI: Using configuration type 2\n"); - raw_pci_ops = &pci_direct_conf2; - return 2; - } - - release_resource(region2); - fail2: - release_resource(region); - return 0; -} diff --git a/arch/i386/pci/early.c b/arch/i386/pci/early.c deleted file mode 100644 index 42df4b6606d..00000000000 --- a/arch/i386/pci/early.c +++ /dev/null @@ -1,59 +0,0 @@ -#include -#include -#include -#include -#include "pci.h" - -/* Direct PCI access. This is used for PCI accesses in early boot before - the PCI subsystem works. */ - -#define PDprintk(x...) - -u32 read_pci_config(u8 bus, u8 slot, u8 func, u8 offset) -{ - u32 v; - outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); - v = inl(0xcfc); - if (v != 0xffffffff) - PDprintk("%x reading 4 from %x: %x\n", slot, offset, v); - return v; -} - -u8 read_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset) -{ - u8 v; - outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); - v = inb(0xcfc + (offset&3)); - PDprintk("%x reading 1 from %x: %x\n", slot, offset, v); - return v; -} - -u16 read_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset) -{ - u16 v; - outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); - v = inw(0xcfc + (offset&2)); - PDprintk("%x reading 2 from %x: %x\n", slot, offset, v); - return v; -} - -void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset, - u32 val) -{ - PDprintk("%x writing to %x: %x\n", slot, offset, val); - outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); - outl(val, 0xcfc); -} - -void write_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset, u8 val) -{ - PDprintk("%x writing to %x: %x\n", slot, offset, val); - outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); - outb(val, 0xcfc); -} - -int early_pci_allowed(void) -{ - return (pci_probe & (PCI_PROBE_CONF1|PCI_PROBE_NOEARLY)) == - PCI_PROBE_CONF1; -} diff --git a/arch/i386/pci/fixup.c b/arch/i386/pci/fixup.c deleted file mode 100644 index c82cbf4c722..00000000000 --- a/arch/i386/pci/fixup.c +++ /dev/null @@ -1,446 +0,0 @@ -/* - * Exceptions for specific devices. Usually work-arounds for fatal design flaws. - */ - -#include -#include -#include -#include -#include "pci.h" - - -static void __devinit pci_fixup_i450nx(struct pci_dev *d) -{ - /* - * i450NX -- Find and scan all secondary buses on all PXB's. - */ - int pxb, reg; - u8 busno, suba, subb; - - printk(KERN_WARNING "PCI: Searching for i450NX host bridges on %s\n", pci_name(d)); - reg = 0xd0; - for(pxb=0; pxb<2; pxb++) { - pci_read_config_byte(d, reg++, &busno); - pci_read_config_byte(d, reg++, &suba); - pci_read_config_byte(d, reg++, &subb); - DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb); - if (busno) - pci_scan_bus_with_sysdata(busno); /* Bus A */ - if (suba < subb) - pci_scan_bus_with_sysdata(suba+1); /* Bus B */ - } - pcibios_last_bus = -1; -} -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX, pci_fixup_i450nx); - -static void __devinit pci_fixup_i450gx(struct pci_dev *d) -{ - /* - * i450GX and i450KX -- Find and scan all secondary buses. - * (called separately for each PCI bridge found) - */ - u8 busno; - pci_read_config_byte(d, 0x4a, &busno); - printk(KERN_INFO "PCI: i440KX/GX host bridge %s: secondary bus %02x\n", pci_name(d), busno); - pci_scan_bus_with_sysdata(busno); - pcibios_last_bus = -1; -} -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82454GX, pci_fixup_i450gx); - -static void __devinit pci_fixup_umc_ide(struct pci_dev *d) -{ - /* - * UM8886BF IDE controller sets region type bits incorrectly, - * therefore they look like memory despite of them being I/O. - */ - int i; - - printk(KERN_WARNING "PCI: Fixing base address flags for device %s\n", pci_name(d)); - for(i=0; i<4; i++) - d->resource[i].flags |= PCI_BASE_ADDRESS_SPACE_IO; -} -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_UMC, PCI_DEVICE_ID_UMC_UM8886BF, pci_fixup_umc_ide); - -static void __devinit pci_fixup_ncr53c810(struct pci_dev *d) -{ - /* - * NCR 53C810 returns class code 0 (at least on some systems). - * Fix class to be PCI_CLASS_STORAGE_SCSI - */ - if (!d->class) { - printk(KERN_WARNING "PCI: fixing NCR 53C810 class code for %s\n", pci_name(d)); - d->class = PCI_CLASS_STORAGE_SCSI << 8; - } -} -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NCR, PCI_DEVICE_ID_NCR_53C810, pci_fixup_ncr53c810); - -static void __devinit pci_fixup_latency(struct pci_dev *d) -{ - /* - * SiS 5597 and 5598 chipsets require latency timer set to - * at most 32 to avoid lockups. - */ - DBG("PCI: Setting max latency to 32\n"); - pcibios_max_latency = 32; -} -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5597, pci_fixup_latency); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5598, pci_fixup_latency); - -static void __devinit pci_fixup_piix4_acpi(struct pci_dev *d) -{ - /* - * PIIX4 ACPI device: hardwired IRQ9 - */ - d->irq = 9; -} -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB_3, pci_fixup_piix4_acpi); - -/* - * Addresses issues with problems in the memory write queue timer in - * certain VIA Northbridges. This bugfix is per VIA's specifications, - * except for the KL133/KM133: clearing bit 5 on those Northbridges seems - * to trigger a bug in its integrated ProSavage video card, which - * causes screen corruption. We only clear bits 6 and 7 for that chipset, - * until VIA can provide us with definitive information on why screen - * corruption occurs, and what exactly those bits do. - * - * VIA 8363,8622,8361 Northbridges: - * - bits 5, 6, 7 at offset 0x55 need to be turned off - * VIA 8367 (KT266x) Northbridges: - * - bits 5, 6, 7 at offset 0x95 need to be turned off - * VIA 8363 rev 0x81/0x84 (KL133/KM133) Northbridges: - * - bits 6, 7 at offset 0x55 need to be turned off - */ - -#define VIA_8363_KL133_REVISION_ID 0x81 -#define VIA_8363_KM133_REVISION_ID 0x84 - -static void pci_fixup_via_northbridge_bug(struct pci_dev *d) -{ - u8 v; - int where = 0x55; - int mask = 0x1f; /* clear bits 5, 6, 7 by default */ - - if (d->device == PCI_DEVICE_ID_VIA_8367_0) { - /* fix pci bus latency issues resulted by NB bios error - it appears on bug free^Wreduced kt266x's bios forces - NB latency to zero */ - pci_write_config_byte(d, PCI_LATENCY_TIMER, 0); - - where = 0x95; /* the memory write queue timer register is - different for the KT266x's: 0x95 not 0x55 */ - } else if (d->device == PCI_DEVICE_ID_VIA_8363_0 && - (d->revision == VIA_8363_KL133_REVISION_ID || - d->revision == VIA_8363_KM133_REVISION_ID)) { - mask = 0x3f; /* clear only bits 6 and 7; clearing bit 5 - causes screen corruption on the KL133/KM133 */ - } - - pci_read_config_byte(d, where, &v); - if (v & ~mask) { - printk(KERN_WARNING "Disabling VIA memory write queue (PCI ID %04x, rev %02x): [%02x] %02x & %02x -> %02x\n", \ - d->device, d->revision, where, v, mask, v & mask); - v &= mask; - pci_write_config_byte(d, where, v); - } -} -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8363_0, pci_fixup_via_northbridge_bug); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8622, pci_fixup_via_northbridge_bug); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8361, pci_fixup_via_northbridge_bug); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8367_0, pci_fixup_via_northbridge_bug); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8363_0, pci_fixup_via_northbridge_bug); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8622, pci_fixup_via_northbridge_bug); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8361, pci_fixup_via_northbridge_bug); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8367_0, pci_fixup_via_northbridge_bug); - -/* - * For some reasons Intel decided that certain parts of their - * 815, 845 and some other chipsets must look like PCI-to-PCI bridges - * while they are obviously not. The 82801 family (AA, AB, BAM/CAM, - * BA/CA/DB and E) PCI bridges are actually HUB-to-PCI ones, according - * to Intel terminology. These devices do forward all addresses from - * system to PCI bus no matter what are their window settings, so they are - * "transparent" (or subtractive decoding) from programmers point of view. - */ -static void __devinit pci_fixup_transparent_bridge(struct pci_dev *dev) -{ - if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && - (dev->device & 0xff00) == 0x2400) - dev->transparent = 1; -} -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_fixup_transparent_bridge); - -/* - * Fixup for C1 Halt Disconnect problem on nForce2 systems. - * - * From information provided by "Allen Martin" : - * - * A hang is caused when the CPU generates a very fast CONNECT/HALT cycle - * sequence. Workaround is to set the SYSTEM_IDLE_TIMEOUT to 80 ns. - * This allows the state-machine and timer to return to a proper state within - * 80 ns of the CONNECT and probe appearing together. Since the CPU will not - * issue another HALT within 80 ns of the initial HALT, the failure condition - * is avoided. - */ -static void pci_fixup_nforce2(struct pci_dev *dev) -{ - u32 val; - - /* - * Chip Old value New value - * C17 0x1F0FFF01 0x1F01FF01 - * C18D 0x9F0FFF01 0x9F01FF01 - * - * Northbridge chip version may be determined by - * reading the PCI revision ID (0xC1 or greater is C18D). - */ - pci_read_config_dword(dev, 0x6c, &val); - - /* - * Apply fixup if needed, but don't touch disconnect state - */ - if ((val & 0x00FF0000) != 0x00010000) { - printk(KERN_WARNING "PCI: nForce2 C1 Halt Disconnect fixup\n"); - pci_write_config_dword(dev, 0x6c, (val & 0xFF00FFFF) | 0x00010000); - } -} -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE2, pci_fixup_nforce2); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE2, pci_fixup_nforce2); - -/* Max PCI Express root ports */ -#define MAX_PCIEROOT 6 -static int quirk_aspm_offset[MAX_PCIEROOT << 3]; - -#define GET_INDEX(a, b) ((((a) - PCI_DEVICE_ID_INTEL_MCH_PA) << 3) + ((b) & 7)) - -static int quirk_pcie_aspm_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value) -{ - return raw_pci_ops->read(0, bus->number, devfn, where, size, value); -} - -/* - * Replace the original pci bus ops for write with a new one that will filter - * the request to insure ASPM cannot be enabled. - */ -static int quirk_pcie_aspm_write(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 value) -{ - u8 offset; - - offset = quirk_aspm_offset[GET_INDEX(bus->self->device, devfn)]; - - if ((offset) && (where == offset)) - value = value & 0xfffffffc; - - return raw_pci_ops->write(0, bus->number, devfn, where, size, value); -} - -static struct pci_ops quirk_pcie_aspm_ops = { - .read = quirk_pcie_aspm_read, - .write = quirk_pcie_aspm_write, -}; - -/* - * Prevents PCI Express ASPM (Active State Power Management) being enabled. - * - * Save the register offset, where the ASPM control bits are located, - * for each PCI Express device that is in the device list of - * the root port in an array for fast indexing. Replace the bus ops - * with the modified one. - */ -static void pcie_rootport_aspm_quirk(struct pci_dev *pdev) -{ - int cap_base, i; - struct pci_bus *pbus; - struct pci_dev *dev; - - if ((pbus = pdev->subordinate) == NULL) - return; - - /* - * Check if the DID of pdev matches one of the six root ports. This - * check is needed in the case this function is called directly by the - * hot-plug driver. - */ - if ((pdev->device < PCI_DEVICE_ID_INTEL_MCH_PA) || - (pdev->device > PCI_DEVICE_ID_INTEL_MCH_PC1)) - return; - - if (list_empty(&pbus->devices)) { - /* - * If no device is attached to the root port at power-up or - * after hot-remove, the pbus->devices is empty and this code - * will set the offsets to zero and the bus ops to parent's bus - * ops, which is unmodified. - */ - for (i= GET_INDEX(pdev->device, 0); i <= GET_INDEX(pdev->device, 7); ++i) - quirk_aspm_offset[i] = 0; - - pbus->ops = pbus->parent->ops; - } else { - /* - * If devices are attached to the root port at power-up or - * after hot-add, the code loops through the device list of - * each root port to save the register offsets and replace the - * bus ops. - */ - list_for_each_entry(dev, &pbus->devices, bus_list) { - /* There are 0 to 8 devices attached to this bus */ - cap_base = pci_find_capability(dev, PCI_CAP_ID_EXP); - quirk_aspm_offset[GET_INDEX(pdev->device, dev->devfn)]= cap_base + 0x10; - } - pbus->ops = &quirk_pcie_aspm_ops; - } -} -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PA, pcie_rootport_aspm_quirk ); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PA1, pcie_rootport_aspm_quirk ); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PB, pcie_rootport_aspm_quirk ); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PB1, pcie_rootport_aspm_quirk ); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PC, pcie_rootport_aspm_quirk ); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PC1, pcie_rootport_aspm_quirk ); - -/* - * Fixup to mark boot BIOS video selected by BIOS before it changes - * - * From information provided by "Jon Smirl" - * - * The standard boot ROM sequence for an x86 machine uses the BIOS - * to select an initial video card for boot display. This boot video - * card will have it's BIOS copied to C0000 in system RAM. - * IORESOURCE_ROM_SHADOW is used to associate the boot video - * card with this copy. On laptops this copy has to be used since - * the main ROM may be compressed or combined with another image. - * See pci_map_rom() for use of this flag. IORESOURCE_ROM_SHADOW - * is marked here since the boot video device will be the only enabled - * video device at this point. - */ - -static void __devinit pci_fixup_video(struct pci_dev *pdev) -{ - struct pci_dev *bridge; - struct pci_bus *bus; - u16 config; - - if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA) - return; - - /* Is VGA routed to us? */ - bus = pdev->bus; - while (bus) { - bridge = bus->self; - - /* - * From information provided by - * "David Miller" - * The bridge control register is valid for PCI header - * type BRIDGE, or CARDBUS. Host to PCI controllers use - * PCI header type NORMAL. - */ - if (bridge - &&((bridge->hdr_type == PCI_HEADER_TYPE_BRIDGE) - ||(bridge->hdr_type == PCI_HEADER_TYPE_CARDBUS))) { - pci_read_config_word(bridge, PCI_BRIDGE_CONTROL, - &config); - if (!(config & PCI_BRIDGE_CTL_VGA)) - return; - } - bus = bus->parent; - } - pci_read_config_word(pdev, PCI_COMMAND, &config); - if (config & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) { - pdev->resource[PCI_ROM_RESOURCE].flags |= IORESOURCE_ROM_SHADOW; - printk(KERN_DEBUG "Boot video device is %s\n", pci_name(pdev)); - } -} -DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, pci_fixup_video); - -/* - * Some Toshiba laptops need extra code to enable their TI TSB43AB22/A. - * - * We pretend to bring them out of full D3 state, and restore the proper - * IRQ, PCI cache line size, and BARs, otherwise the device won't function - * properly. In some cases, the device will generate an interrupt on - * the wrong IRQ line, causing any devices sharing the line it's - * *supposed* to use to be disabled by the kernel's IRQ debug code. - */ -static u16 toshiba_line_size; - -static struct dmi_system_id __devinitdata toshiba_ohci1394_dmi_table[] = { - { - .ident = "Toshiba PS5 based laptop", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"), - DMI_MATCH(DMI_PRODUCT_VERSION, "PS5"), - }, - }, - { - .ident = "Toshiba PSM4 based laptop", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"), - DMI_MATCH(DMI_PRODUCT_VERSION, "PSM4"), - }, - }, - { - .ident = "Toshiba A40 based laptop", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"), - DMI_MATCH(DMI_PRODUCT_VERSION, "PSA40U"), - }, - }, - { } -}; - -static void __devinit pci_pre_fixup_toshiba_ohci1394(struct pci_dev *dev) -{ - if (!dmi_check_system(toshiba_ohci1394_dmi_table)) - return; /* only applies to certain Toshibas (so far) */ - - dev->current_state = PCI_D3cold; - pci_read_config_word(dev, PCI_CACHE_LINE_SIZE, &toshiba_line_size); -} -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TI, 0x8032, - pci_pre_fixup_toshiba_ohci1394); - -static void __devinit pci_post_fixup_toshiba_ohci1394(struct pci_dev *dev) -{ - if (!dmi_check_system(toshiba_ohci1394_dmi_table)) - return; /* only applies to certain Toshibas (so far) */ - - /* Restore config space on Toshiba laptops */ - pci_write_config_word(dev, PCI_CACHE_LINE_SIZE, toshiba_line_size); - pci_read_config_byte(dev, PCI_INTERRUPT_LINE, (u8 *)&dev->irq); - pci_write_config_dword(dev, PCI_BASE_ADDRESS_0, - pci_resource_start(dev, 0)); - pci_write_config_dword(dev, PCI_BASE_ADDRESS_1, - pci_resource_start(dev, 1)); -} -DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_TI, 0x8032, - pci_post_fixup_toshiba_ohci1394); - - -/* - * Prevent the BIOS trapping accesses to the Cyrix CS5530A video device - * configuration space. - */ -static void pci_early_fixup_cyrix_5530(struct pci_dev *dev) -{ - u8 r; - /* clear 'F4 Video Configuration Trap' bit */ - pci_read_config_byte(dev, 0x42, &r); - r &= 0xfd; - pci_write_config_byte(dev, 0x42, r); -} -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, - pci_early_fixup_cyrix_5530); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, - pci_early_fixup_cyrix_5530); - -/* - * Siemens Nixdorf AG FSC Multiprocessor Interrupt Controller: - * prevent update of the BAR0, which doesn't look like a normal BAR. - */ -static void __devinit pci_siemens_interrupt_controller(struct pci_dev *dev) -{ - dev->resource[0].flags |= IORESOURCE_PCI_FIXED; -} -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SIEMENS, 0x0015, - pci_siemens_interrupt_controller); diff --git a/arch/i386/pci/i386.c b/arch/i386/pci/i386.c deleted file mode 100644 index bcd2f94b732..00000000000 --- a/arch/i386/pci/i386.c +++ /dev/null @@ -1,315 +0,0 @@ -/* - * Low-Level PCI Access for i386 machines - * - * Copyright 1993, 1994 Drew Eckhardt - * Visionary Computing - * (Unix and Linux consulting and custom programming) - * Drew@Colorado.EDU - * +1 (303) 786-7975 - * - * Drew's work was sponsored by: - * iX Multiuser Multitasking Magazine - * Hannover, Germany - * hm@ix.de - * - * Copyright 1997--2000 Martin Mares - * - * For more information, please consult the following manuals (look at - * http://www.pcisig.com/ for how to get them): - * - * PCI BIOS Specification - * PCI Local Bus Specification - * PCI to PCI Bridge Specification - * PCI System Design Guide - * - */ - -#include -#include -#include -#include -#include -#include - -#include "pci.h" - -/* - * We need to avoid collisions with `mirrored' VGA ports - * and other strange ISA hardware, so we always want the - * addresses to be allocated in the 0x000-0x0ff region - * modulo 0x400. - * - * Why? Because some silly external IO cards only decode - * the low 10 bits of the IO address. The 0x00-0xff region - * is reserved for motherboard devices that decode all 16 - * bits, so it's ok to allocate at, say, 0x2800-0x28ff, - * but we want to try to avoid allocating at 0x2900-0x2bff - * which might have be mirrored at 0x0100-0x03ff.. - */ -void -pcibios_align_resource(void *data, struct resource *res, - resource_size_t size, resource_size_t align) -{ - if (res->flags & IORESOURCE_IO) { - resource_size_t start = res->start; - - if (start & 0x300) { - start = (start + 0x3ff) & ~0x3ff; - res->start = start; - } - } -} - - -/* - * Handle resources of PCI devices. If the world were perfect, we could - * just allocate all the resource regions and do nothing more. It isn't. - * On the other hand, we cannot just re-allocate all devices, as it would - * require us to know lots of host bridge internals. So we attempt to - * keep as much of the original configuration as possible, but tweak it - * when it's found to be wrong. - * - * Known BIOS problems we have to work around: - * - I/O or memory regions not configured - * - regions configured, but not enabled in the command register - * - bogus I/O addresses above 64K used - * - expansion ROMs left enabled (this may sound harmless, but given - * the fact the PCI specs explicitly allow address decoders to be - * shared between expansion ROMs and other resource regions, it's - * at least dangerous) - * - * Our solution: - * (1) Allocate resources for all buses behind PCI-to-PCI bridges. - * This gives us fixed barriers on where we can allocate. - * (2) Allocate resources for all enabled devices. If there is - * a collision, just mark the resource as unallocated. Also - * disable expansion ROMs during this step. - * (3) Try to allocate resources for disabled devices. If the - * resources were assigned correctly, everything goes well, - * if they weren't, they won't disturb allocation of other - * resources. - * (4) Assign new addresses to resources which were either - * not configured at all or misconfigured. If explicitly - * requested by the user, configure expansion ROM address - * as well. - */ - -static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) -{ - struct pci_bus *bus; - struct pci_dev *dev; - int idx; - struct resource *r, *pr; - - /* Depth-First Search on bus tree */ - list_for_each_entry(bus, bus_list, node) { - if ((dev = bus->self)) { - for (idx = PCI_BRIDGE_RESOURCES; - idx < PCI_NUM_RESOURCES; idx++) { - r = &dev->resource[idx]; - if (!r->flags) - continue; - pr = pci_find_parent_resource(dev, r); - if (!r->start || !pr || - request_resource(pr, r) < 0) { - printk(KERN_ERR "PCI: Cannot allocate " - "resource region %d " - "of bridge %s\n", - idx, pci_name(dev)); - /* - * Something is wrong with the region. - * Invalidate the resource to prevent - * child resource allocations in this - * range. - */ - r->flags = 0; - } - } - } - pcibios_allocate_bus_resources(&bus->children); - } -} - -static void __init pcibios_allocate_resources(int pass) -{ - struct pci_dev *dev = NULL; - int idx, disabled; - u16 command; - struct resource *r, *pr; - - for_each_pci_dev(dev) { - pci_read_config_word(dev, PCI_COMMAND, &command); - for (idx = 0; idx < PCI_ROM_RESOURCE; idx++) { - r = &dev->resource[idx]; - if (r->parent) /* Already allocated */ - continue; - if (!r->start) /* Address not assigned at all */ - continue; - if (r->flags & IORESOURCE_IO) - disabled = !(command & PCI_COMMAND_IO); - else - disabled = !(command & PCI_COMMAND_MEMORY); - if (pass == disabled) { - DBG("PCI: Resource %08lx-%08lx " - "(f=%lx, d=%d, p=%d)\n", - r->start, r->end, r->flags, disabled, pass); - pr = pci_find_parent_resource(dev, r); - if (!pr || request_resource(pr, r) < 0) { - printk(KERN_ERR "PCI: Cannot allocate " - "resource region %d " - "of device %s\n", - idx, pci_name(dev)); - /* We'll assign a new address later */ - r->end -= r->start; - r->start = 0; - } - } - } - if (!pass) { - r = &dev->resource[PCI_ROM_RESOURCE]; - if (r->flags & IORESOURCE_ROM_ENABLE) { - /* Turn the ROM off, leave the resource region, - * but keep it unregistered. */ - u32 reg; - DBG("PCI: Switching off ROM of %s\n", - pci_name(dev)); - r->flags &= ~IORESOURCE_ROM_ENABLE; - pci_read_config_dword(dev, - dev->rom_base_reg, ®); - pci_write_config_dword(dev, dev->rom_base_reg, - reg & ~PCI_ROM_ADDRESS_ENABLE); - } - } - } -} - -static int __init pcibios_assign_resources(void) -{ - struct pci_dev *dev = NULL; - struct resource *r, *pr; - - if (!(pci_probe & PCI_ASSIGN_ROMS)) { - /* - * Try to use BIOS settings for ROMs, otherwise let - * pci_assign_unassigned_resources() allocate the new - * addresses. - */ - for_each_pci_dev(dev) { - r = &dev->resource[PCI_ROM_RESOURCE]; - if (!r->flags || !r->start) - continue; - pr = pci_find_parent_resource(dev, r); - if (!pr || request_resource(pr, r) < 0) { - r->end -= r->start; - r->start = 0; - } - } - } - - pci_assign_unassigned_resources(); - - return 0; -} - -void __init pcibios_resource_survey(void) -{ - DBG("PCI: Allocating resources\n"); - pcibios_allocate_bus_resources(&pci_root_buses); - pcibios_allocate_resources(0); - pcibios_allocate_resources(1); -} - -/** - * called in fs_initcall (one below subsys_initcall), - * give a chance for motherboard reserve resources - */ -fs_initcall(pcibios_assign_resources); - -int pcibios_enable_resources(struct pci_dev *dev, int mask) -{ - u16 cmd, old_cmd; - int idx; - struct resource *r; - - pci_read_config_word(dev, PCI_COMMAND, &cmd); - old_cmd = cmd; - for (idx = 0; idx < PCI_NUM_RESOURCES; idx++) { - /* Only set up the requested stuff */ - if (!(mask & (1 << idx))) - continue; - - r = &dev->resource[idx]; - if (!(r->flags & (IORESOURCE_IO | IORESOURCE_MEM))) - continue; - if ((idx == PCI_ROM_RESOURCE) && - (!(r->flags & IORESOURCE_ROM_ENABLE))) - continue; - if (!r->start && r->end) { - printk(KERN_ERR "PCI: Device %s not available " - "because of resource %d collisions\n", - pci_name(dev), idx); - return -EINVAL; - } - if (r->flags & IORESOURCE_IO) - cmd |= PCI_COMMAND_IO; - if (r->flags & IORESOURCE_MEM) - cmd |= PCI_COMMAND_MEMORY; - } - if (cmd != old_cmd) { - printk("PCI: Enabling device %s (%04x -> %04x)\n", - pci_name(dev), old_cmd, cmd); - pci_write_config_word(dev, PCI_COMMAND, cmd); - } - return 0; -} - -/* - * If we set up a device for bus mastering, we need to check the latency - * timer as certain crappy BIOSes forget to set it properly. - */ -unsigned int pcibios_max_latency = 255; - -void pcibios_set_master(struct pci_dev *dev) -{ - u8 lat; - pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat); - if (lat < 16) - lat = (64 <= pcibios_max_latency) ? 64 : pcibios_max_latency; - else if (lat > pcibios_max_latency) - lat = pcibios_max_latency; - else - return; - printk(KERN_DEBUG "PCI: Setting latency timer of device %s to %d\n", - pci_name(dev), lat); - pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat); -} - -int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, - enum pci_mmap_state mmap_state, int write_combine) -{ - unsigned long prot; - - /* I/O space cannot be accessed via normal processor loads and - * stores on this platform. - */ - if (mmap_state == pci_mmap_io) - return -EINVAL; - - /* Leave vm_pgoff as-is, the PCI space address is the physical - * address on this platform. - */ - prot = pgprot_val(vma->vm_page_prot); - if (boot_cpu_data.x86 > 3) - prot |= _PAGE_PCD | _PAGE_PWT; - vma->vm_page_prot = __pgprot(prot); - - /* Write-combine setting is ignored, it is changed via the mtrr - * interfaces on this platform. - */ - if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, - vma->vm_end - vma->vm_start, - vma->vm_page_prot)) - return -EAGAIN; - - return 0; -} diff --git a/arch/i386/pci/init.c b/arch/i386/pci/init.c deleted file mode 100644 index 3de9f9ba2da..00000000000 --- a/arch/i386/pci/init.c +++ /dev/null @@ -1,37 +0,0 @@ -#include -#include -#include "pci.h" - -/* arch_initcall has too random ordering, so call the initializers - in the right sequence from here. */ -static __init int pci_access_init(void) -{ - int type __maybe_unused = 0; - -#ifdef CONFIG_PCI_DIRECT - type = pci_direct_probe(); -#endif -#ifdef CONFIG_PCI_MMCONFIG - pci_mmcfg_init(type); -#endif - if (raw_pci_ops) - return 0; -#ifdef CONFIG_PCI_BIOS - pci_pcbios_init(); -#endif - /* - * don't check for raw_pci_ops here because we want pcbios as last - * fallback, yet it's needed to run first to set pcibios_last_bus - * in case legacy PCI probing is used. otherwise detecting peer busses - * fails. - */ -#ifdef CONFIG_PCI_DIRECT - pci_direct_init(type); -#endif - if (!raw_pci_ops) - printk(KERN_ERR - "PCI: Fatal: No config space access function found\n"); - - return 0; -} -arch_initcall(pci_access_init); diff --git a/arch/i386/pci/irq.c b/arch/i386/pci/irq.c deleted file mode 100644 index 8434f2323b8..00000000000 --- a/arch/i386/pci/irq.c +++ /dev/null @@ -1,1173 +0,0 @@ -/* - * Low-Level PCI Support for PC -- Routing of Interrupts - * - * (c) 1999--2000 Martin Mares - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "pci.h" - -#define PIRQ_SIGNATURE (('$' << 0) + ('P' << 8) + ('I' << 16) + ('R' << 24)) -#define PIRQ_VERSION 0x0100 - -static int broken_hp_bios_irq9; -static int acer_tm360_irqrouting; - -static struct irq_routing_table *pirq_table; - -static int pirq_enable_irq(struct pci_dev *dev); - -/* - * Never use: 0, 1, 2 (timer, keyboard, and cascade) - * Avoid using: 13, 14 and 15 (FP error and IDE). - * Penalize: 3, 4, 6, 7, 12 (known ISA uses: serial, floppy, parallel and mouse) - */ -unsigned int pcibios_irq_mask = 0xfff8; - -static int pirq_penalty[16] = { - 1000000, 1000000, 1000000, 1000, 1000, 0, 1000, 1000, - 0, 0, 0, 0, 1000, 100000, 100000, 100000 -}; - -struct irq_router { - char *name; - u16 vendor, device; - int (*get)(struct pci_dev *router, struct pci_dev *dev, int pirq); - int (*set)(struct pci_dev *router, struct pci_dev *dev, int pirq, int new); -}; - -struct irq_router_handler { - u16 vendor; - int (*probe)(struct irq_router *r, struct pci_dev *router, u16 device); -}; - -int (*pcibios_enable_irq)(struct pci_dev *dev) = NULL; -void (*pcibios_disable_irq)(struct pci_dev *dev) = NULL; - -/* - * Check passed address for the PCI IRQ Routing Table signature - * and perform checksum verification. - */ - -static inline struct irq_routing_table * pirq_check_routing_table(u8 *addr) -{ - struct irq_routing_table *rt; - int i; - u8 sum; - - rt = (struct irq_routing_table *) addr; - if (rt->signature != PIRQ_SIGNATURE || - rt->version != PIRQ_VERSION || - rt->size % 16 || - rt->size < sizeof(struct irq_routing_table)) - return NULL; - sum = 0; - for (i=0; i < rt->size; i++) - sum += addr[i]; - if (!sum) { - DBG(KERN_DEBUG "PCI: Interrupt Routing Table found at 0x%p\n", rt); - return rt; - } - return NULL; -} - - - -/* - * Search 0xf0000 -- 0xfffff for the PCI IRQ Routing Table. - */ - -static struct irq_routing_table * __init pirq_find_routing_table(void) -{ - u8 *addr; - struct irq_routing_table *rt; - - if (pirq_table_addr) { - rt = pirq_check_routing_table((u8 *) __va(pirq_table_addr)); - if (rt) - return rt; - printk(KERN_WARNING "PCI: PIRQ table NOT found at pirqaddr\n"); - } - for(addr = (u8 *) __va(0xf0000); addr < (u8 *) __va(0x100000); addr += 16) { - rt = pirq_check_routing_table(addr); - if (rt) - return rt; - } - return NULL; -} - -/* - * If we have a IRQ routing table, use it to search for peer host - * bridges. It's a gross hack, but since there are no other known - * ways how to get a list of buses, we have to go this way. - */ - -static void __init pirq_peer_trick(void) -{ - struct irq_routing_table *rt = pirq_table; - u8 busmap[256]; - int i; - struct irq_info *e; - - memset(busmap, 0, sizeof(busmap)); - for(i=0; i < (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info); i++) { - e = &rt->slots[i]; -#ifdef DEBUG - { - int j; - DBG(KERN_DEBUG "%02x:%02x slot=%02x", e->bus, e->devfn/8, e->slot); - for(j=0; j<4; j++) - DBG(" %d:%02x/%04x", j, e->irq[j].link, e->irq[j].bitmap); - DBG("\n"); - } -#endif - busmap[e->bus] = 1; - } - for(i = 1; i < 256; i++) { - if (!busmap[i] || pci_find_bus(0, i)) - continue; - if (pci_scan_bus_with_sysdata(i)) - printk(KERN_INFO "PCI: Discovered primary peer " - "bus %02x [IRQ]\n", i); - } - pcibios_last_bus = -1; -} - -/* - * Code for querying and setting of IRQ routes on various interrupt routers. - */ - -void eisa_set_level_irq(unsigned int irq) -{ - unsigned char mask = 1 << (irq & 7); - unsigned int port = 0x4d0 + (irq >> 3); - unsigned char val; - static u16 eisa_irq_mask; - - if (irq >= 16 || (1 << irq) & eisa_irq_mask) - return; - - eisa_irq_mask |= (1 << irq); - printk(KERN_DEBUG "PCI: setting IRQ %u as level-triggered\n", irq); - val = inb(port); - if (!(val & mask)) { - DBG(KERN_DEBUG " -> edge"); - outb(val | mask, port); - } -} - -/* - * Common IRQ routing practice: nybbles in config space, - * offset by some magic constant. - */ -static unsigned int read_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr) -{ - u8 x; - unsigned reg = offset + (nr >> 1); - - pci_read_config_byte(router, reg, &x); - return (nr & 1) ? (x >> 4) : (x & 0xf); -} - -static void write_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr, unsigned int val) -{ - u8 x; - unsigned reg = offset + (nr >> 1); - - pci_read_config_byte(router, reg, &x); - x = (nr & 1) ? ((x & 0x0f) | (val << 4)) : ((x & 0xf0) | val); - pci_write_config_byte(router, reg, x); -} - -/* - * ALI pirq entries are damn ugly, and completely undocumented. - * This has been figured out from pirq tables, and it's not a pretty - * picture. - */ -static int pirq_ali_get(struct pci_dev *router, struct pci_dev *dev, int pirq) -{ - static const unsigned char irqmap[16] = { 0, 9, 3, 10, 4, 5, 7, 6, 1, 11, 0, 12, 0, 14, 0, 15 }; - - return irqmap[read_config_nybble(router, 0x48, pirq-1)]; -} - -static int pirq_ali_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) -{ - static const unsigned char irqmap[16] = { 0, 8, 0, 2, 4, 5, 7, 6, 0, 1, 3, 9, 11, 0, 13, 15 }; - unsigned int val = irqmap[irq]; - - if (val) { - write_config_nybble(router, 0x48, pirq-1, val); - return 1; - } - return 0; -} - -/* - * The Intel PIIX4 pirq rules are fairly simple: "pirq" is - * just a pointer to the config space. - */ -static int pirq_piix_get(struct pci_dev *router, struct pci_dev *dev, int pirq) -{ - u8 x; - - pci_read_config_byte(router, pirq, &x); - return (x < 16) ? x : 0; -} - -static int pirq_piix_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) -{ - pci_write_config_byte(router, pirq, irq); - return 1; -} - -/* - * The VIA pirq rules are nibble-based, like ALI, - * but without the ugly irq number munging. - * However, PIRQD is in the upper instead of lower 4 bits. - */ -static int pirq_via_get(struct pci_dev *router, struct pci_dev *dev, int pirq) -{ - return read_config_nybble(router, 0x55, pirq == 4 ? 5 : pirq); -} - -static int pirq_via_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) -{ - write_config_nybble(router, 0x55, pirq == 4 ? 5 : pirq, irq); - return 1; -} - -/* - * The VIA pirq rules are nibble-based, like ALI, - * but without the ugly irq number munging. - * However, for 82C586, nibble map is different . - */ -static int pirq_via586_get(struct pci_dev *router, struct pci_dev *dev, int pirq) -{ - static const unsigned int pirqmap[5] = { 3, 2, 5, 1, 1 }; - return read_config_nybble(router, 0x55, pirqmap[pirq-1]); -} - -static int pirq_via586_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) -{ - static const unsigned int pirqmap[5] = { 3, 2, 5, 1, 1 }; - write_config_nybble(router, 0x55, pirqmap[pirq-1], irq); - return 1; -} - -/* - * ITE 8330G pirq rules are nibble-based - * FIXME: pirqmap may be { 1, 0, 3, 2 }, - * 2+3 are both mapped to irq 9 on my system - */ -static int pirq_ite_get(struct pci_dev *router, struct pci_dev *dev, int pirq) -{ - static const unsigned char pirqmap[4] = { 1, 0, 2, 3 }; - return read_config_nybble(router,0x43, pirqmap[pirq-1]); -} - -static int pirq_ite_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) -{ - static const unsigned char pirqmap[4] = { 1, 0, 2, 3 }; - write_config_nybble(router, 0x43, pirqmap[pirq-1], irq); - return 1; -} - -/* - * OPTI: high four bits are nibble pointer.. - * I wonder what the low bits do? - */ -static int pirq_opti_get(struct pci_dev *router, struct pci_dev *dev, int pirq) -{ - return read_config_nybble(router, 0xb8, pirq >> 4); -} - -static int pirq_opti_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) -{ - write_config_nybble(router, 0xb8, pirq >> 4, irq); - return 1; -} - -/* - * Cyrix: nibble offset 0x5C - * 0x5C bits 7:4 is INTB bits 3:0 is INTA - * 0x5D bits 7:4 is INTD bits 3:0 is INTC - */ -static int pirq_cyrix_get(struct pci_dev *router, struct pci_dev *dev, int pirq) -{ - return read_config_nybble(router, 0x5C, (pirq-1)^1); -} - -static int pirq_cyrix_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) -{ - write_config_nybble(router, 0x5C, (pirq-1)^1, irq); - return 1; -} - -/* - * PIRQ routing for SiS 85C503 router used in several SiS chipsets. - * We have to deal with the following issues here: - * - vendors have different ideas about the meaning of link values - * - some onboard devices (integrated in the chipset) have special - * links and are thus routed differently (i.e. not via PCI INTA-INTD) - * - different revision of the router have a different layout for - * the routing registers, particularly for the onchip devices - * - * For all routing registers the common thing is we have one byte - * per routeable link which is defined as: - * bit 7 IRQ mapping enabled (0) or disabled (1) - * bits [6:4] reserved (sometimes used for onchip devices) - * bits [3:0] IRQ to map to - * allowed: 3-7, 9-12, 14-15 - * reserved: 0, 1, 2, 8, 13 - * - * The config-space registers located at 0x41/0x42/0x43/0x44 are - * always used to route the normal PCI INT A/B/C/D respectively. - * Apparently there are systems implementing PCI routing table using - * link values 0x01-0x04 and others using 0x41-0x44 for PCI INTA..D. - * We try our best to handle both link mappings. - * - * Currently (2003-05-21) it appears most SiS chipsets follow the - * definition of routing registers from the SiS-5595 southbridge. - * According to the SiS 5595 datasheets the revision id's of the - * router (ISA-bridge) should be 0x01 or 0xb0. - * - * Furthermore we've also seen lspci dumps with revision 0x00 and 0xb1. - * Looks like these are used in a number of SiS 5xx/6xx/7xx chipsets. - * They seem to work with the current routing code. However there is - * some concern because of the two USB-OHCI HCs (original SiS 5595 - * had only one). YMMV. - * - * Onchip routing for router rev-id 0x01/0xb0 and probably 0x00/0xb1: - * - * 0x61: IDEIRQ: - * bits [6:5] must be written 01 - * bit 4 channel-select primary (0), secondary (1) - * - * 0x62: USBIRQ: - * bit 6 OHCI function disabled (0), enabled (1) - * - * 0x6a: ACPI/SCI IRQ: bits 4-6 reserved - * - * 0x7e: Data Acq. Module IRQ - bits 4-6 reserved - * - * We support USBIRQ (in addition to INTA-INTD) and keep the - * IDE, ACPI and DAQ routing untouched as set by the BIOS. - * - * Currently the only reported exception is the new SiS 65x chipset - * which includes the SiS 69x southbridge. Here we have the 85C503 - * router revision 0x04 and there are changes in the register layout - * mostly related to the different USB HCs with USB 2.0 support. - * - * Onchip routing for router rev-id 0x04 (try-and-error observation) - * - * 0x60/0x61/0x62/0x63: 1xEHCI and 3xOHCI (companion) USB-HCs - * bit 6-4 are probably unused, not like 5595 - */ - -#define PIRQ_SIS_IRQ_MASK 0x0f -#define PIRQ_SIS_IRQ_DISABLE 0x80 -#define PIRQ_SIS_USB_ENABLE 0x40 - -static int pirq_sis_get(struct pci_dev *router, struct pci_dev *dev, int pirq) -{ - u8 x; - int reg; - - reg = pirq; - if (reg >= 0x01 && reg <= 0x04) - reg += 0x40; - pci_read_config_byte(router, reg, &x); - return (x & PIRQ_SIS_IRQ_DISABLE) ? 0 : (x & PIRQ_SIS_IRQ_MASK); -} - -static int pirq_sis_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) -{ - u8 x; - int reg; - - reg = pirq; - if (reg >= 0x01 && reg <= 0x04) - reg += 0x40; - pci_read_config_byte(router, reg, &x); - x &= ~(PIRQ_SIS_IRQ_MASK | PIRQ_SIS_IRQ_DISABLE); - x |= irq ? irq: PIRQ_SIS_IRQ_DISABLE; - pci_write_config_byte(router, reg, x); - return 1; -} - - -/* - * VLSI: nibble offset 0x74 - educated guess due to routing table and - * config space of VLSI 82C534 PCI-bridge/router (1004:0102) - * Tested on HP OmniBook 800 covering PIRQ 1, 2, 4, 8 for onboard - * devices, PIRQ 3 for non-pci(!) soundchip and (untested) PIRQ 6 - * for the busbridge to the docking station. - */ - -static int pirq_vlsi_get(struct pci_dev *router, struct pci_dev *dev, int pirq) -{ - if (pirq > 8) { - printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq); - return 0; - } - return read_config_nybble(router, 0x74, pirq-1); -} - -static int pirq_vlsi_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) -{ - if (pirq > 8) { - printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq); - return 0; - } - write_config_nybble(router, 0x74, pirq-1, irq); - return 1; -} - -/* - * ServerWorks: PCI interrupts mapped to system IRQ lines through Index - * and Redirect I/O registers (0x0c00 and 0x0c01). The Index register - * format is (PCIIRQ## | 0x10), e.g.: PCIIRQ10=0x1a. The Redirect - * register is a straight binary coding of desired PIC IRQ (low nibble). - * - * The 'link' value in the PIRQ table is already in the correct format - * for the Index register. There are some special index values: - * 0x00 for ACPI (SCI), 0x01 for USB, 0x02 for IDE0, 0x04 for IDE1, - * and 0x03 for SMBus. - */ -static int pirq_serverworks_get(struct pci_dev *router, struct pci_dev *dev, int pirq) -{ - outb_p(pirq, 0xc00); - return inb(0xc01) & 0xf; -} - -static int pirq_serverworks_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) -{ - outb_p(pirq, 0xc00); - outb_p(irq, 0xc01); - return 1; -} - -/* Support for AMD756 PCI IRQ Routing - * Jhon H. Caicedo - * Jun/21/2001 0.2.0 Release, fixed to use "nybble" functions... (jhcaiced) - * Jun/19/2001 Alpha Release 0.1.0 (jhcaiced) - * The AMD756 pirq rules are nibble-based - * offset 0x56 0-3 PIRQA 4-7 PIRQB - * offset 0x57 0-3 PIRQC 4-7 PIRQD - */ -static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq) -{ - u8 irq; - irq = 0; - if (pirq <= 4) - { - irq = read_config_nybble(router, 0x56, pirq - 1); - } - printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d get irq : %2d\n", - dev->vendor, dev->device, pirq, irq); - return irq; -} - -static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) -{ - printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d SET irq : %2d\n", - dev->vendor, dev->device, pirq, irq); - if (pirq <= 4) - { - write_config_nybble(router, 0x56, pirq - 1, irq); - } - return 1; -} - -#ifdef CONFIG_PCI_BIOS - -static int pirq_bios_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) -{ - struct pci_dev *bridge; - int pin = pci_get_interrupt_pin(dev, &bridge); - return pcibios_set_irq_routing(bridge, pin, irq); -} - -#endif - -static __init int intel_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) -{ - static struct pci_device_id __initdata pirq_440gx[] = { - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_0) }, - { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_2) }, - { }, - }; - - /* 440GX has a proprietary PIRQ router -- don't use it */ - if (pci_dev_present(pirq_440gx)) - return 0; - - switch(device) - { - case PCI_DEVICE_ID_INTEL_82371FB_0: - case PCI_DEVICE_ID_INTEL_82371SB_0: - case PCI_DEVICE_ID_INTEL_82371AB_0: - case PCI_DEVICE_ID_INTEL_82371MX: - case PCI_DEVICE_ID_INTEL_82443MX_0: - case PCI_DEVICE_ID_INTEL_82801AA_0: - case PCI_DEVICE_ID_INTEL_82801AB_0: - case PCI_DEVICE_ID_INTEL_82801BA_0: - case PCI_DEVICE_ID_INTEL_82801BA_10: - case PCI_DEVICE_ID_INTEL_82801CA_0: - case PCI_DEVICE_ID_INTEL_82801CA_12: - case PCI_DEVICE_ID_INTEL_82801DB_0: - case PCI_DEVICE_ID_INTEL_82801E_0: - case PCI_DEVICE_ID_INTEL_82801EB_0: - case PCI_DEVICE_ID_INTEL_ESB_1: - case PCI_DEVICE_ID_INTEL_ICH6_0: - case PCI_DEVICE_ID_INTEL_ICH6_1: - case PCI_DEVICE_ID_INTEL_ICH7_0: - case PCI_DEVICE_ID_INTEL_ICH7_1: - case PCI_DEVICE_ID_INTEL_ICH7_30: - case PCI_DEVICE_ID_INTEL_ICH7_31: - case PCI_DEVICE_ID_INTEL_ESB2_0: - case PCI_DEVICE_ID_INTEL_ICH8_0: - case PCI_DEVICE_ID_INTEL_ICH8_1: - case PCI_DEVICE_ID_INTEL_ICH8_2: - case PCI_DEVICE_ID_INTEL_ICH8_3: - case PCI_DEVICE_ID_INTEL_ICH8_4: - case PCI_DEVICE_ID_INTEL_ICH9_0: - case PCI_DEVICE_ID_INTEL_ICH9_1: - case PCI_DEVICE_ID_INTEL_ICH9_2: - case PCI_DEVICE_ID_INTEL_ICH9_3: - case PCI_DEVICE_ID_INTEL_ICH9_4: - case PCI_DEVICE_ID_INTEL_ICH9_5: - case PCI_DEVICE_ID_INTEL_TOLAPAI_0: - r->name = "PIIX/ICH"; - r->get = pirq_piix_get; - r->set = pirq_piix_set; - return 1; - } - return 0; -} - -static __init int via_router_probe(struct irq_router *r, - struct pci_dev *router, u16 device) -{ - /* FIXME: We should move some of the quirk fixup stuff here */ - - /* - * work arounds for some buggy BIOSes - */ - if (device == PCI_DEVICE_ID_VIA_82C586_0) { - switch(router->device) { - case PCI_DEVICE_ID_VIA_82C686: - /* - * Asus k7m bios wrongly reports 82C686A - * as 586-compatible - */ - device = PCI_DEVICE_ID_VIA_82C686; - break; - case PCI_DEVICE_ID_VIA_8235: - /** - * Asus a7v-x bios wrongly reports 8235 - * as 586-compatible - */ - device = PCI_DEVICE_ID_VIA_8235; - break; - } - } - - switch(device) { - case PCI_DEVICE_ID_VIA_82C586_0: - r->name = "VIA"; - r->get = pirq_via586_get; - r->set = pirq_via586_set; - return 1; - case PCI_DEVICE_ID_VIA_82C596: - case PCI_DEVICE_ID_VIA_82C686: - case PCI_DEVICE_ID_VIA_8231: - case PCI_DEVICE_ID_VIA_8233A: - case PCI_DEVICE_ID_VIA_8235: - case PCI_DEVICE_ID_VIA_8237: - /* FIXME: add new ones for 8233/5 */ - r->name = "VIA"; - r->get = pirq_via_get; - r->set = pirq_via_set; - return 1; - } - return 0; -} - -static __init int vlsi_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) -{ - switch(device) - { - case PCI_DEVICE_ID_VLSI_82C534: - r->name = "VLSI 82C534"; - r->get = pirq_vlsi_get; - r->set = pirq_vlsi_set; - return 1; - } - return 0; -} - - -static __init int serverworks_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) -{ - switch(device) - { - case PCI_DEVICE_ID_SERVERWORKS_OSB4: - case PCI_DEVICE_ID_SERVERWORKS_CSB5: - r->name = "ServerWorks"; - r->get = pirq_serverworks_get; - r->set = pirq_serverworks_set; - return 1; - } - return 0; -} - -static __init int sis_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) -{ - if (device != PCI_DEVICE_ID_SI_503) - return 0; - - r->name = "SIS"; - r->get = pirq_sis_get; - r->set = pirq_sis_set; - return 1; -} - -static __init int cyrix_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) -{ - switch(device) - { - case PCI_DEVICE_ID_CYRIX_5520: - r->name = "NatSemi"; - r->get = pirq_cyrix_get; - r->set = pirq_cyrix_set; - return 1; - } - return 0; -} - -static __init int opti_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) -{ - switch(device) - { - case PCI_DEVICE_ID_OPTI_82C700: - r->name = "OPTI"; - r->get = pirq_opti_get; - r->set = pirq_opti_set; - return 1; - } - return 0; -} - -static __init int ite_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) -{ - switch(device) - { - case PCI_DEVICE_ID_ITE_IT8330G_0: - r->name = "ITE"; - r->get = pirq_ite_get; - r->set = pirq_ite_set; - return 1; - } - return 0; -} - -static __init int ali_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) -{ - switch(device) - { - case PCI_DEVICE_ID_AL_M1533: - case PCI_DEVICE_ID_AL_M1563: - printk(KERN_DEBUG "PCI: Using ALI IRQ Router\n"); - r->name = "ALI"; - r->get = pirq_ali_get; - r->set = pirq_ali_set; - return 1; - } - return 0; -} - -static __init int amd_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) -{ - switch(device) - { - case PCI_DEVICE_ID_AMD_VIPER_740B: - r->name = "AMD756"; - break; - case PCI_DEVICE_ID_AMD_VIPER_7413: - r->name = "AMD766"; - break; - case PCI_DEVICE_ID_AMD_VIPER_7443: - r->name = "AMD768"; - break; - default: - return 0; - } - r->get = pirq_amd756_get; - r->set = pirq_amd756_set; - return 1; -} - -static __initdata struct irq_router_handler pirq_routers[] = { - { PCI_VENDOR_ID_INTEL, intel_router_probe }, - { PCI_VENDOR_ID_AL, ali_router_probe }, - { PCI_VENDOR_ID_ITE, ite_router_probe }, - { PCI_VENDOR_ID_VIA, via_router_probe }, - { PCI_VENDOR_ID_OPTI, opti_router_probe }, - { PCI_VENDOR_ID_SI, sis_router_probe }, - { PCI_VENDOR_ID_CYRIX, cyrix_router_probe }, - { PCI_VENDOR_ID_VLSI, vlsi_router_probe }, - { PCI_VENDOR_ID_SERVERWORKS, serverworks_router_probe }, - { PCI_VENDOR_ID_AMD, amd_router_probe }, - /* Someone with docs needs to add the ATI Radeon IGP */ - { 0, NULL } -}; -static struct irq_router pirq_router; -static struct pci_dev *pirq_router_dev; - - -/* - * FIXME: should we have an option to say "generic for - * chipset" ? - */ - -static void __init pirq_find_router(struct irq_router *r) -{ - struct irq_routing_table *rt = pirq_table; - struct irq_router_handler *h; - -#ifdef CONFIG_PCI_BIOS - if (!rt->signature) { - printk(KERN_INFO "PCI: Using BIOS for IRQ routing\n"); - r->set = pirq_bios_set; - r->name = "BIOS"; - return; - } -#endif - - /* Default unless a driver reloads it */ - r->name = "default"; - r->get = NULL; - r->set = NULL; - - DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for %04x:%04x\n", - rt->rtr_vendor, rt->rtr_device); - - pirq_router_dev = pci_get_bus_and_slot(rt->rtr_bus, rt->rtr_devfn); - if (!pirq_router_dev) { - DBG(KERN_DEBUG "PCI: Interrupt router not found at " - "%02x:%02x\n", rt->rtr_bus, rt->rtr_devfn); - return; - } - - for( h = pirq_routers; h->vendor; h++) { - /* First look for a router match */ - if (rt->rtr_vendor == h->vendor && h->probe(r, pirq_router_dev, rt->rtr_device)) - break; - /* Fall back to a device match */ - if (pirq_router_dev->vendor == h->vendor && h->probe(r, pirq_router_dev, pirq_router_dev->device)) - break; - } - printk(KERN_INFO "PCI: Using IRQ router %s [%04x/%04x] at %s\n", - pirq_router.name, - pirq_router_dev->vendor, - pirq_router_dev->device, - pci_name(pirq_router_dev)); - - /* The device remains referenced for the kernel lifetime */ -} - -static struct irq_info *pirq_get_info(struct pci_dev *dev) -{ - struct irq_routing_table *rt = pirq_table; - int entries = (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info); - struct irq_info *info; - - for (info = rt->slots; entries--; info++) - if (info->bus == dev->bus->number && PCI_SLOT(info->devfn) == PCI_SLOT(dev->devfn)) - return info; - return NULL; -} - -static int pcibios_lookup_irq(struct pci_dev *dev, int assign) -{ - u8 pin; - struct irq_info *info; - int i, pirq, newirq; - int irq = 0; - u32 mask; - struct irq_router *r = &pirq_router; - struct pci_dev *dev2 = NULL; - char *msg = NULL; - - /* Find IRQ pin */ - pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); - if (!pin) { - DBG(KERN_DEBUG " -> no interrupt pin\n"); - return 0; - } - pin = pin - 1; - - /* Find IRQ routing entry */ - - if (!pirq_table) - return 0; - - DBG(KERN_DEBUG "IRQ for %s[%c]", pci_name(dev), 'A' + pin); - info = pirq_get_info(dev); - if (!info) { - DBG(" -> not found in routing table\n" KERN_DEBUG); - return 0; - } - pirq = info->irq[pin].link; - mask = info->irq[pin].bitmap; - if (!pirq) { - DBG(" -> not routed\n" KERN_DEBUG); - return 0; - } - DBG(" -> PIRQ %02x, mask %04x, excl %04x", pirq, mask, pirq_table->exclusive_irqs); - mask &= pcibios_irq_mask; - - /* Work around broken HP Pavilion Notebooks which assign USB to - IRQ 9 even though it is actually wired to IRQ 11 */ - - if (broken_hp_bios_irq9 && pirq == 0x59 && dev->irq == 9) { - dev->irq = 11; - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, 11); - r->set(pirq_router_dev, dev, pirq, 11); - } - - /* same for Acer Travelmate 360, but with CB and irq 11 -> 10 */ - if (acer_tm360_irqrouting && dev->irq == 11 && dev->vendor == PCI_VENDOR_ID_O2) { - pirq = 0x68; - mask = 0x400; - dev->irq = r->get(pirq_router_dev, dev, pirq); - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq); - } - - /* - * Find the best IRQ to assign: use the one - * reported by the device if possible. - */ - newirq = dev->irq; - if (newirq && !((1 << newirq) & mask)) { - if ( pci_probe & PCI_USE_PIRQ_MASK) newirq = 0; - else printk("\n" KERN_WARNING - "PCI: IRQ %i for device %s doesn't match PIRQ mask " - "- try pci=usepirqmask\n" KERN_DEBUG, newirq, - pci_name(dev)); - } - if (!newirq && assign) { - for (i = 0; i < 16; i++) { - if (!(mask & (1 << i))) - continue; - if (pirq_penalty[i] < pirq_penalty[newirq] && can_request_irq(i, IRQF_SHARED)) - newirq = i; - } - } - DBG(" -> newirq=%d", newirq); - - /* Check if it is hardcoded */ - if ((pirq & 0xf0) == 0xf0) { - irq = pirq & 0xf; - DBG(" -> hardcoded IRQ %d\n", irq); - msg = "Hardcoded"; - } else if ( r->get && (irq = r->get(pirq_router_dev, dev, pirq)) && \ - ((!(pci_probe & PCI_USE_PIRQ_MASK)) || ((1 << irq) & mask)) ) { - DBG(" -> got IRQ %d\n", irq); - msg = "Found"; - eisa_set_level_irq(irq); - } else if (newirq && r->set && (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) { - DBG(" -> assigning IRQ %d", newirq); - if (r->set(pirq_router_dev, dev, pirq, newirq)) { - eisa_set_level_irq(newirq); - DBG(" ... OK\n"); - msg = "Assigned"; - irq = newirq; - } - } - - if (!irq) { - DBG(" ... failed\n"); - if (newirq && mask == (1 << newirq)) { - msg = "Guessed"; - irq = newirq; - } else - return 0; - } - printk(KERN_INFO "PCI: %s IRQ %d for device %s\n", msg, irq, pci_name(dev)); - - /* Update IRQ for all devices with the same pirq value */ - while ((dev2 = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev2)) != NULL) { - pci_read_config_byte(dev2, PCI_INTERRUPT_PIN, &pin); - if (!pin) - continue; - pin--; - info = pirq_get_info(dev2); - if (!info) - continue; - if (info->irq[pin].link == pirq) { - /* We refuse to override the dev->irq information. Give a warning! */ - if ( dev2->irq && dev2->irq != irq && \ - (!(pci_probe & PCI_USE_PIRQ_MASK) || \ - ((1 << dev2->irq) & mask)) ) { -#ifndef CONFIG_PCI_MSI - printk(KERN_INFO "IRQ routing conflict for %s, have irq %d, want irq %d\n", - pci_name(dev2), dev2->irq, irq); -#endif - continue; - } - dev2->irq = irq; - pirq_penalty[irq]++; - if (dev != dev2) - printk(KERN_INFO "PCI: Sharing IRQ %d with %s\n", irq, pci_name(dev2)); - } - } - return 1; -} - -static void __init pcibios_fixup_irqs(void) -{ - struct pci_dev *dev = NULL; - u8 pin; - - DBG(KERN_DEBUG "PCI: IRQ fixup\n"); - while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { - /* - * If the BIOS has set an out of range IRQ number, just ignore it. - * Also keep track of which IRQ's are already in use. - */ - if (dev->irq >= 16) { - DBG(KERN_DEBUG "%s: ignoring bogus IRQ %d\n", pci_name(dev), dev->irq); - dev->irq = 0; - } - /* If the IRQ is already assigned to a PCI device, ignore its ISA use penalty */ - if (pirq_penalty[dev->irq] >= 100 && pirq_penalty[dev->irq] < 100000) - pirq_penalty[dev->irq] = 0; - pirq_penalty[dev->irq]++; - } - - dev = NULL; - while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { - pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); -#ifdef CONFIG_X86_IO_APIC - /* - * Recalculate IRQ numbers if we use the I/O APIC. - */ - if (io_apic_assign_pci_irqs) - { - int irq; - - if (pin) { - pin--; /* interrupt pins are numbered starting from 1 */ - irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin); - /* - * Busses behind bridges are typically not listed in the MP-table. - * In this case we have to look up the IRQ based on the parent bus, - * parent slot, and pin number. The SMP code detects such bridged - * busses itself so we should get into this branch reliably. - */ - if (irq < 0 && dev->bus->parent) { /* go back to the bridge */ - struct pci_dev * bridge = dev->bus->self; - - pin = (pin + PCI_SLOT(dev->devfn)) % 4; - irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, - PCI_SLOT(bridge->devfn), pin); - if (irq >= 0) - printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n", - pci_name(bridge), 'A' + pin, irq); - } - if (irq >= 0) { - printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n", - pci_name(dev), 'A' + pin, irq); - dev->irq = irq; - } - } - } -#endif - /* - * Still no IRQ? Try to lookup one... - */ - if (pin && !dev->irq) - pcibios_lookup_irq(dev, 0); - } -} - -/* - * Work around broken HP Pavilion Notebooks which assign USB to - * IRQ 9 even though it is actually wired to IRQ 11 - */ -static int __init fix_broken_hp_bios_irq9(struct dmi_system_id *d) -{ - if (!broken_hp_bios_irq9) { - broken_hp_bios_irq9 = 1; - printk(KERN_INFO "%s detected - fixing broken IRQ routing\n", d->ident); - } - return 0; -} - -/* - * Work around broken Acer TravelMate 360 Notebooks which assign - * Cardbus to IRQ 11 even though it is actually wired to IRQ 10 - */ -static int __init fix_acer_tm360_irqrouting(struct dmi_system_id *d) -{ - if (!acer_tm360_irqrouting) { - acer_tm360_irqrouting = 1; - printk(KERN_INFO "%s detected - fixing broken IRQ routing\n", d->ident); - } - return 0; -} - -static struct dmi_system_id __initdata pciirq_dmi_table[] = { - { - .callback = fix_broken_hp_bios_irq9, - .ident = "HP Pavilion N5400 Series Laptop", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), - DMI_MATCH(DMI_BIOS_VERSION, "GE.M1.03"), - DMI_MATCH(DMI_PRODUCT_VERSION, "HP Pavilion Notebook Model GE"), - DMI_MATCH(DMI_BOARD_VERSION, "OmniBook N32N-736"), - }, - }, - { - .callback = fix_acer_tm360_irqrouting, - .ident = "Acer TravelMate 36x Laptop", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Acer"), - DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"), - }, - }, - { } -}; - -static int __init pcibios_irq_init(void) -{ - DBG(KERN_DEBUG "PCI: IRQ init\n"); - - if (pcibios_enable_irq || raw_pci_ops == NULL) - return 0; - - dmi_check_system(pciirq_dmi_table); - - pirq_table = pirq_find_routing_table(); - -#ifdef CONFIG_PCI_BIOS - if (!pirq_table && (pci_probe & PCI_BIOS_IRQ_SCAN)) - pirq_table = pcibios_get_irq_routing_table(); -#endif - if (pirq_table) { - pirq_peer_trick(); - pirq_find_router(&pirq_router); - if (pirq_table->exclusive_irqs) { - int i; - for (i=0; i<16; i++) - if (!(pirq_table->exclusive_irqs & (1 << i))) - pirq_penalty[i] += 100; - } - /* If we're using the I/O APIC, avoid using the PCI IRQ routing table */ - if (io_apic_assign_pci_irqs) - pirq_table = NULL; - } - - pcibios_enable_irq = pirq_enable_irq; - - pcibios_fixup_irqs(); - return 0; -} - -subsys_initcall(pcibios_irq_init); - - -static void pirq_penalize_isa_irq(int irq, int active) -{ - /* - * If any ISAPnP device reports an IRQ in its list of possible - * IRQ's, we try to avoid assigning it to PCI devices. - */ - if (irq < 16) { - if (active) - pirq_penalty[irq] += 1000; - else - pirq_penalty[irq] += 100; - } -} - -void pcibios_penalize_isa_irq(int irq, int active) -{ -#ifdef CONFIG_ACPI - if (!acpi_noirq) - acpi_penalize_isa_irq(irq, active); - else -#endif - pirq_penalize_isa_irq(irq, active); -} - -static int pirq_enable_irq(struct pci_dev *dev) -{ - u8 pin; - struct pci_dev *temp_dev; - - pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); - if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) { - char *msg = ""; - - pin--; /* interrupt pins are numbered starting from 1 */ - - if (io_apic_assign_pci_irqs) { - int irq; - - irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin); - /* - * Busses behind bridges are typically not listed in the MP-table. - * In this case we have to look up the IRQ based on the parent bus, - * parent slot, and pin number. The SMP code detects such bridged - * busses itself so we should get into this branch reliably. - */ - temp_dev = dev; - while (irq < 0 && dev->bus->parent) { /* go back to the bridge */ - struct pci_dev * bridge = dev->bus->self; - - pin = (pin + PCI_SLOT(dev->devfn)) % 4; - irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, - PCI_SLOT(bridge->devfn), pin); - if (irq >= 0) - printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n", - pci_name(bridge), 'A' + pin, irq); - dev = bridge; - } - dev = temp_dev; - if (irq >= 0) { - printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n", - pci_name(dev), 'A' + pin, irq); - dev->irq = irq; - return 0; - } else - msg = " Probably buggy MP table."; - } else if (pci_probe & PCI_BIOS_IRQ_SCAN) - msg = ""; - else - msg = " Please try using pci=biosirq."; - - /* With IDE legacy devices the IRQ lookup failure is not a problem.. */ - if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE && !(dev->class & 0x5)) - return 0; - - printk(KERN_WARNING "PCI: No IRQ known for interrupt pin %c of device %s.%s\n", - 'A' + pin, pci_name(dev), msg); - } - return 0; -} diff --git a/arch/i386/pci/legacy.c b/arch/i386/pci/legacy.c deleted file mode 100644 index 5565d7016b7..00000000000 --- a/arch/i386/pci/legacy.c +++ /dev/null @@ -1,56 +0,0 @@ -/* - * legacy.c - traditional, old school PCI bus probing - */ -#include -#include -#include "pci.h" - -/* - * Discover remaining PCI buses in case there are peer host bridges. - * We use the number of last PCI bus provided by the PCI BIOS. - */ -static void __devinit pcibios_fixup_peer_bridges(void) -{ - int n, devfn; - - if (pcibios_last_bus <= 0 || pcibios_last_bus >= 0xff) - return; - DBG("PCI: Peer bridge fixup\n"); - - for (n=0; n <= pcibios_last_bus; n++) { - u32 l; - if (pci_find_bus(0, n)) - continue; - for (devfn = 0; devfn < 256; devfn += 8) { - if (!raw_pci_ops->read(0, n, devfn, PCI_VENDOR_ID, 2, &l) && - l != 0x0000 && l != 0xffff) { - DBG("Found device at %02x:%02x [%04x]\n", n, devfn, l); - printk(KERN_INFO "PCI: Discovered peer bus %02x\n", n); - pci_scan_bus_with_sysdata(n); - break; - } - } - } -} - -static int __init pci_legacy_init(void) -{ - if (!raw_pci_ops) { - printk("PCI: System does not support PCI\n"); - return 0; - } - - if (pcibios_scanned++) - return 0; - - printk("PCI: Probing PCI hardware\n"); - pci_root_bus = pcibios_scan_root(0); - if (pci_root_bus) - pci_bus_add_devices(pci_root_bus); - - pcibios_fixup_peer_bridges(); - - return 0; -} - -subsys_initcall(pci_legacy_init); diff --git a/arch/i386/pci/mmconfig-shared.c b/arch/i386/pci/mmconfig-shared.c deleted file mode 100644 index 4df637e34f8..00000000000 --- a/arch/i386/pci/mmconfig-shared.c +++ /dev/null @@ -1,315 +0,0 @@ -/* - * mmconfig-shared.c - Low-level direct PCI config space access via - * MMCONFIG - common code between i386 and x86-64. - * - * This code does: - * - known chipset handling - * - ACPI decoding and validation - * - * Per-architecture code takes care of the mappings and accesses - * themselves. - */ - -#include -#include -#include -#include -#include - -#include "pci.h" - -/* aperture is up to 256MB but BIOS may reserve less */ -#define MMCONFIG_APER_MIN (2 * 1024*1024) -#define MMCONFIG_APER_MAX (256 * 1024*1024) - -DECLARE_BITMAP(pci_mmcfg_fallback_slots, 32*PCI_MMCFG_MAX_CHECK_BUS); - -/* Indicate if the mmcfg resources have been placed into the resource table. */ -static int __initdata pci_mmcfg_resources_inserted; - -/* K8 systems have some devices (typically in the builtin northbridge) - that are only accessible using type1 - Normally this can be expressed in the MCFG by not listing them - and assigning suitable _SEGs, but this isn't implemented in some BIOS. - Instead try to discover all devices on bus 0 that are unreachable using MM - and fallback for them. */ -static void __init unreachable_devices(void) -{ - int i, bus; - /* Use the max bus number from ACPI here? */ - for (bus = 0; bus < PCI_MMCFG_MAX_CHECK_BUS; bus++) { - for (i = 0; i < 32; i++) { - unsigned int devfn = PCI_DEVFN(i, 0); - u32 val1, val2; - - pci_conf1_read(0, bus, devfn, 0, 4, &val1); - if (val1 == 0xffffffff) - continue; - - if (pci_mmcfg_arch_reachable(0, bus, devfn)) { - raw_pci_ops->read(0, bus, devfn, 0, 4, &val2); - if (val1 == val2) - continue; - } - set_bit(i + 32 * bus, pci_mmcfg_fallback_slots); - printk(KERN_NOTICE "PCI: No mmconfig possible on device" - " %02x:%02x\n", bus, i); - } - } -} - -static const char __init *pci_mmcfg_e7520(void) -{ - u32 win; - pci_conf1_read(0, 0, PCI_DEVFN(0,0), 0xce, 2, &win); - - win = win & 0xf000; - if(win == 0x0000 || win == 0xf000) - pci_mmcfg_config_num = 0; - else { - pci_mmcfg_config_num = 1; - pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]), GFP_KERNEL); - if (!pci_mmcfg_config) - return NULL; - pci_mmcfg_config[0].address = win << 16; - pci_mmcfg_config[0].pci_segment = 0; - pci_mmcfg_config[0].start_bus_number = 0; - pci_mmcfg_config[0].end_bus_number = 255; - } - - return "Intel Corporation E7520 Memory Controller Hub"; -} - -static const char __init *pci_mmcfg_intel_945(void) -{ - u32 pciexbar, mask = 0, len = 0; - - pci_mmcfg_config_num = 1; - - pci_conf1_read(0, 0, PCI_DEVFN(0,0), 0x48, 4, &pciexbar); - - /* Enable bit */ - if (!(pciexbar & 1)) - pci_mmcfg_config_num = 0; - - /* Size bits */ - switch ((pciexbar >> 1) & 3) { - case 0: - mask = 0xf0000000U; - len = 0x10000000U; - break; - case 1: - mask = 0xf8000000U; - len = 0x08000000U; - break; - case 2: - mask = 0xfc000000U; - len = 0x04000000U; - break; - default: - pci_mmcfg_config_num = 0; - } - - /* Errata #2, things break when not aligned on a 256Mb boundary */ - /* Can only happen in 64M/128M mode */ - - if ((pciexbar & mask) & 0x0fffffffU) - pci_mmcfg_config_num = 0; - - /* Don't hit the APIC registers and their friends */ - if ((pciexbar & mask) >= 0xf0000000U) - pci_mmcfg_config_num = 0; - - if (pci_mmcfg_config_num) { - pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]), GFP_KERNEL); - if (!pci_mmcfg_config) - return NULL; - pci_mmcfg_config[0].address = pciexbar & mask; - pci_mmcfg_config[0].pci_segment = 0; - pci_mmcfg_config[0].start_bus_number = 0; - pci_mmcfg_config[0].end_bus_number = (len >> 20) - 1; - } - - return "Intel Corporation 945G/GZ/P/PL Express Memory Controller Hub"; -} - -struct pci_mmcfg_hostbridge_probe { - u32 vendor; - u32 device; - const char *(*probe)(void); -}; - -static struct pci_mmcfg_hostbridge_probe pci_mmcfg_probes[] __initdata = { - { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, pci_mmcfg_e7520 }, - { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82945G_HB, pci_mmcfg_intel_945 }, -}; - -static int __init pci_mmcfg_check_hostbridge(void) -{ - u32 l; - u16 vendor, device; - int i; - const char *name; - - pci_conf1_read(0, 0, PCI_DEVFN(0,0), 0, 4, &l); - vendor = l & 0xffff; - device = (l >> 16) & 0xffff; - - pci_mmcfg_config_num = 0; - pci_mmcfg_config = NULL; - name = NULL; - - for (i = 0; !name && i < ARRAY_SIZE(pci_mmcfg_probes); i++) { - if (pci_mmcfg_probes[i].vendor == vendor && - pci_mmcfg_probes[i].device == device) - name = pci_mmcfg_probes[i].probe(); - } - - if (name) { - printk(KERN_INFO "PCI: Found %s %s MMCONFIG support.\n", - name, pci_mmcfg_config_num ? "with" : "without"); - } - - return name != NULL; -} - -static void __init pci_mmcfg_insert_resources(unsigned long resource_flags) -{ -#define PCI_MMCFG_RESOURCE_NAME_LEN 19 - int i; - struct resource *res; - char *names; - unsigned num_buses; - - res = kcalloc(PCI_MMCFG_RESOURCE_NAME_LEN + sizeof(*res), - pci_mmcfg_config_num, GFP_KERNEL); - if (!res) { - printk(KERN_ERR "PCI: Unable to allocate MMCONFIG resources\n"); - return; - } - - names = (void *)&res[pci_mmcfg_config_num]; - for (i = 0; i < pci_mmcfg_config_num; i++, res++) { - struct acpi_mcfg_allocation *cfg = &pci_mmcfg_config[i]; - num_buses = cfg->end_bus_number - cfg->start_bus_number + 1; - res->name = names; - snprintf(names, PCI_MMCFG_RESOURCE_NAME_LEN, "PCI MMCONFIG %u", - cfg->pci_segment); - res->start = cfg->address; - res->end = res->start + (num_buses << 20) - 1; - res->flags = IORESOURCE_MEM | resource_flags; - insert_resource(&iomem_resource, res); - names += PCI_MMCFG_RESOURCE_NAME_LEN; - } - - /* Mark that the resources have been inserted. */ - pci_mmcfg_resources_inserted = 1; -} - -static void __init pci_mmcfg_reject_broken(int type) -{ - typeof(pci_mmcfg_config[0]) *cfg; - - if ((pci_mmcfg_config_num == 0) || - (pci_mmcfg_config == NULL) || - (pci_mmcfg_config[0].address == 0)) - return; - - cfg = &pci_mmcfg_config[0]; - - /* - * Handle more broken MCFG tables on Asus etc. - * They only contain a single entry for bus 0-0. - */ - if (pci_mmcfg_config_num == 1 && - cfg->pci_segment == 0 && - (cfg->start_bus_number | cfg->end_bus_number) == 0) { - printk(KERN_ERR "PCI: start and end of bus number is 0. " - "Rejected as broken MCFG.\n"); - goto reject; - } - - /* - * Only do this check when type 1 works. If it doesn't work - * assume we run on a Mac and always use MCFG - */ - if (type == 1 && !e820_all_mapped(cfg->address, - cfg->address + MMCONFIG_APER_MIN, - E820_RESERVED)) { - printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %Lx is not" - " E820-reserved\n", cfg->address); - goto reject; - } - return; - -reject: - printk(KERN_ERR "PCI: Not using MMCONFIG.\n"); - kfree(pci_mmcfg_config); - pci_mmcfg_config = NULL; - pci_mmcfg_config_num = 0; -} - -void __init pci_mmcfg_init(int type) -{ - int known_bridge = 0; - - if ((pci_probe & PCI_PROBE_MMCONF) == 0) - return; - - if (type == 1 && pci_mmcfg_check_hostbridge()) - known_bridge = 1; - - if (!known_bridge) { - acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg); - pci_mmcfg_reject_broken(type); - } - - if ((pci_mmcfg_config_num == 0) || - (pci_mmcfg_config == NULL) || - (pci_mmcfg_config[0].address == 0)) - return; - - if (pci_mmcfg_arch_init()) { - if (type == 1) - unreachable_devices(); - if (known_bridge) - pci_mmcfg_insert_resources(IORESOURCE_BUSY); - pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; - } else { - /* - * Signal not to attempt to insert mmcfg resources because - * the architecture mmcfg setup could not initialize. - */ - pci_mmcfg_resources_inserted = 1; - } -} - -static int __init pci_mmcfg_late_insert_resources(void) -{ - /* - * If resources are already inserted or we are not using MMCONFIG, - * don't insert the resources. - */ - if ((pci_mmcfg_resources_inserted == 1) || - (pci_probe & PCI_PROBE_MMCONF) == 0 || - (pci_mmcfg_config_num == 0) || - (pci_mmcfg_config == NULL) || - (pci_mmcfg_config[0].address == 0)) - return 1; - - /* - * Attempt to insert the mmcfg resources but not with the busy flag - * marked so it won't cause request errors when __request_region is - * called. - */ - pci_mmcfg_insert_resources(0); - - return 0; -} - -/* - * Perform MMCONFIG resource insertion after PCI initialization to allow for - * misprogrammed MCFG tables that state larger sizes but actually conflict - * with other system resources. - */ -late_initcall(pci_mmcfg_late_insert_resources); diff --git a/arch/i386/pci/mmconfig_32.c b/arch/i386/pci/mmconfig_32.c deleted file mode 100644 index 1bf5816d34c..00000000000 --- a/arch/i386/pci/mmconfig_32.c +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Copyright (C) 2004 Matthew Wilcox - * Copyright (C) 2004 Intel Corp. - * - * This code is released under the GNU General Public License version 2. - */ - -/* - * mmconfig.c - Low-level direct PCI config space access via MMCONFIG - */ - -#include -#include -#include -#include -#include "pci.h" - -/* Assume systems with more busses have correct MCFG */ -#define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG)) - -/* The base address of the last MMCONFIG device accessed */ -static u32 mmcfg_last_accessed_device; -static int mmcfg_last_accessed_cpu; - -/* - * Functions for accessing PCI configuration space with MMCONFIG accesses - */ -static u32 get_base_addr(unsigned int seg, int bus, unsigned devfn) -{ - struct acpi_mcfg_allocation *cfg; - int cfg_num; - - if (seg == 0 && bus < PCI_MMCFG_MAX_CHECK_BUS && - test_bit(PCI_SLOT(devfn) + 32*bus, pci_mmcfg_fallback_slots)) - return 0; - - for (cfg_num = 0; cfg_num < pci_mmcfg_config_num; cfg_num++) { - cfg = &pci_mmcfg_config[cfg_num]; - if (cfg->pci_segment == seg && - (cfg->start_bus_number <= bus) && - (cfg->end_bus_number >= bus)) - return cfg->address; - } - - /* Fall back to type 0 */ - return 0; -} - -/* - * This is always called under pci_config_lock - */ -static void pci_exp_set_dev_base(unsigned int base, int bus, int devfn) -{ - u32 dev_base = base | (bus << 20) | (devfn << 12); - int cpu = smp_processor_id(); - if (dev_base != mmcfg_last_accessed_device || - cpu != mmcfg_last_accessed_cpu) { - mmcfg_last_accessed_device = dev_base; - mmcfg_last_accessed_cpu = cpu; - set_fixmap_nocache(FIX_PCIE_MCFG, dev_base); - } -} - -static int pci_mmcfg_read(unsigned int seg, unsigned int bus, - unsigned int devfn, int reg, int len, u32 *value) -{ - unsigned long flags; - u32 base; - - if ((bus > 255) || (devfn > 255) || (reg > 4095)) { - *value = -1; - return -EINVAL; - } - - base = get_base_addr(seg, bus, devfn); - if (!base) - return pci_conf1_read(seg,bus,devfn,reg,len,value); - - spin_lock_irqsave(&pci_config_lock, flags); - - pci_exp_set_dev_base(base, bus, devfn); - - switch (len) { - case 1: - *value = mmio_config_readb(mmcfg_virt_addr + reg); - break; - case 2: - *value = mmio_config_readw(mmcfg_virt_addr + reg); - break; - case 4: - *value = mmio_config_readl(mmcfg_virt_addr + reg); - break; - } - spin_unlock_irqrestore(&pci_config_lock, flags); - - return 0; -} - -static int pci_mmcfg_write(unsigned int seg, unsigned int bus, - unsigned int devfn, int reg, int len, u32 value) -{ - unsigned long flags; - u32 base; - - if ((bus > 255) || (devfn > 255) || (reg > 4095)) - return -EINVAL; - - base = get_base_addr(seg, bus, devfn); - if (!base) - return pci_conf1_write(seg,bus,devfn,reg,len,value); - - spin_lock_irqsave(&pci_config_lock, flags); - - pci_exp_set_dev_base(base, bus, devfn); - - switch (len) { - case 1: - mmio_config_writeb(mmcfg_virt_addr + reg, value); - break; - case 2: - mmio_config_writew(mmcfg_virt_addr + reg, value); - break; - case 4: - mmio_config_writel(mmcfg_virt_addr + reg, value); - break; - } - spin_unlock_irqrestore(&pci_config_lock, flags); - - return 0; -} - -static struct pci_raw_ops pci_mmcfg = { - .read = pci_mmcfg_read, - .write = pci_mmcfg_write, -}; - -int __init pci_mmcfg_arch_reachable(unsigned int seg, unsigned int bus, - unsigned int devfn) -{ - return get_base_addr(seg, bus, devfn) != 0; -} - -int __init pci_mmcfg_arch_init(void) -{ - printk(KERN_INFO "PCI: Using MMCONFIG\n"); - raw_pci_ops = &pci_mmcfg; - return 1; -} diff --git a/arch/i386/pci/numa.c b/arch/i386/pci/numa.c deleted file mode 100644 index f5f165f69e0..00000000000 --- a/arch/i386/pci/numa.c +++ /dev/null @@ -1,135 +0,0 @@ -/* - * numa.c - Low-level PCI access for NUMA-Q machines - */ - -#include -#include -#include -#include "pci.h" - -#define BUS2QUAD(global) (mp_bus_id_to_node[global]) -#define BUS2LOCAL(global) (mp_bus_id_to_local[global]) -#define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local]) - -#define PCI_CONF1_MQ_ADDRESS(bus, devfn, reg) \ - (0x80000000 | (BUS2LOCAL(bus) << 16) | (devfn << 8) | (reg & ~3)) - -static int pci_conf1_mq_read(unsigned int seg, unsigned int bus, - unsigned int devfn, int reg, int len, u32 *value) -{ - unsigned long flags; - - if (!value || (bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255)) - return -EINVAL; - - spin_lock_irqsave(&pci_config_lock, flags); - - outl_quad(PCI_CONF1_MQ_ADDRESS(bus, devfn, reg), 0xCF8, BUS2QUAD(bus)); - - switch (len) { - case 1: - *value = inb_quad(0xCFC + (reg & 3), BUS2QUAD(bus)); - break; - case 2: - *value = inw_quad(0xCFC + (reg & 2), BUS2QUAD(bus)); - break; - case 4: - *value = inl_quad(0xCFC, BUS2QUAD(bus)); - break; - } - - spin_unlock_irqrestore(&pci_config_lock, flags); - - return 0; -} - -static int pci_conf1_mq_write(unsigned int seg, unsigned int bus, - unsigned int devfn, int reg, int len, u32 value) -{ - unsigned long flags; - - if ((bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255)) - return -EINVAL; - - spin_lock_irqsave(&pci_config_lock, flags); - - outl_quad(PCI_CONF1_MQ_ADDRESS(bus, devfn, reg), 0xCF8, BUS2QUAD(bus)); - - switch (len) { - case 1: - outb_quad((u8)value, 0xCFC + (reg & 3), BUS2QUAD(bus)); - break; - case 2: - outw_quad((u16)value, 0xCFC + (reg & 2), BUS2QUAD(bus)); - break; - case 4: - outl_quad((u32)value, 0xCFC, BUS2QUAD(bus)); - break; - } - - spin_unlock_irqrestore(&pci_config_lock, flags); - - return 0; -} - -#undef PCI_CONF1_MQ_ADDRESS - -static struct pci_raw_ops pci_direct_conf1_mq = { - .read = pci_conf1_mq_read, - .write = pci_conf1_mq_write -}; - - -static void __devinit pci_fixup_i450nx(struct pci_dev *d) -{ - /* - * i450NX -- Find and scan all secondary buses on all PXB's. - */ - int pxb, reg; - u8 busno, suba, subb; - int quad = BUS2QUAD(d->bus->number); - - printk("PCI: Searching for i450NX host bridges on %s\n", pci_name(d)); - reg = 0xd0; - for(pxb=0; pxb<2; pxb++) { - pci_read_config_byte(d, reg++, &busno); - pci_read_config_byte(d, reg++, &suba); - pci_read_config_byte(d, reg++, &subb); - DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb); - if (busno) { - /* Bus A */ - pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, busno)); - } - if (suba < subb) { - /* Bus B */ - pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, suba+1)); - } - } - pcibios_last_bus = -1; -} -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX, pci_fixup_i450nx); - -static int __init pci_numa_init(void) -{ - int quad; - - raw_pci_ops = &pci_direct_conf1_mq; - - if (pcibios_scanned++) - return 0; - - pci_root_bus = pcibios_scan_root(0); - if (pci_root_bus) - pci_bus_add_devices(pci_root_bus); - if (num_online_nodes() > 1) - for_each_online_node(quad) { - if (quad == 0) - continue; - printk("Scanning PCI bus %d for quad %d\n", - QUADLOCAL2BUS(quad,0), quad); - pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, 0)); - } - return 0; -} - -subsys_initcall(pci_numa_init); diff --git a/arch/i386/pci/pcbios.c b/arch/i386/pci/pcbios.c deleted file mode 100644 index 10ac8c316c4..00000000000 --- a/arch/i386/pci/pcbios.c +++ /dev/null @@ -1,492 +0,0 @@ -/* - * BIOS32 and PCI BIOS handling. - */ - -#include -#include -#include -#include -#include "pci.h" -#include "pci-functions.h" - - -/* BIOS32 signature: "_32_" */ -#define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24)) - -/* PCI signature: "PCI " */ -#define PCI_SIGNATURE (('P' << 0) + ('C' << 8) + ('I' << 16) + (' ' << 24)) - -/* PCI service signature: "$PCI" */ -#define PCI_SERVICE (('$' << 0) + ('P' << 8) + ('C' << 16) + ('I' << 24)) - -/* PCI BIOS hardware mechanism flags */ -#define PCIBIOS_HW_TYPE1 0x01 -#define PCIBIOS_HW_TYPE2 0x02 -#define PCIBIOS_HW_TYPE1_SPEC 0x10 -#define PCIBIOS_HW_TYPE2_SPEC 0x20 - -/* - * This is the standard structure used to identify the entry point - * to the BIOS32 Service Directory, as documented in - * Standard BIOS 32-bit Service Directory Proposal - * Revision 0.4 May 24, 1993 - * Phoenix Technologies Ltd. - * Norwood, MA - * and the PCI BIOS specification. - */ - -union bios32 { - struct { - unsigned long signature; /* _32_ */ - unsigned long entry; /* 32 bit physical address */ - unsigned char revision; /* Revision level, 0 */ - unsigned char length; /* Length in paragraphs should be 01 */ - unsigned char checksum; /* All bytes must add up to zero */ - unsigned char reserved[5]; /* Must be zero */ - } fields; - char chars[16]; -}; - -/* - * Physical address of the service directory. I don't know if we're - * allowed to have more than one of these or not, so just in case - * we'll make pcibios_present() take a memory start parameter and store - * the array there. - */ - -static struct { - unsigned long address; - unsigned short segment; -} bios32_indirect = { 0, __KERNEL_CS }; - -/* - * Returns the entry point for the given service, NULL on error - */ - -static unsigned long bios32_service(unsigned long service) -{ - unsigned char return_code; /* %al */ - unsigned long address; /* %ebx */ - unsigned long length; /* %ecx */ - unsigned long entry; /* %edx */ - unsigned long flags; - - local_irq_save(flags); - __asm__("lcall *(%%edi); cld" - : "=a" (return_code), - "=b" (address), - "=c" (length), - "=d" (entry) - : "0" (service), - "1" (0), - "D" (&bios32_indirect)); - local_irq_restore(flags); - - switch (return_code) { - case 0: - return address + entry; - case 0x80: /* Not present */ - printk(KERN_WARNING "bios32_service(0x%lx): not present\n", service); - return 0; - default: /* Shouldn't happen */ - printk(KERN_WARNING "bios32_service(0x%lx): returned 0x%x -- BIOS bug!\n", - service, return_code); - return 0; - } -} - -static struct { - unsigned long address; - unsigned short segment; -} pci_indirect = { 0, __KERNEL_CS }; - -static int pci_bios_present; - -static int __devinit check_pcibios(void) -{ - u32 signature, eax, ebx, ecx; - u8 status, major_ver, minor_ver, hw_mech; - unsigned long flags, pcibios_entry; - - if ((pcibios_entry = bios32_service(PCI_SERVICE))) { - pci_indirect.address = pcibios_entry + PAGE_OFFSET; - - local_irq_save(flags); - __asm__( - "lcall *(%%edi); cld\n\t" - "jc 1f\n\t" - "xor %%ah, %%ah\n" - "1:" - : "=d" (signature), - "=a" (eax), - "=b" (ebx), - "=c" (ecx) - : "1" (PCIBIOS_PCI_BIOS_PRESENT), - "D" (&pci_indirect) - : "memory"); - local_irq_restore(flags); - - status = (eax >> 8) & 0xff; - hw_mech = eax & 0xff; - major_ver = (ebx >> 8) & 0xff; - minor_ver = ebx & 0xff; - if (pcibios_last_bus < 0) - pcibios_last_bus = ecx & 0xff; - DBG("PCI: BIOS probe returned s=%02x hw=%02x ver=%02x.%02x l=%02x\n", - status, hw_mech, major_ver, minor_ver, pcibios_last_bus); - if (status || signature != PCI_SIGNATURE) { - printk (KERN_ERR "PCI: BIOS BUG #%x[%08x] found\n", - status, signature); - return 0; - } - printk(KERN_INFO "PCI: PCI BIOS revision %x.%02x entry at 0x%lx, last bus=%d\n", - major_ver, minor_ver, pcibios_entry, pcibios_last_bus); -#ifdef CONFIG_PCI_DIRECT - if (!(hw_mech & PCIBIOS_HW_TYPE1)) - pci_probe &= ~PCI_PROBE_CONF1; - if (!(hw_mech & PCIBIOS_HW_TYPE2)) - pci_probe &= ~PCI_PROBE_CONF2; -#endif - return 1; - } - return 0; -} - -static int __devinit pci_bios_find_device (unsigned short vendor, unsigned short device_id, - unsigned short index, unsigned char *bus, unsigned char *device_fn) -{ - unsigned short bx; - unsigned short ret; - - __asm__("lcall *(%%edi); cld\n\t" - "jc 1f\n\t" - "xor %%ah, %%ah\n" - "1:" - : "=b" (bx), - "=a" (ret) - : "1" (PCIBIOS_FIND_PCI_DEVICE), - "c" (device_id), - "d" (vendor), - "S" ((int) index), - "D" (&pci_indirect)); - *bus = (bx >> 8) & 0xff; - *device_fn = bx & 0xff; - return (int) (ret & 0xff00) >> 8; -} - -static int pci_bios_read(unsigned int seg, unsigned int bus, - unsigned int devfn, int reg, int len, u32 *value) -{ - unsigned long result = 0; - unsigned long flags; - unsigned long bx = (bus << 8) | devfn; - - if (!value || (bus > 255) || (devfn > 255) || (reg > 255)) - return -EINVAL; - - spin_lock_irqsave(&pci_config_lock, flags); - - switch (len) { - case 1: - __asm__("lcall *(%%esi); cld\n\t" - "jc 1f\n\t" - "xor %%ah, %%ah\n" - "1:" - : "=c" (*value), - "=a" (result) - : "1" (PCIBIOS_READ_CONFIG_BYTE), - "b" (bx), - "D" ((long)reg), - "S" (&pci_indirect)); - break; - case 2: - __asm__("lcall *(%%esi); cld\n\t" - "jc 1f\n\t" - "xor %%ah, %%ah\n" - "1:" - : "=c" (*value), - "=a" (result) - : "1" (PCIBIOS_READ_CONFIG_WORD), - "b" (bx), - "D" ((long)reg), - "S" (&pci_indirect)); - break; - case 4: - __asm__("lcall *(%%esi); cld\n\t" - "jc 1f\n\t" - "xor %%ah, %%ah\n" - "1:" - : "=c" (*value), - "=a" (result) - : "1" (PCIBIOS_READ_CONFIG_DWORD), - "b" (bx), - "D" ((long)reg), - "S" (&pci_indirect)); - break; - } - - spin_unlock_irqrestore(&pci_config_lock, flags); - - return (int)((result & 0xff00) >> 8); -} - -static int pci_bios_write(unsigned int seg, unsigned int bus, - unsigned int devfn, int reg, int len, u32 value) -{ - unsigned long result = 0; - unsigned long flags; - unsigned long bx = (bus << 8) | devfn; - - if ((bus > 255) || (devfn > 255) || (reg > 255)) - return -EINVAL; - - spin_lock_irqsave(&pci_config_lock, flags); - - switch (len) { - case 1: - __asm__("lcall *(%%esi); cld\n\t" - "jc 1f\n\t" - "xor %%ah, %%ah\n" - "1:" - : "=a" (result) - : "0" (PCIBIOS_WRITE_CONFIG_BYTE), - "c" (value), - "b" (bx), - "D" ((long)reg), - "S" (&pci_indirect)); - break; - case 2: - __asm__("lcall *(%%esi); cld\n\t" - "jc 1f\n\t" - "xor %%ah, %%ah\n" - "1:" - : "=a" (result) - : "0" (PCIBIOS_WRITE_CONFIG_WORD), - "c" (value), - "b" (bx), - "D" ((long)reg), - "S" (&pci_indirect)); - break; - case 4: - __asm__("lcall *(%%esi); cld\n\t" - "jc 1f\n\t" - "xor %%ah, %%ah\n" - "1:" - : "=a" (result) - : "0" (PCIBIOS_WRITE_CONFIG_DWORD), - "c" (value), - "b" (bx), - "D" ((long)reg), - "S" (&pci_indirect)); - break; - } - - spin_unlock_irqrestore(&pci_config_lock, flags); - - return (int)((result & 0xff00) >> 8); -} - - -/* - * Function table for BIOS32 access - */ - -static struct pci_raw_ops pci_bios_access = { - .read = pci_bios_read, - .write = pci_bios_write -}; - -/* - * Try to find PCI BIOS. - */ - -static struct pci_raw_ops * __devinit pci_find_bios(void) -{ - union bios32 *check; - unsigned char sum; - int i, length; - - /* - * Follow the standard procedure for locating the BIOS32 Service - * directory by scanning the permissible address range from - * 0xe0000 through 0xfffff for a valid BIOS32 structure. - */ - - for (check = (union bios32 *) __va(0xe0000); - check <= (union bios32 *) __va(0xffff0); - ++check) { - long sig; - if (probe_kernel_address(&check->fields.signature, sig)) - continue; - - if (check->fields.signature != BIOS32_SIGNATURE) - continue; - length = check->fields.length * 16; - if (!length) - continue; - sum = 0; - for (i = 0; i < length ; ++i) - sum += check->chars[i]; - if (sum != 0) - continue; - if (check->fields.revision != 0) { - printk("PCI: unsupported BIOS32 revision %d at 0x%p\n", - check->fields.revision, check); - continue; - } - DBG("PCI: BIOS32 Service Directory structure at 0x%p\n", check); - if (check->fields.entry >= 0x100000) { - printk("PCI: BIOS32 entry (0x%p) in high memory, " - "cannot use.\n", check); - return NULL; - } else { - unsigned long bios32_entry = check->fields.entry; - DBG("PCI: BIOS32 Service Directory entry at 0x%lx\n", - bios32_entry); - bios32_indirect.address = bios32_entry + PAGE_OFFSET; - if (check_pcibios()) - return &pci_bios_access; - } - break; /* Hopefully more than one BIOS32 cannot happen... */ - } - - return NULL; -} - -/* - * Sort the device list according to PCI BIOS. Nasty hack, but since some - * fool forgot to define the `correct' device order in the PCI BIOS specs - * and we want to be (possibly bug-to-bug ;-]) compatible with older kernels - * which used BIOS ordering, we are bound to do this... - */ - -void __devinit pcibios_sort(void) -{ - LIST_HEAD(sorted_devices); - struct list_head *ln; - struct pci_dev *dev, *d; - int idx, found; - unsigned char bus, devfn; - - DBG("PCI: Sorting device list...\n"); - while (!list_empty(&pci_devices)) { - ln = pci_devices.next; - dev = pci_dev_g(ln); - idx = found = 0; - while (pci_bios_find_device(dev->vendor, dev->device, idx, &bus, &devfn) == PCIBIOS_SUCCESSFUL) { - idx++; - list_for_each(ln, &pci_devices) { - d = pci_dev_g(ln); - if (d->bus->number == bus && d->devfn == devfn) { - list_move_tail(&d->global_list, &sorted_devices); - if (d == dev) - found = 1; - break; - } - } - if (ln == &pci_devices) { - printk(KERN_WARNING "PCI: BIOS reporting unknown device %02x:%02x\n", bus, devfn); - /* - * We must not continue scanning as several buggy BIOSes - * return garbage after the last device. Grr. - */ - break; - } - } - if (!found) { - printk(KERN_WARNING "PCI: Device %s not found by BIOS\n", - pci_name(dev)); - list_move_tail(&dev->global_list, &sorted_devices); - } - } - list_splice(&sorted_devices, &pci_devices); -} - -/* - * BIOS Functions for IRQ Routing - */ - -struct irq_routing_options { - u16 size; - struct irq_info *table; - u16 segment; -} __attribute__((packed)); - -struct irq_routing_table * pcibios_get_irq_routing_table(void) -{ - struct irq_routing_options opt; - struct irq_routing_table *rt = NULL; - int ret, map; - unsigned long page; - - if (!pci_bios_present) - return NULL; - page = __get_free_page(GFP_KERNEL); - if (!page) - return NULL; - opt.table = (struct irq_info *) page; - opt.size = PAGE_SIZE; - opt.segment = __KERNEL_DS; - - DBG("PCI: Fetching IRQ routing table... "); - __asm__("push %%es\n\t" - "push %%ds\n\t" - "pop %%es\n\t" - "lcall *(%%esi); cld\n\t" - "pop %%es\n\t" - "jc 1f\n\t" - "xor %%ah, %%ah\n" - "1:" - : "=a" (ret), - "=b" (map), - "=m" (opt) - : "0" (PCIBIOS_GET_ROUTING_OPTIONS), - "1" (0), - "D" ((long) &opt), - "S" (&pci_indirect), - "m" (opt) - : "memory"); - DBG("OK ret=%d, size=%d, map=%x\n", ret, opt.size, map); - if (ret & 0xff00) - printk(KERN_ERR "PCI: Error %02x when fetching IRQ routing table.\n", (ret >> 8) & 0xff); - else if (opt.size) { - rt = kmalloc(sizeof(struct irq_routing_table) + opt.size, GFP_KERNEL); - if (rt) { - memset(rt, 0, sizeof(struct irq_routing_table)); - rt->size = opt.size + sizeof(struct irq_routing_table); - rt->exclusive_irqs = map; - memcpy(rt->slots, (void *) page, opt.size); - printk(KERN_INFO "PCI: Using BIOS Interrupt Routing Table\n"); - } - } - free_page(page); - return rt; -} -EXPORT_SYMBOL(pcibios_get_irq_routing_table); - -int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq) -{ - int ret; - - __asm__("lcall *(%%esi); cld\n\t" - "jc 1f\n\t" - "xor %%ah, %%ah\n" - "1:" - : "=a" (ret) - : "0" (PCIBIOS_SET_PCI_HW_INT), - "b" ((dev->bus->number << 8) | dev->devfn), - "c" ((irq << 8) | (pin + 10)), - "S" (&pci_indirect)); - return !(ret & 0xff00); -} -EXPORT_SYMBOL(pcibios_set_irq_routing); - -void __init pci_pcbios_init(void) -{ - if ((pci_probe & PCI_PROBE_BIOS) - && ((raw_pci_ops = pci_find_bios()))) { - pci_probe |= PCI_BIOS_SORT; - pci_bios_present = 1; - } -} - diff --git a/arch/i386/pci/pci.h b/arch/i386/pci/pci.h deleted file mode 100644 index 8c66f275756..00000000000 --- a/arch/i386/pci/pci.h +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Low-Level PCI Access for i386 machines. - * - * (c) 1999 Martin Mares - */ - -#undef DEBUG - -#ifdef DEBUG -#define DBG(x...) printk(x) -#else -#define DBG(x...) -#endif - -#define PCI_PROBE_BIOS 0x0001 -#define PCI_PROBE_CONF1 0x0002 -#define PCI_PROBE_CONF2 0x0004 -#define PCI_PROBE_MMCONF 0x0008 -#define PCI_PROBE_MASK 0x000f -#define PCI_PROBE_NOEARLY 0x0010 - -#define PCI_NO_SORT 0x0100 -#define PCI_BIOS_SORT 0x0200 -#define PCI_NO_CHECKS 0x0400 -#define PCI_USE_PIRQ_MASK 0x0800 -#define PCI_ASSIGN_ROMS 0x1000 -#define PCI_BIOS_IRQ_SCAN 0x2000 -#define PCI_ASSIGN_ALL_BUSSES 0x4000 - -extern unsigned int pci_probe; -extern unsigned long pirq_table_addr; - -enum pci_bf_sort_state { - pci_bf_sort_default, - pci_force_nobf, - pci_force_bf, - pci_dmi_bf, -}; - -/* pci-i386.c */ - -extern unsigned int pcibios_max_latency; - -void pcibios_resource_survey(void); -int pcibios_enable_resources(struct pci_dev *, int); - -/* pci-pc.c */ - -extern int pcibios_last_bus; -extern struct pci_bus *pci_root_bus; -extern struct pci_ops pci_root_ops; - -/* pci-irq.c */ - -struct irq_info { - u8 bus, devfn; /* Bus, device and function */ - struct { - u8 link; /* IRQ line ID, chipset dependent, 0=not routed */ - u16 bitmap; /* Available IRQs */ - } __attribute__((packed)) irq[4]; - u8 slot; /* Slot number, 0=onboard */ - u8 rfu; -} __attribute__((packed)); - -struct irq_routing_table { - u32 signature; /* PIRQ_SIGNATURE should be here */ - u16 version; /* PIRQ_VERSION */ - u16 size; /* Table size in bytes */ - u8 rtr_bus, rtr_devfn; /* Where the interrupt router lies */ - u16 exclusive_irqs; /* IRQs devoted exclusively to PCI usage */ - u16 rtr_vendor, rtr_device; /* Vendor and device ID of interrupt router */ - u32 miniport_data; /* Crap */ - u8 rfu[11]; - u8 checksum; /* Modulo 256 checksum must give zero */ - struct irq_info slots[0]; -} __attribute__((packed)); - -extern unsigned int pcibios_irq_mask; - -extern int pcibios_scanned; -extern spinlock_t pci_config_lock; - -extern int (*pcibios_enable_irq)(struct pci_dev *dev); -extern void (*pcibios_disable_irq)(struct pci_dev *dev); - -extern int pci_conf1_write(unsigned int seg, unsigned int bus, - unsigned int devfn, int reg, int len, u32 value); -extern int pci_conf1_read(unsigned int seg, unsigned int bus, - unsigned int devfn, int reg, int len, u32 *value); - -extern int pci_direct_probe(void); -extern void pci_direct_init(int type); -extern void pci_pcbios_init(void); -extern void pci_mmcfg_init(int type); -extern void pcibios_sort(void); - -/* pci-mmconfig.c */ - -/* Verify the first 16 busses. We assume that systems with more busses - get MCFG right. */ -#define PCI_MMCFG_MAX_CHECK_BUS 16 -extern DECLARE_BITMAP(pci_mmcfg_fallback_slots, 32*PCI_MMCFG_MAX_CHECK_BUS); - -extern int __init pci_mmcfg_arch_reachable(unsigned int seg, unsigned int bus, - unsigned int devfn); -extern int __init pci_mmcfg_arch_init(void); - -/* - * AMD Fam10h CPUs are buggy, and cannot access MMIO config space - * on their northbrige except through the * %eax register. As such, you MUST - * NOT use normal IOMEM accesses, you need to only use the magic mmio-config - * accessor functions. - * In fact just use pci_config_*, nothing else please. - */ -static inline unsigned char mmio_config_readb(void __iomem *pos) -{ - u8 val; - asm volatile("movb (%1),%%al" : "=a" (val) : "r" (pos)); - return val; -} - -static inline unsigned short mmio_config_readw(void __iomem *pos) -{ - u16 val; - asm volatile("movw (%1),%%ax" : "=a" (val) : "r" (pos)); - return val; -} - -static inline unsigned int mmio_config_readl(void __iomem *pos) -{ - u32 val; - asm volatile("movl (%1),%%eax" : "=a" (val) : "r" (pos)); - return val; -} - -static inline void mmio_config_writeb(void __iomem *pos, u8 val) -{ - asm volatile("movb %%al,(%1)" :: "a" (val), "r" (pos) : "memory"); -} - -static inline void mmio_config_writew(void __iomem *pos, u16 val) -{ - asm volatile("movw %%ax,(%1)" :: "a" (val), "r" (pos) : "memory"); -} - -static inline void mmio_config_writel(void __iomem *pos, u32 val) -{ - asm volatile("movl %%eax,(%1)" :: "a" (val), "r" (pos) : "memory"); -} diff --git a/arch/i386/pci/visws.c b/arch/i386/pci/visws.c deleted file mode 100644 index 8ecb1c72259..00000000000 --- a/arch/i386/pci/visws.c +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Low-Level PCI Support for SGI Visual Workstation - * - * (c) 1999--2000 Martin Mares - */ - -#include -#include -#include - -#include "cobalt.h" -#include "lithium.h" - -#include "pci.h" - - -extern struct pci_raw_ops pci_direct_conf1; - -static int pci_visws_enable_irq(struct pci_dev *dev) { return 0; } -static void pci_visws_disable_irq(struct pci_dev *dev) { } - -int (*pcibios_enable_irq)(struct pci_dev *dev) = &pci_visws_enable_irq; -void (*pcibios_disable_irq)(struct pci_dev *dev) = &pci_visws_disable_irq; - -void __init pcibios_penalize_isa_irq(int irq, int active) {} - - -unsigned int pci_bus0, pci_bus1; - -static inline u8 bridge_swizzle(u8 pin, u8 slot) -{ - return (((pin - 1) + slot) % 4) + 1; -} - -static u8 __init visws_swizzle(struct pci_dev *dev, u8 *pinp) -{ - u8 pin = *pinp; - - while (dev->bus->self) { /* Move up the chain of bridges. */ - pin = bridge_swizzle(pin, PCI_SLOT(dev->devfn)); - dev = dev->bus->self; - } - *pinp = pin; - - return PCI_SLOT(dev->devfn); -} - -static int __init visws_map_irq(struct pci_dev *dev, u8 slot, u8 pin) -{ - int irq, bus = dev->bus->number; - - pin--; - - /* Nothing useful at PIIX4 pin 1 */ - if (bus == pci_bus0 && slot == 4 && pin == 0) - return -1; - - /* PIIX4 USB is on Bus 0, Slot 4, Line 3 */ - if (bus == pci_bus0 && slot == 4 && pin == 3) { - irq = CO_IRQ(CO_APIC_PIIX4_USB); - goto out; - } - - /* First pin spread down 1 APIC entry per slot */ - if (pin == 0) { - irq = CO_IRQ((bus == pci_bus0 ? CO_APIC_PCIB_BASE0 : - CO_APIC_PCIA_BASE0) + slot); - goto out; - } - - /* lines 1,2,3 from any slot is shared in this twirly pattern */ - if (bus == pci_bus1) { - /* lines 1-3 from devices 0 1 rotate over 2 apic entries */ - irq = CO_IRQ(CO_APIC_PCIA_BASE123 + ((slot + (pin - 1)) % 2)); - } else { /* bus == pci_bus0 */ - /* lines 1-3 from devices 0-3 rotate over 3 apic entries */ - if (slot == 0) - slot = 3; /* same pattern */ - irq = CO_IRQ(CO_APIC_PCIA_BASE123 + ((3 - slot) + (pin - 1) % 3)); - } -out: - printk(KERN_DEBUG "PCI: Bus %d Slot %d Line %d -> IRQ %d\n", bus, slot, pin, irq); - return irq; -} - -void __init pcibios_update_irq(struct pci_dev *dev, int irq) -{ - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); -} - -static int __init pcibios_init(void) -{ - /* The VISWS supports configuration access type 1 only */ - pci_probe = (pci_probe | PCI_PROBE_CONF1) & - ~(PCI_PROBE_BIOS | PCI_PROBE_CONF2); - - pci_bus0 = li_pcib_read16(LI_PCI_BUSNUM) & 0xff; - pci_bus1 = li_pcia_read16(LI_PCI_BUSNUM) & 0xff; - - printk(KERN_INFO "PCI: Lithium bridge A bus: %u, " - "bridge B (PIIX4) bus: %u\n", pci_bus1, pci_bus0); - - raw_pci_ops = &pci_direct_conf1; - pci_scan_bus_with_sysdata(pci_bus0); - pci_scan_bus_with_sysdata(pci_bus1); - pci_fixup_irqs(visws_swizzle, visws_map_irq); - pcibios_resource_survey(); - return 0; -} - -subsys_initcall(pcibios_init); diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile new file mode 100644 index 00000000000..b3e54c45d40 --- /dev/null +++ b/arch/x86/pci/Makefile @@ -0,0 +1,5 @@ +ifeq ($(CONFIG_X86_32),y) +include ${srctree}/arch/x86/pci/Makefile_32 +else +include ${srctree}/arch/x86_64/pci/Makefile_64 +endif diff --git a/arch/x86/pci/Makefile_32 b/arch/x86/pci/Makefile_32 new file mode 100644 index 00000000000..cdd6828b5ab --- /dev/null +++ b/arch/x86/pci/Makefile_32 @@ -0,0 +1,14 @@ +obj-y := i386.o init.o + +obj-$(CONFIG_PCI_BIOS) += pcbios.o +obj-$(CONFIG_PCI_MMCONFIG) += mmconfig_32.o direct.o mmconfig-shared.o +obj-$(CONFIG_PCI_DIRECT) += direct.o + +pci-y := fixup.o +pci-$(CONFIG_ACPI) += acpi.o +pci-y += legacy.o irq.o + +pci-$(CONFIG_X86_VISWS) := visws.o fixup.o +pci-$(CONFIG_X86_NUMAQ) := numa.o irq.o + +obj-y += $(pci-y) common.o early.o diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c new file mode 100644 index 00000000000..bc8a44bddaa --- /dev/null +++ b/arch/x86/pci/acpi.c @@ -0,0 +1,90 @@ +#include +#include +#include +#include +#include +#include "pci.h" + +struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int domain, int busnum) +{ + struct pci_bus *bus; + struct pci_sysdata *sd; + int pxm; + + /* Allocate per-root-bus (not per bus) arch-specific data. + * TODO: leak; this memory is never freed. + * It's arguable whether it's worth the trouble to care. + */ + sd = kzalloc(sizeof(*sd), GFP_KERNEL); + if (!sd) { + printk(KERN_ERR "PCI: OOM, not probing PCI bus %02x\n", busnum); + return NULL; + } + + if (domain != 0) { + printk(KERN_WARNING "PCI: Multiple domains not supported\n"); + kfree(sd); + return NULL; + } + + sd->node = -1; + + pxm = acpi_get_pxm(device->handle); +#ifdef CONFIG_ACPI_NUMA + if (pxm >= 0) + sd->node = pxm_to_node(pxm); +#endif + + bus = pci_scan_bus_parented(NULL, busnum, &pci_root_ops, sd); + if (!bus) + kfree(sd); + +#ifdef CONFIG_ACPI_NUMA + if (bus != NULL) { + if (pxm >= 0) { + printk("bus %d -> pxm %d -> node %d\n", + busnum, pxm, sd->node); + } + } +#endif + + return bus; +} + +extern int pci_routeirq; +static int __init pci_acpi_init(void) +{ + struct pci_dev *dev = NULL; + + if (pcibios_scanned) + return 0; + + if (acpi_noirq) + return 0; + + printk(KERN_INFO "PCI: Using ACPI for IRQ routing\n"); + acpi_irq_penalty_init(); + pcibios_scanned++; + pcibios_enable_irq = acpi_pci_irq_enable; + pcibios_disable_irq = acpi_pci_irq_disable; + + if (pci_routeirq) { + /* + * PCI IRQ routing is set up by pci_enable_device(), but we + * also do it here in case there are still broken drivers that + * don't use pci_enable_device(). + */ + printk(KERN_INFO "PCI: Routing PCI interrupts for all devices because \"pci=routeirq\" specified\n"); + for_each_pci_dev(dev) + acpi_pci_irq_enable(dev); + } else + printk(KERN_INFO "PCI: If a device doesn't work, try \"pci=routeirq\". If it helps, post a report\n"); + +#ifdef CONFIG_X86_IO_APIC + if (acpi_ioapic) + print_IO_APIC(); +#endif + + return 0; +} +subsys_initcall(pci_acpi_init); diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c new file mode 100644 index 00000000000..ebc6f3c6634 --- /dev/null +++ b/arch/x86/pci/common.c @@ -0,0 +1,480 @@ +/* + * Low-Level PCI Support for PC + * + * (c) 1999--2000 Martin Mares + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "pci.h" + +unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 | + PCI_PROBE_MMCONF; + +static int pci_bf_sort; +int pci_routeirq; +int pcibios_last_bus = -1; +unsigned long pirq_table_addr; +struct pci_bus *pci_root_bus; +struct pci_raw_ops *raw_pci_ops; + +static int pci_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value) +{ + return raw_pci_ops->read(0, bus->number, devfn, where, size, value); +} + +static int pci_write(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 value) +{ + return raw_pci_ops->write(0, bus->number, devfn, where, size, value); +} + +struct pci_ops pci_root_ops = { + .read = pci_read, + .write = pci_write, +}; + +/* + * legacy, numa, and acpi all want to call pcibios_scan_root + * from their initcalls. This flag prevents that. + */ +int pcibios_scanned; + +/* + * This interrupt-safe spinlock protects all accesses to PCI + * configuration space. + */ +DEFINE_SPINLOCK(pci_config_lock); + +/* + * Several buggy motherboards address only 16 devices and mirror + * them to next 16 IDs. We try to detect this `feature' on all + * primary buses (those containing host bridges as they are + * expected to be unique) and remove the ghost devices. + */ + +static void __devinit pcibios_fixup_ghosts(struct pci_bus *b) +{ + struct list_head *ln, *mn; + struct pci_dev *d, *e; + int mirror = PCI_DEVFN(16,0); + int seen_host_bridge = 0; + int i; + + DBG("PCI: Scanning for ghost devices on bus %d\n", b->number); + list_for_each(ln, &b->devices) { + d = pci_dev_b(ln); + if ((d->class >> 8) == PCI_CLASS_BRIDGE_HOST) + seen_host_bridge++; + for (mn=ln->next; mn != &b->devices; mn=mn->next) { + e = pci_dev_b(mn); + if (e->devfn != d->devfn + mirror || + e->vendor != d->vendor || + e->device != d->device || + e->class != d->class) + continue; + for(i=0; iresource[i].start != d->resource[i].start || + e->resource[i].end != d->resource[i].end || + e->resource[i].flags != d->resource[i].flags) + continue; + break; + } + if (mn == &b->devices) + return; + } + if (!seen_host_bridge) + return; + printk(KERN_WARNING "PCI: Ignoring ghost devices on bus %02x\n", b->number); + + ln = &b->devices; + while (ln->next != &b->devices) { + d = pci_dev_b(ln->next); + if (d->devfn >= mirror) { + list_del(&d->global_list); + list_del(&d->bus_list); + kfree(d); + } else + ln = ln->next; + } +} + +/* + * Called after each bus is probed, but before its children + * are examined. + */ + +void __devinit pcibios_fixup_bus(struct pci_bus *b) +{ + pcibios_fixup_ghosts(b); + pci_read_bridge_bases(b); +} + +/* + * Only use DMI information to set this if nothing was passed + * on the kernel command line (which was parsed earlier). + */ + +static int __devinit set_bf_sort(struct dmi_system_id *d) +{ + if (pci_bf_sort == pci_bf_sort_default) { + pci_bf_sort = pci_dmi_bf; + printk(KERN_INFO "PCI: %s detected, enabling pci=bfsort.\n", d->ident); + } + return 0; +} + +/* + * Enable renumbering of PCI bus# ranges to reach all PCI busses (Cardbus) + */ +#ifdef __i386__ +static int __devinit assign_all_busses(struct dmi_system_id *d) +{ + pci_probe |= PCI_ASSIGN_ALL_BUSSES; + printk(KERN_INFO "%s detected: enabling PCI bus# renumbering" + " (pci=assign-busses)\n", d->ident); + return 0; +} +#endif + +static struct dmi_system_id __devinitdata pciprobe_dmi_table[] = { +#ifdef __i386__ +/* + * Laptops which need pci=assign-busses to see Cardbus cards + */ + { + .callback = assign_all_busses, + .ident = "Samsung X20 Laptop", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Samsung Electronics"), + DMI_MATCH(DMI_PRODUCT_NAME, "SX20S"), + }, + }, +#endif /* __i386__ */ + { + .callback = set_bf_sort, + .ident = "Dell PowerEdge 1950", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell"), + DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 1950"), + }, + }, + { + .callback = set_bf_sort, + .ident = "Dell PowerEdge 1955", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell"), + DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 1955"), + }, + }, + { + .callback = set_bf_sort, + .ident = "Dell PowerEdge 2900", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell"), + DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2900"), + }, + }, + { + .callback = set_bf_sort, + .ident = "Dell PowerEdge 2950", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell"), + DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2950"), + }, + }, + { + .callback = set_bf_sort, + .ident = "Dell PowerEdge R900", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell"), + DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R900"), + }, + }, + { + .callback = set_bf_sort, + .ident = "HP ProLiant BL20p G3", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL20p G3"), + }, + }, + { + .callback = set_bf_sort, + .ident = "HP ProLiant BL20p G4", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL20p G4"), + }, + }, + { + .callback = set_bf_sort, + .ident = "HP ProLiant BL30p G1", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL30p G1"), + }, + }, + { + .callback = set_bf_sort, + .ident = "HP ProLiant BL25p G1", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL25p G1"), + }, + }, + { + .callback = set_bf_sort, + .ident = "HP ProLiant BL35p G1", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL35p G1"), + }, + }, + { + .callback = set_bf_sort, + .ident = "HP ProLiant BL45p G1", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL45p G1"), + }, + }, + { + .callback = set_bf_sort, + .ident = "HP ProLiant BL45p G2", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL45p G2"), + }, + }, + { + .callback = set_bf_sort, + .ident = "HP ProLiant BL460c G1", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL460c G1"), + }, + }, + { + .callback = set_bf_sort, + .ident = "HP ProLiant BL465c G1", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL465c G1"), + }, + }, + { + .callback = set_bf_sort, + .ident = "HP ProLiant BL480c G1", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL480c G1"), + }, + }, + { + .callback = set_bf_sort, + .ident = "HP ProLiant BL685c G1", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL685c G1"), + }, + }, + {} +}; + +struct pci_bus * __devinit pcibios_scan_root(int busnum) +{ + struct pci_bus *bus = NULL; + struct pci_sysdata *sd; + + dmi_check_system(pciprobe_dmi_table); + + while ((bus = pci_find_next_bus(bus)) != NULL) { + if (bus->number == busnum) { + /* Already scanned */ + return bus; + } + } + + /* Allocate per-root-bus (not per bus) arch-specific data. + * TODO: leak; this memory is never freed. + * It's arguable whether it's worth the trouble to care. + */ + sd = kzalloc(sizeof(*sd), GFP_KERNEL); + if (!sd) { + printk(KERN_ERR "PCI: OOM, not probing PCI bus %02x\n", busnum); + return NULL; + } + + printk(KERN_DEBUG "PCI: Probing PCI hardware (bus %02x)\n", busnum); + + return pci_scan_bus_parented(NULL, busnum, &pci_root_ops, sd); +} + +extern u8 pci_cache_line_size; + +static int __init pcibios_init(void) +{ + struct cpuinfo_x86 *c = &boot_cpu_data; + + if (!raw_pci_ops) { + printk(KERN_WARNING "PCI: System does not support PCI\n"); + return 0; + } + + /* + * Assume PCI cacheline size of 32 bytes for all x86s except K7/K8 + * and P4. It's also good for 386/486s (which actually have 16) + * as quite a few PCI devices do not support smaller values. + */ + pci_cache_line_size = 32 >> 2; + if (c->x86 >= 6 && c->x86_vendor == X86_VENDOR_AMD) + pci_cache_line_size = 64 >> 2; /* K7 & K8 */ + else if (c->x86 > 6 && c->x86_vendor == X86_VENDOR_INTEL) + pci_cache_line_size = 128 >> 2; /* P4 */ + + pcibios_resource_survey(); + + if (pci_bf_sort >= pci_force_bf) + pci_sort_breadthfirst(); +#ifdef CONFIG_PCI_BIOS + if ((pci_probe & PCI_BIOS_SORT) && !(pci_probe & PCI_NO_SORT)) + pcibios_sort(); +#endif + return 0; +} + +subsys_initcall(pcibios_init); + +char * __devinit pcibios_setup(char *str) +{ + if (!strcmp(str, "off")) { + pci_probe = 0; + return NULL; + } else if (!strcmp(str, "bfsort")) { + pci_bf_sort = pci_force_bf; + return NULL; + } else if (!strcmp(str, "nobfsort")) { + pci_bf_sort = pci_force_nobf; + return NULL; + } +#ifdef CONFIG_PCI_BIOS + else if (!strcmp(str, "bios")) { + pci_probe = PCI_PROBE_BIOS; + return NULL; + } else if (!strcmp(str, "nobios")) { + pci_probe &= ~PCI_PROBE_BIOS; + return NULL; + } else if (!strcmp(str, "nosort")) { + pci_probe |= PCI_NO_SORT; + return NULL; + } else if (!strcmp(str, "biosirq")) { + pci_probe |= PCI_BIOS_IRQ_SCAN; + return NULL; + } else if (!strncmp(str, "pirqaddr=", 9)) { + pirq_table_addr = simple_strtoul(str+9, NULL, 0); + return NULL; + } +#endif +#ifdef CONFIG_PCI_DIRECT + else if (!strcmp(str, "conf1")) { + pci_probe = PCI_PROBE_CONF1 | PCI_NO_CHECKS; + return NULL; + } + else if (!strcmp(str, "conf2")) { + pci_probe = PCI_PROBE_CONF2 | PCI_NO_CHECKS; + return NULL; + } +#endif +#ifdef CONFIG_PCI_MMCONFIG + else if (!strcmp(str, "nommconf")) { + pci_probe &= ~PCI_PROBE_MMCONF; + return NULL; + } +#endif + else if (!strcmp(str, "noacpi")) { + acpi_noirq_set(); + return NULL; + } + else if (!strcmp(str, "noearly")) { + pci_probe |= PCI_PROBE_NOEARLY; + return NULL; + } +#ifndef CONFIG_X86_VISWS + else if (!strcmp(str, "usepirqmask")) { + pci_probe |= PCI_USE_PIRQ_MASK; + return NULL; + } else if (!strncmp(str, "irqmask=", 8)) { + pcibios_irq_mask = simple_strtol(str+8, NULL, 0); + return NULL; + } else if (!strncmp(str, "lastbus=", 8)) { + pcibios_last_bus = simple_strtol(str+8, NULL, 0); + return NULL; + } +#endif + else if (!strcmp(str, "rom")) { + pci_probe |= PCI_ASSIGN_ROMS; + return NULL; + } else if (!strcmp(str, "assign-busses")) { + pci_probe |= PCI_ASSIGN_ALL_BUSSES; + return NULL; + } else if (!strcmp(str, "routeirq")) { + pci_routeirq = 1; + return NULL; + } + return str; +} + +unsigned int pcibios_assign_all_busses(void) +{ + return (pci_probe & PCI_ASSIGN_ALL_BUSSES) ? 1 : 0; +} + +int pcibios_enable_device(struct pci_dev *dev, int mask) +{ + int err; + + if ((err = pcibios_enable_resources(dev, mask)) < 0) + return err; + + if (!dev->msi_enabled) + return pcibios_enable_irq(dev); + return 0; +} + +void pcibios_disable_device (struct pci_dev *dev) +{ + if (!dev->msi_enabled && pcibios_disable_irq) + pcibios_disable_irq(dev); +} + +struct pci_bus *pci_scan_bus_with_sysdata(int busno) +{ + struct pci_bus *bus = NULL; + struct pci_sysdata *sd; + + /* + * Allocate per-root-bus (not per bus) arch-specific data. + * TODO: leak; this memory is never freed. + * It's arguable whether it's worth the trouble to care. + */ + sd = kzalloc(sizeof(*sd), GFP_KERNEL); + if (!sd) { + printk(KERN_ERR "PCI: OOM, skipping PCI bus %02x\n", busno); + return NULL; + } + sd->node = -1; + bus = pci_scan_bus(busno, &pci_root_ops, sd); + if (!bus) + kfree(sd); + + return bus; +} diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c new file mode 100644 index 00000000000..431c9a51b15 --- /dev/null +++ b/arch/x86/pci/direct.c @@ -0,0 +1,302 @@ +/* + * direct.c - Low-level direct PCI config space access + */ + +#include +#include +#include +#include "pci.h" + +/* + * Functions for accessing PCI configuration space with type 1 accesses + */ + +#define PCI_CONF1_ADDRESS(bus, devfn, reg) \ + (0x80000000 | (bus << 16) | (devfn << 8) | (reg & ~3)) + +int pci_conf1_read(unsigned int seg, unsigned int bus, + unsigned int devfn, int reg, int len, u32 *value) +{ + unsigned long flags; + + if ((bus > 255) || (devfn > 255) || (reg > 255)) { + *value = -1; + return -EINVAL; + } + + spin_lock_irqsave(&pci_config_lock, flags); + + outl(PCI_CONF1_ADDRESS(bus, devfn, reg), 0xCF8); + + switch (len) { + case 1: + *value = inb(0xCFC + (reg & 3)); + break; + case 2: + *value = inw(0xCFC + (reg & 2)); + break; + case 4: + *value = inl(0xCFC); + break; + } + + spin_unlock_irqrestore(&pci_config_lock, flags); + + return 0; +} + +int pci_conf1_write(unsigned int seg, unsigned int bus, + unsigned int devfn, int reg, int len, u32 value) +{ + unsigned long flags; + + if ((bus > 255) || (devfn > 255) || (reg > 255)) + return -EINVAL; + + spin_lock_irqsave(&pci_config_lock, flags); + + outl(PCI_CONF1_ADDRESS(bus, devfn, reg), 0xCF8); + + switch (len) { + case 1: + outb((u8)value, 0xCFC + (reg & 3)); + break; + case 2: + outw((u16)value, 0xCFC + (reg & 2)); + break; + case 4: + outl((u32)value, 0xCFC); + break; + } + + spin_unlock_irqrestore(&pci_config_lock, flags); + + return 0; +} + +#undef PCI_CONF1_ADDRESS + +struct pci_raw_ops pci_direct_conf1 = { + .read = pci_conf1_read, + .write = pci_conf1_write, +}; + + +/* + * Functions for accessing PCI configuration space with type 2 accesses + */ + +#define PCI_CONF2_ADDRESS(dev, reg) (u16)(0xC000 | (dev << 8) | reg) + +static int pci_conf2_read(unsigned int seg, unsigned int bus, + unsigned int devfn, int reg, int len, u32 *value) +{ + unsigned long flags; + int dev, fn; + + if ((bus > 255) || (devfn > 255) || (reg > 255)) { + *value = -1; + return -EINVAL; + } + + dev = PCI_SLOT(devfn); + fn = PCI_FUNC(devfn); + + if (dev & 0x10) + return PCIBIOS_DEVICE_NOT_FOUND; + + spin_lock_irqsave(&pci_config_lock, flags); + + outb((u8)(0xF0 | (fn << 1)), 0xCF8); + outb((u8)bus, 0xCFA); + + switch (len) { + case 1: + *value = inb(PCI_CONF2_ADDRESS(dev, reg)); + break; + case 2: + *value = inw(PCI_CONF2_ADDRESS(dev, reg)); + break; + case 4: + *value = inl(PCI_CONF2_ADDRESS(dev, reg)); + break; + } + + outb(0, 0xCF8); + + spin_unlock_irqrestore(&pci_config_lock, flags); + + return 0; +} + +static int pci_conf2_write(unsigned int seg, unsigned int bus, + unsigned int devfn, int reg, int len, u32 value) +{ + unsigned long flags; + int dev, fn; + + if ((bus > 255) || (devfn > 255) || (reg > 255)) + return -EINVAL; + + dev = PCI_SLOT(devfn); + fn = PCI_FUNC(devfn); + + if (dev & 0x10) + return PCIBIOS_DEVICE_NOT_FOUND; + + spin_lock_irqsave(&pci_config_lock, flags); + + outb((u8)(0xF0 | (fn << 1)), 0xCF8); + outb((u8)bus, 0xCFA); + + switch (len) { + case 1: + outb((u8)value, PCI_CONF2_ADDRESS(dev, reg)); + break; + case 2: + outw((u16)value, PCI_CONF2_ADDRESS(dev, reg)); + break; + case 4: + outl((u32)value, PCI_CONF2_ADDRESS(dev, reg)); + break; + } + + outb(0, 0xCF8); + + spin_unlock_irqrestore(&pci_config_lock, flags); + + return 0; +} + +#undef PCI_CONF2_ADDRESS + +static struct pci_raw_ops pci_direct_conf2 = { + .read = pci_conf2_read, + .write = pci_conf2_write, +}; + + +/* + * Before we decide to use direct hardware access mechanisms, we try to do some + * trivial checks to ensure it at least _seems_ to be working -- we just test + * whether bus 00 contains a host bridge (this is similar to checking + * techniques used in XFree86, but ours should be more reliable since we + * attempt to make use of direct access hints provided by the PCI BIOS). + * + * This should be close to trivial, but it isn't, because there are buggy + * chipsets (yes, you guessed it, by Intel and Compaq) that have no class ID. + */ +static int __init pci_sanity_check(struct pci_raw_ops *o) +{ + u32 x = 0; + int devfn; + + if (pci_probe & PCI_NO_CHECKS) + return 1; + /* Assume Type 1 works for newer systems. + This handles machines that don't have anything on PCI Bus 0. */ + if (dmi_get_year(DMI_BIOS_DATE) >= 2001) + return 1; + + for (devfn = 0; devfn < 0x100; devfn++) { + if (o->read(0, 0, devfn, PCI_CLASS_DEVICE, 2, &x)) + continue; + if (x == PCI_CLASS_BRIDGE_HOST || x == PCI_CLASS_DISPLAY_VGA) + return 1; + + if (o->read(0, 0, devfn, PCI_VENDOR_ID, 2, &x)) + continue; + if (x == PCI_VENDOR_ID_INTEL || x == PCI_VENDOR_ID_COMPAQ) + return 1; + } + + DBG(KERN_WARNING "PCI: Sanity check failed\n"); + return 0; +} + +static int __init pci_check_type1(void) +{ + unsigned long flags; + unsigned int tmp; + int works = 0; + + local_irq_save(flags); + + outb(0x01, 0xCFB); + tmp = inl(0xCF8); + outl(0x80000000, 0xCF8); + if (inl(0xCF8) == 0x80000000 && pci_sanity_check(&pci_direct_conf1)) { + works = 1; + } + outl(tmp, 0xCF8); + local_irq_restore(flags); + + return works; +} + +static int __init pci_check_type2(void) +{ + unsigned long flags; + int works = 0; + + local_irq_save(flags); + + outb(0x00, 0xCFB); + outb(0x00, 0xCF8); + outb(0x00, 0xCFA); + if (inb(0xCF8) == 0x00 && inb(0xCFA) == 0x00 && + pci_sanity_check(&pci_direct_conf2)) { + works = 1; + } + + local_irq_restore(flags); + + return works; +} + +void __init pci_direct_init(int type) +{ + if (type == 0) + return; + printk(KERN_INFO "PCI: Using configuration type %d\n", type); + if (type == 1) + raw_pci_ops = &pci_direct_conf1; + else + raw_pci_ops = &pci_direct_conf2; +} + +int __init pci_direct_probe(void) +{ + struct resource *region, *region2; + + if ((pci_probe & PCI_PROBE_CONF1) == 0) + goto type2; + region = request_region(0xCF8, 8, "PCI conf1"); + if (!region) + goto type2; + + if (pci_check_type1()) + return 1; + release_resource(region); + + type2: + if ((pci_probe & PCI_PROBE_CONF2) == 0) + return 0; + region = request_region(0xCF8, 4, "PCI conf2"); + if (!region) + return 0; + region2 = request_region(0xC000, 0x1000, "PCI conf2"); + if (!region2) + goto fail2; + + if (pci_check_type2()) { + printk(KERN_INFO "PCI: Using configuration type 2\n"); + raw_pci_ops = &pci_direct_conf2; + return 2; + } + + release_resource(region2); + fail2: + release_resource(region); + return 0; +} diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c new file mode 100644 index 00000000000..42df4b6606d --- /dev/null +++ b/arch/x86/pci/early.c @@ -0,0 +1,59 @@ +#include +#include +#include +#include +#include "pci.h" + +/* Direct PCI access. This is used for PCI accesses in early boot before + the PCI subsystem works. */ + +#define PDprintk(x...) + +u32 read_pci_config(u8 bus, u8 slot, u8 func, u8 offset) +{ + u32 v; + outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); + v = inl(0xcfc); + if (v != 0xffffffff) + PDprintk("%x reading 4 from %x: %x\n", slot, offset, v); + return v; +} + +u8 read_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset) +{ + u8 v; + outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); + v = inb(0xcfc + (offset&3)); + PDprintk("%x reading 1 from %x: %x\n", slot, offset, v); + return v; +} + +u16 read_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset) +{ + u16 v; + outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); + v = inw(0xcfc + (offset&2)); + PDprintk("%x reading 2 from %x: %x\n", slot, offset, v); + return v; +} + +void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset, + u32 val) +{ + PDprintk("%x writing to %x: %x\n", slot, offset, val); + outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); + outl(val, 0xcfc); +} + +void write_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset, u8 val) +{ + PDprintk("%x writing to %x: %x\n", slot, offset, val); + outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); + outb(val, 0xcfc); +} + +int early_pci_allowed(void) +{ + return (pci_probe & (PCI_PROBE_CONF1|PCI_PROBE_NOEARLY)) == + PCI_PROBE_CONF1; +} diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c new file mode 100644 index 00000000000..c82cbf4c722 --- /dev/null +++ b/arch/x86/pci/fixup.c @@ -0,0 +1,446 @@ +/* + * Exceptions for specific devices. Usually work-arounds for fatal design flaws. + */ + +#include +#include +#include +#include +#include "pci.h" + + +static void __devinit pci_fixup_i450nx(struct pci_dev *d) +{ + /* + * i450NX -- Find and scan all secondary buses on all PXB's. + */ + int pxb, reg; + u8 busno, suba, subb; + + printk(KERN_WARNING "PCI: Searching for i450NX host bridges on %s\n", pci_name(d)); + reg = 0xd0; + for(pxb=0; pxb<2; pxb++) { + pci_read_config_byte(d, reg++, &busno); + pci_read_config_byte(d, reg++, &suba); + pci_read_config_byte(d, reg++, &subb); + DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb); + if (busno) + pci_scan_bus_with_sysdata(busno); /* Bus A */ + if (suba < subb) + pci_scan_bus_with_sysdata(suba+1); /* Bus B */ + } + pcibios_last_bus = -1; +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX, pci_fixup_i450nx); + +static void __devinit pci_fixup_i450gx(struct pci_dev *d) +{ + /* + * i450GX and i450KX -- Find and scan all secondary buses. + * (called separately for each PCI bridge found) + */ + u8 busno; + pci_read_config_byte(d, 0x4a, &busno); + printk(KERN_INFO "PCI: i440KX/GX host bridge %s: secondary bus %02x\n", pci_name(d), busno); + pci_scan_bus_with_sysdata(busno); + pcibios_last_bus = -1; +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82454GX, pci_fixup_i450gx); + +static void __devinit pci_fixup_umc_ide(struct pci_dev *d) +{ + /* + * UM8886BF IDE controller sets region type bits incorrectly, + * therefore they look like memory despite of them being I/O. + */ + int i; + + printk(KERN_WARNING "PCI: Fixing base address flags for device %s\n", pci_name(d)); + for(i=0; i<4; i++) + d->resource[i].flags |= PCI_BASE_ADDRESS_SPACE_IO; +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_UMC, PCI_DEVICE_ID_UMC_UM8886BF, pci_fixup_umc_ide); + +static void __devinit pci_fixup_ncr53c810(struct pci_dev *d) +{ + /* + * NCR 53C810 returns class code 0 (at least on some systems). + * Fix class to be PCI_CLASS_STORAGE_SCSI + */ + if (!d->class) { + printk(KERN_WARNING "PCI: fixing NCR 53C810 class code for %s\n", pci_name(d)); + d->class = PCI_CLASS_STORAGE_SCSI << 8; + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NCR, PCI_DEVICE_ID_NCR_53C810, pci_fixup_ncr53c810); + +static void __devinit pci_fixup_latency(struct pci_dev *d) +{ + /* + * SiS 5597 and 5598 chipsets require latency timer set to + * at most 32 to avoid lockups. + */ + DBG("PCI: Setting max latency to 32\n"); + pcibios_max_latency = 32; +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5597, pci_fixup_latency); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5598, pci_fixup_latency); + +static void __devinit pci_fixup_piix4_acpi(struct pci_dev *d) +{ + /* + * PIIX4 ACPI device: hardwired IRQ9 + */ + d->irq = 9; +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB_3, pci_fixup_piix4_acpi); + +/* + * Addresses issues with problems in the memory write queue timer in + * certain VIA Northbridges. This bugfix is per VIA's specifications, + * except for the KL133/KM133: clearing bit 5 on those Northbridges seems + * to trigger a bug in its integrated ProSavage video card, which + * causes screen corruption. We only clear bits 6 and 7 for that chipset, + * until VIA can provide us with definitive information on why screen + * corruption occurs, and what exactly those bits do. + * + * VIA 8363,8622,8361 Northbridges: + * - bits 5, 6, 7 at offset 0x55 need to be turned off + * VIA 8367 (KT266x) Northbridges: + * - bits 5, 6, 7 at offset 0x95 need to be turned off + * VIA 8363 rev 0x81/0x84 (KL133/KM133) Northbridges: + * - bits 6, 7 at offset 0x55 need to be turned off + */ + +#define VIA_8363_KL133_REVISION_ID 0x81 +#define VIA_8363_KM133_REVISION_ID 0x84 + +static void pci_fixup_via_northbridge_bug(struct pci_dev *d) +{ + u8 v; + int where = 0x55; + int mask = 0x1f; /* clear bits 5, 6, 7 by default */ + + if (d->device == PCI_DEVICE_ID_VIA_8367_0) { + /* fix pci bus latency issues resulted by NB bios error + it appears on bug free^Wreduced kt266x's bios forces + NB latency to zero */ + pci_write_config_byte(d, PCI_LATENCY_TIMER, 0); + + where = 0x95; /* the memory write queue timer register is + different for the KT266x's: 0x95 not 0x55 */ + } else if (d->device == PCI_DEVICE_ID_VIA_8363_0 && + (d->revision == VIA_8363_KL133_REVISION_ID || + d->revision == VIA_8363_KM133_REVISION_ID)) { + mask = 0x3f; /* clear only bits 6 and 7; clearing bit 5 + causes screen corruption on the KL133/KM133 */ + } + + pci_read_config_byte(d, where, &v); + if (v & ~mask) { + printk(KERN_WARNING "Disabling VIA memory write queue (PCI ID %04x, rev %02x): [%02x] %02x & %02x -> %02x\n", \ + d->device, d->revision, where, v, mask, v & mask); + v &= mask; + pci_write_config_byte(d, where, v); + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8363_0, pci_fixup_via_northbridge_bug); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8622, pci_fixup_via_northbridge_bug); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8361, pci_fixup_via_northbridge_bug); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8367_0, pci_fixup_via_northbridge_bug); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8363_0, pci_fixup_via_northbridge_bug); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8622, pci_fixup_via_northbridge_bug); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8361, pci_fixup_via_northbridge_bug); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8367_0, pci_fixup_via_northbridge_bug); + +/* + * For some reasons Intel decided that certain parts of their + * 815, 845 and some other chipsets must look like PCI-to-PCI bridges + * while they are obviously not. The 82801 family (AA, AB, BAM/CAM, + * BA/CA/DB and E) PCI bridges are actually HUB-to-PCI ones, according + * to Intel terminology. These devices do forward all addresses from + * system to PCI bus no matter what are their window settings, so they are + * "transparent" (or subtractive decoding) from programmers point of view. + */ +static void __devinit pci_fixup_transparent_bridge(struct pci_dev *dev) +{ + if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && + (dev->device & 0xff00) == 0x2400) + dev->transparent = 1; +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_fixup_transparent_bridge); + +/* + * Fixup for C1 Halt Disconnect problem on nForce2 systems. + * + * From information provided by "Allen Martin" : + * + * A hang is caused when the CPU generates a very fast CONNECT/HALT cycle + * sequence. Workaround is to set the SYSTEM_IDLE_TIMEOUT to 80 ns. + * This allows the state-machine and timer to return to a proper state within + * 80 ns of the CONNECT and probe appearing together. Since the CPU will not + * issue another HALT within 80 ns of the initial HALT, the failure condition + * is avoided. + */ +static void pci_fixup_nforce2(struct pci_dev *dev) +{ + u32 val; + + /* + * Chip Old value New value + * C17 0x1F0FFF01 0x1F01FF01 + * C18D 0x9F0FFF01 0x9F01FF01 + * + * Northbridge chip version may be determined by + * reading the PCI revision ID (0xC1 or greater is C18D). + */ + pci_read_config_dword(dev, 0x6c, &val); + + /* + * Apply fixup if needed, but don't touch disconnect state + */ + if ((val & 0x00FF0000) != 0x00010000) { + printk(KERN_WARNING "PCI: nForce2 C1 Halt Disconnect fixup\n"); + pci_write_config_dword(dev, 0x6c, (val & 0xFF00FFFF) | 0x00010000); + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE2, pci_fixup_nforce2); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE2, pci_fixup_nforce2); + +/* Max PCI Express root ports */ +#define MAX_PCIEROOT 6 +static int quirk_aspm_offset[MAX_PCIEROOT << 3]; + +#define GET_INDEX(a, b) ((((a) - PCI_DEVICE_ID_INTEL_MCH_PA) << 3) + ((b) & 7)) + +static int quirk_pcie_aspm_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value) +{ + return raw_pci_ops->read(0, bus->number, devfn, where, size, value); +} + +/* + * Replace the original pci bus ops for write with a new one that will filter + * the request to insure ASPM cannot be enabled. + */ +static int quirk_pcie_aspm_write(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 value) +{ + u8 offset; + + offset = quirk_aspm_offset[GET_INDEX(bus->self->device, devfn)]; + + if ((offset) && (where == offset)) + value = value & 0xfffffffc; + + return raw_pci_ops->write(0, bus->number, devfn, where, size, value); +} + +static struct pci_ops quirk_pcie_aspm_ops = { + .read = quirk_pcie_aspm_read, + .write = quirk_pcie_aspm_write, +}; + +/* + * Prevents PCI Express ASPM (Active State Power Management) being enabled. + * + * Save the register offset, where the ASPM control bits are located, + * for each PCI Express device that is in the device list of + * the root port in an array for fast indexing. Replace the bus ops + * with the modified one. + */ +static void pcie_rootport_aspm_quirk(struct pci_dev *pdev) +{ + int cap_base, i; + struct pci_bus *pbus; + struct pci_dev *dev; + + if ((pbus = pdev->subordinate) == NULL) + return; + + /* + * Check if the DID of pdev matches one of the six root ports. This + * check is needed in the case this function is called directly by the + * hot-plug driver. + */ + if ((pdev->device < PCI_DEVICE_ID_INTEL_MCH_PA) || + (pdev->device > PCI_DEVICE_ID_INTEL_MCH_PC1)) + return; + + if (list_empty(&pbus->devices)) { + /* + * If no device is attached to the root port at power-up or + * after hot-remove, the pbus->devices is empty and this code + * will set the offsets to zero and the bus ops to parent's bus + * ops, which is unmodified. + */ + for (i= GET_INDEX(pdev->device, 0); i <= GET_INDEX(pdev->device, 7); ++i) + quirk_aspm_offset[i] = 0; + + pbus->ops = pbus->parent->ops; + } else { + /* + * If devices are attached to the root port at power-up or + * after hot-add, the code loops through the device list of + * each root port to save the register offsets and replace the + * bus ops. + */ + list_for_each_entry(dev, &pbus->devices, bus_list) { + /* There are 0 to 8 devices attached to this bus */ + cap_base = pci_find_capability(dev, PCI_CAP_ID_EXP); + quirk_aspm_offset[GET_INDEX(pdev->device, dev->devfn)]= cap_base + 0x10; + } + pbus->ops = &quirk_pcie_aspm_ops; + } +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PA, pcie_rootport_aspm_quirk ); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PA1, pcie_rootport_aspm_quirk ); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PB, pcie_rootport_aspm_quirk ); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PB1, pcie_rootport_aspm_quirk ); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PC, pcie_rootport_aspm_quirk ); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PC1, pcie_rootport_aspm_quirk ); + +/* + * Fixup to mark boot BIOS video selected by BIOS before it changes + * + * From information provided by "Jon Smirl" + * + * The standard boot ROM sequence for an x86 machine uses the BIOS + * to select an initial video card for boot display. This boot video + * card will have it's BIOS copied to C0000 in system RAM. + * IORESOURCE_ROM_SHADOW is used to associate the boot video + * card with this copy. On laptops this copy has to be used since + * the main ROM may be compressed or combined with another image. + * See pci_map_rom() for use of this flag. IORESOURCE_ROM_SHADOW + * is marked here since the boot video device will be the only enabled + * video device at this point. + */ + +static void __devinit pci_fixup_video(struct pci_dev *pdev) +{ + struct pci_dev *bridge; + struct pci_bus *bus; + u16 config; + + if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA) + return; + + /* Is VGA routed to us? */ + bus = pdev->bus; + while (bus) { + bridge = bus->self; + + /* + * From information provided by + * "David Miller" + * The bridge control register is valid for PCI header + * type BRIDGE, or CARDBUS. Host to PCI controllers use + * PCI header type NORMAL. + */ + if (bridge + &&((bridge->hdr_type == PCI_HEADER_TYPE_BRIDGE) + ||(bridge->hdr_type == PCI_HEADER_TYPE_CARDBUS))) { + pci_read_config_word(bridge, PCI_BRIDGE_CONTROL, + &config); + if (!(config & PCI_BRIDGE_CTL_VGA)) + return; + } + bus = bus->parent; + } + pci_read_config_word(pdev, PCI_COMMAND, &config); + if (config & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) { + pdev->resource[PCI_ROM_RESOURCE].flags |= IORESOURCE_ROM_SHADOW; + printk(KERN_DEBUG "Boot video device is %s\n", pci_name(pdev)); + } +} +DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, pci_fixup_video); + +/* + * Some Toshiba laptops need extra code to enable their TI TSB43AB22/A. + * + * We pretend to bring them out of full D3 state, and restore the proper + * IRQ, PCI cache line size, and BARs, otherwise the device won't function + * properly. In some cases, the device will generate an interrupt on + * the wrong IRQ line, causing any devices sharing the line it's + * *supposed* to use to be disabled by the kernel's IRQ debug code. + */ +static u16 toshiba_line_size; + +static struct dmi_system_id __devinitdata toshiba_ohci1394_dmi_table[] = { + { + .ident = "Toshiba PS5 based laptop", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"), + DMI_MATCH(DMI_PRODUCT_VERSION, "PS5"), + }, + }, + { + .ident = "Toshiba PSM4 based laptop", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"), + DMI_MATCH(DMI_PRODUCT_VERSION, "PSM4"), + }, + }, + { + .ident = "Toshiba A40 based laptop", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"), + DMI_MATCH(DMI_PRODUCT_VERSION, "PSA40U"), + }, + }, + { } +}; + +static void __devinit pci_pre_fixup_toshiba_ohci1394(struct pci_dev *dev) +{ + if (!dmi_check_system(toshiba_ohci1394_dmi_table)) + return; /* only applies to certain Toshibas (so far) */ + + dev->current_state = PCI_D3cold; + pci_read_config_word(dev, PCI_CACHE_LINE_SIZE, &toshiba_line_size); +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TI, 0x8032, + pci_pre_fixup_toshiba_ohci1394); + +static void __devinit pci_post_fixup_toshiba_ohci1394(struct pci_dev *dev) +{ + if (!dmi_check_system(toshiba_ohci1394_dmi_table)) + return; /* only applies to certain Toshibas (so far) */ + + /* Restore config space on Toshiba laptops */ + pci_write_config_word(dev, PCI_CACHE_LINE_SIZE, toshiba_line_size); + pci_read_config_byte(dev, PCI_INTERRUPT_LINE, (u8 *)&dev->irq); + pci_write_config_dword(dev, PCI_BASE_ADDRESS_0, + pci_resource_start(dev, 0)); + pci_write_config_dword(dev, PCI_BASE_ADDRESS_1, + pci_resource_start(dev, 1)); +} +DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_TI, 0x8032, + pci_post_fixup_toshiba_ohci1394); + + +/* + * Prevent the BIOS trapping accesses to the Cyrix CS5530A video device + * configuration space. + */ +static void pci_early_fixup_cyrix_5530(struct pci_dev *dev) +{ + u8 r; + /* clear 'F4 Video Configuration Trap' bit */ + pci_read_config_byte(dev, 0x42, &r); + r &= 0xfd; + pci_write_config_byte(dev, 0x42, r); +} +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, + pci_early_fixup_cyrix_5530); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, + pci_early_fixup_cyrix_5530); + +/* + * Siemens Nixdorf AG FSC Multiprocessor Interrupt Controller: + * prevent update of the BAR0, which doesn't look like a normal BAR. + */ +static void __devinit pci_siemens_interrupt_controller(struct pci_dev *dev) +{ + dev->resource[0].flags |= IORESOURCE_PCI_FIXED; +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SIEMENS, 0x0015, + pci_siemens_interrupt_controller); diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c new file mode 100644 index 00000000000..bcd2f94b732 --- /dev/null +++ b/arch/x86/pci/i386.c @@ -0,0 +1,315 @@ +/* + * Low-Level PCI Access for i386 machines + * + * Copyright 1993, 1994 Drew Eckhardt + * Visionary Computing + * (Unix and Linux consulting and custom programming) + * Drew@Colorado.EDU + * +1 (303) 786-7975 + * + * Drew's work was sponsored by: + * iX Multiuser Multitasking Magazine + * Hannover, Germany + * hm@ix.de + * + * Copyright 1997--2000 Martin Mares + * + * For more information, please consult the following manuals (look at + * http://www.pcisig.com/ for how to get them): + * + * PCI BIOS Specification + * PCI Local Bus Specification + * PCI to PCI Bridge Specification + * PCI System Design Guide + * + */ + +#include +#include +#include +#include +#include +#include + +#include "pci.h" + +/* + * We need to avoid collisions with `mirrored' VGA ports + * and other strange ISA hardware, so we always want the + * addresses to be allocated in the 0x000-0x0ff region + * modulo 0x400. + * + * Why? Because some silly external IO cards only decode + * the low 10 bits of the IO address. The 0x00-0xff region + * is reserved for motherboard devices that decode all 16 + * bits, so it's ok to allocate at, say, 0x2800-0x28ff, + * but we want to try to avoid allocating at 0x2900-0x2bff + * which might have be mirrored at 0x0100-0x03ff.. + */ +void +pcibios_align_resource(void *data, struct resource *res, + resource_size_t size, resource_size_t align) +{ + if (res->flags & IORESOURCE_IO) { + resource_size_t start = res->start; + + if (start & 0x300) { + start = (start + 0x3ff) & ~0x3ff; + res->start = start; + } + } +} + + +/* + * Handle resources of PCI devices. If the world were perfect, we could + * just allocate all the resource regions and do nothing more. It isn't. + * On the other hand, we cannot just re-allocate all devices, as it would + * require us to know lots of host bridge internals. So we attempt to + * keep as much of the original configuration as possible, but tweak it + * when it's found to be wrong. + * + * Known BIOS problems we have to work around: + * - I/O or memory regions not configured + * - regions configured, but not enabled in the command register + * - bogus I/O addresses above 64K used + * - expansion ROMs left enabled (this may sound harmless, but given + * the fact the PCI specs explicitly allow address decoders to be + * shared between expansion ROMs and other resource regions, it's + * at least dangerous) + * + * Our solution: + * (1) Allocate resources for all buses behind PCI-to-PCI bridges. + * This gives us fixed barriers on where we can allocate. + * (2) Allocate resources for all enabled devices. If there is + * a collision, just mark the resource as unallocated. Also + * disable expansion ROMs during this step. + * (3) Try to allocate resources for disabled devices. If the + * resources were assigned correctly, everything goes well, + * if they weren't, they won't disturb allocation of other + * resources. + * (4) Assign new addresses to resources which were either + * not configured at all or misconfigured. If explicitly + * requested by the user, configure expansion ROM address + * as well. + */ + +static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) +{ + struct pci_bus *bus; + struct pci_dev *dev; + int idx; + struct resource *r, *pr; + + /* Depth-First Search on bus tree */ + list_for_each_entry(bus, bus_list, node) { + if ((dev = bus->self)) { + for (idx = PCI_BRIDGE_RESOURCES; + idx < PCI_NUM_RESOURCES; idx++) { + r = &dev->resource[idx]; + if (!r->flags) + continue; + pr = pci_find_parent_resource(dev, r); + if (!r->start || !pr || + request_resource(pr, r) < 0) { + printk(KERN_ERR "PCI: Cannot allocate " + "resource region %d " + "of bridge %s\n", + idx, pci_name(dev)); + /* + * Something is wrong with the region. + * Invalidate the resource to prevent + * child resource allocations in this + * range. + */ + r->flags = 0; + } + } + } + pcibios_allocate_bus_resources(&bus->children); + } +} + +static void __init pcibios_allocate_resources(int pass) +{ + struct pci_dev *dev = NULL; + int idx, disabled; + u16 command; + struct resource *r, *pr; + + for_each_pci_dev(dev) { + pci_read_config_word(dev, PCI_COMMAND, &command); + for (idx = 0; idx < PCI_ROM_RESOURCE; idx++) { + r = &dev->resource[idx]; + if (r->parent) /* Already allocated */ + continue; + if (!r->start) /* Address not assigned at all */ + continue; + if (r->flags & IORESOURCE_IO) + disabled = !(command & PCI_COMMAND_IO); + else + disabled = !(command & PCI_COMMAND_MEMORY); + if (pass == disabled) { + DBG("PCI: Resource %08lx-%08lx " + "(f=%lx, d=%d, p=%d)\n", + r->start, r->end, r->flags, disabled, pass); + pr = pci_find_parent_resource(dev, r); + if (!pr || request_resource(pr, r) < 0) { + printk(KERN_ERR "PCI: Cannot allocate " + "resource region %d " + "of device %s\n", + idx, pci_name(dev)); + /* We'll assign a new address later */ + r->end -= r->start; + r->start = 0; + } + } + } + if (!pass) { + r = &dev->resource[PCI_ROM_RESOURCE]; + if (r->flags & IORESOURCE_ROM_ENABLE) { + /* Turn the ROM off, leave the resource region, + * but keep it unregistered. */ + u32 reg; + DBG("PCI: Switching off ROM of %s\n", + pci_name(dev)); + r->flags &= ~IORESOURCE_ROM_ENABLE; + pci_read_config_dword(dev, + dev->rom_base_reg, ®); + pci_write_config_dword(dev, dev->rom_base_reg, + reg & ~PCI_ROM_ADDRESS_ENABLE); + } + } + } +} + +static int __init pcibios_assign_resources(void) +{ + struct pci_dev *dev = NULL; + struct resource *r, *pr; + + if (!(pci_probe & PCI_ASSIGN_ROMS)) { + /* + * Try to use BIOS settings for ROMs, otherwise let + * pci_assign_unassigned_resources() allocate the new + * addresses. + */ + for_each_pci_dev(dev) { + r = &dev->resource[PCI_ROM_RESOURCE]; + if (!r->flags || !r->start) + continue; + pr = pci_find_parent_resource(dev, r); + if (!pr || request_resource(pr, r) < 0) { + r->end -= r->start; + r->start = 0; + } + } + } + + pci_assign_unassigned_resources(); + + return 0; +} + +void __init pcibios_resource_survey(void) +{ + DBG("PCI: Allocating resources\n"); + pcibios_allocate_bus_resources(&pci_root_buses); + pcibios_allocate_resources(0); + pcibios_allocate_resources(1); +} + +/** + * called in fs_initcall (one below subsys_initcall), + * give a chance for motherboard reserve resources + */ +fs_initcall(pcibios_assign_resources); + +int pcibios_enable_resources(struct pci_dev *dev, int mask) +{ + u16 cmd, old_cmd; + int idx; + struct resource *r; + + pci_read_config_word(dev, PCI_COMMAND, &cmd); + old_cmd = cmd; + for (idx = 0; idx < PCI_NUM_RESOURCES; idx++) { + /* Only set up the requested stuff */ + if (!(mask & (1 << idx))) + continue; + + r = &dev->resource[idx]; + if (!(r->flags & (IORESOURCE_IO | IORESOURCE_MEM))) + continue; + if ((idx == PCI_ROM_RESOURCE) && + (!(r->flags & IORESOURCE_ROM_ENABLE))) + continue; + if (!r->start && r->end) { + printk(KERN_ERR "PCI: Device %s not available " + "because of resource %d collisions\n", + pci_name(dev), idx); + return -EINVAL; + } + if (r->flags & IORESOURCE_IO) + cmd |= PCI_COMMAND_IO; + if (r->flags & IORESOURCE_MEM) + cmd |= PCI_COMMAND_MEMORY; + } + if (cmd != old_cmd) { + printk("PCI: Enabling device %s (%04x -> %04x)\n", + pci_name(dev), old_cmd, cmd); + pci_write_config_word(dev, PCI_COMMAND, cmd); + } + return 0; +} + +/* + * If we set up a device for bus mastering, we need to check the latency + * timer as certain crappy BIOSes forget to set it properly. + */ +unsigned int pcibios_max_latency = 255; + +void pcibios_set_master(struct pci_dev *dev) +{ + u8 lat; + pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat); + if (lat < 16) + lat = (64 <= pcibios_max_latency) ? 64 : pcibios_max_latency; + else if (lat > pcibios_max_latency) + lat = pcibios_max_latency; + else + return; + printk(KERN_DEBUG "PCI: Setting latency timer of device %s to %d\n", + pci_name(dev), lat); + pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat); +} + +int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, + enum pci_mmap_state mmap_state, int write_combine) +{ + unsigned long prot; + + /* I/O space cannot be accessed via normal processor loads and + * stores on this platform. + */ + if (mmap_state == pci_mmap_io) + return -EINVAL; + + /* Leave vm_pgoff as-is, the PCI space address is the physical + * address on this platform. + */ + prot = pgprot_val(vma->vm_page_prot); + if (boot_cpu_data.x86 > 3) + prot |= _PAGE_PCD | _PAGE_PWT; + vma->vm_page_prot = __pgprot(prot); + + /* Write-combine setting is ignored, it is changed via the mtrr + * interfaces on this platform. + */ + if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, + vma->vm_end - vma->vm_start, + vma->vm_page_prot)) + return -EAGAIN; + + return 0; +} diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c new file mode 100644 index 00000000000..3de9f9ba2da --- /dev/null +++ b/arch/x86/pci/init.c @@ -0,0 +1,37 @@ +#include +#include +#include "pci.h" + +/* arch_initcall has too random ordering, so call the initializers + in the right sequence from here. */ +static __init int pci_access_init(void) +{ + int type __maybe_unused = 0; + +#ifdef CONFIG_PCI_DIRECT + type = pci_direct_probe(); +#endif +#ifdef CONFIG_PCI_MMCONFIG + pci_mmcfg_init(type); +#endif + if (raw_pci_ops) + return 0; +#ifdef CONFIG_PCI_BIOS + pci_pcbios_init(); +#endif + /* + * don't check for raw_pci_ops here because we want pcbios as last + * fallback, yet it's needed to run first to set pcibios_last_bus + * in case legacy PCI probing is used. otherwise detecting peer busses + * fails. + */ +#ifdef CONFIG_PCI_DIRECT + pci_direct_init(type); +#endif + if (!raw_pci_ops) + printk(KERN_ERR + "PCI: Fatal: No config space access function found\n"); + + return 0; +} +arch_initcall(pci_access_init); diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c new file mode 100644 index 00000000000..8434f2323b8 --- /dev/null +++ b/arch/x86/pci/irq.c @@ -0,0 +1,1173 @@ +/* + * Low-Level PCI Support for PC -- Routing of Interrupts + * + * (c) 1999--2000 Martin Mares + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pci.h" + +#define PIRQ_SIGNATURE (('$' << 0) + ('P' << 8) + ('I' << 16) + ('R' << 24)) +#define PIRQ_VERSION 0x0100 + +static int broken_hp_bios_irq9; +static int acer_tm360_irqrouting; + +static struct irq_routing_table *pirq_table; + +static int pirq_enable_irq(struct pci_dev *dev); + +/* + * Never use: 0, 1, 2 (timer, keyboard, and cascade) + * Avoid using: 13, 14 and 15 (FP error and IDE). + * Penalize: 3, 4, 6, 7, 12 (known ISA uses: serial, floppy, parallel and mouse) + */ +unsigned int pcibios_irq_mask = 0xfff8; + +static int pirq_penalty[16] = { + 1000000, 1000000, 1000000, 1000, 1000, 0, 1000, 1000, + 0, 0, 0, 0, 1000, 100000, 100000, 100000 +}; + +struct irq_router { + char *name; + u16 vendor, device; + int (*get)(struct pci_dev *router, struct pci_dev *dev, int pirq); + int (*set)(struct pci_dev *router, struct pci_dev *dev, int pirq, int new); +}; + +struct irq_router_handler { + u16 vendor; + int (*probe)(struct irq_router *r, struct pci_dev *router, u16 device); +}; + +int (*pcibios_enable_irq)(struct pci_dev *dev) = NULL; +void (*pcibios_disable_irq)(struct pci_dev *dev) = NULL; + +/* + * Check passed address for the PCI IRQ Routing Table signature + * and perform checksum verification. + */ + +static inline struct irq_routing_table * pirq_check_routing_table(u8 *addr) +{ + struct irq_routing_table *rt; + int i; + u8 sum; + + rt = (struct irq_routing_table *) addr; + if (rt->signature != PIRQ_SIGNATURE || + rt->version != PIRQ_VERSION || + rt->size % 16 || + rt->size < sizeof(struct irq_routing_table)) + return NULL; + sum = 0; + for (i=0; i < rt->size; i++) + sum += addr[i]; + if (!sum) { + DBG(KERN_DEBUG "PCI: Interrupt Routing Table found at 0x%p\n", rt); + return rt; + } + return NULL; +} + + + +/* + * Search 0xf0000 -- 0xfffff for the PCI IRQ Routing Table. + */ + +static struct irq_routing_table * __init pirq_find_routing_table(void) +{ + u8 *addr; + struct irq_routing_table *rt; + + if (pirq_table_addr) { + rt = pirq_check_routing_table((u8 *) __va(pirq_table_addr)); + if (rt) + return rt; + printk(KERN_WARNING "PCI: PIRQ table NOT found at pirqaddr\n"); + } + for(addr = (u8 *) __va(0xf0000); addr < (u8 *) __va(0x100000); addr += 16) { + rt = pirq_check_routing_table(addr); + if (rt) + return rt; + } + return NULL; +} + +/* + * If we have a IRQ routing table, use it to search for peer host + * bridges. It's a gross hack, but since there are no other known + * ways how to get a list of buses, we have to go this way. + */ + +static void __init pirq_peer_trick(void) +{ + struct irq_routing_table *rt = pirq_table; + u8 busmap[256]; + int i; + struct irq_info *e; + + memset(busmap, 0, sizeof(busmap)); + for(i=0; i < (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info); i++) { + e = &rt->slots[i]; +#ifdef DEBUG + { + int j; + DBG(KERN_DEBUG "%02x:%02x slot=%02x", e->bus, e->devfn/8, e->slot); + for(j=0; j<4; j++) + DBG(" %d:%02x/%04x", j, e->irq[j].link, e->irq[j].bitmap); + DBG("\n"); + } +#endif + busmap[e->bus] = 1; + } + for(i = 1; i < 256; i++) { + if (!busmap[i] || pci_find_bus(0, i)) + continue; + if (pci_scan_bus_with_sysdata(i)) + printk(KERN_INFO "PCI: Discovered primary peer " + "bus %02x [IRQ]\n", i); + } + pcibios_last_bus = -1; +} + +/* + * Code for querying and setting of IRQ routes on various interrupt routers. + */ + +void eisa_set_level_irq(unsigned int irq) +{ + unsigned char mask = 1 << (irq & 7); + unsigned int port = 0x4d0 + (irq >> 3); + unsigned char val; + static u16 eisa_irq_mask; + + if (irq >= 16 || (1 << irq) & eisa_irq_mask) + return; + + eisa_irq_mask |= (1 << irq); + printk(KERN_DEBUG "PCI: setting IRQ %u as level-triggered\n", irq); + val = inb(port); + if (!(val & mask)) { + DBG(KERN_DEBUG " -> edge"); + outb(val | mask, port); + } +} + +/* + * Common IRQ routing practice: nybbles in config space, + * offset by some magic constant. + */ +static unsigned int read_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr) +{ + u8 x; + unsigned reg = offset + (nr >> 1); + + pci_read_config_byte(router, reg, &x); + return (nr & 1) ? (x >> 4) : (x & 0xf); +} + +static void write_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr, unsigned int val) +{ + u8 x; + unsigned reg = offset + (nr >> 1); + + pci_read_config_byte(router, reg, &x); + x = (nr & 1) ? ((x & 0x0f) | (val << 4)) : ((x & 0xf0) | val); + pci_write_config_byte(router, reg, x); +} + +/* + * ALI pirq entries are damn ugly, and completely undocumented. + * This has been figured out from pirq tables, and it's not a pretty + * picture. + */ +static int pirq_ali_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + static const unsigned char irqmap[16] = { 0, 9, 3, 10, 4, 5, 7, 6, 1, 11, 0, 12, 0, 14, 0, 15 }; + + return irqmap[read_config_nybble(router, 0x48, pirq-1)]; +} + +static int pirq_ali_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + static const unsigned char irqmap[16] = { 0, 8, 0, 2, 4, 5, 7, 6, 0, 1, 3, 9, 11, 0, 13, 15 }; + unsigned int val = irqmap[irq]; + + if (val) { + write_config_nybble(router, 0x48, pirq-1, val); + return 1; + } + return 0; +} + +/* + * The Intel PIIX4 pirq rules are fairly simple: "pirq" is + * just a pointer to the config space. + */ +static int pirq_piix_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + u8 x; + + pci_read_config_byte(router, pirq, &x); + return (x < 16) ? x : 0; +} + +static int pirq_piix_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + pci_write_config_byte(router, pirq, irq); + return 1; +} + +/* + * The VIA pirq rules are nibble-based, like ALI, + * but without the ugly irq number munging. + * However, PIRQD is in the upper instead of lower 4 bits. + */ +static int pirq_via_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + return read_config_nybble(router, 0x55, pirq == 4 ? 5 : pirq); +} + +static int pirq_via_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + write_config_nybble(router, 0x55, pirq == 4 ? 5 : pirq, irq); + return 1; +} + +/* + * The VIA pirq rules are nibble-based, like ALI, + * but without the ugly irq number munging. + * However, for 82C586, nibble map is different . + */ +static int pirq_via586_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + static const unsigned int pirqmap[5] = { 3, 2, 5, 1, 1 }; + return read_config_nybble(router, 0x55, pirqmap[pirq-1]); +} + +static int pirq_via586_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + static const unsigned int pirqmap[5] = { 3, 2, 5, 1, 1 }; + write_config_nybble(router, 0x55, pirqmap[pirq-1], irq); + return 1; +} + +/* + * ITE 8330G pirq rules are nibble-based + * FIXME: pirqmap may be { 1, 0, 3, 2 }, + * 2+3 are both mapped to irq 9 on my system + */ +static int pirq_ite_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + static const unsigned char pirqmap[4] = { 1, 0, 2, 3 }; + return read_config_nybble(router,0x43, pirqmap[pirq-1]); +} + +static int pirq_ite_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + static const unsigned char pirqmap[4] = { 1, 0, 2, 3 }; + write_config_nybble(router, 0x43, pirqmap[pirq-1], irq); + return 1; +} + +/* + * OPTI: high four bits are nibble pointer.. + * I wonder what the low bits do? + */ +static int pirq_opti_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + return read_config_nybble(router, 0xb8, pirq >> 4); +} + +static int pirq_opti_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + write_config_nybble(router, 0xb8, pirq >> 4, irq); + return 1; +} + +/* + * Cyrix: nibble offset 0x5C + * 0x5C bits 7:4 is INTB bits 3:0 is INTA + * 0x5D bits 7:4 is INTD bits 3:0 is INTC + */ +static int pirq_cyrix_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + return read_config_nybble(router, 0x5C, (pirq-1)^1); +} + +static int pirq_cyrix_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + write_config_nybble(router, 0x5C, (pirq-1)^1, irq); + return 1; +} + +/* + * PIRQ routing for SiS 85C503 router used in several SiS chipsets. + * We have to deal with the following issues here: + * - vendors have different ideas about the meaning of link values + * - some onboard devices (integrated in the chipset) have special + * links and are thus routed differently (i.e. not via PCI INTA-INTD) + * - different revision of the router have a different layout for + * the routing registers, particularly for the onchip devices + * + * For all routing registers the common thing is we have one byte + * per routeable link which is defined as: + * bit 7 IRQ mapping enabled (0) or disabled (1) + * bits [6:4] reserved (sometimes used for onchip devices) + * bits [3:0] IRQ to map to + * allowed: 3-7, 9-12, 14-15 + * reserved: 0, 1, 2, 8, 13 + * + * The config-space registers located at 0x41/0x42/0x43/0x44 are + * always used to route the normal PCI INT A/B/C/D respectively. + * Apparently there are systems implementing PCI routing table using + * link values 0x01-0x04 and others using 0x41-0x44 for PCI INTA..D. + * We try our best to handle both link mappings. + * + * Currently (2003-05-21) it appears most SiS chipsets follow the + * definition of routing registers from the SiS-5595 southbridge. + * According to the SiS 5595 datasheets the revision id's of the + * router (ISA-bridge) should be 0x01 or 0xb0. + * + * Furthermore we've also seen lspci dumps with revision 0x00 and 0xb1. + * Looks like these are used in a number of SiS 5xx/6xx/7xx chipsets. + * They seem to work with the current routing code. However there is + * some concern because of the two USB-OHCI HCs (original SiS 5595 + * had only one). YMMV. + * + * Onchip routing for router rev-id 0x01/0xb0 and probably 0x00/0xb1: + * + * 0x61: IDEIRQ: + * bits [6:5] must be written 01 + * bit 4 channel-select primary (0), secondary (1) + * + * 0x62: USBIRQ: + * bit 6 OHCI function disabled (0), enabled (1) + * + * 0x6a: ACPI/SCI IRQ: bits 4-6 reserved + * + * 0x7e: Data Acq. Module IRQ - bits 4-6 reserved + * + * We support USBIRQ (in addition to INTA-INTD) and keep the + * IDE, ACPI and DAQ routing untouched as set by the BIOS. + * + * Currently the only reported exception is the new SiS 65x chipset + * which includes the SiS 69x southbridge. Here we have the 85C503 + * router revision 0x04 and there are changes in the register layout + * mostly related to the different USB HCs with USB 2.0 support. + * + * Onchip routing for router rev-id 0x04 (try-and-error observation) + * + * 0x60/0x61/0x62/0x63: 1xEHCI and 3xOHCI (companion) USB-HCs + * bit 6-4 are probably unused, not like 5595 + */ + +#define PIRQ_SIS_IRQ_MASK 0x0f +#define PIRQ_SIS_IRQ_DISABLE 0x80 +#define PIRQ_SIS_USB_ENABLE 0x40 + +static int pirq_sis_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + u8 x; + int reg; + + reg = pirq; + if (reg >= 0x01 && reg <= 0x04) + reg += 0x40; + pci_read_config_byte(router, reg, &x); + return (x & PIRQ_SIS_IRQ_DISABLE) ? 0 : (x & PIRQ_SIS_IRQ_MASK); +} + +static int pirq_sis_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + u8 x; + int reg; + + reg = pirq; + if (reg >= 0x01 && reg <= 0x04) + reg += 0x40; + pci_read_config_byte(router, reg, &x); + x &= ~(PIRQ_SIS_IRQ_MASK | PIRQ_SIS_IRQ_DISABLE); + x |= irq ? irq: PIRQ_SIS_IRQ_DISABLE; + pci_write_config_byte(router, reg, x); + return 1; +} + + +/* + * VLSI: nibble offset 0x74 - educated guess due to routing table and + * config space of VLSI 82C534 PCI-bridge/router (1004:0102) + * Tested on HP OmniBook 800 covering PIRQ 1, 2, 4, 8 for onboard + * devices, PIRQ 3 for non-pci(!) soundchip and (untested) PIRQ 6 + * for the busbridge to the docking station. + */ + +static int pirq_vlsi_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + if (pirq > 8) { + printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq); + return 0; + } + return read_config_nybble(router, 0x74, pirq-1); +} + +static int pirq_vlsi_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + if (pirq > 8) { + printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq); + return 0; + } + write_config_nybble(router, 0x74, pirq-1, irq); + return 1; +} + +/* + * ServerWorks: PCI interrupts mapped to system IRQ lines through Index + * and Redirect I/O registers (0x0c00 and 0x0c01). The Index register + * format is (PCIIRQ## | 0x10), e.g.: PCIIRQ10=0x1a. The Redirect + * register is a straight binary coding of desired PIC IRQ (low nibble). + * + * The 'link' value in the PIRQ table is already in the correct format + * for the Index register. There are some special index values: + * 0x00 for ACPI (SCI), 0x01 for USB, 0x02 for IDE0, 0x04 for IDE1, + * and 0x03 for SMBus. + */ +static int pirq_serverworks_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + outb_p(pirq, 0xc00); + return inb(0xc01) & 0xf; +} + +static int pirq_serverworks_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + outb_p(pirq, 0xc00); + outb_p(irq, 0xc01); + return 1; +} + +/* Support for AMD756 PCI IRQ Routing + * Jhon H. Caicedo + * Jun/21/2001 0.2.0 Release, fixed to use "nybble" functions... (jhcaiced) + * Jun/19/2001 Alpha Release 0.1.0 (jhcaiced) + * The AMD756 pirq rules are nibble-based + * offset 0x56 0-3 PIRQA 4-7 PIRQB + * offset 0x57 0-3 PIRQC 4-7 PIRQD + */ +static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + u8 irq; + irq = 0; + if (pirq <= 4) + { + irq = read_config_nybble(router, 0x56, pirq - 1); + } + printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d get irq : %2d\n", + dev->vendor, dev->device, pirq, irq); + return irq; +} + +static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d SET irq : %2d\n", + dev->vendor, dev->device, pirq, irq); + if (pirq <= 4) + { + write_config_nybble(router, 0x56, pirq - 1, irq); + } + return 1; +} + +#ifdef CONFIG_PCI_BIOS + +static int pirq_bios_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + struct pci_dev *bridge; + int pin = pci_get_interrupt_pin(dev, &bridge); + return pcibios_set_irq_routing(bridge, pin, irq); +} + +#endif + +static __init int intel_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) +{ + static struct pci_device_id __initdata pirq_440gx[] = { + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_0) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_2) }, + { }, + }; + + /* 440GX has a proprietary PIRQ router -- don't use it */ + if (pci_dev_present(pirq_440gx)) + return 0; + + switch(device) + { + case PCI_DEVICE_ID_INTEL_82371FB_0: + case PCI_DEVICE_ID_INTEL_82371SB_0: + case PCI_DEVICE_ID_INTEL_82371AB_0: + case PCI_DEVICE_ID_INTEL_82371MX: + case PCI_DEVICE_ID_INTEL_82443MX_0: + case PCI_DEVICE_ID_INTEL_82801AA_0: + case PCI_DEVICE_ID_INTEL_82801AB_0: + case PCI_DEVICE_ID_INTEL_82801BA_0: + case PCI_DEVICE_ID_INTEL_82801BA_10: + case PCI_DEVICE_ID_INTEL_82801CA_0: + case PCI_DEVICE_ID_INTEL_82801CA_12: + case PCI_DEVICE_ID_INTEL_82801DB_0: + case PCI_DEVICE_ID_INTEL_82801E_0: + case PCI_DEVICE_ID_INTEL_82801EB_0: + case PCI_DEVICE_ID_INTEL_ESB_1: + case PCI_DEVICE_ID_INTEL_ICH6_0: + case PCI_DEVICE_ID_INTEL_ICH6_1: + case PCI_DEVICE_ID_INTEL_ICH7_0: + case PCI_DEVICE_ID_INTEL_ICH7_1: + case PCI_DEVICE_ID_INTEL_ICH7_30: + case PCI_DEVICE_ID_INTEL_ICH7_31: + case PCI_DEVICE_ID_INTEL_ESB2_0: + case PCI_DEVICE_ID_INTEL_ICH8_0: + case PCI_DEVICE_ID_INTEL_ICH8_1: + case PCI_DEVICE_ID_INTEL_ICH8_2: + case PCI_DEVICE_ID_INTEL_ICH8_3: + case PCI_DEVICE_ID_INTEL_ICH8_4: + case PCI_DEVICE_ID_INTEL_ICH9_0: + case PCI_DEVICE_ID_INTEL_ICH9_1: + case PCI_DEVICE_ID_INTEL_ICH9_2: + case PCI_DEVICE_ID_INTEL_ICH9_3: + case PCI_DEVICE_ID_INTEL_ICH9_4: + case PCI_DEVICE_ID_INTEL_ICH9_5: + case PCI_DEVICE_ID_INTEL_TOLAPAI_0: + r->name = "PIIX/ICH"; + r->get = pirq_piix_get; + r->set = pirq_piix_set; + return 1; + } + return 0; +} + +static __init int via_router_probe(struct irq_router *r, + struct pci_dev *router, u16 device) +{ + /* FIXME: We should move some of the quirk fixup stuff here */ + + /* + * work arounds for some buggy BIOSes + */ + if (device == PCI_DEVICE_ID_VIA_82C586_0) { + switch(router->device) { + case PCI_DEVICE_ID_VIA_82C686: + /* + * Asus k7m bios wrongly reports 82C686A + * as 586-compatible + */ + device = PCI_DEVICE_ID_VIA_82C686; + break; + case PCI_DEVICE_ID_VIA_8235: + /** + * Asus a7v-x bios wrongly reports 8235 + * as 586-compatible + */ + device = PCI_DEVICE_ID_VIA_8235; + break; + } + } + + switch(device) { + case PCI_DEVICE_ID_VIA_82C586_0: + r->name = "VIA"; + r->get = pirq_via586_get; + r->set = pirq_via586_set; + return 1; + case PCI_DEVICE_ID_VIA_82C596: + case PCI_DEVICE_ID_VIA_82C686: + case PCI_DEVICE_ID_VIA_8231: + case PCI_DEVICE_ID_VIA_8233A: + case PCI_DEVICE_ID_VIA_8235: + case PCI_DEVICE_ID_VIA_8237: + /* FIXME: add new ones for 8233/5 */ + r->name = "VIA"; + r->get = pirq_via_get; + r->set = pirq_via_set; + return 1; + } + return 0; +} + +static __init int vlsi_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) +{ + switch(device) + { + case PCI_DEVICE_ID_VLSI_82C534: + r->name = "VLSI 82C534"; + r->get = pirq_vlsi_get; + r->set = pirq_vlsi_set; + return 1; + } + return 0; +} + + +static __init int serverworks_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) +{ + switch(device) + { + case PCI_DEVICE_ID_SERVERWORKS_OSB4: + case PCI_DEVICE_ID_SERVERWORKS_CSB5: + r->name = "ServerWorks"; + r->get = pirq_serverworks_get; + r->set = pirq_serverworks_set; + return 1; + } + return 0; +} + +static __init int sis_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) +{ + if (device != PCI_DEVICE_ID_SI_503) + return 0; + + r->name = "SIS"; + r->get = pirq_sis_get; + r->set = pirq_sis_set; + return 1; +} + +static __init int cyrix_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) +{ + switch(device) + { + case PCI_DEVICE_ID_CYRIX_5520: + r->name = "NatSemi"; + r->get = pirq_cyrix_get; + r->set = pirq_cyrix_set; + return 1; + } + return 0; +} + +static __init int opti_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) +{ + switch(device) + { + case PCI_DEVICE_ID_OPTI_82C700: + r->name = "OPTI"; + r->get = pirq_opti_get; + r->set = pirq_opti_set; + return 1; + } + return 0; +} + +static __init int ite_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) +{ + switch(device) + { + case PCI_DEVICE_ID_ITE_IT8330G_0: + r->name = "ITE"; + r->get = pirq_ite_get; + r->set = pirq_ite_set; + return 1; + } + return 0; +} + +static __init int ali_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) +{ + switch(device) + { + case PCI_DEVICE_ID_AL_M1533: + case PCI_DEVICE_ID_AL_M1563: + printk(KERN_DEBUG "PCI: Using ALI IRQ Router\n"); + r->name = "ALI"; + r->get = pirq_ali_get; + r->set = pirq_ali_set; + return 1; + } + return 0; +} + +static __init int amd_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) +{ + switch(device) + { + case PCI_DEVICE_ID_AMD_VIPER_740B: + r->name = "AMD756"; + break; + case PCI_DEVICE_ID_AMD_VIPER_7413: + r->name = "AMD766"; + break; + case PCI_DEVICE_ID_AMD_VIPER_7443: + r->name = "AMD768"; + break; + default: + return 0; + } + r->get = pirq_amd756_get; + r->set = pirq_amd756_set; + return 1; +} + +static __initdata struct irq_router_handler pirq_routers[] = { + { PCI_VENDOR_ID_INTEL, intel_router_probe }, + { PCI_VENDOR_ID_AL, ali_router_probe }, + { PCI_VENDOR_ID_ITE, ite_router_probe }, + { PCI_VENDOR_ID_VIA, via_router_probe }, + { PCI_VENDOR_ID_OPTI, opti_router_probe }, + { PCI_VENDOR_ID_SI, sis_router_probe }, + { PCI_VENDOR_ID_CYRIX, cyrix_router_probe }, + { PCI_VENDOR_ID_VLSI, vlsi_router_probe }, + { PCI_VENDOR_ID_SERVERWORKS, serverworks_router_probe }, + { PCI_VENDOR_ID_AMD, amd_router_probe }, + /* Someone with docs needs to add the ATI Radeon IGP */ + { 0, NULL } +}; +static struct irq_router pirq_router; +static struct pci_dev *pirq_router_dev; + + +/* + * FIXME: should we have an option to say "generic for + * chipset" ? + */ + +static void __init pirq_find_router(struct irq_router *r) +{ + struct irq_routing_table *rt = pirq_table; + struct irq_router_handler *h; + +#ifdef CONFIG_PCI_BIOS + if (!rt->signature) { + printk(KERN_INFO "PCI: Using BIOS for IRQ routing\n"); + r->set = pirq_bios_set; + r->name = "BIOS"; + return; + } +#endif + + /* Default unless a driver reloads it */ + r->name = "default"; + r->get = NULL; + r->set = NULL; + + DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for %04x:%04x\n", + rt->rtr_vendor, rt->rtr_device); + + pirq_router_dev = pci_get_bus_and_slot(rt->rtr_bus, rt->rtr_devfn); + if (!pirq_router_dev) { + DBG(KERN_DEBUG "PCI: Interrupt router not found at " + "%02x:%02x\n", rt->rtr_bus, rt->rtr_devfn); + return; + } + + for( h = pirq_routers; h->vendor; h++) { + /* First look for a router match */ + if (rt->rtr_vendor == h->vendor && h->probe(r, pirq_router_dev, rt->rtr_device)) + break; + /* Fall back to a device match */ + if (pirq_router_dev->vendor == h->vendor && h->probe(r, pirq_router_dev, pirq_router_dev->device)) + break; + } + printk(KERN_INFO "PCI: Using IRQ router %s [%04x/%04x] at %s\n", + pirq_router.name, + pirq_router_dev->vendor, + pirq_router_dev->device, + pci_name(pirq_router_dev)); + + /* The device remains referenced for the kernel lifetime */ +} + +static struct irq_info *pirq_get_info(struct pci_dev *dev) +{ + struct irq_routing_table *rt = pirq_table; + int entries = (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info); + struct irq_info *info; + + for (info = rt->slots; entries--; info++) + if (info->bus == dev->bus->number && PCI_SLOT(info->devfn) == PCI_SLOT(dev->devfn)) + return info; + return NULL; +} + +static int pcibios_lookup_irq(struct pci_dev *dev, int assign) +{ + u8 pin; + struct irq_info *info; + int i, pirq, newirq; + int irq = 0; + u32 mask; + struct irq_router *r = &pirq_router; + struct pci_dev *dev2 = NULL; + char *msg = NULL; + + /* Find IRQ pin */ + pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); + if (!pin) { + DBG(KERN_DEBUG " -> no interrupt pin\n"); + return 0; + } + pin = pin - 1; + + /* Find IRQ routing entry */ + + if (!pirq_table) + return 0; + + DBG(KERN_DEBUG "IRQ for %s[%c]", pci_name(dev), 'A' + pin); + info = pirq_get_info(dev); + if (!info) { + DBG(" -> not found in routing table\n" KERN_DEBUG); + return 0; + } + pirq = info->irq[pin].link; + mask = info->irq[pin].bitmap; + if (!pirq) { + DBG(" -> not routed\n" KERN_DEBUG); + return 0; + } + DBG(" -> PIRQ %02x, mask %04x, excl %04x", pirq, mask, pirq_table->exclusive_irqs); + mask &= pcibios_irq_mask; + + /* Work around broken HP Pavilion Notebooks which assign USB to + IRQ 9 even though it is actually wired to IRQ 11 */ + + if (broken_hp_bios_irq9 && pirq == 0x59 && dev->irq == 9) { + dev->irq = 11; + pci_write_config_byte(dev, PCI_INTERRUPT_LINE, 11); + r->set(pirq_router_dev, dev, pirq, 11); + } + + /* same for Acer Travelmate 360, but with CB and irq 11 -> 10 */ + if (acer_tm360_irqrouting && dev->irq == 11 && dev->vendor == PCI_VENDOR_ID_O2) { + pirq = 0x68; + mask = 0x400; + dev->irq = r->get(pirq_router_dev, dev, pirq); + pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq); + } + + /* + * Find the best IRQ to assign: use the one + * reported by the device if possible. + */ + newirq = dev->irq; + if (newirq && !((1 << newirq) & mask)) { + if ( pci_probe & PCI_USE_PIRQ_MASK) newirq = 0; + else printk("\n" KERN_WARNING + "PCI: IRQ %i for device %s doesn't match PIRQ mask " + "- try pci=usepirqmask\n" KERN_DEBUG, newirq, + pci_name(dev)); + } + if (!newirq && assign) { + for (i = 0; i < 16; i++) { + if (!(mask & (1 << i))) + continue; + if (pirq_penalty[i] < pirq_penalty[newirq] && can_request_irq(i, IRQF_SHARED)) + newirq = i; + } + } + DBG(" -> newirq=%d", newirq); + + /* Check if it is hardcoded */ + if ((pirq & 0xf0) == 0xf0) { + irq = pirq & 0xf; + DBG(" -> hardcoded IRQ %d\n", irq); + msg = "Hardcoded"; + } else if ( r->get && (irq = r->get(pirq_router_dev, dev, pirq)) && \ + ((!(pci_probe & PCI_USE_PIRQ_MASK)) || ((1 << irq) & mask)) ) { + DBG(" -> got IRQ %d\n", irq); + msg = "Found"; + eisa_set_level_irq(irq); + } else if (newirq && r->set && (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) { + DBG(" -> assigning IRQ %d", newirq); + if (r->set(pirq_router_dev, dev, pirq, newirq)) { + eisa_set_level_irq(newirq); + DBG(" ... OK\n"); + msg = "Assigned"; + irq = newirq; + } + } + + if (!irq) { + DBG(" ... failed\n"); + if (newirq && mask == (1 << newirq)) { + msg = "Guessed"; + irq = newirq; + } else + return 0; + } + printk(KERN_INFO "PCI: %s IRQ %d for device %s\n", msg, irq, pci_name(dev)); + + /* Update IRQ for all devices with the same pirq value */ + while ((dev2 = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev2)) != NULL) { + pci_read_config_byte(dev2, PCI_INTERRUPT_PIN, &pin); + if (!pin) + continue; + pin--; + info = pirq_get_info(dev2); + if (!info) + continue; + if (info->irq[pin].link == pirq) { + /* We refuse to override the dev->irq information. Give a warning! */ + if ( dev2->irq && dev2->irq != irq && \ + (!(pci_probe & PCI_USE_PIRQ_MASK) || \ + ((1 << dev2->irq) & mask)) ) { +#ifndef CONFIG_PCI_MSI + printk(KERN_INFO "IRQ routing conflict for %s, have irq %d, want irq %d\n", + pci_name(dev2), dev2->irq, irq); +#endif + continue; + } + dev2->irq = irq; + pirq_penalty[irq]++; + if (dev != dev2) + printk(KERN_INFO "PCI: Sharing IRQ %d with %s\n", irq, pci_name(dev2)); + } + } + return 1; +} + +static void __init pcibios_fixup_irqs(void) +{ + struct pci_dev *dev = NULL; + u8 pin; + + DBG(KERN_DEBUG "PCI: IRQ fixup\n"); + while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { + /* + * If the BIOS has set an out of range IRQ number, just ignore it. + * Also keep track of which IRQ's are already in use. + */ + if (dev->irq >= 16) { + DBG(KERN_DEBUG "%s: ignoring bogus IRQ %d\n", pci_name(dev), dev->irq); + dev->irq = 0; + } + /* If the IRQ is already assigned to a PCI device, ignore its ISA use penalty */ + if (pirq_penalty[dev->irq] >= 100 && pirq_penalty[dev->irq] < 100000) + pirq_penalty[dev->irq] = 0; + pirq_penalty[dev->irq]++; + } + + dev = NULL; + while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { + pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); +#ifdef CONFIG_X86_IO_APIC + /* + * Recalculate IRQ numbers if we use the I/O APIC. + */ + if (io_apic_assign_pci_irqs) + { + int irq; + + if (pin) { + pin--; /* interrupt pins are numbered starting from 1 */ + irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin); + /* + * Busses behind bridges are typically not listed in the MP-table. + * In this case we have to look up the IRQ based on the parent bus, + * parent slot, and pin number. The SMP code detects such bridged + * busses itself so we should get into this branch reliably. + */ + if (irq < 0 && dev->bus->parent) { /* go back to the bridge */ + struct pci_dev * bridge = dev->bus->self; + + pin = (pin + PCI_SLOT(dev->devfn)) % 4; + irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, + PCI_SLOT(bridge->devfn), pin); + if (irq >= 0) + printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n", + pci_name(bridge), 'A' + pin, irq); + } + if (irq >= 0) { + printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n", + pci_name(dev), 'A' + pin, irq); + dev->irq = irq; + } + } + } +#endif + /* + * Still no IRQ? Try to lookup one... + */ + if (pin && !dev->irq) + pcibios_lookup_irq(dev, 0); + } +} + +/* + * Work around broken HP Pavilion Notebooks which assign USB to + * IRQ 9 even though it is actually wired to IRQ 11 + */ +static int __init fix_broken_hp_bios_irq9(struct dmi_system_id *d) +{ + if (!broken_hp_bios_irq9) { + broken_hp_bios_irq9 = 1; + printk(KERN_INFO "%s detected - fixing broken IRQ routing\n", d->ident); + } + return 0; +} + +/* + * Work around broken Acer TravelMate 360 Notebooks which assign + * Cardbus to IRQ 11 even though it is actually wired to IRQ 10 + */ +static int __init fix_acer_tm360_irqrouting(struct dmi_system_id *d) +{ + if (!acer_tm360_irqrouting) { + acer_tm360_irqrouting = 1; + printk(KERN_INFO "%s detected - fixing broken IRQ routing\n", d->ident); + } + return 0; +} + +static struct dmi_system_id __initdata pciirq_dmi_table[] = { + { + .callback = fix_broken_hp_bios_irq9, + .ident = "HP Pavilion N5400 Series Laptop", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_BIOS_VERSION, "GE.M1.03"), + DMI_MATCH(DMI_PRODUCT_VERSION, "HP Pavilion Notebook Model GE"), + DMI_MATCH(DMI_BOARD_VERSION, "OmniBook N32N-736"), + }, + }, + { + .callback = fix_acer_tm360_irqrouting, + .ident = "Acer TravelMate 36x Laptop", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"), + }, + }, + { } +}; + +static int __init pcibios_irq_init(void) +{ + DBG(KERN_DEBUG "PCI: IRQ init\n"); + + if (pcibios_enable_irq || raw_pci_ops == NULL) + return 0; + + dmi_check_system(pciirq_dmi_table); + + pirq_table = pirq_find_routing_table(); + +#ifdef CONFIG_PCI_BIOS + if (!pirq_table && (pci_probe & PCI_BIOS_IRQ_SCAN)) + pirq_table = pcibios_get_irq_routing_table(); +#endif + if (pirq_table) { + pirq_peer_trick(); + pirq_find_router(&pirq_router); + if (pirq_table->exclusive_irqs) { + int i; + for (i=0; i<16; i++) + if (!(pirq_table->exclusive_irqs & (1 << i))) + pirq_penalty[i] += 100; + } + /* If we're using the I/O APIC, avoid using the PCI IRQ routing table */ + if (io_apic_assign_pci_irqs) + pirq_table = NULL; + } + + pcibios_enable_irq = pirq_enable_irq; + + pcibios_fixup_irqs(); + return 0; +} + +subsys_initcall(pcibios_irq_init); + + +static void pirq_penalize_isa_irq(int irq, int active) +{ + /* + * If any ISAPnP device reports an IRQ in its list of possible + * IRQ's, we try to avoid assigning it to PCI devices. + */ + if (irq < 16) { + if (active) + pirq_penalty[irq] += 1000; + else + pirq_penalty[irq] += 100; + } +} + +void pcibios_penalize_isa_irq(int irq, int active) +{ +#ifdef CONFIG_ACPI + if (!acpi_noirq) + acpi_penalize_isa_irq(irq, active); + else +#endif + pirq_penalize_isa_irq(irq, active); +} + +static int pirq_enable_irq(struct pci_dev *dev) +{ + u8 pin; + struct pci_dev *temp_dev; + + pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); + if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) { + char *msg = ""; + + pin--; /* interrupt pins are numbered starting from 1 */ + + if (io_apic_assign_pci_irqs) { + int irq; + + irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin); + /* + * Busses behind bridges are typically not listed in the MP-table. + * In this case we have to look up the IRQ based on the parent bus, + * parent slot, and pin number. The SMP code detects such bridged + * busses itself so we should get into this branch reliably. + */ + temp_dev = dev; + while (irq < 0 && dev->bus->parent) { /* go back to the bridge */ + struct pci_dev * bridge = dev->bus->self; + + pin = (pin + PCI_SLOT(dev->devfn)) % 4; + irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, + PCI_SLOT(bridge->devfn), pin); + if (irq >= 0) + printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n", + pci_name(bridge), 'A' + pin, irq); + dev = bridge; + } + dev = temp_dev; + if (irq >= 0) { + printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n", + pci_name(dev), 'A' + pin, irq); + dev->irq = irq; + return 0; + } else + msg = " Probably buggy MP table."; + } else if (pci_probe & PCI_BIOS_IRQ_SCAN) + msg = ""; + else + msg = " Please try using pci=biosirq."; + + /* With IDE legacy devices the IRQ lookup failure is not a problem.. */ + if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE && !(dev->class & 0x5)) + return 0; + + printk(KERN_WARNING "PCI: No IRQ known for interrupt pin %c of device %s.%s\n", + 'A' + pin, pci_name(dev), msg); + } + return 0; +} diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c new file mode 100644 index 00000000000..5565d7016b7 --- /dev/null +++ b/arch/x86/pci/legacy.c @@ -0,0 +1,56 @@ +/* + * legacy.c - traditional, old school PCI bus probing + */ +#include +#include +#include "pci.h" + +/* + * Discover remaining PCI buses in case there are peer host bridges. + * We use the number of last PCI bus provided by the PCI BIOS. + */ +static void __devinit pcibios_fixup_peer_bridges(void) +{ + int n, devfn; + + if (pcibios_last_bus <= 0 || pcibios_last_bus >= 0xff) + return; + DBG("PCI: Peer bridge fixup\n"); + + for (n=0; n <= pcibios_last_bus; n++) { + u32 l; + if (pci_find_bus(0, n)) + continue; + for (devfn = 0; devfn < 256; devfn += 8) { + if (!raw_pci_ops->read(0, n, devfn, PCI_VENDOR_ID, 2, &l) && + l != 0x0000 && l != 0xffff) { + DBG("Found device at %02x:%02x [%04x]\n", n, devfn, l); + printk(KERN_INFO "PCI: Discovered peer bus %02x\n", n); + pci_scan_bus_with_sysdata(n); + break; + } + } + } +} + +static int __init pci_legacy_init(void) +{ + if (!raw_pci_ops) { + printk("PCI: System does not support PCI\n"); + return 0; + } + + if (pcibios_scanned++) + return 0; + + printk("PCI: Probing PCI hardware\n"); + pci_root_bus = pcibios_scan_root(0); + if (pci_root_bus) + pci_bus_add_devices(pci_root_bus); + + pcibios_fixup_peer_bridges(); + + return 0; +} + +subsys_initcall(pci_legacy_init); diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c new file mode 100644 index 00000000000..4df637e34f8 --- /dev/null +++ b/arch/x86/pci/mmconfig-shared.c @@ -0,0 +1,315 @@ +/* + * mmconfig-shared.c - Low-level direct PCI config space access via + * MMCONFIG - common code between i386 and x86-64. + * + * This code does: + * - known chipset handling + * - ACPI decoding and validation + * + * Per-architecture code takes care of the mappings and accesses + * themselves. + */ + +#include +#include +#include +#include +#include + +#include "pci.h" + +/* aperture is up to 256MB but BIOS may reserve less */ +#define MMCONFIG_APER_MIN (2 * 1024*1024) +#define MMCONFIG_APER_MAX (256 * 1024*1024) + +DECLARE_BITMAP(pci_mmcfg_fallback_slots, 32*PCI_MMCFG_MAX_CHECK_BUS); + +/* Indicate if the mmcfg resources have been placed into the resource table. */ +static int __initdata pci_mmcfg_resources_inserted; + +/* K8 systems have some devices (typically in the builtin northbridge) + that are only accessible using type1 + Normally this can be expressed in the MCFG by not listing them + and assigning suitable _SEGs, but this isn't implemented in some BIOS. + Instead try to discover all devices on bus 0 that are unreachable using MM + and fallback for them. */ +static void __init unreachable_devices(void) +{ + int i, bus; + /* Use the max bus number from ACPI here? */ + for (bus = 0; bus < PCI_MMCFG_MAX_CHECK_BUS; bus++) { + for (i = 0; i < 32; i++) { + unsigned int devfn = PCI_DEVFN(i, 0); + u32 val1, val2; + + pci_conf1_read(0, bus, devfn, 0, 4, &val1); + if (val1 == 0xffffffff) + continue; + + if (pci_mmcfg_arch_reachable(0, bus, devfn)) { + raw_pci_ops->read(0, bus, devfn, 0, 4, &val2); + if (val1 == val2) + continue; + } + set_bit(i + 32 * bus, pci_mmcfg_fallback_slots); + printk(KERN_NOTICE "PCI: No mmconfig possible on device" + " %02x:%02x\n", bus, i); + } + } +} + +static const char __init *pci_mmcfg_e7520(void) +{ + u32 win; + pci_conf1_read(0, 0, PCI_DEVFN(0,0), 0xce, 2, &win); + + win = win & 0xf000; + if(win == 0x0000 || win == 0xf000) + pci_mmcfg_config_num = 0; + else { + pci_mmcfg_config_num = 1; + pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]), GFP_KERNEL); + if (!pci_mmcfg_config) + return NULL; + pci_mmcfg_config[0].address = win << 16; + pci_mmcfg_config[0].pci_segment = 0; + pci_mmcfg_config[0].start_bus_number = 0; + pci_mmcfg_config[0].end_bus_number = 255; + } + + return "Intel Corporation E7520 Memory Controller Hub"; +} + +static const char __init *pci_mmcfg_intel_945(void) +{ + u32 pciexbar, mask = 0, len = 0; + + pci_mmcfg_config_num = 1; + + pci_conf1_read(0, 0, PCI_DEVFN(0,0), 0x48, 4, &pciexbar); + + /* Enable bit */ + if (!(pciexbar & 1)) + pci_mmcfg_config_num = 0; + + /* Size bits */ + switch ((pciexbar >> 1) & 3) { + case 0: + mask = 0xf0000000U; + len = 0x10000000U; + break; + case 1: + mask = 0xf8000000U; + len = 0x08000000U; + break; + case 2: + mask = 0xfc000000U; + len = 0x04000000U; + break; + default: + pci_mmcfg_config_num = 0; + } + + /* Errata #2, things break when not aligned on a 256Mb boundary */ + /* Can only happen in 64M/128M mode */ + + if ((pciexbar & mask) & 0x0fffffffU) + pci_mmcfg_config_num = 0; + + /* Don't hit the APIC registers and their friends */ + if ((pciexbar & mask) >= 0xf0000000U) + pci_mmcfg_config_num = 0; + + if (pci_mmcfg_config_num) { + pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]), GFP_KERNEL); + if (!pci_mmcfg_config) + return NULL; + pci_mmcfg_config[0].address = pciexbar & mask; + pci_mmcfg_config[0].pci_segment = 0; + pci_mmcfg_config[0].start_bus_number = 0; + pci_mmcfg_config[0].end_bus_number = (len >> 20) - 1; + } + + return "Intel Corporation 945G/GZ/P/PL Express Memory Controller Hub"; +} + +struct pci_mmcfg_hostbridge_probe { + u32 vendor; + u32 device; + const char *(*probe)(void); +}; + +static struct pci_mmcfg_hostbridge_probe pci_mmcfg_probes[] __initdata = { + { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, pci_mmcfg_e7520 }, + { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82945G_HB, pci_mmcfg_intel_945 }, +}; + +static int __init pci_mmcfg_check_hostbridge(void) +{ + u32 l; + u16 vendor, device; + int i; + const char *name; + + pci_conf1_read(0, 0, PCI_DEVFN(0,0), 0, 4, &l); + vendor = l & 0xffff; + device = (l >> 16) & 0xffff; + + pci_mmcfg_config_num = 0; + pci_mmcfg_config = NULL; + name = NULL; + + for (i = 0; !name && i < ARRAY_SIZE(pci_mmcfg_probes); i++) { + if (pci_mmcfg_probes[i].vendor == vendor && + pci_mmcfg_probes[i].device == device) + name = pci_mmcfg_probes[i].probe(); + } + + if (name) { + printk(KERN_INFO "PCI: Found %s %s MMCONFIG support.\n", + name, pci_mmcfg_config_num ? "with" : "without"); + } + + return name != NULL; +} + +static void __init pci_mmcfg_insert_resources(unsigned long resource_flags) +{ +#define PCI_MMCFG_RESOURCE_NAME_LEN 19 + int i; + struct resource *res; + char *names; + unsigned num_buses; + + res = kcalloc(PCI_MMCFG_RESOURCE_NAME_LEN + sizeof(*res), + pci_mmcfg_config_num, GFP_KERNEL); + if (!res) { + printk(KERN_ERR "PCI: Unable to allocate MMCONFIG resources\n"); + return; + } + + names = (void *)&res[pci_mmcfg_config_num]; + for (i = 0; i < pci_mmcfg_config_num; i++, res++) { + struct acpi_mcfg_allocation *cfg = &pci_mmcfg_config[i]; + num_buses = cfg->end_bus_number - cfg->start_bus_number + 1; + res->name = names; + snprintf(names, PCI_MMCFG_RESOURCE_NAME_LEN, "PCI MMCONFIG %u", + cfg->pci_segment); + res->start = cfg->address; + res->end = res->start + (num_buses << 20) - 1; + res->flags = IORESOURCE_MEM | resource_flags; + insert_resource(&iomem_resource, res); + names += PCI_MMCFG_RESOURCE_NAME_LEN; + } + + /* Mark that the resources have been inserted. */ + pci_mmcfg_resources_inserted = 1; +} + +static void __init pci_mmcfg_reject_broken(int type) +{ + typeof(pci_mmcfg_config[0]) *cfg; + + if ((pci_mmcfg_config_num == 0) || + (pci_mmcfg_config == NULL) || + (pci_mmcfg_config[0].address == 0)) + return; + + cfg = &pci_mmcfg_config[0]; + + /* + * Handle more broken MCFG tables on Asus etc. + * They only contain a single entry for bus 0-0. + */ + if (pci_mmcfg_config_num == 1 && + cfg->pci_segment == 0 && + (cfg->start_bus_number | cfg->end_bus_number) == 0) { + printk(KERN_ERR "PCI: start and end of bus number is 0. " + "Rejected as broken MCFG.\n"); + goto reject; + } + + /* + * Only do this check when type 1 works. If it doesn't work + * assume we run on a Mac and always use MCFG + */ + if (type == 1 && !e820_all_mapped(cfg->address, + cfg->address + MMCONFIG_APER_MIN, + E820_RESERVED)) { + printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %Lx is not" + " E820-reserved\n", cfg->address); + goto reject; + } + return; + +reject: + printk(KERN_ERR "PCI: Not using MMCONFIG.\n"); + kfree(pci_mmcfg_config); + pci_mmcfg_config = NULL; + pci_mmcfg_config_num = 0; +} + +void __init pci_mmcfg_init(int type) +{ + int known_bridge = 0; + + if ((pci_probe & PCI_PROBE_MMCONF) == 0) + return; + + if (type == 1 && pci_mmcfg_check_hostbridge()) + known_bridge = 1; + + if (!known_bridge) { + acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg); + pci_mmcfg_reject_broken(type); + } + + if ((pci_mmcfg_config_num == 0) || + (pci_mmcfg_config == NULL) || + (pci_mmcfg_config[0].address == 0)) + return; + + if (pci_mmcfg_arch_init()) { + if (type == 1) + unreachable_devices(); + if (known_bridge) + pci_mmcfg_insert_resources(IORESOURCE_BUSY); + pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; + } else { + /* + * Signal not to attempt to insert mmcfg resources because + * the architecture mmcfg setup could not initialize. + */ + pci_mmcfg_resources_inserted = 1; + } +} + +static int __init pci_mmcfg_late_insert_resources(void) +{ + /* + * If resources are already inserted or we are not using MMCONFIG, + * don't insert the resources. + */ + if ((pci_mmcfg_resources_inserted == 1) || + (pci_probe & PCI_PROBE_MMCONF) == 0 || + (pci_mmcfg_config_num == 0) || + (pci_mmcfg_config == NULL) || + (pci_mmcfg_config[0].address == 0)) + return 1; + + /* + * Attempt to insert the mmcfg resources but not with the busy flag + * marked so it won't cause request errors when __request_region is + * called. + */ + pci_mmcfg_insert_resources(0); + + return 0; +} + +/* + * Perform MMCONFIG resource insertion after PCI initialization to allow for + * misprogrammed MCFG tables that state larger sizes but actually conflict + * with other system resources. + */ +late_initcall(pci_mmcfg_late_insert_resources); diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c new file mode 100644 index 00000000000..1bf5816d34c --- /dev/null +++ b/arch/x86/pci/mmconfig_32.c @@ -0,0 +1,148 @@ +/* + * Copyright (C) 2004 Matthew Wilcox + * Copyright (C) 2004 Intel Corp. + * + * This code is released under the GNU General Public License version 2. + */ + +/* + * mmconfig.c - Low-level direct PCI config space access via MMCONFIG + */ + +#include +#include +#include +#include +#include "pci.h" + +/* Assume systems with more busses have correct MCFG */ +#define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG)) + +/* The base address of the last MMCONFIG device accessed */ +static u32 mmcfg_last_accessed_device; +static int mmcfg_last_accessed_cpu; + +/* + * Functions for accessing PCI configuration space with MMCONFIG accesses + */ +static u32 get_base_addr(unsigned int seg, int bus, unsigned devfn) +{ + struct acpi_mcfg_allocation *cfg; + int cfg_num; + + if (seg == 0 && bus < PCI_MMCFG_MAX_CHECK_BUS && + test_bit(PCI_SLOT(devfn) + 32*bus, pci_mmcfg_fallback_slots)) + return 0; + + for (cfg_num = 0; cfg_num < pci_mmcfg_config_num; cfg_num++) { + cfg = &pci_mmcfg_config[cfg_num]; + if (cfg->pci_segment == seg && + (cfg->start_bus_number <= bus) && + (cfg->end_bus_number >= bus)) + return cfg->address; + } + + /* Fall back to type 0 */ + return 0; +} + +/* + * This is always called under pci_config_lock + */ +static void pci_exp_set_dev_base(unsigned int base, int bus, int devfn) +{ + u32 dev_base = base | (bus << 20) | (devfn << 12); + int cpu = smp_processor_id(); + if (dev_base != mmcfg_last_accessed_device || + cpu != mmcfg_last_accessed_cpu) { + mmcfg_last_accessed_device = dev_base; + mmcfg_last_accessed_cpu = cpu; + set_fixmap_nocache(FIX_PCIE_MCFG, dev_base); + } +} + +static int pci_mmcfg_read(unsigned int seg, unsigned int bus, + unsigned int devfn, int reg, int len, u32 *value) +{ + unsigned long flags; + u32 base; + + if ((bus > 255) || (devfn > 255) || (reg > 4095)) { + *value = -1; + return -EINVAL; + } + + base = get_base_addr(seg, bus, devfn); + if (!base) + return pci_conf1_read(seg,bus,devfn,reg,len,value); + + spin_lock_irqsave(&pci_config_lock, flags); + + pci_exp_set_dev_base(base, bus, devfn); + + switch (len) { + case 1: + *value = mmio_config_readb(mmcfg_virt_addr + reg); + break; + case 2: + *value = mmio_config_readw(mmcfg_virt_addr + reg); + break; + case 4: + *value = mmio_config_readl(mmcfg_virt_addr + reg); + break; + } + spin_unlock_irqrestore(&pci_config_lock, flags); + + return 0; +} + +static int pci_mmcfg_write(unsigned int seg, unsigned int bus, + unsigned int devfn, int reg, int len, u32 value) +{ + unsigned long flags; + u32 base; + + if ((bus > 255) || (devfn > 255) || (reg > 4095)) + return -EINVAL; + + base = get_base_addr(seg, bus, devfn); + if (!base) + return pci_conf1_write(seg,bus,devfn,reg,len,value); + + spin_lock_irqsave(&pci_config_lock, flags); + + pci_exp_set_dev_base(base, bus, devfn); + + switch (len) { + case 1: + mmio_config_writeb(mmcfg_virt_addr + reg, value); + break; + case 2: + mmio_config_writew(mmcfg_virt_addr + reg, value); + break; + case 4: + mmio_config_writel(mmcfg_virt_addr + reg, value); + break; + } + spin_unlock_irqrestore(&pci_config_lock, flags); + + return 0; +} + +static struct pci_raw_ops pci_mmcfg = { + .read = pci_mmcfg_read, + .write = pci_mmcfg_write, +}; + +int __init pci_mmcfg_arch_reachable(unsigned int seg, unsigned int bus, + unsigned int devfn) +{ + return get_base_addr(seg, bus, devfn) != 0; +} + +int __init pci_mmcfg_arch_init(void) +{ + printk(KERN_INFO "PCI: Using MMCONFIG\n"); + raw_pci_ops = &pci_mmcfg; + return 1; +} diff --git a/arch/x86/pci/numa.c b/arch/x86/pci/numa.c new file mode 100644 index 00000000000..f5f165f69e0 --- /dev/null +++ b/arch/x86/pci/numa.c @@ -0,0 +1,135 @@ +/* + * numa.c - Low-level PCI access for NUMA-Q machines + */ + +#include +#include +#include +#include "pci.h" + +#define BUS2QUAD(global) (mp_bus_id_to_node[global]) +#define BUS2LOCAL(global) (mp_bus_id_to_local[global]) +#define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local]) + +#define PCI_CONF1_MQ_ADDRESS(bus, devfn, reg) \ + (0x80000000 | (BUS2LOCAL(bus) << 16) | (devfn << 8) | (reg & ~3)) + +static int pci_conf1_mq_read(unsigned int seg, unsigned int bus, + unsigned int devfn, int reg, int len, u32 *value) +{ + unsigned long flags; + + if (!value || (bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255)) + return -EINVAL; + + spin_lock_irqsave(&pci_config_lock, flags); + + outl_quad(PCI_CONF1_MQ_ADDRESS(bus, devfn, reg), 0xCF8, BUS2QUAD(bus)); + + switch (len) { + case 1: + *value = inb_quad(0xCFC + (reg & 3), BUS2QUAD(bus)); + break; + case 2: + *value = inw_quad(0xCFC + (reg & 2), BUS2QUAD(bus)); + break; + case 4: + *value = inl_quad(0xCFC, BUS2QUAD(bus)); + break; + } + + spin_unlock_irqrestore(&pci_config_lock, flags); + + return 0; +} + +static int pci_conf1_mq_write(unsigned int seg, unsigned int bus, + unsigned int devfn, int reg, int len, u32 value) +{ + unsigned long flags; + + if ((bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255)) + return -EINVAL; + + spin_lock_irqsave(&pci_config_lock, flags); + + outl_quad(PCI_CONF1_MQ_ADDRESS(bus, devfn, reg), 0xCF8, BUS2QUAD(bus)); + + switch (len) { + case 1: + outb_quad((u8)value, 0xCFC + (reg & 3), BUS2QUAD(bus)); + break; + case 2: + outw_quad((u16)value, 0xCFC + (reg & 2), BUS2QUAD(bus)); + break; + case 4: + outl_quad((u32)value, 0xCFC, BUS2QUAD(bus)); + break; + } + + spin_unlock_irqrestore(&pci_config_lock, flags); + + return 0; +} + +#undef PCI_CONF1_MQ_ADDRESS + +static struct pci_raw_ops pci_direct_conf1_mq = { + .read = pci_conf1_mq_read, + .write = pci_conf1_mq_write +}; + + +static void __devinit pci_fixup_i450nx(struct pci_dev *d) +{ + /* + * i450NX -- Find and scan all secondary buses on all PXB's. + */ + int pxb, reg; + u8 busno, suba, subb; + int quad = BUS2QUAD(d->bus->number); + + printk("PCI: Searching for i450NX host bridges on %s\n", pci_name(d)); + reg = 0xd0; + for(pxb=0; pxb<2; pxb++) { + pci_read_config_byte(d, reg++, &busno); + pci_read_config_byte(d, reg++, &suba); + pci_read_config_byte(d, reg++, &subb); + DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb); + if (busno) { + /* Bus A */ + pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, busno)); + } + if (suba < subb) { + /* Bus B */ + pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, suba+1)); + } + } + pcibios_last_bus = -1; +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX, pci_fixup_i450nx); + +static int __init pci_numa_init(void) +{ + int quad; + + raw_pci_ops = &pci_direct_conf1_mq; + + if (pcibios_scanned++) + return 0; + + pci_root_bus = pcibios_scan_root(0); + if (pci_root_bus) + pci_bus_add_devices(pci_root_bus); + if (num_online_nodes() > 1) + for_each_online_node(quad) { + if (quad == 0) + continue; + printk("Scanning PCI bus %d for quad %d\n", + QUADLOCAL2BUS(quad,0), quad); + pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, 0)); + } + return 0; +} + +subsys_initcall(pci_numa_init); diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c new file mode 100644 index 00000000000..10ac8c316c4 --- /dev/null +++ b/arch/x86/pci/pcbios.c @@ -0,0 +1,492 @@ +/* + * BIOS32 and PCI BIOS handling. + */ + +#include +#include +#include +#include +#include "pci.h" +#include "pci-functions.h" + + +/* BIOS32 signature: "_32_" */ +#define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24)) + +/* PCI signature: "PCI " */ +#define PCI_SIGNATURE (('P' << 0) + ('C' << 8) + ('I' << 16) + (' ' << 24)) + +/* PCI service signature: "$PCI" */ +#define PCI_SERVICE (('$' << 0) + ('P' << 8) + ('C' << 16) + ('I' << 24)) + +/* PCI BIOS hardware mechanism flags */ +#define PCIBIOS_HW_TYPE1 0x01 +#define PCIBIOS_HW_TYPE2 0x02 +#define PCIBIOS_HW_TYPE1_SPEC 0x10 +#define PCIBIOS_HW_TYPE2_SPEC 0x20 + +/* + * This is the standard structure used to identify the entry point + * to the BIOS32 Service Directory, as documented in + * Standard BIOS 32-bit Service Directory Proposal + * Revision 0.4 May 24, 1993 + * Phoenix Technologies Ltd. + * Norwood, MA + * and the PCI BIOS specification. + */ + +union bios32 { + struct { + unsigned long signature; /* _32_ */ + unsigned long entry; /* 32 bit physical address */ + unsigned char revision; /* Revision level, 0 */ + unsigned char length; /* Length in paragraphs should be 01 */ + unsigned char checksum; /* All bytes must add up to zero */ + unsigned char reserved[5]; /* Must be zero */ + } fields; + char chars[16]; +}; + +/* + * Physical address of the service directory. I don't know if we're + * allowed to have more than one of these or not, so just in case + * we'll make pcibios_present() take a memory start parameter and store + * the array there. + */ + +static struct { + unsigned long address; + unsigned short segment; +} bios32_indirect = { 0, __KERNEL_CS }; + +/* + * Returns the entry point for the given service, NULL on error + */ + +static unsigned long bios32_service(unsigned long service) +{ + unsigned char return_code; /* %al */ + unsigned long address; /* %ebx */ + unsigned long length; /* %ecx */ + unsigned long entry; /* %edx */ + unsigned long flags; + + local_irq_save(flags); + __asm__("lcall *(%%edi); cld" + : "=a" (return_code), + "=b" (address), + "=c" (length), + "=d" (entry) + : "0" (service), + "1" (0), + "D" (&bios32_indirect)); + local_irq_restore(flags); + + switch (return_code) { + case 0: + return address + entry; + case 0x80: /* Not present */ + printk(KERN_WARNING "bios32_service(0x%lx): not present\n", service); + return 0; + default: /* Shouldn't happen */ + printk(KERN_WARNING "bios32_service(0x%lx): returned 0x%x -- BIOS bug!\n", + service, return_code); + return 0; + } +} + +static struct { + unsigned long address; + unsigned short segment; +} pci_indirect = { 0, __KERNEL_CS }; + +static int pci_bios_present; + +static int __devinit check_pcibios(void) +{ + u32 signature, eax, ebx, ecx; + u8 status, major_ver, minor_ver, hw_mech; + unsigned long flags, pcibios_entry; + + if ((pcibios_entry = bios32_service(PCI_SERVICE))) { + pci_indirect.address = pcibios_entry + PAGE_OFFSET; + + local_irq_save(flags); + __asm__( + "lcall *(%%edi); cld\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=d" (signature), + "=a" (eax), + "=b" (ebx), + "=c" (ecx) + : "1" (PCIBIOS_PCI_BIOS_PRESENT), + "D" (&pci_indirect) + : "memory"); + local_irq_restore(flags); + + status = (eax >> 8) & 0xff; + hw_mech = eax & 0xff; + major_ver = (ebx >> 8) & 0xff; + minor_ver = ebx & 0xff; + if (pcibios_last_bus < 0) + pcibios_last_bus = ecx & 0xff; + DBG("PCI: BIOS probe returned s=%02x hw=%02x ver=%02x.%02x l=%02x\n", + status, hw_mech, major_ver, minor_ver, pcibios_last_bus); + if (status || signature != PCI_SIGNATURE) { + printk (KERN_ERR "PCI: BIOS BUG #%x[%08x] found\n", + status, signature); + return 0; + } + printk(KERN_INFO "PCI: PCI BIOS revision %x.%02x entry at 0x%lx, last bus=%d\n", + major_ver, minor_ver, pcibios_entry, pcibios_last_bus); +#ifdef CONFIG_PCI_DIRECT + if (!(hw_mech & PCIBIOS_HW_TYPE1)) + pci_probe &= ~PCI_PROBE_CONF1; + if (!(hw_mech & PCIBIOS_HW_TYPE2)) + pci_probe &= ~PCI_PROBE_CONF2; +#endif + return 1; + } + return 0; +} + +static int __devinit pci_bios_find_device (unsigned short vendor, unsigned short device_id, + unsigned short index, unsigned char *bus, unsigned char *device_fn) +{ + unsigned short bx; + unsigned short ret; + + __asm__("lcall *(%%edi); cld\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=b" (bx), + "=a" (ret) + : "1" (PCIBIOS_FIND_PCI_DEVICE), + "c" (device_id), + "d" (vendor), + "S" ((int) index), + "D" (&pci_indirect)); + *bus = (bx >> 8) & 0xff; + *device_fn = bx & 0xff; + return (int) (ret & 0xff00) >> 8; +} + +static int pci_bios_read(unsigned int seg, unsigned int bus, + unsigned int devfn, int reg, int len, u32 *value) +{ + unsigned long result = 0; + unsigned long flags; + unsigned long bx = (bus << 8) | devfn; + + if (!value || (bus > 255) || (devfn > 255) || (reg > 255)) + return -EINVAL; + + spin_lock_irqsave(&pci_config_lock, flags); + + switch (len) { + case 1: + __asm__("lcall *(%%esi); cld\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=c" (*value), + "=a" (result) + : "1" (PCIBIOS_READ_CONFIG_BYTE), + "b" (bx), + "D" ((long)reg), + "S" (&pci_indirect)); + break; + case 2: + __asm__("lcall *(%%esi); cld\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=c" (*value), + "=a" (result) + : "1" (PCIBIOS_READ_CONFIG_WORD), + "b" (bx), + "D" ((long)reg), + "S" (&pci_indirect)); + break; + case 4: + __asm__("lcall *(%%esi); cld\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=c" (*value), + "=a" (result) + : "1" (PCIBIOS_READ_CONFIG_DWORD), + "b" (bx), + "D" ((long)reg), + "S" (&pci_indirect)); + break; + } + + spin_unlock_irqrestore(&pci_config_lock, flags); + + return (int)((result & 0xff00) >> 8); +} + +static int pci_bios_write(unsigned int seg, unsigned int bus, + unsigned int devfn, int reg, int len, u32 value) +{ + unsigned long result = 0; + unsigned long flags; + unsigned long bx = (bus << 8) | devfn; + + if ((bus > 255) || (devfn > 255) || (reg > 255)) + return -EINVAL; + + spin_lock_irqsave(&pci_config_lock, flags); + + switch (len) { + case 1: + __asm__("lcall *(%%esi); cld\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=a" (result) + : "0" (PCIBIOS_WRITE_CONFIG_BYTE), + "c" (value), + "b" (bx), + "D" ((long)reg), + "S" (&pci_indirect)); + break; + case 2: + __asm__("lcall *(%%esi); cld\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=a" (result) + : "0" (PCIBIOS_WRITE_CONFIG_WORD), + "c" (value), + "b" (bx), + "D" ((long)reg), + "S" (&pci_indirect)); + break; + case 4: + __asm__("lcall *(%%esi); cld\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=a" (result) + : "0" (PCIBIOS_WRITE_CONFIG_DWORD), + "c" (value), + "b" (bx), + "D" ((long)reg), + "S" (&pci_indirect)); + break; + } + + spin_unlock_irqrestore(&pci_config_lock, flags); + + return (int)((result & 0xff00) >> 8); +} + + +/* + * Function table for BIOS32 access + */ + +static struct pci_raw_ops pci_bios_access = { + .read = pci_bios_read, + .write = pci_bios_write +}; + +/* + * Try to find PCI BIOS. + */ + +static struct pci_raw_ops * __devinit pci_find_bios(void) +{ + union bios32 *check; + unsigned char sum; + int i, length; + + /* + * Follow the standard procedure for locating the BIOS32 Service + * directory by scanning the permissible address range from + * 0xe0000 through 0xfffff for a valid BIOS32 structure. + */ + + for (check = (union bios32 *) __va(0xe0000); + check <= (union bios32 *) __va(0xffff0); + ++check) { + long sig; + if (probe_kernel_address(&check->fields.signature, sig)) + continue; + + if (check->fields.signature != BIOS32_SIGNATURE) + continue; + length = check->fields.length * 16; + if (!length) + continue; + sum = 0; + for (i = 0; i < length ; ++i) + sum += check->chars[i]; + if (sum != 0) + continue; + if (check->fields.revision != 0) { + printk("PCI: unsupported BIOS32 revision %d at 0x%p\n", + check->fields.revision, check); + continue; + } + DBG("PCI: BIOS32 Service Directory structure at 0x%p\n", check); + if (check->fields.entry >= 0x100000) { + printk("PCI: BIOS32 entry (0x%p) in high memory, " + "cannot use.\n", check); + return NULL; + } else { + unsigned long bios32_entry = check->fields.entry; + DBG("PCI: BIOS32 Service Directory entry at 0x%lx\n", + bios32_entry); + bios32_indirect.address = bios32_entry + PAGE_OFFSET; + if (check_pcibios()) + return &pci_bios_access; + } + break; /* Hopefully more than one BIOS32 cannot happen... */ + } + + return NULL; +} + +/* + * Sort the device list according to PCI BIOS. Nasty hack, but since some + * fool forgot to define the `correct' device order in the PCI BIOS specs + * and we want to be (possibly bug-to-bug ;-]) compatible with older kernels + * which used BIOS ordering, we are bound to do this... + */ + +void __devinit pcibios_sort(void) +{ + LIST_HEAD(sorted_devices); + struct list_head *ln; + struct pci_dev *dev, *d; + int idx, found; + unsigned char bus, devfn; + + DBG("PCI: Sorting device list...\n"); + while (!list_empty(&pci_devices)) { + ln = pci_devices.next; + dev = pci_dev_g(ln); + idx = found = 0; + while (pci_bios_find_device(dev->vendor, dev->device, idx, &bus, &devfn) == PCIBIOS_SUCCESSFUL) { + idx++; + list_for_each(ln, &pci_devices) { + d = pci_dev_g(ln); + if (d->bus->number == bus && d->devfn == devfn) { + list_move_tail(&d->global_list, &sorted_devices); + if (d == dev) + found = 1; + break; + } + } + if (ln == &pci_devices) { + printk(KERN_WARNING "PCI: BIOS reporting unknown device %02x:%02x\n", bus, devfn); + /* + * We must not continue scanning as several buggy BIOSes + * return garbage after the last device. Grr. + */ + break; + } + } + if (!found) { + printk(KERN_WARNING "PCI: Device %s not found by BIOS\n", + pci_name(dev)); + list_move_tail(&dev->global_list, &sorted_devices); + } + } + list_splice(&sorted_devices, &pci_devices); +} + +/* + * BIOS Functions for IRQ Routing + */ + +struct irq_routing_options { + u16 size; + struct irq_info *table; + u16 segment; +} __attribute__((packed)); + +struct irq_routing_table * pcibios_get_irq_routing_table(void) +{ + struct irq_routing_options opt; + struct irq_routing_table *rt = NULL; + int ret, map; + unsigned long page; + + if (!pci_bios_present) + return NULL; + page = __get_free_page(GFP_KERNEL); + if (!page) + return NULL; + opt.table = (struct irq_info *) page; + opt.size = PAGE_SIZE; + opt.segment = __KERNEL_DS; + + DBG("PCI: Fetching IRQ routing table... "); + __asm__("push %%es\n\t" + "push %%ds\n\t" + "pop %%es\n\t" + "lcall *(%%esi); cld\n\t" + "pop %%es\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=a" (ret), + "=b" (map), + "=m" (opt) + : "0" (PCIBIOS_GET_ROUTING_OPTIONS), + "1" (0), + "D" ((long) &opt), + "S" (&pci_indirect), + "m" (opt) + : "memory"); + DBG("OK ret=%d, size=%d, map=%x\n", ret, opt.size, map); + if (ret & 0xff00) + printk(KERN_ERR "PCI: Error %02x when fetching IRQ routing table.\n", (ret >> 8) & 0xff); + else if (opt.size) { + rt = kmalloc(sizeof(struct irq_routing_table) + opt.size, GFP_KERNEL); + if (rt) { + memset(rt, 0, sizeof(struct irq_routing_table)); + rt->size = opt.size + sizeof(struct irq_routing_table); + rt->exclusive_irqs = map; + memcpy(rt->slots, (void *) page, opt.size); + printk(KERN_INFO "PCI: Using BIOS Interrupt Routing Table\n"); + } + } + free_page(page); + return rt; +} +EXPORT_SYMBOL(pcibios_get_irq_routing_table); + +int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq) +{ + int ret; + + __asm__("lcall *(%%esi); cld\n\t" + "jc 1f\n\t" + "xor %%ah, %%ah\n" + "1:" + : "=a" (ret) + : "0" (PCIBIOS_SET_PCI_HW_INT), + "b" ((dev->bus->number << 8) | dev->devfn), + "c" ((irq << 8) | (pin + 10)), + "S" (&pci_indirect)); + return !(ret & 0xff00); +} +EXPORT_SYMBOL(pcibios_set_irq_routing); + +void __init pci_pcbios_init(void) +{ + if ((pci_probe & PCI_PROBE_BIOS) + && ((raw_pci_ops = pci_find_bios()))) { + pci_probe |= PCI_BIOS_SORT; + pci_bios_present = 1; + } +} + diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h new file mode 100644 index 00000000000..8c66f275756 --- /dev/null +++ b/arch/x86/pci/pci.h @@ -0,0 +1,149 @@ +/* + * Low-Level PCI Access for i386 machines. + * + * (c) 1999 Martin Mares + */ + +#undef DEBUG + +#ifdef DEBUG +#define DBG(x...) printk(x) +#else +#define DBG(x...) +#endif + +#define PCI_PROBE_BIOS 0x0001 +#define PCI_PROBE_CONF1 0x0002 +#define PCI_PROBE_CONF2 0x0004 +#define PCI_PROBE_MMCONF 0x0008 +#define PCI_PROBE_MASK 0x000f +#define PCI_PROBE_NOEARLY 0x0010 + +#define PCI_NO_SORT 0x0100 +#define PCI_BIOS_SORT 0x0200 +#define PCI_NO_CHECKS 0x0400 +#define PCI_USE_PIRQ_MASK 0x0800 +#define PCI_ASSIGN_ROMS 0x1000 +#define PCI_BIOS_IRQ_SCAN 0x2000 +#define PCI_ASSIGN_ALL_BUSSES 0x4000 + +extern unsigned int pci_probe; +extern unsigned long pirq_table_addr; + +enum pci_bf_sort_state { + pci_bf_sort_default, + pci_force_nobf, + pci_force_bf, + pci_dmi_bf, +}; + +/* pci-i386.c */ + +extern unsigned int pcibios_max_latency; + +void pcibios_resource_survey(void); +int pcibios_enable_resources(struct pci_dev *, int); + +/* pci-pc.c */ + +extern int pcibios_last_bus; +extern struct pci_bus *pci_root_bus; +extern struct pci_ops pci_root_ops; + +/* pci-irq.c */ + +struct irq_info { + u8 bus, devfn; /* Bus, device and function */ + struct { + u8 link; /* IRQ line ID, chipset dependent, 0=not routed */ + u16 bitmap; /* Available IRQs */ + } __attribute__((packed)) irq[4]; + u8 slot; /* Slot number, 0=onboard */ + u8 rfu; +} __attribute__((packed)); + +struct irq_routing_table { + u32 signature; /* PIRQ_SIGNATURE should be here */ + u16 version; /* PIRQ_VERSION */ + u16 size; /* Table size in bytes */ + u8 rtr_bus, rtr_devfn; /* Where the interrupt router lies */ + u16 exclusive_irqs; /* IRQs devoted exclusively to PCI usage */ + u16 rtr_vendor, rtr_device; /* Vendor and device ID of interrupt router */ + u32 miniport_data; /* Crap */ + u8 rfu[11]; + u8 checksum; /* Modulo 256 checksum must give zero */ + struct irq_info slots[0]; +} __attribute__((packed)); + +extern unsigned int pcibios_irq_mask; + +extern int pcibios_scanned; +extern spinlock_t pci_config_lock; + +extern int (*pcibios_enable_irq)(struct pci_dev *dev); +extern void (*pcibios_disable_irq)(struct pci_dev *dev); + +extern int pci_conf1_write(unsigned int seg, unsigned int bus, + unsigned int devfn, int reg, int len, u32 value); +extern int pci_conf1_read(unsigned int seg, unsigned int bus, + unsigned int devfn, int reg, int len, u32 *value); + +extern int pci_direct_probe(void); +extern void pci_direct_init(int type); +extern void pci_pcbios_init(void); +extern void pci_mmcfg_init(int type); +extern void pcibios_sort(void); + +/* pci-mmconfig.c */ + +/* Verify the first 16 busses. We assume that systems with more busses + get MCFG right. */ +#define PCI_MMCFG_MAX_CHECK_BUS 16 +extern DECLARE_BITMAP(pci_mmcfg_fallback_slots, 32*PCI_MMCFG_MAX_CHECK_BUS); + +extern int __init pci_mmcfg_arch_reachable(unsigned int seg, unsigned int bus, + unsigned int devfn); +extern int __init pci_mmcfg_arch_init(void); + +/* + * AMD Fam10h CPUs are buggy, and cannot access MMIO config space + * on their northbrige except through the * %eax register. As such, you MUST + * NOT use normal IOMEM accesses, you need to only use the magic mmio-config + * accessor functions. + * In fact just use pci_config_*, nothing else please. + */ +static inline unsigned char mmio_config_readb(void __iomem *pos) +{ + u8 val; + asm volatile("movb (%1),%%al" : "=a" (val) : "r" (pos)); + return val; +} + +static inline unsigned short mmio_config_readw(void __iomem *pos) +{ + u16 val; + asm volatile("movw (%1),%%ax" : "=a" (val) : "r" (pos)); + return val; +} + +static inline unsigned int mmio_config_readl(void __iomem *pos) +{ + u32 val; + asm volatile("movl (%1),%%eax" : "=a" (val) : "r" (pos)); + return val; +} + +static inline void mmio_config_writeb(void __iomem *pos, u8 val) +{ + asm volatile("movb %%al,(%1)" :: "a" (val), "r" (pos) : "memory"); +} + +static inline void mmio_config_writew(void __iomem *pos, u16 val) +{ + asm volatile("movw %%ax,(%1)" :: "a" (val), "r" (pos) : "memory"); +} + +static inline void mmio_config_writel(void __iomem *pos, u32 val) +{ + asm volatile("movl %%eax,(%1)" :: "a" (val), "r" (pos) : "memory"); +} diff --git a/arch/x86/pci/visws.c b/arch/x86/pci/visws.c new file mode 100644 index 00000000000..8ecb1c72259 --- /dev/null +++ b/arch/x86/pci/visws.c @@ -0,0 +1,111 @@ +/* + * Low-Level PCI Support for SGI Visual Workstation + * + * (c) 1999--2000 Martin Mares + */ + +#include +#include +#include + +#include "cobalt.h" +#include "lithium.h" + +#include "pci.h" + + +extern struct pci_raw_ops pci_direct_conf1; + +static int pci_visws_enable_irq(struct pci_dev *dev) { return 0; } +static void pci_visws_disable_irq(struct pci_dev *dev) { } + +int (*pcibios_enable_irq)(struct pci_dev *dev) = &pci_visws_enable_irq; +void (*pcibios_disable_irq)(struct pci_dev *dev) = &pci_visws_disable_irq; + +void __init pcibios_penalize_isa_irq(int irq, int active) {} + + +unsigned int pci_bus0, pci_bus1; + +static inline u8 bridge_swizzle(u8 pin, u8 slot) +{ + return (((pin - 1) + slot) % 4) + 1; +} + +static u8 __init visws_swizzle(struct pci_dev *dev, u8 *pinp) +{ + u8 pin = *pinp; + + while (dev->bus->self) { /* Move up the chain of bridges. */ + pin = bridge_swizzle(pin, PCI_SLOT(dev->devfn)); + dev = dev->bus->self; + } + *pinp = pin; + + return PCI_SLOT(dev->devfn); +} + +static int __init visws_map_irq(struct pci_dev *dev, u8 slot, u8 pin) +{ + int irq, bus = dev->bus->number; + + pin--; + + /* Nothing useful at PIIX4 pin 1 */ + if (bus == pci_bus0 && slot == 4 && pin == 0) + return -1; + + /* PIIX4 USB is on Bus 0, Slot 4, Line 3 */ + if (bus == pci_bus0 && slot == 4 && pin == 3) { + irq = CO_IRQ(CO_APIC_PIIX4_USB); + goto out; + } + + /* First pin spread down 1 APIC entry per slot */ + if (pin == 0) { + irq = CO_IRQ((bus == pci_bus0 ? CO_APIC_PCIB_BASE0 : + CO_APIC_PCIA_BASE0) + slot); + goto out; + } + + /* lines 1,2,3 from any slot is shared in this twirly pattern */ + if (bus == pci_bus1) { + /* lines 1-3 from devices 0 1 rotate over 2 apic entries */ + irq = CO_IRQ(CO_APIC_PCIA_BASE123 + ((slot + (pin - 1)) % 2)); + } else { /* bus == pci_bus0 */ + /* lines 1-3 from devices 0-3 rotate over 3 apic entries */ + if (slot == 0) + slot = 3; /* same pattern */ + irq = CO_IRQ(CO_APIC_PCIA_BASE123 + ((3 - slot) + (pin - 1) % 3)); + } +out: + printk(KERN_DEBUG "PCI: Bus %d Slot %d Line %d -> IRQ %d\n", bus, slot, pin, irq); + return irq; +} + +void __init pcibios_update_irq(struct pci_dev *dev, int irq) +{ + pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); +} + +static int __init pcibios_init(void) +{ + /* The VISWS supports configuration access type 1 only */ + pci_probe = (pci_probe | PCI_PROBE_CONF1) & + ~(PCI_PROBE_BIOS | PCI_PROBE_CONF2); + + pci_bus0 = li_pcib_read16(LI_PCI_BUSNUM) & 0xff; + pci_bus1 = li_pcia_read16(LI_PCI_BUSNUM) & 0xff; + + printk(KERN_INFO "PCI: Lithium bridge A bus: %u, " + "bridge B (PIIX4) bus: %u\n", pci_bus1, pci_bus0); + + raw_pci_ops = &pci_direct_conf1; + pci_scan_bus_with_sysdata(pci_bus0); + pci_scan_bus_with_sysdata(pci_bus1); + pci_fixup_irqs(visws_swizzle, visws_map_irq); + pcibios_resource_survey(); + return 0; +} + +subsys_initcall(pcibios_init); diff --git a/arch/x86_64/pci/Makefile b/arch/x86_64/pci/Makefile index d04d8f976d1..b3e54c45d40 100644 --- a/arch/x86_64/pci/Makefile +++ b/arch/x86_64/pci/Makefile @@ -1,5 +1,5 @@ ifeq ($(CONFIG_X86_32),y) -include ${srctree}/arch/i386/pci/Makefile_32 +include ${srctree}/arch/x86/pci/Makefile_32 else include ${srctree}/arch/x86_64/pci/Makefile_64 endif diff --git a/arch/x86_64/pci/Makefile_64 b/arch/x86_64/pci/Makefile_64 index 77b1b9abedc..79b618b9e9d 100644 --- a/arch/x86_64/pci/Makefile_64 +++ b/arch/x86_64/pci/Makefile_64 @@ -3,7 +3,7 @@ # # Reuse the i386 PCI subsystem # -EXTRA_CFLAGS += -Iarch/i386/pci +EXTRA_CFLAGS += -Iarch/x86/pci obj-y := i386.o obj-$(CONFIG_PCI_DIRECT)+= direct.o @@ -15,13 +15,13 @@ obj-$(CONFIG_PCI_MMCONFIG) += mmconfig_64.o direct.o mmconfig-shared.o obj-$(CONFIG_NUMA) += k8-bus_64.o -direct-y += ../../i386/pci/direct.o -acpi-y += ../../i386/pci/acpi.o -legacy-y += ../../i386/pci/legacy.o -irq-y += ../../i386/pci/irq.o -common-y += ../../i386/pci/common.o -fixup-y += ../../i386/pci/fixup.o -i386-y += ../../i386/pci/i386.o -init-y += ../../i386/pci/init.o -early-y += ../../i386/pci/early.o -mmconfig-shared-y += ../../i386/pci/mmconfig-shared.o +direct-y += ../../x86/pci/direct.o +acpi-y += ../../x86/pci/acpi.o +legacy-y += ../../x86/pci/legacy.o +irq-y += ../../x86/pci/irq.o +common-y += ../../x86/pci/common.o +fixup-y += ../../x86/pci/fixup.o +i386-y += ../../x86/pci/i386.o +init-y += ../../x86/pci/init.o +early-y += ../../x86/pci/early.o +mmconfig-shared-y += ../../x86/pci/mmconfig-shared.o diff --git a/drivers/pci/hotplug/cpqphp_core.c b/drivers/pci/hotplug/cpqphp_core.c index d590a99930f..2305cc450a4 100644 --- a/drivers/pci/hotplug/cpqphp_core.c +++ b/drivers/pci/hotplug/cpqphp_core.c @@ -45,7 +45,7 @@ #include "cpqphp.h" #include "cpqphp_nvram.h" -#include "../../../arch/i386/pci/pci.h" /* horrible hack showing how processor dependent we are... */ +#include "../../../arch/x86/pci/pci.h" /* horrible hack showing how processor dependent we are... */ /* Global variables */ diff --git a/drivers/pci/hotplug/cpqphp_pci.c b/drivers/pci/hotplug/cpqphp_pci.c index fc7c74d7259..3f6cd20e95d 100644 --- a/drivers/pci/hotplug/cpqphp_pci.c +++ b/drivers/pci/hotplug/cpqphp_pci.c @@ -37,7 +37,7 @@ #include "../pci.h" #include "cpqphp.h" #include "cpqphp_nvram.h" -#include "../../../arch/i386/pci/pci.h" /* horrible hack showing how processor dependent we are... */ +#include "../../../arch/x86/pci/pci.h" /* horrible hack showing how processor dependent we are... */ u8 cpqhp_nic_irq; diff --git a/drivers/pci/hotplug/ibmphp_core.c b/drivers/pci/hotplug/ibmphp_core.c index 0316eeaaeb2..a90c28d0c69 100644 --- a/drivers/pci/hotplug/ibmphp_core.c +++ b/drivers/pci/hotplug/ibmphp_core.c @@ -35,7 +35,7 @@ #include #include #include "../pci.h" -#include "../../../arch/i386/pci/pci.h" /* for struct irq_routing_table */ +#include "../../../arch/x86/pci/pci.h" /* for struct irq_routing_table */ #include "ibmphp.h" #define attn_on(sl) ibmphp_hpc_writeslot (sl, HPC_SLOT_ATTNON) -- cgit v1.2.3-70-g09d2 From c2b84d8d1a66a0a886de51f1bfef5c4f16c0c784 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 11 Oct 2007 11:16:38 +0200 Subject: i386: move mach-visws Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/i386/Makefile | 2 +- arch/i386/mach-visws/Makefile | 8 - arch/i386/mach-visws/mpparse.c | 101 ------------- arch/i386/mach-visws/reboot.c | 55 ------- arch/i386/mach-visws/setup.c | 183 ----------------------- arch/i386/mach-visws/traps.c | 68 --------- arch/i386/mach-visws/visws_apic.c | 299 -------------------------------------- arch/x86/mach-visws/Makefile | 8 + arch/x86/mach-visws/mpparse.c | 101 +++++++++++++ arch/x86/mach-visws/reboot.c | 55 +++++++ arch/x86/mach-visws/setup.c | 183 +++++++++++++++++++++++ arch/x86/mach-visws/traps.c | 68 +++++++++ arch/x86/mach-visws/visws_apic.c | 299 ++++++++++++++++++++++++++++++++++++++ 13 files changed, 715 insertions(+), 715 deletions(-) delete mode 100644 arch/i386/mach-visws/Makefile delete mode 100644 arch/i386/mach-visws/mpparse.c delete mode 100644 arch/i386/mach-visws/reboot.c delete mode 100644 arch/i386/mach-visws/setup.c delete mode 100644 arch/i386/mach-visws/traps.c delete mode 100644 arch/i386/mach-visws/visws_apic.c create mode 100644 arch/x86/mach-visws/Makefile create mode 100644 arch/x86/mach-visws/mpparse.c create mode 100644 arch/x86/mach-visws/reboot.c create mode 100644 arch/x86/mach-visws/setup.c create mode 100644 arch/x86/mach-visws/traps.c create mode 100644 arch/x86/mach-visws/visws_apic.c (limited to 'arch/x86') diff --git a/arch/i386/Makefile b/arch/i386/Makefile index 87c959d5f35..191e171b367 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -69,7 +69,7 @@ mcore-$(CONFIG_X86_VOYAGER) := arch/i386/mach-voyager # VISWS subarch support mflags-$(CONFIG_X86_VISWS) := -Iinclude/asm-i386/mach-visws -mcore-$(CONFIG_X86_VISWS) := arch/i386/mach-visws +mcore-$(CONFIG_X86_VISWS) := arch/x86/mach-visws # NUMAQ subarch support mflags-$(CONFIG_X86_NUMAQ) := -Iinclude/asm-i386/mach-numaq diff --git a/arch/i386/mach-visws/Makefile b/arch/i386/mach-visws/Makefile deleted file mode 100644 index 835fd96ad76..00000000000 --- a/arch/i386/mach-visws/Makefile +++ /dev/null @@ -1,8 +0,0 @@ -# -# Makefile for the linux kernel. -# - -obj-y := setup.o traps.o reboot.o - -obj-$(CONFIG_X86_VISWS_APIC) += visws_apic.o -obj-$(CONFIG_X86_LOCAL_APIC) += mpparse.o diff --git a/arch/i386/mach-visws/mpparse.c b/arch/i386/mach-visws/mpparse.c deleted file mode 100644 index f3c74fab8b9..00000000000 --- a/arch/i386/mach-visws/mpparse.c +++ /dev/null @@ -1,101 +0,0 @@ - -#include -#include - -#include -#include - -#include "cobalt.h" -#include "mach_apic.h" - -/* Have we found an MP table */ -int smp_found_config; - -/* - * Various Linux-internal data structures created from the - * MP-table. - */ -int apic_version [MAX_APICS]; - -int pic_mode; -unsigned long mp_lapic_addr; - -/* Processor that is doing the boot up */ -unsigned int boot_cpu_physical_apicid = -1U; - -/* Bitmask of physically existing CPUs */ -physid_mask_t phys_cpu_present_map; - -unsigned int __initdata maxcpus = NR_CPUS; - -/* - * The Visual Workstation is Intel MP compliant in the hardware - * sense, but it doesn't have a BIOS(-configuration table). - * No problem for Linux. - */ - -static void __init MP_processor_info (struct mpc_config_processor *m) -{ - int ver, logical_apicid; - physid_mask_t apic_cpus; - - if (!(m->mpc_cpuflag & CPU_ENABLED)) - return; - - logical_apicid = m->mpc_apicid; - printk(KERN_INFO "%sCPU #%d %ld:%ld APIC version %d\n", - m->mpc_cpuflag & CPU_BOOTPROCESSOR ? "Bootup " : "", - m->mpc_apicid, - (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8, - (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4, - m->mpc_apicver); - - if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) - boot_cpu_physical_apicid = m->mpc_apicid; - - ver = m->mpc_apicver; - if ((ver >= 0x14 && m->mpc_apicid >= 0xff) || m->mpc_apicid >= 0xf) { - printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n", - m->mpc_apicid, MAX_APICS); - return; - } - - apic_cpus = apicid_to_cpu_present(m->mpc_apicid); - physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus); - /* - * Validate version - */ - if (ver == 0x0) { - printk(KERN_ERR "BIOS bug, APIC version is 0 for CPU#%d! " - "fixing up to 0x10. (tell your hw vendor)\n", - m->mpc_apicid); - ver = 0x10; - } - apic_version[m->mpc_apicid] = ver; -} - -void __init find_smp_config(void) -{ - struct mpc_config_processor *mp = phys_to_virt(CO_CPU_TAB_PHYS); - unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS)); - - if (ncpus > CO_CPU_MAX) { - printk(KERN_WARNING "find_visws_smp: got cpu count of %d at %p\n", - ncpus, mp); - - ncpus = CO_CPU_MAX; - } - - if (ncpus > maxcpus) - ncpus = maxcpus; - - smp_found_config = 1; - while (ncpus--) - MP_processor_info(mp++); - - mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; -} - -void __init get_smp_config (void) -{ -} diff --git a/arch/i386/mach-visws/reboot.c b/arch/i386/mach-visws/reboot.c deleted file mode 100644 index 99332abfad4..00000000000 --- a/arch/i386/mach-visws/reboot.c +++ /dev/null @@ -1,55 +0,0 @@ -#include -#include -#include - -#include -#include "piix4.h" - -void (*pm_power_off)(void); -EXPORT_SYMBOL(pm_power_off); - -void machine_shutdown(void) -{ -#ifdef CONFIG_SMP - smp_send_stop(); -#endif -} - -void machine_emergency_restart(void) -{ - /* - * Visual Workstations restart after this - * register is poked on the PIIX4 - */ - outb(PIIX4_RESET_VAL, PIIX4_RESET_PORT); -} - -void machine_restart(char * __unused) -{ - machine_shutdown(); - machine_emergency_restart(); -} - -void machine_power_off(void) -{ - unsigned short pm_status; - extern unsigned int pci_bus0; - - while ((pm_status = inw(PMSTS_PORT)) & 0x100) - outw(pm_status, PMSTS_PORT); - - outw(PM_SUSPEND_ENABLE, PMCNTRL_PORT); - - mdelay(10); - -#define PCI_CONF1_ADDRESS(bus, devfn, reg) \ - (0x80000000 | (bus << 16) | (devfn << 8) | (reg & ~3)) - - outl(PCI_CONF1_ADDRESS(pci_bus0, SPECIAL_DEV, SPECIAL_REG), 0xCF8); - outl(PIIX_SPECIAL_STOP, 0xCFC); -} - -void machine_halt(void) -{ -} - diff --git a/arch/i386/mach-visws/setup.c b/arch/i386/mach-visws/setup.c deleted file mode 100644 index 1f81f10e03a..00000000000 --- a/arch/i386/mach-visws/setup.c +++ /dev/null @@ -1,183 +0,0 @@ -/* - * Unmaintained SGI Visual Workstation support. - * Split out from setup.c by davej@suse.de - */ - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include "cobalt.h" -#include "piix4.h" - -int no_broadcast; - -char visws_board_type = -1; -char visws_board_rev = -1; - -void __init visws_get_board_type_and_rev(void) -{ - int raw; - - visws_board_type = (char)(inb_p(PIIX_GPI_BD_REG) & PIIX_GPI_BD_REG) - >> PIIX_GPI_BD_SHIFT; - /* - * Get Board rev. - * First, we have to initialize the 307 part to allow us access - * to the GPIO registers. Let's map them at 0x0fc0 which is right - * after the PIIX4 PM section. - */ - outb_p(SIO_DEV_SEL, SIO_INDEX); - outb_p(SIO_GP_DEV, SIO_DATA); /* Talk to GPIO regs. */ - - outb_p(SIO_DEV_MSB, SIO_INDEX); - outb_p(SIO_GP_MSB, SIO_DATA); /* MSB of GPIO base address */ - - outb_p(SIO_DEV_LSB, SIO_INDEX); - outb_p(SIO_GP_LSB, SIO_DATA); /* LSB of GPIO base address */ - - outb_p(SIO_DEV_ENB, SIO_INDEX); - outb_p(1, SIO_DATA); /* Enable GPIO registers. */ - - /* - * Now, we have to map the power management section to write - * a bit which enables access to the GPIO registers. - * What lunatic came up with this shit? - */ - outb_p(SIO_DEV_SEL, SIO_INDEX); - outb_p(SIO_PM_DEV, SIO_DATA); /* Talk to GPIO regs. */ - - outb_p(SIO_DEV_MSB, SIO_INDEX); - outb_p(SIO_PM_MSB, SIO_DATA); /* MSB of PM base address */ - - outb_p(SIO_DEV_LSB, SIO_INDEX); - outb_p(SIO_PM_LSB, SIO_DATA); /* LSB of PM base address */ - - outb_p(SIO_DEV_ENB, SIO_INDEX); - outb_p(1, SIO_DATA); /* Enable PM registers. */ - - /* - * Now, write the PM register which enables the GPIO registers. - */ - outb_p(SIO_PM_FER2, SIO_PM_INDEX); - outb_p(SIO_PM_GP_EN, SIO_PM_DATA); - - /* - * Now, initialize the GPIO registers. - * We want them all to be inputs which is the - * power on default, so let's leave them alone. - * So, let's just read the board rev! - */ - raw = inb_p(SIO_GP_DATA1); - raw &= 0x7f; /* 7 bits of valid board revision ID. */ - - if (visws_board_type == VISWS_320) { - if (raw < 0x6) { - visws_board_rev = 4; - } else if (raw < 0xc) { - visws_board_rev = 5; - } else { - visws_board_rev = 6; - } - } else if (visws_board_type == VISWS_540) { - visws_board_rev = 2; - } else { - visws_board_rev = raw; - } - - printk(KERN_INFO "Silicon Graphics Visual Workstation %s (rev %d) detected\n", - (visws_board_type == VISWS_320 ? "320" : - (visws_board_type == VISWS_540 ? "540" : - "unknown")), visws_board_rev); -} - -void __init pre_intr_init_hook(void) -{ - init_VISWS_APIC_irqs(); -} - -void __init intr_init_hook(void) -{ -#ifdef CONFIG_X86_LOCAL_APIC - apic_intr_init(); -#endif -} - -void __init pre_setup_arch_hook() -{ - visws_get_board_type_and_rev(); -} - -static struct irqaction irq0 = { - .handler = timer_interrupt, - .flags = IRQF_DISABLED | IRQF_IRQPOLL, - .name = "timer", -}; - -void __init time_init_hook(void) -{ - printk(KERN_INFO "Starting Cobalt Timer system clock\n"); - - /* Set the countdown value */ - co_cpu_write(CO_CPU_TIMEVAL, CO_TIME_HZ/HZ); - - /* Start the timer */ - co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) | CO_CTRL_TIMERUN); - - /* Enable (unmask) the timer interrupt */ - co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) & ~CO_CTRL_TIMEMASK); - - /* Wire cpu IDT entry to s/w handler (and Cobalt APIC to IDT) */ - setup_irq(0, &irq0); -} - -/* Hook for machine specific memory setup. */ - -#define MB (1024 * 1024) - -unsigned long sgivwfb_mem_phys; -unsigned long sgivwfb_mem_size; -EXPORT_SYMBOL(sgivwfb_mem_phys); -EXPORT_SYMBOL(sgivwfb_mem_size); - -long long mem_size __initdata = 0; - -char * __init machine_specific_memory_setup(void) -{ - long long gfx_mem_size = 8 * MB; - - mem_size = ALT_MEM_K; - - if (!mem_size) { - printk(KERN_WARNING "Bootloader didn't set memory size, upgrade it !\n"); - mem_size = 128 * MB; - } - - /* - * this hardcodes the graphics memory to 8 MB - * it really should be sized dynamically (or at least - * set as a boot param) - */ - if (!sgivwfb_mem_size) { - printk(KERN_WARNING "Defaulting to 8 MB framebuffer size\n"); - sgivwfb_mem_size = 8 * MB; - } - - /* - * Trim to nearest MB - */ - sgivwfb_mem_size &= ~((1 << 20) - 1); - sgivwfb_mem_phys = mem_size - gfx_mem_size; - - add_memory_region(0, LOWMEMSIZE(), E820_RAM); - add_memory_region(HIGH_MEMORY, mem_size - sgivwfb_mem_size - HIGH_MEMORY, E820_RAM); - add_memory_region(sgivwfb_mem_phys, sgivwfb_mem_size, E820_RESERVED); - - return "PROM"; -} diff --git a/arch/i386/mach-visws/traps.c b/arch/i386/mach-visws/traps.c deleted file mode 100644 index 843b67acf43..00000000000 --- a/arch/i386/mach-visws/traps.c +++ /dev/null @@ -1,68 +0,0 @@ -/* VISWS traps */ - -#include -#include -#include -#include -#include - -#include -#include -#include -#include "cobalt.h" -#include "lithium.h" - - -#define A01234 (LI_INTA_0 | LI_INTA_1 | LI_INTA_2 | LI_INTA_3 | LI_INTA_4) -#define BCD (LI_INTB | LI_INTC | LI_INTD) -#define ALLDEVS (A01234 | BCD) - -static __init void lithium_init(void) -{ - set_fixmap(FIX_LI_PCIA, LI_PCI_A_PHYS); - set_fixmap(FIX_LI_PCIB, LI_PCI_B_PHYS); - - if ((li_pcia_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) || - (li_pcia_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) { - printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'A'); - panic("This machine is not SGI Visual Workstation 320/540"); - } - - if ((li_pcib_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) || - (li_pcib_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) { - printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'B'); - panic("This machine is not SGI Visual Workstation 320/540"); - } - - li_pcia_write16(LI_PCI_INTEN, ALLDEVS); - li_pcib_write16(LI_PCI_INTEN, ALLDEVS); -} - -static __init void cobalt_init(void) -{ - /* - * On normal SMP PC this is used only with SMP, but we have to - * use it and set it up here to start the Cobalt clock - */ - set_fixmap(FIX_APIC_BASE, APIC_DEFAULT_PHYS_BASE); - setup_local_APIC(); - printk(KERN_INFO "Local APIC Version %#lx, ID %#lx\n", - apic_read(APIC_LVR), apic_read(APIC_ID)); - - set_fixmap(FIX_CO_CPU, CO_CPU_PHYS); - set_fixmap(FIX_CO_APIC, CO_APIC_PHYS); - printk(KERN_INFO "Cobalt Revision %#lx, APIC ID %#lx\n", - co_cpu_read(CO_CPU_REV), co_apic_read(CO_APIC_ID)); - - /* Enable Cobalt APIC being careful to NOT change the ID! */ - co_apic_write(CO_APIC_ID, co_apic_read(CO_APIC_ID) | CO_APIC_ENABLE); - - printk(KERN_INFO "Cobalt APIC enabled: ID reg %#lx\n", - co_apic_read(CO_APIC_ID)); -} - -void __init trap_init_hook(void) -{ - lithium_init(); - cobalt_init(); -} diff --git a/arch/i386/mach-visws/visws_apic.c b/arch/i386/mach-visws/visws_apic.c deleted file mode 100644 index 710faf71a65..00000000000 --- a/arch/i386/mach-visws/visws_apic.c +++ /dev/null @@ -1,299 +0,0 @@ -/* - * linux/arch/i386/mach-visws/visws_apic.c - * - * Copyright (C) 1999 Bent Hagemark, Ingo Molnar - * - * SGI Visual Workstation interrupt controller - * - * The Cobalt system ASIC in the Visual Workstation contains a "Cobalt" APIC - * which serves as the main interrupt controller in the system. Non-legacy - * hardware in the system uses this controller directly. Legacy devices - * are connected to the PIIX4 which in turn has its 8259(s) connected to - * a of the Cobalt APIC entry. - * - * 09/02/2000 - Updated for 2.4 by jbarnes@sgi.com - * - * 25/11/2002 - Updated for 2.5 by Andrey Panin - */ - -#include -#include -#include - -#include -#include -#include - -#include "cobalt.h" -#include "irq_vectors.h" - - -static DEFINE_SPINLOCK(cobalt_lock); - -/* - * Set the given Cobalt APIC Redirection Table entry to point - * to the given IDT vector/index. - */ -static inline void co_apic_set(int entry, int irq) -{ - co_apic_write(CO_APIC_LO(entry), CO_APIC_LEVEL | (irq + FIRST_EXTERNAL_VECTOR)); - co_apic_write(CO_APIC_HI(entry), 0); -} - -/* - * Cobalt (IO)-APIC functions to handle PCI devices. - */ -static inline int co_apic_ide0_hack(void) -{ - extern char visws_board_type; - extern char visws_board_rev; - - if (visws_board_type == VISWS_320 && visws_board_rev == 5) - return 5; - return CO_APIC_IDE0; -} - -static int is_co_apic(unsigned int irq) -{ - if (IS_CO_APIC(irq)) - return CO_APIC(irq); - - switch (irq) { - case 0: return CO_APIC_CPU; - case CO_IRQ_IDE0: return co_apic_ide0_hack(); - case CO_IRQ_IDE1: return CO_APIC_IDE1; - default: return -1; - } -} - - -/* - * This is the SGI Cobalt (IO-)APIC: - */ - -static void enable_cobalt_irq(unsigned int irq) -{ - co_apic_set(is_co_apic(irq), irq); -} - -static void disable_cobalt_irq(unsigned int irq) -{ - int entry = is_co_apic(irq); - - co_apic_write(CO_APIC_LO(entry), CO_APIC_MASK); - co_apic_read(CO_APIC_LO(entry)); -} - -/* - * "irq" really just serves to identify the device. Here is where we - * map this to the Cobalt APIC entry where it's physically wired. - * This is called via request_irq -> setup_irq -> irq_desc->startup() - */ -static unsigned int startup_cobalt_irq(unsigned int irq) -{ - unsigned long flags; - - spin_lock_irqsave(&cobalt_lock, flags); - if ((irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING))) - irq_desc[irq].status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING); - enable_cobalt_irq(irq); - spin_unlock_irqrestore(&cobalt_lock, flags); - return 0; -} - -static void ack_cobalt_irq(unsigned int irq) -{ - unsigned long flags; - - spin_lock_irqsave(&cobalt_lock, flags); - disable_cobalt_irq(irq); - apic_write(APIC_EOI, APIC_EIO_ACK); - spin_unlock_irqrestore(&cobalt_lock, flags); -} - -static void end_cobalt_irq(unsigned int irq) -{ - unsigned long flags; - - spin_lock_irqsave(&cobalt_lock, flags); - if (!(irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS))) - enable_cobalt_irq(irq); - spin_unlock_irqrestore(&cobalt_lock, flags); -} - -static struct irq_chip cobalt_irq_type = { - .typename = "Cobalt-APIC", - .startup = startup_cobalt_irq, - .shutdown = disable_cobalt_irq, - .enable = enable_cobalt_irq, - .disable = disable_cobalt_irq, - .ack = ack_cobalt_irq, - .end = end_cobalt_irq, -}; - - -/* - * This is the PIIX4-based 8259 that is wired up indirectly to Cobalt - * -- not the manner expected by the code in i8259.c. - * - * there is a 'master' physical interrupt source that gets sent to - * the CPU. But in the chipset there are various 'virtual' interrupts - * waiting to be handled. We represent this to Linux through a 'master' - * interrupt controller type, and through a special virtual interrupt- - * controller. Device drivers only see the virtual interrupt sources. - */ -static unsigned int startup_piix4_master_irq(unsigned int irq) -{ - init_8259A(0); - - return startup_cobalt_irq(irq); -} - -static void end_piix4_master_irq(unsigned int irq) -{ - unsigned long flags; - - spin_lock_irqsave(&cobalt_lock, flags); - enable_cobalt_irq(irq); - spin_unlock_irqrestore(&cobalt_lock, flags); -} - -static struct irq_chip piix4_master_irq_type = { - .typename = "PIIX4-master", - .startup = startup_piix4_master_irq, - .ack = ack_cobalt_irq, - .end = end_piix4_master_irq, -}; - - -static struct irq_chip piix4_virtual_irq_type = { - .typename = "PIIX4-virtual", - .shutdown = disable_8259A_irq, - .enable = enable_8259A_irq, - .disable = disable_8259A_irq, -}; - - -/* - * PIIX4-8259 master/virtual functions to handle interrupt requests - * from legacy devices: floppy, parallel, serial, rtc. - * - * None of these get Cobalt APIC entries, neither do they have IDT - * entries. These interrupts are purely virtual and distributed from - * the 'master' interrupt source: CO_IRQ_8259. - * - * When the 8259 interrupts its handler figures out which of these - * devices is interrupting and dispatches to its handler. - * - * CAREFUL: devices see the 'virtual' interrupt only. Thus disable/ - * enable_irq gets the right irq. This 'master' irq is never directly - * manipulated by any driver. - */ -static irqreturn_t piix4_master_intr(int irq, void *dev_id) -{ - int realirq; - irq_desc_t *desc; - unsigned long flags; - - spin_lock_irqsave(&i8259A_lock, flags); - - /* Find out what's interrupting in the PIIX4 master 8259 */ - outb(0x0c, 0x20); /* OCW3 Poll command */ - realirq = inb(0x20); - - /* - * Bit 7 == 0 means invalid/spurious - */ - if (unlikely(!(realirq & 0x80))) - goto out_unlock; - - realirq &= 7; - - if (unlikely(realirq == 2)) { - outb(0x0c, 0xa0); - realirq = inb(0xa0); - - if (unlikely(!(realirq & 0x80))) - goto out_unlock; - - realirq = (realirq & 7) + 8; - } - - /* mask and ack interrupt */ - cached_irq_mask |= 1 << realirq; - if (unlikely(realirq > 7)) { - inb(0xa1); - outb(cached_slave_mask, 0xa1); - outb(0x60 + (realirq & 7), 0xa0); - outb(0x60 + 2, 0x20); - } else { - inb(0x21); - outb(cached_master_mask, 0x21); - outb(0x60 + realirq, 0x20); - } - - spin_unlock_irqrestore(&i8259A_lock, flags); - - desc = irq_desc + realirq; - - /* - * handle this 'virtual interrupt' as a Cobalt one now. - */ - kstat_cpu(smp_processor_id()).irqs[realirq]++; - - if (likely(desc->action != NULL)) - handle_IRQ_event(realirq, desc->action); - - if (!(desc->status & IRQ_DISABLED)) - enable_8259A_irq(realirq); - - return IRQ_HANDLED; - -out_unlock: - spin_unlock_irqrestore(&i8259A_lock, flags); - return IRQ_NONE; -} - -static struct irqaction master_action = { - .handler = piix4_master_intr, - .name = "PIIX4-8259", -}; - -static struct irqaction cascade_action = { - .handler = no_action, - .name = "cascade", -}; - - -void init_VISWS_APIC_irqs(void) -{ - int i; - - for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) { - irq_desc[i].status = IRQ_DISABLED; - irq_desc[i].action = 0; - irq_desc[i].depth = 1; - - if (i == 0) { - irq_desc[i].chip = &cobalt_irq_type; - } - else if (i == CO_IRQ_IDE0) { - irq_desc[i].chip = &cobalt_irq_type; - } - else if (i == CO_IRQ_IDE1) { - irq_desc[i].chip = &cobalt_irq_type; - } - else if (i == CO_IRQ_8259) { - irq_desc[i].chip = &piix4_master_irq_type; - } - else if (i < CO_IRQ_APIC0) { - irq_desc[i].chip = &piix4_virtual_irq_type; - } - else if (IS_CO_APIC(i)) { - irq_desc[i].chip = &cobalt_irq_type; - } - } - - setup_irq(CO_IRQ_8259, &master_action); - setup_irq(2, &cascade_action); -} diff --git a/arch/x86/mach-visws/Makefile b/arch/x86/mach-visws/Makefile new file mode 100644 index 00000000000..835fd96ad76 --- /dev/null +++ b/arch/x86/mach-visws/Makefile @@ -0,0 +1,8 @@ +# +# Makefile for the linux kernel. +# + +obj-y := setup.o traps.o reboot.o + +obj-$(CONFIG_X86_VISWS_APIC) += visws_apic.o +obj-$(CONFIG_X86_LOCAL_APIC) += mpparse.o diff --git a/arch/x86/mach-visws/mpparse.c b/arch/x86/mach-visws/mpparse.c new file mode 100644 index 00000000000..f3c74fab8b9 --- /dev/null +++ b/arch/x86/mach-visws/mpparse.c @@ -0,0 +1,101 @@ + +#include +#include + +#include +#include + +#include "cobalt.h" +#include "mach_apic.h" + +/* Have we found an MP table */ +int smp_found_config; + +/* + * Various Linux-internal data structures created from the + * MP-table. + */ +int apic_version [MAX_APICS]; + +int pic_mode; +unsigned long mp_lapic_addr; + +/* Processor that is doing the boot up */ +unsigned int boot_cpu_physical_apicid = -1U; + +/* Bitmask of physically existing CPUs */ +physid_mask_t phys_cpu_present_map; + +unsigned int __initdata maxcpus = NR_CPUS; + +/* + * The Visual Workstation is Intel MP compliant in the hardware + * sense, but it doesn't have a BIOS(-configuration table). + * No problem for Linux. + */ + +static void __init MP_processor_info (struct mpc_config_processor *m) +{ + int ver, logical_apicid; + physid_mask_t apic_cpus; + + if (!(m->mpc_cpuflag & CPU_ENABLED)) + return; + + logical_apicid = m->mpc_apicid; + printk(KERN_INFO "%sCPU #%d %ld:%ld APIC version %d\n", + m->mpc_cpuflag & CPU_BOOTPROCESSOR ? "Bootup " : "", + m->mpc_apicid, + (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8, + (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4, + m->mpc_apicver); + + if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) + boot_cpu_physical_apicid = m->mpc_apicid; + + ver = m->mpc_apicver; + if ((ver >= 0x14 && m->mpc_apicid >= 0xff) || m->mpc_apicid >= 0xf) { + printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n", + m->mpc_apicid, MAX_APICS); + return; + } + + apic_cpus = apicid_to_cpu_present(m->mpc_apicid); + physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus); + /* + * Validate version + */ + if (ver == 0x0) { + printk(KERN_ERR "BIOS bug, APIC version is 0 for CPU#%d! " + "fixing up to 0x10. (tell your hw vendor)\n", + m->mpc_apicid); + ver = 0x10; + } + apic_version[m->mpc_apicid] = ver; +} + +void __init find_smp_config(void) +{ + struct mpc_config_processor *mp = phys_to_virt(CO_CPU_TAB_PHYS); + unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS)); + + if (ncpus > CO_CPU_MAX) { + printk(KERN_WARNING "find_visws_smp: got cpu count of %d at %p\n", + ncpus, mp); + + ncpus = CO_CPU_MAX; + } + + if (ncpus > maxcpus) + ncpus = maxcpus; + + smp_found_config = 1; + while (ncpus--) + MP_processor_info(mp++); + + mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; +} + +void __init get_smp_config (void) +{ +} diff --git a/arch/x86/mach-visws/reboot.c b/arch/x86/mach-visws/reboot.c new file mode 100644 index 00000000000..99332abfad4 --- /dev/null +++ b/arch/x86/mach-visws/reboot.c @@ -0,0 +1,55 @@ +#include +#include +#include + +#include +#include "piix4.h" + +void (*pm_power_off)(void); +EXPORT_SYMBOL(pm_power_off); + +void machine_shutdown(void) +{ +#ifdef CONFIG_SMP + smp_send_stop(); +#endif +} + +void machine_emergency_restart(void) +{ + /* + * Visual Workstations restart after this + * register is poked on the PIIX4 + */ + outb(PIIX4_RESET_VAL, PIIX4_RESET_PORT); +} + +void machine_restart(char * __unused) +{ + machine_shutdown(); + machine_emergency_restart(); +} + +void machine_power_off(void) +{ + unsigned short pm_status; + extern unsigned int pci_bus0; + + while ((pm_status = inw(PMSTS_PORT)) & 0x100) + outw(pm_status, PMSTS_PORT); + + outw(PM_SUSPEND_ENABLE, PMCNTRL_PORT); + + mdelay(10); + +#define PCI_CONF1_ADDRESS(bus, devfn, reg) \ + (0x80000000 | (bus << 16) | (devfn << 8) | (reg & ~3)) + + outl(PCI_CONF1_ADDRESS(pci_bus0, SPECIAL_DEV, SPECIAL_REG), 0xCF8); + outl(PIIX_SPECIAL_STOP, 0xCFC); +} + +void machine_halt(void) +{ +} + diff --git a/arch/x86/mach-visws/setup.c b/arch/x86/mach-visws/setup.c new file mode 100644 index 00000000000..1f81f10e03a --- /dev/null +++ b/arch/x86/mach-visws/setup.c @@ -0,0 +1,183 @@ +/* + * Unmaintained SGI Visual Workstation support. + * Split out from setup.c by davej@suse.de + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include "cobalt.h" +#include "piix4.h" + +int no_broadcast; + +char visws_board_type = -1; +char visws_board_rev = -1; + +void __init visws_get_board_type_and_rev(void) +{ + int raw; + + visws_board_type = (char)(inb_p(PIIX_GPI_BD_REG) & PIIX_GPI_BD_REG) + >> PIIX_GPI_BD_SHIFT; + /* + * Get Board rev. + * First, we have to initialize the 307 part to allow us access + * to the GPIO registers. Let's map them at 0x0fc0 which is right + * after the PIIX4 PM section. + */ + outb_p(SIO_DEV_SEL, SIO_INDEX); + outb_p(SIO_GP_DEV, SIO_DATA); /* Talk to GPIO regs. */ + + outb_p(SIO_DEV_MSB, SIO_INDEX); + outb_p(SIO_GP_MSB, SIO_DATA); /* MSB of GPIO base address */ + + outb_p(SIO_DEV_LSB, SIO_INDEX); + outb_p(SIO_GP_LSB, SIO_DATA); /* LSB of GPIO base address */ + + outb_p(SIO_DEV_ENB, SIO_INDEX); + outb_p(1, SIO_DATA); /* Enable GPIO registers. */ + + /* + * Now, we have to map the power management section to write + * a bit which enables access to the GPIO registers. + * What lunatic came up with this shit? + */ + outb_p(SIO_DEV_SEL, SIO_INDEX); + outb_p(SIO_PM_DEV, SIO_DATA); /* Talk to GPIO regs. */ + + outb_p(SIO_DEV_MSB, SIO_INDEX); + outb_p(SIO_PM_MSB, SIO_DATA); /* MSB of PM base address */ + + outb_p(SIO_DEV_LSB, SIO_INDEX); + outb_p(SIO_PM_LSB, SIO_DATA); /* LSB of PM base address */ + + outb_p(SIO_DEV_ENB, SIO_INDEX); + outb_p(1, SIO_DATA); /* Enable PM registers. */ + + /* + * Now, write the PM register which enables the GPIO registers. + */ + outb_p(SIO_PM_FER2, SIO_PM_INDEX); + outb_p(SIO_PM_GP_EN, SIO_PM_DATA); + + /* + * Now, initialize the GPIO registers. + * We want them all to be inputs which is the + * power on default, so let's leave them alone. + * So, let's just read the board rev! + */ + raw = inb_p(SIO_GP_DATA1); + raw &= 0x7f; /* 7 bits of valid board revision ID. */ + + if (visws_board_type == VISWS_320) { + if (raw < 0x6) { + visws_board_rev = 4; + } else if (raw < 0xc) { + visws_board_rev = 5; + } else { + visws_board_rev = 6; + } + } else if (visws_board_type == VISWS_540) { + visws_board_rev = 2; + } else { + visws_board_rev = raw; + } + + printk(KERN_INFO "Silicon Graphics Visual Workstation %s (rev %d) detected\n", + (visws_board_type == VISWS_320 ? "320" : + (visws_board_type == VISWS_540 ? "540" : + "unknown")), visws_board_rev); +} + +void __init pre_intr_init_hook(void) +{ + init_VISWS_APIC_irqs(); +} + +void __init intr_init_hook(void) +{ +#ifdef CONFIG_X86_LOCAL_APIC + apic_intr_init(); +#endif +} + +void __init pre_setup_arch_hook() +{ + visws_get_board_type_and_rev(); +} + +static struct irqaction irq0 = { + .handler = timer_interrupt, + .flags = IRQF_DISABLED | IRQF_IRQPOLL, + .name = "timer", +}; + +void __init time_init_hook(void) +{ + printk(KERN_INFO "Starting Cobalt Timer system clock\n"); + + /* Set the countdown value */ + co_cpu_write(CO_CPU_TIMEVAL, CO_TIME_HZ/HZ); + + /* Start the timer */ + co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) | CO_CTRL_TIMERUN); + + /* Enable (unmask) the timer interrupt */ + co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) & ~CO_CTRL_TIMEMASK); + + /* Wire cpu IDT entry to s/w handler (and Cobalt APIC to IDT) */ + setup_irq(0, &irq0); +} + +/* Hook for machine specific memory setup. */ + +#define MB (1024 * 1024) + +unsigned long sgivwfb_mem_phys; +unsigned long sgivwfb_mem_size; +EXPORT_SYMBOL(sgivwfb_mem_phys); +EXPORT_SYMBOL(sgivwfb_mem_size); + +long long mem_size __initdata = 0; + +char * __init machine_specific_memory_setup(void) +{ + long long gfx_mem_size = 8 * MB; + + mem_size = ALT_MEM_K; + + if (!mem_size) { + printk(KERN_WARNING "Bootloader didn't set memory size, upgrade it !\n"); + mem_size = 128 * MB; + } + + /* + * this hardcodes the graphics memory to 8 MB + * it really should be sized dynamically (or at least + * set as a boot param) + */ + if (!sgivwfb_mem_size) { + printk(KERN_WARNING "Defaulting to 8 MB framebuffer size\n"); + sgivwfb_mem_size = 8 * MB; + } + + /* + * Trim to nearest MB + */ + sgivwfb_mem_size &= ~((1 << 20) - 1); + sgivwfb_mem_phys = mem_size - gfx_mem_size; + + add_memory_region(0, LOWMEMSIZE(), E820_RAM); + add_memory_region(HIGH_MEMORY, mem_size - sgivwfb_mem_size - HIGH_MEMORY, E820_RAM); + add_memory_region(sgivwfb_mem_phys, sgivwfb_mem_size, E820_RESERVED); + + return "PROM"; +} diff --git a/arch/x86/mach-visws/traps.c b/arch/x86/mach-visws/traps.c new file mode 100644 index 00000000000..843b67acf43 --- /dev/null +++ b/arch/x86/mach-visws/traps.c @@ -0,0 +1,68 @@ +/* VISWS traps */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include "cobalt.h" +#include "lithium.h" + + +#define A01234 (LI_INTA_0 | LI_INTA_1 | LI_INTA_2 | LI_INTA_3 | LI_INTA_4) +#define BCD (LI_INTB | LI_INTC | LI_INTD) +#define ALLDEVS (A01234 | BCD) + +static __init void lithium_init(void) +{ + set_fixmap(FIX_LI_PCIA, LI_PCI_A_PHYS); + set_fixmap(FIX_LI_PCIB, LI_PCI_B_PHYS); + + if ((li_pcia_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) || + (li_pcia_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) { + printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'A'); + panic("This machine is not SGI Visual Workstation 320/540"); + } + + if ((li_pcib_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) || + (li_pcib_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) { + printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'B'); + panic("This machine is not SGI Visual Workstation 320/540"); + } + + li_pcia_write16(LI_PCI_INTEN, ALLDEVS); + li_pcib_write16(LI_PCI_INTEN, ALLDEVS); +} + +static __init void cobalt_init(void) +{ + /* + * On normal SMP PC this is used only with SMP, but we have to + * use it and set it up here to start the Cobalt clock + */ + set_fixmap(FIX_APIC_BASE, APIC_DEFAULT_PHYS_BASE); + setup_local_APIC(); + printk(KERN_INFO "Local APIC Version %#lx, ID %#lx\n", + apic_read(APIC_LVR), apic_read(APIC_ID)); + + set_fixmap(FIX_CO_CPU, CO_CPU_PHYS); + set_fixmap(FIX_CO_APIC, CO_APIC_PHYS); + printk(KERN_INFO "Cobalt Revision %#lx, APIC ID %#lx\n", + co_cpu_read(CO_CPU_REV), co_apic_read(CO_APIC_ID)); + + /* Enable Cobalt APIC being careful to NOT change the ID! */ + co_apic_write(CO_APIC_ID, co_apic_read(CO_APIC_ID) | CO_APIC_ENABLE); + + printk(KERN_INFO "Cobalt APIC enabled: ID reg %#lx\n", + co_apic_read(CO_APIC_ID)); +} + +void __init trap_init_hook(void) +{ + lithium_init(); + cobalt_init(); +} diff --git a/arch/x86/mach-visws/visws_apic.c b/arch/x86/mach-visws/visws_apic.c new file mode 100644 index 00000000000..710faf71a65 --- /dev/null +++ b/arch/x86/mach-visws/visws_apic.c @@ -0,0 +1,299 @@ +/* + * linux/arch/i386/mach-visws/visws_apic.c + * + * Copyright (C) 1999 Bent Hagemark, Ingo Molnar + * + * SGI Visual Workstation interrupt controller + * + * The Cobalt system ASIC in the Visual Workstation contains a "Cobalt" APIC + * which serves as the main interrupt controller in the system. Non-legacy + * hardware in the system uses this controller directly. Legacy devices + * are connected to the PIIX4 which in turn has its 8259(s) connected to + * a of the Cobalt APIC entry. + * + * 09/02/2000 - Updated for 2.4 by jbarnes@sgi.com + * + * 25/11/2002 - Updated for 2.5 by Andrey Panin + */ + +#include +#include +#include + +#include +#include +#include + +#include "cobalt.h" +#include "irq_vectors.h" + + +static DEFINE_SPINLOCK(cobalt_lock); + +/* + * Set the given Cobalt APIC Redirection Table entry to point + * to the given IDT vector/index. + */ +static inline void co_apic_set(int entry, int irq) +{ + co_apic_write(CO_APIC_LO(entry), CO_APIC_LEVEL | (irq + FIRST_EXTERNAL_VECTOR)); + co_apic_write(CO_APIC_HI(entry), 0); +} + +/* + * Cobalt (IO)-APIC functions to handle PCI devices. + */ +static inline int co_apic_ide0_hack(void) +{ + extern char visws_board_type; + extern char visws_board_rev; + + if (visws_board_type == VISWS_320 && visws_board_rev == 5) + return 5; + return CO_APIC_IDE0; +} + +static int is_co_apic(unsigned int irq) +{ + if (IS_CO_APIC(irq)) + return CO_APIC(irq); + + switch (irq) { + case 0: return CO_APIC_CPU; + case CO_IRQ_IDE0: return co_apic_ide0_hack(); + case CO_IRQ_IDE1: return CO_APIC_IDE1; + default: return -1; + } +} + + +/* + * This is the SGI Cobalt (IO-)APIC: + */ + +static void enable_cobalt_irq(unsigned int irq) +{ + co_apic_set(is_co_apic(irq), irq); +} + +static void disable_cobalt_irq(unsigned int irq) +{ + int entry = is_co_apic(irq); + + co_apic_write(CO_APIC_LO(entry), CO_APIC_MASK); + co_apic_read(CO_APIC_LO(entry)); +} + +/* + * "irq" really just serves to identify the device. Here is where we + * map this to the Cobalt APIC entry where it's physically wired. + * This is called via request_irq -> setup_irq -> irq_desc->startup() + */ +static unsigned int startup_cobalt_irq(unsigned int irq) +{ + unsigned long flags; + + spin_lock_irqsave(&cobalt_lock, flags); + if ((irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING))) + irq_desc[irq].status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING); + enable_cobalt_irq(irq); + spin_unlock_irqrestore(&cobalt_lock, flags); + return 0; +} + +static void ack_cobalt_irq(unsigned int irq) +{ + unsigned long flags; + + spin_lock_irqsave(&cobalt_lock, flags); + disable_cobalt_irq(irq); + apic_write(APIC_EOI, APIC_EIO_ACK); + spin_unlock_irqrestore(&cobalt_lock, flags); +} + +static void end_cobalt_irq(unsigned int irq) +{ + unsigned long flags; + + spin_lock_irqsave(&cobalt_lock, flags); + if (!(irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS))) + enable_cobalt_irq(irq); + spin_unlock_irqrestore(&cobalt_lock, flags); +} + +static struct irq_chip cobalt_irq_type = { + .typename = "Cobalt-APIC", + .startup = startup_cobalt_irq, + .shutdown = disable_cobalt_irq, + .enable = enable_cobalt_irq, + .disable = disable_cobalt_irq, + .ack = ack_cobalt_irq, + .end = end_cobalt_irq, +}; + + +/* + * This is the PIIX4-based 8259 that is wired up indirectly to Cobalt + * -- not the manner expected by the code in i8259.c. + * + * there is a 'master' physical interrupt source that gets sent to + * the CPU. But in the chipset there are various 'virtual' interrupts + * waiting to be handled. We represent this to Linux through a 'master' + * interrupt controller type, and through a special virtual interrupt- + * controller. Device drivers only see the virtual interrupt sources. + */ +static unsigned int startup_piix4_master_irq(unsigned int irq) +{ + init_8259A(0); + + return startup_cobalt_irq(irq); +} + +static void end_piix4_master_irq(unsigned int irq) +{ + unsigned long flags; + + spin_lock_irqsave(&cobalt_lock, flags); + enable_cobalt_irq(irq); + spin_unlock_irqrestore(&cobalt_lock, flags); +} + +static struct irq_chip piix4_master_irq_type = { + .typename = "PIIX4-master", + .startup = startup_piix4_master_irq, + .ack = ack_cobalt_irq, + .end = end_piix4_master_irq, +}; + + +static struct irq_chip piix4_virtual_irq_type = { + .typename = "PIIX4-virtual", + .shutdown = disable_8259A_irq, + .enable = enable_8259A_irq, + .disable = disable_8259A_irq, +}; + + +/* + * PIIX4-8259 master/virtual functions to handle interrupt requests + * from legacy devices: floppy, parallel, serial, rtc. + * + * None of these get Cobalt APIC entries, neither do they have IDT + * entries. These interrupts are purely virtual and distributed from + * the 'master' interrupt source: CO_IRQ_8259. + * + * When the 8259 interrupts its handler figures out which of these + * devices is interrupting and dispatches to its handler. + * + * CAREFUL: devices see the 'virtual' interrupt only. Thus disable/ + * enable_irq gets the right irq. This 'master' irq is never directly + * manipulated by any driver. + */ +static irqreturn_t piix4_master_intr(int irq, void *dev_id) +{ + int realirq; + irq_desc_t *desc; + unsigned long flags; + + spin_lock_irqsave(&i8259A_lock, flags); + + /* Find out what's interrupting in the PIIX4 master 8259 */ + outb(0x0c, 0x20); /* OCW3 Poll command */ + realirq = inb(0x20); + + /* + * Bit 7 == 0 means invalid/spurious + */ + if (unlikely(!(realirq & 0x80))) + goto out_unlock; + + realirq &= 7; + + if (unlikely(realirq == 2)) { + outb(0x0c, 0xa0); + realirq = inb(0xa0); + + if (unlikely(!(realirq & 0x80))) + goto out_unlock; + + realirq = (realirq & 7) + 8; + } + + /* mask and ack interrupt */ + cached_irq_mask |= 1 << realirq; + if (unlikely(realirq > 7)) { + inb(0xa1); + outb(cached_slave_mask, 0xa1); + outb(0x60 + (realirq & 7), 0xa0); + outb(0x60 + 2, 0x20); + } else { + inb(0x21); + outb(cached_master_mask, 0x21); + outb(0x60 + realirq, 0x20); + } + + spin_unlock_irqrestore(&i8259A_lock, flags); + + desc = irq_desc + realirq; + + /* + * handle this 'virtual interrupt' as a Cobalt one now. + */ + kstat_cpu(smp_processor_id()).irqs[realirq]++; + + if (likely(desc->action != NULL)) + handle_IRQ_event(realirq, desc->action); + + if (!(desc->status & IRQ_DISABLED)) + enable_8259A_irq(realirq); + + return IRQ_HANDLED; + +out_unlock: + spin_unlock_irqrestore(&i8259A_lock, flags); + return IRQ_NONE; +} + +static struct irqaction master_action = { + .handler = piix4_master_intr, + .name = "PIIX4-8259", +}; + +static struct irqaction cascade_action = { + .handler = no_action, + .name = "cascade", +}; + + +void init_VISWS_APIC_irqs(void) +{ + int i; + + for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) { + irq_desc[i].status = IRQ_DISABLED; + irq_desc[i].action = 0; + irq_desc[i].depth = 1; + + if (i == 0) { + irq_desc[i].chip = &cobalt_irq_type; + } + else if (i == CO_IRQ_IDE0) { + irq_desc[i].chip = &cobalt_irq_type; + } + else if (i == CO_IRQ_IDE1) { + irq_desc[i].chip = &cobalt_irq_type; + } + else if (i == CO_IRQ_8259) { + irq_desc[i].chip = &piix4_master_irq_type; + } + else if (i < CO_IRQ_APIC0) { + irq_desc[i].chip = &piix4_virtual_irq_type; + } + else if (IS_CO_APIC(i)) { + irq_desc[i].chip = &cobalt_irq_type; + } + } + + setup_irq(CO_IRQ_8259, &master_action); + setup_irq(2, &cascade_action); +} -- cgit v1.2.3-70-g09d2 From c750a66b0ebfcd8f4cb353ab37b286c8cd93ad10 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 11 Oct 2007 11:16:39 +0200 Subject: i386: move mach-generic Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/i386/Makefile | 2 +- arch/i386/mach-generic/Makefile | 8 --- arch/i386/mach-generic/bigsmp.c | 57 ------------------ arch/i386/mach-generic/default.c | 26 -------- arch/i386/mach-generic/es7000.c | 69 --------------------- arch/i386/mach-generic/probe.c | 125 --------------------------------------- arch/i386/mach-generic/summit.c | 27 --------- arch/x86/mach-generic/Makefile | 8 +++ arch/x86/mach-generic/bigsmp.c | 57 ++++++++++++++++++ arch/x86/mach-generic/default.c | 26 ++++++++ arch/x86/mach-generic/es7000.c | 69 +++++++++++++++++++++ arch/x86/mach-generic/probe.c | 125 +++++++++++++++++++++++++++++++++++++++ arch/x86/mach-generic/summit.c | 27 +++++++++ 13 files changed, 313 insertions(+), 313 deletions(-) delete mode 100644 arch/i386/mach-generic/Makefile delete mode 100644 arch/i386/mach-generic/bigsmp.c delete mode 100644 arch/i386/mach-generic/default.c delete mode 100644 arch/i386/mach-generic/es7000.c delete mode 100644 arch/i386/mach-generic/probe.c delete mode 100644 arch/i386/mach-generic/summit.c create mode 100644 arch/x86/mach-generic/Makefile create mode 100644 arch/x86/mach-generic/bigsmp.c create mode 100644 arch/x86/mach-generic/default.c create mode 100644 arch/x86/mach-generic/es7000.c create mode 100644 arch/x86/mach-generic/probe.c create mode 100644 arch/x86/mach-generic/summit.c (limited to 'arch/x86') diff --git a/arch/i386/Makefile b/arch/i386/Makefile index 191e171b367..510df782f94 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -86,7 +86,7 @@ mcore-$(CONFIG_X86_SUMMIT) := arch/i386/mach-default # generic subarchitecture mflags-$(CONFIG_X86_GENERICARCH) := -Iinclude/asm-i386/mach-generic mcore-$(CONFIG_X86_GENERICARCH) := arch/i386/mach-default -core-$(CONFIG_X86_GENERICARCH) += arch/i386/mach-generic/ +core-$(CONFIG_X86_GENERICARCH) += arch/x86/mach-generic/ # ES7000 subarch support mflags-$(CONFIG_X86_ES7000) := -Iinclude/asm-i386/mach-es7000 diff --git a/arch/i386/mach-generic/Makefile b/arch/i386/mach-generic/Makefile deleted file mode 100644 index 2f216f55b2d..00000000000 --- a/arch/i386/mach-generic/Makefile +++ /dev/null @@ -1,8 +0,0 @@ -# -# Makefile for the generic architecture -# - -EXTRA_CFLAGS := -Iarch/i386/kernel - -obj-y := probe.o summit.o bigsmp.o es7000.o default.o -obj-y += ../../i386/mach-es7000/ diff --git a/arch/i386/mach-generic/bigsmp.c b/arch/i386/mach-generic/bigsmp.c deleted file mode 100644 index 58a477baec3..00000000000 --- a/arch/i386/mach-generic/bigsmp.c +++ /dev/null @@ -1,57 +0,0 @@ -/* - * APIC driver for "bigsmp" XAPIC machines with more than 8 virtual CPUs. - * Drives the local APIC in "clustered mode". - */ -#define APIC_DEFINITION 1 -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static int dmi_bigsmp; /* can be set by dmi scanners */ - -static int hp_ht_bigsmp(struct dmi_system_id *d) -{ -#ifdef CONFIG_X86_GENERICARCH - printk(KERN_NOTICE "%s detected: force use of apic=bigsmp\n", d->ident); - dmi_bigsmp = 1; -#endif - return 0; -} - - -static struct dmi_system_id bigsmp_dmi_table[] = { - { hp_ht_bigsmp, "HP ProLiant DL760 G2", { - DMI_MATCH(DMI_BIOS_VENDOR, "HP"), - DMI_MATCH(DMI_BIOS_VERSION, "P44-"), - }}, - - { hp_ht_bigsmp, "HP ProLiant DL740", { - DMI_MATCH(DMI_BIOS_VENDOR, "HP"), - DMI_MATCH(DMI_BIOS_VERSION, "P47-"), - }}, - { } -}; - - -static int probe_bigsmp(void) -{ - if (def_to_bigsmp) - dmi_bigsmp = 1; - else - dmi_check_system(bigsmp_dmi_table); - return dmi_bigsmp; -} - -struct genapic apic_bigsmp = APIC_INIT("bigsmp", probe_bigsmp); diff --git a/arch/i386/mach-generic/default.c b/arch/i386/mach-generic/default.c deleted file mode 100644 index 8685208d851..00000000000 --- a/arch/i386/mach-generic/default.c +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Default generic APIC driver. This handles upto 8 CPUs. - */ -#define APIC_DEFINITION 1 -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* should be called last. */ -static int probe_default(void) -{ - return 1; -} - -struct genapic apic_default = APIC_INIT("default", probe_default); diff --git a/arch/i386/mach-generic/es7000.c b/arch/i386/mach-generic/es7000.c deleted file mode 100644 index 4742626f08c..00000000000 --- a/arch/i386/mach-generic/es7000.c +++ /dev/null @@ -1,69 +0,0 @@ -/* - * APIC driver for the Unisys ES7000 chipset. - */ -#define APIC_DEFINITION 1 -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static int probe_es7000(void) -{ - /* probed later in mptable/ACPI hooks */ - return 0; -} - -extern void es7000_sw_apic(void); -static void __init enable_apic_mode(void) -{ - es7000_sw_apic(); - return; -} - -static __init int mps_oem_check(struct mp_config_table *mpc, char *oem, - char *productid) -{ - if (mpc->mpc_oemptr) { - struct mp_config_oemtable *oem_table = - (struct mp_config_oemtable *)mpc->mpc_oemptr; - if (!strncmp(oem, "UNISYS", 6)) - return parse_unisys_oem((char *)oem_table); - } - return 0; -} - -#ifdef CONFIG_ACPI -/* Hook from generic ACPI tables.c */ -static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - unsigned long oem_addr; - if (!find_unisys_acpi_oem_table(&oem_addr)) { - if (es7000_check_dsdt()) - return parse_unisys_oem((char *)oem_addr); - else { - setup_unisys(); - return 1; - } - } - return 0; -} -#else -static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - return 0; -} -#endif - -struct genapic __initdata_refok apic_es7000 = APIC_INIT("es7000", probe_es7000); diff --git a/arch/i386/mach-generic/probe.c b/arch/i386/mach-generic/probe.c deleted file mode 100644 index 74f3da63442..00000000000 --- a/arch/i386/mach-generic/probe.c +++ /dev/null @@ -1,125 +0,0 @@ -/* Copyright 2003 Andi Kleen, SuSE Labs. - * Subject to the GNU Public License, v.2 - * - * Generic x86 APIC driver probe layer. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -extern struct genapic apic_summit; -extern struct genapic apic_bigsmp; -extern struct genapic apic_es7000; -extern struct genapic apic_default; - -struct genapic *genapic = &apic_default; - -struct genapic *apic_probe[] __initdata = { - &apic_summit, - &apic_bigsmp, - &apic_es7000, - &apic_default, /* must be last */ - NULL, -}; - -static int cmdline_apic __initdata; -static int __init parse_apic(char *arg) -{ - int i; - - if (!arg) - return -EINVAL; - - for (i = 0; apic_probe[i]; i++) { - if (!strcmp(apic_probe[i]->name, arg)) { - genapic = apic_probe[i]; - cmdline_apic = 1; - return 0; - } - } - - /* Parsed again by __setup for debug/verbose */ - return 0; -} -early_param("apic", parse_apic); - -void __init generic_bigsmp_probe(void) -{ - /* - * This routine is used to switch to bigsmp mode when - * - There is no apic= option specified by the user - * - generic_apic_probe() has choosen apic_default as the sub_arch - * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support - */ - - if (!cmdline_apic && genapic == &apic_default) - if (apic_bigsmp.probe()) { - genapic = &apic_bigsmp; - printk(KERN_INFO "Overriding APIC driver with %s\n", - genapic->name); - } -} - -void __init generic_apic_probe(void) -{ - if (!cmdline_apic) { - int i; - for (i = 0; apic_probe[i]; i++) { - if (apic_probe[i]->probe()) { - genapic = apic_probe[i]; - break; - } - } - /* Not visible without early console */ - if (!apic_probe[i]) - panic("Didn't find an APIC driver"); - } - printk(KERN_INFO "Using APIC driver %s\n", genapic->name); -} - -/* These functions can switch the APIC even after the initial ->probe() */ - -int __init mps_oem_check(struct mp_config_table *mpc, char *oem, char *productid) -{ - int i; - for (i = 0; apic_probe[i]; ++i) { - if (apic_probe[i]->mps_oem_check(mpc,oem,productid)) { - if (!cmdline_apic) { - genapic = apic_probe[i]; - printk(KERN_INFO "Switched to APIC driver `%s'.\n", - genapic->name); - } - return 1; - } - } - return 0; -} - -int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - int i; - for (i = 0; apic_probe[i]; ++i) { - if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) { - if (!cmdline_apic) { - genapic = apic_probe[i]; - printk(KERN_INFO "Switched to APIC driver `%s'.\n", - genapic->name); - } - return 1; - } - } - return 0; -} - -int hard_smp_processor_id(void) -{ - return genapic->get_apic_id(*(unsigned long *)(APIC_BASE+APIC_ID)); -} diff --git a/arch/i386/mach-generic/summit.c b/arch/i386/mach-generic/summit.c deleted file mode 100644 index 74883ccb8f7..00000000000 --- a/arch/i386/mach-generic/summit.c +++ /dev/null @@ -1,27 +0,0 @@ -/* - * APIC driver for the IBM "Summit" chipset. - */ -#define APIC_DEFINITION 1 -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static int probe_summit(void) -{ - /* probed later in mptable/ACPI hooks */ - return 0; -} - -struct genapic apic_summit = APIC_INIT("summit", probe_summit); diff --git a/arch/x86/mach-generic/Makefile b/arch/x86/mach-generic/Makefile new file mode 100644 index 00000000000..2f216f55b2d --- /dev/null +++ b/arch/x86/mach-generic/Makefile @@ -0,0 +1,8 @@ +# +# Makefile for the generic architecture +# + +EXTRA_CFLAGS := -Iarch/i386/kernel + +obj-y := probe.o summit.o bigsmp.o es7000.o default.o +obj-y += ../../i386/mach-es7000/ diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c new file mode 100644 index 00000000000..58a477baec3 --- /dev/null +++ b/arch/x86/mach-generic/bigsmp.c @@ -0,0 +1,57 @@ +/* + * APIC driver for "bigsmp" XAPIC machines with more than 8 virtual CPUs. + * Drives the local APIC in "clustered mode". + */ +#define APIC_DEFINITION 1 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int dmi_bigsmp; /* can be set by dmi scanners */ + +static int hp_ht_bigsmp(struct dmi_system_id *d) +{ +#ifdef CONFIG_X86_GENERICARCH + printk(KERN_NOTICE "%s detected: force use of apic=bigsmp\n", d->ident); + dmi_bigsmp = 1; +#endif + return 0; +} + + +static struct dmi_system_id bigsmp_dmi_table[] = { + { hp_ht_bigsmp, "HP ProLiant DL760 G2", { + DMI_MATCH(DMI_BIOS_VENDOR, "HP"), + DMI_MATCH(DMI_BIOS_VERSION, "P44-"), + }}, + + { hp_ht_bigsmp, "HP ProLiant DL740", { + DMI_MATCH(DMI_BIOS_VENDOR, "HP"), + DMI_MATCH(DMI_BIOS_VERSION, "P47-"), + }}, + { } +}; + + +static int probe_bigsmp(void) +{ + if (def_to_bigsmp) + dmi_bigsmp = 1; + else + dmi_check_system(bigsmp_dmi_table); + return dmi_bigsmp; +} + +struct genapic apic_bigsmp = APIC_INIT("bigsmp", probe_bigsmp); diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c new file mode 100644 index 00000000000..8685208d851 --- /dev/null +++ b/arch/x86/mach-generic/default.c @@ -0,0 +1,26 @@ +/* + * Default generic APIC driver. This handles upto 8 CPUs. + */ +#define APIC_DEFINITION 1 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* should be called last. */ +static int probe_default(void) +{ + return 1; +} + +struct genapic apic_default = APIC_INIT("default", probe_default); diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c new file mode 100644 index 00000000000..4742626f08c --- /dev/null +++ b/arch/x86/mach-generic/es7000.c @@ -0,0 +1,69 @@ +/* + * APIC driver for the Unisys ES7000 chipset. + */ +#define APIC_DEFINITION 1 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int probe_es7000(void) +{ + /* probed later in mptable/ACPI hooks */ + return 0; +} + +extern void es7000_sw_apic(void); +static void __init enable_apic_mode(void) +{ + es7000_sw_apic(); + return; +} + +static __init int mps_oem_check(struct mp_config_table *mpc, char *oem, + char *productid) +{ + if (mpc->mpc_oemptr) { + struct mp_config_oemtable *oem_table = + (struct mp_config_oemtable *)mpc->mpc_oemptr; + if (!strncmp(oem, "UNISYS", 6)) + return parse_unisys_oem((char *)oem_table); + } + return 0; +} + +#ifdef CONFIG_ACPI +/* Hook from generic ACPI tables.c */ +static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + unsigned long oem_addr; + if (!find_unisys_acpi_oem_table(&oem_addr)) { + if (es7000_check_dsdt()) + return parse_unisys_oem((char *)oem_addr); + else { + setup_unisys(); + return 1; + } + } + return 0; +} +#else +static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + return 0; +} +#endif + +struct genapic __initdata_refok apic_es7000 = APIC_INIT("es7000", probe_es7000); diff --git a/arch/x86/mach-generic/probe.c b/arch/x86/mach-generic/probe.c new file mode 100644 index 00000000000..74f3da63442 --- /dev/null +++ b/arch/x86/mach-generic/probe.c @@ -0,0 +1,125 @@ +/* Copyright 2003 Andi Kleen, SuSE Labs. + * Subject to the GNU Public License, v.2 + * + * Generic x86 APIC driver probe layer. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern struct genapic apic_summit; +extern struct genapic apic_bigsmp; +extern struct genapic apic_es7000; +extern struct genapic apic_default; + +struct genapic *genapic = &apic_default; + +struct genapic *apic_probe[] __initdata = { + &apic_summit, + &apic_bigsmp, + &apic_es7000, + &apic_default, /* must be last */ + NULL, +}; + +static int cmdline_apic __initdata; +static int __init parse_apic(char *arg) +{ + int i; + + if (!arg) + return -EINVAL; + + for (i = 0; apic_probe[i]; i++) { + if (!strcmp(apic_probe[i]->name, arg)) { + genapic = apic_probe[i]; + cmdline_apic = 1; + return 0; + } + } + + /* Parsed again by __setup for debug/verbose */ + return 0; +} +early_param("apic", parse_apic); + +void __init generic_bigsmp_probe(void) +{ + /* + * This routine is used to switch to bigsmp mode when + * - There is no apic= option specified by the user + * - generic_apic_probe() has choosen apic_default as the sub_arch + * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support + */ + + if (!cmdline_apic && genapic == &apic_default) + if (apic_bigsmp.probe()) { + genapic = &apic_bigsmp; + printk(KERN_INFO "Overriding APIC driver with %s\n", + genapic->name); + } +} + +void __init generic_apic_probe(void) +{ + if (!cmdline_apic) { + int i; + for (i = 0; apic_probe[i]; i++) { + if (apic_probe[i]->probe()) { + genapic = apic_probe[i]; + break; + } + } + /* Not visible without early console */ + if (!apic_probe[i]) + panic("Didn't find an APIC driver"); + } + printk(KERN_INFO "Using APIC driver %s\n", genapic->name); +} + +/* These functions can switch the APIC even after the initial ->probe() */ + +int __init mps_oem_check(struct mp_config_table *mpc, char *oem, char *productid) +{ + int i; + for (i = 0; apic_probe[i]; ++i) { + if (apic_probe[i]->mps_oem_check(mpc,oem,productid)) { + if (!cmdline_apic) { + genapic = apic_probe[i]; + printk(KERN_INFO "Switched to APIC driver `%s'.\n", + genapic->name); + } + return 1; + } + } + return 0; +} + +int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + int i; + for (i = 0; apic_probe[i]; ++i) { + if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) { + if (!cmdline_apic) { + genapic = apic_probe[i]; + printk(KERN_INFO "Switched to APIC driver `%s'.\n", + genapic->name); + } + return 1; + } + } + return 0; +} + +int hard_smp_processor_id(void) +{ + return genapic->get_apic_id(*(unsigned long *)(APIC_BASE+APIC_ID)); +} diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c new file mode 100644 index 00000000000..74883ccb8f7 --- /dev/null +++ b/arch/x86/mach-generic/summit.c @@ -0,0 +1,27 @@ +/* + * APIC driver for the IBM "Summit" chipset. + */ +#define APIC_DEFINITION 1 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int probe_summit(void) +{ + /* probed later in mptable/ACPI hooks */ + return 0; +} + +struct genapic apic_summit = APIC_INIT("summit", probe_summit); -- cgit v1.2.3-70-g09d2 From 19d8d79ccfd2fb67b1eb86e3c634a2e68a4c4b11 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 11 Oct 2007 11:16:41 +0200 Subject: i386: move boot Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/i386/boot/tools/.gitignore | 1 - arch/i386/boot/tools/build.c | 168 ---------------------------------------- arch/x86/boot/tools/.gitignore | 1 + arch/x86/boot/tools/build.c | 168 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 169 insertions(+), 169 deletions(-) delete mode 100644 arch/i386/boot/tools/.gitignore delete mode 100644 arch/i386/boot/tools/build.c create mode 100644 arch/x86/boot/tools/.gitignore create mode 100644 arch/x86/boot/tools/build.c (limited to 'arch/x86') diff --git a/arch/i386/boot/tools/.gitignore b/arch/i386/boot/tools/.gitignore deleted file mode 100644 index 378eac25d31..00000000000 --- a/arch/i386/boot/tools/.gitignore +++ /dev/null @@ -1 +0,0 @@ -build diff --git a/arch/i386/boot/tools/build.c b/arch/i386/boot/tools/build.c deleted file mode 100644 index b4248740ff0..00000000000 --- a/arch/i386/boot/tools/build.c +++ /dev/null @@ -1,168 +0,0 @@ -/* - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright (C) 1997 Martin Mares - * Copyright (C) 2007 H. Peter Anvin - */ - -/* - * This file builds a disk-image from two different files: - * - * - setup: 8086 machine code, sets up system parm - * - system: 80386 code for actual system - * - * It does some checking that all files are of the correct type, and - * just writes the result to stdout, removing headers and padding to - * the right amount. It also writes some system data to stderr. - */ - -/* - * Changes by tytso to allow root device specification - * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 - * Cross compiling fixes by Gertjan van Wingerde, July 1996 - * Rewritten by Martin Mares, April 1997 - * Substantially overhauled by H. Peter Anvin, April 2007 - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -typedef unsigned char u8; -typedef unsigned short u16; -typedef unsigned long u32; - -#define DEFAULT_MAJOR_ROOT 0 -#define DEFAULT_MINOR_ROOT 0 - -/* Minimal number of setup sectors */ -#define SETUP_SECT_MIN 5 -#define SETUP_SECT_MAX 64 - -/* This must be large enough to hold the entire setup */ -u8 buf[SETUP_SECT_MAX*512]; -int is_big_kernel; - -static void die(const char * str, ...) -{ - va_list args; - va_start(args, str); - vfprintf(stderr, str, args); - fputc('\n', stderr); - exit(1); -} - -static void usage(void) -{ - die("Usage: build [-b] setup system [rootdev] [> image]"); -} - -int main(int argc, char ** argv) -{ - unsigned int i, sz, setup_sectors; - int c; - u32 sys_size; - u8 major_root, minor_root; - struct stat sb; - FILE *file; - int fd; - void *kernel; - - if (argc > 2 && !strcmp(argv[1], "-b")) - { - is_big_kernel = 1; - argc--, argv++; - } - if ((argc < 3) || (argc > 4)) - usage(); - if (argc > 3) { - if (!strcmp(argv[3], "CURRENT")) { - if (stat("/", &sb)) { - perror("/"); - die("Couldn't stat /"); - } - major_root = major(sb.st_dev); - minor_root = minor(sb.st_dev); - } else if (strcmp(argv[3], "FLOPPY")) { - if (stat(argv[3], &sb)) { - perror(argv[3]); - die("Couldn't stat root device."); - } - major_root = major(sb.st_rdev); - minor_root = minor(sb.st_rdev); - } else { - major_root = 0; - minor_root = 0; - } - } else { - major_root = DEFAULT_MAJOR_ROOT; - minor_root = DEFAULT_MINOR_ROOT; - } - fprintf(stderr, "Root device is (%d, %d)\n", major_root, minor_root); - - /* Copy the setup code */ - file = fopen(argv[1], "r"); - if (!file) - die("Unable to open `%s': %m", argv[1]); - c = fread(buf, 1, sizeof(buf), file); - if (ferror(file)) - die("read-error on `setup'"); - if (c < 1024) - die("The setup must be at least 1024 bytes"); - if (buf[510] != 0x55 || buf[511] != 0xaa) - die("Boot block hasn't got boot flag (0xAA55)"); - fclose(file); - - /* Pad unused space with zeros */ - setup_sectors = (c + 511) / 512; - if (setup_sectors < SETUP_SECT_MIN) - setup_sectors = SETUP_SECT_MIN; - i = setup_sectors*512; - memset(buf+c, 0, i-c); - - /* Set the default root device */ - buf[508] = minor_root; - buf[509] = major_root; - - fprintf(stderr, "Setup is %d bytes (padded to %d bytes).\n", c, i); - - /* Open and stat the kernel file */ - fd = open(argv[2], O_RDONLY); - if (fd < 0) - die("Unable to open `%s': %m", argv[2]); - if (fstat(fd, &sb)) - die("Unable to stat `%s': %m", argv[2]); - sz = sb.st_size; - fprintf (stderr, "System is %d kB\n", (sz+1023)/1024); - kernel = mmap(NULL, sz, PROT_READ, MAP_SHARED, fd, 0); - if (kernel == MAP_FAILED) - die("Unable to mmap '%s': %m", argv[2]); - sys_size = (sz + 15) / 16; - if (!is_big_kernel && sys_size > DEF_SYSSIZE) - die("System is too big. Try using bzImage or modules."); - - /* Patch the setup code with the appropriate size parameters */ - buf[0x1f1] = setup_sectors-1; - buf[0x1f4] = sys_size; - buf[0x1f5] = sys_size >> 8; - buf[0x1f6] = sys_size >> 16; - buf[0x1f7] = sys_size >> 24; - - if (fwrite(buf, 1, i, stdout) != i) - die("Writing setup failed"); - - /* Copy the kernel code */ - if (fwrite(kernel, 1, sz, stdout) != sz) - die("Writing kernel failed"); - close(fd); - - /* Everything is OK */ - return 0; -} diff --git a/arch/x86/boot/tools/.gitignore b/arch/x86/boot/tools/.gitignore new file mode 100644 index 00000000000..378eac25d31 --- /dev/null +++ b/arch/x86/boot/tools/.gitignore @@ -0,0 +1 @@ +build diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c new file mode 100644 index 00000000000..b4248740ff0 --- /dev/null +++ b/arch/x86/boot/tools/build.c @@ -0,0 +1,168 @@ +/* + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright (C) 1997 Martin Mares + * Copyright (C) 2007 H. Peter Anvin + */ + +/* + * This file builds a disk-image from two different files: + * + * - setup: 8086 machine code, sets up system parm + * - system: 80386 code for actual system + * + * It does some checking that all files are of the correct type, and + * just writes the result to stdout, removing headers and padding to + * the right amount. It also writes some system data to stderr. + */ + +/* + * Changes by tytso to allow root device specification + * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 + * Cross compiling fixes by Gertjan van Wingerde, July 1996 + * Rewritten by Martin Mares, April 1997 + * Substantially overhauled by H. Peter Anvin, April 2007 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned long u32; + +#define DEFAULT_MAJOR_ROOT 0 +#define DEFAULT_MINOR_ROOT 0 + +/* Minimal number of setup sectors */ +#define SETUP_SECT_MIN 5 +#define SETUP_SECT_MAX 64 + +/* This must be large enough to hold the entire setup */ +u8 buf[SETUP_SECT_MAX*512]; +int is_big_kernel; + +static void die(const char * str, ...) +{ + va_list args; + va_start(args, str); + vfprintf(stderr, str, args); + fputc('\n', stderr); + exit(1); +} + +static void usage(void) +{ + die("Usage: build [-b] setup system [rootdev] [> image]"); +} + +int main(int argc, char ** argv) +{ + unsigned int i, sz, setup_sectors; + int c; + u32 sys_size; + u8 major_root, minor_root; + struct stat sb; + FILE *file; + int fd; + void *kernel; + + if (argc > 2 && !strcmp(argv[1], "-b")) + { + is_big_kernel = 1; + argc--, argv++; + } + if ((argc < 3) || (argc > 4)) + usage(); + if (argc > 3) { + if (!strcmp(argv[3], "CURRENT")) { + if (stat("/", &sb)) { + perror("/"); + die("Couldn't stat /"); + } + major_root = major(sb.st_dev); + minor_root = minor(sb.st_dev); + } else if (strcmp(argv[3], "FLOPPY")) { + if (stat(argv[3], &sb)) { + perror(argv[3]); + die("Couldn't stat root device."); + } + major_root = major(sb.st_rdev); + minor_root = minor(sb.st_rdev); + } else { + major_root = 0; + minor_root = 0; + } + } else { + major_root = DEFAULT_MAJOR_ROOT; + minor_root = DEFAULT_MINOR_ROOT; + } + fprintf(stderr, "Root device is (%d, %d)\n", major_root, minor_root); + + /* Copy the setup code */ + file = fopen(argv[1], "r"); + if (!file) + die("Unable to open `%s': %m", argv[1]); + c = fread(buf, 1, sizeof(buf), file); + if (ferror(file)) + die("read-error on `setup'"); + if (c < 1024) + die("The setup must be at least 1024 bytes"); + if (buf[510] != 0x55 || buf[511] != 0xaa) + die("Boot block hasn't got boot flag (0xAA55)"); + fclose(file); + + /* Pad unused space with zeros */ + setup_sectors = (c + 511) / 512; + if (setup_sectors < SETUP_SECT_MIN) + setup_sectors = SETUP_SECT_MIN; + i = setup_sectors*512; + memset(buf+c, 0, i-c); + + /* Set the default root device */ + buf[508] = minor_root; + buf[509] = major_root; + + fprintf(stderr, "Setup is %d bytes (padded to %d bytes).\n", c, i); + + /* Open and stat the kernel file */ + fd = open(argv[2], O_RDONLY); + if (fd < 0) + die("Unable to open `%s': %m", argv[2]); + if (fstat(fd, &sb)) + die("Unable to stat `%s': %m", argv[2]); + sz = sb.st_size; + fprintf (stderr, "System is %d kB\n", (sz+1023)/1024); + kernel = mmap(NULL, sz, PROT_READ, MAP_SHARED, fd, 0); + if (kernel == MAP_FAILED) + die("Unable to mmap '%s': %m", argv[2]); + sys_size = (sz + 15) / 16; + if (!is_big_kernel && sys_size > DEF_SYSSIZE) + die("System is too big. Try using bzImage or modules."); + + /* Patch the setup code with the appropriate size parameters */ + buf[0x1f1] = setup_sectors-1; + buf[0x1f4] = sys_size; + buf[0x1f5] = sys_size >> 8; + buf[0x1f6] = sys_size >> 16; + buf[0x1f7] = sys_size >> 24; + + if (fwrite(buf, 1, i, stdout) != i) + die("Writing setup failed"); + + /* Copy the kernel code */ + if (fwrite(kernel, 1, sz, stdout) != sz) + die("Writing kernel failed"); + close(fd); + + /* Everything is OK */ + return 0; +} -- cgit v1.2.3-70-g09d2 From 0530bf37cebcf22a73652937c2340bc1ebd92000 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 11 Oct 2007 11:16:43 +0200 Subject: i386: move boot Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/i386/boot/compressed/.gitignore | 1 - arch/i386/boot/compressed/Makefile | 5 - arch/i386/boot/compressed/Makefile_32 | 50 --- arch/i386/boot/compressed/head_32.S | 180 --------- arch/i386/boot/compressed/misc_32.c | 379 ------------------- arch/i386/boot/compressed/relocs.c | 631 ------------------------------- arch/i386/boot/compressed/vmlinux_32.lds | 43 --- arch/i386/boot/compressed/vmlinux_32.scr | 10 - arch/x86/boot/compressed/.gitignore | 1 + arch/x86/boot/compressed/Makefile | 5 + arch/x86/boot/compressed/Makefile_32 | 50 +++ arch/x86/boot/compressed/head_32.S | 180 +++++++++ arch/x86/boot/compressed/misc_32.c | 379 +++++++++++++++++++ arch/x86/boot/compressed/relocs.c | 631 +++++++++++++++++++++++++++++++ arch/x86/boot/compressed/vmlinux_32.lds | 43 +++ arch/x86/boot/compressed/vmlinux_32.scr | 10 + arch/x86_64/boot/compressed/Makefile | 2 +- 17 files changed, 1300 insertions(+), 1300 deletions(-) delete mode 100644 arch/i386/boot/compressed/.gitignore delete mode 100644 arch/i386/boot/compressed/Makefile delete mode 100644 arch/i386/boot/compressed/Makefile_32 delete mode 100644 arch/i386/boot/compressed/head_32.S delete mode 100644 arch/i386/boot/compressed/misc_32.c delete mode 100644 arch/i386/boot/compressed/relocs.c delete mode 100644 arch/i386/boot/compressed/vmlinux_32.lds delete mode 100644 arch/i386/boot/compressed/vmlinux_32.scr create mode 100644 arch/x86/boot/compressed/.gitignore create mode 100644 arch/x86/boot/compressed/Makefile create mode 100644 arch/x86/boot/compressed/Makefile_32 create mode 100644 arch/x86/boot/compressed/head_32.S create mode 100644 arch/x86/boot/compressed/misc_32.c create mode 100644 arch/x86/boot/compressed/relocs.c create mode 100644 arch/x86/boot/compressed/vmlinux_32.lds create mode 100644 arch/x86/boot/compressed/vmlinux_32.scr (limited to 'arch/x86') diff --git a/arch/i386/boot/compressed/.gitignore b/arch/i386/boot/compressed/.gitignore deleted file mode 100644 index be0ed065249..00000000000 --- a/arch/i386/boot/compressed/.gitignore +++ /dev/null @@ -1 +0,0 @@ -relocs diff --git a/arch/i386/boot/compressed/Makefile b/arch/i386/boot/compressed/Makefile deleted file mode 100644 index 78514f3f626..00000000000 --- a/arch/i386/boot/compressed/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -ifeq ($(CONFIG_X86_32),y) -include ${srctree}/arch/i386/boot/compressed/Makefile_32 -else -include ${srctree}/arch/x86_64/boot/compressed/Makefile_64 -endif diff --git a/arch/i386/boot/compressed/Makefile_32 b/arch/i386/boot/compressed/Makefile_32 deleted file mode 100644 index 0c64c314ead..00000000000 --- a/arch/i386/boot/compressed/Makefile_32 +++ /dev/null @@ -1,50 +0,0 @@ -# -# linux/arch/i386/boot/compressed/Makefile -# -# create a compressed vmlinux image from the original vmlinux -# - -targets := vmlinux vmlinux.bin vmlinux.bin.gz head_32.o misc_32.o piggy.o \ - vmlinux.bin.all vmlinux.relocs -EXTRA_AFLAGS := -traditional - -LDFLAGS_vmlinux := -T -hostprogs-y := relocs - -CFLAGS := -m32 -D__KERNEL__ $(LINUX_INCLUDE) -O2 \ - -fno-strict-aliasing -fPIC \ - $(call cc-option,-ffreestanding) \ - $(call cc-option,-fno-stack-protector) -LDFLAGS := -m elf_i386 - -$(obj)/vmlinux: $(src)/vmlinux_32.lds $(obj)/head_32.o $(obj)/misc_32.o $(obj)/piggy.o FORCE - $(call if_changed,ld) - @: - -$(obj)/vmlinux.bin: vmlinux FORCE - $(call if_changed,objcopy) - -quiet_cmd_relocs = RELOCS $@ - cmd_relocs = $(obj)/relocs $< > $@;$(obj)/relocs --abs-relocs $< -$(obj)/vmlinux.relocs: vmlinux $(obj)/relocs FORCE - $(call if_changed,relocs) - -vmlinux.bin.all-y := $(obj)/vmlinux.bin -vmlinux.bin.all-$(CONFIG_RELOCATABLE) += $(obj)/vmlinux.relocs -quiet_cmd_relocbin = BUILD $@ - cmd_relocbin = cat $(filter-out FORCE,$^) > $@ -$(obj)/vmlinux.bin.all: $(vmlinux.bin.all-y) FORCE - $(call if_changed,relocbin) - -ifdef CONFIG_RELOCATABLE -$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin.all FORCE - $(call if_changed,gzip) -else -$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE - $(call if_changed,gzip) -endif - -LDFLAGS_piggy.o := -r --format binary --oformat elf32-i386 -T - -$(obj)/piggy.o: $(src)/vmlinux_32.scr $(obj)/vmlinux.bin.gz FORCE - $(call if_changed,ld) diff --git a/arch/i386/boot/compressed/head_32.S b/arch/i386/boot/compressed/head_32.S deleted file mode 100644 index f35ea223752..00000000000 --- a/arch/i386/boot/compressed/head_32.S +++ /dev/null @@ -1,180 +0,0 @@ -/* - * linux/boot/head.S - * - * Copyright (C) 1991, 1992, 1993 Linus Torvalds - */ - -/* - * head.S contains the 32-bit startup code. - * - * NOTE!!! Startup happens at absolute address 0x00001000, which is also where - * the page directory will exist. The startup code will be overwritten by - * the page directory. [According to comments etc elsewhere on a compressed - * kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC] - * - * Page 0 is deliberately kept safe, since System Management Mode code in - * laptops may need to access the BIOS data stored there. This is also - * useful for future device drivers that either access the BIOS via VM86 - * mode. - */ - -/* - * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 - */ -.text - -#include -#include -#include -#include - -.section ".text.head","ax",@progbits - .globl startup_32 - -startup_32: - cld - cli - movl $(__BOOT_DS),%eax - movl %eax,%ds - movl %eax,%es - movl %eax,%fs - movl %eax,%gs - movl %eax,%ss - -/* Calculate the delta between where we were compiled to run - * at and where we were actually loaded at. This can only be done - * with a short local call on x86. Nothing else will tell us what - * address we are running at. The reserved chunk of the real-mode - * data at 0x1e4 (defined as a scratch field) are used as the stack - * for this calculation. Only 4 bytes are needed. - */ - leal (0x1e4+4)(%esi), %esp - call 1f -1: popl %ebp - subl $1b, %ebp - -/* %ebp contains the address we are loaded at by the boot loader and %ebx - * contains the address where we should move the kernel image temporarily - * for safe in-place decompression. - */ - -#ifdef CONFIG_RELOCATABLE - movl %ebp, %ebx - addl $(CONFIG_PHYSICAL_ALIGN - 1), %ebx - andl $(~(CONFIG_PHYSICAL_ALIGN - 1)), %ebx -#else - movl $LOAD_PHYSICAL_ADDR, %ebx -#endif - - /* Replace the compressed data size with the uncompressed size */ - subl input_len(%ebp), %ebx - movl output_len(%ebp), %eax - addl %eax, %ebx - /* Add 8 bytes for every 32K input block */ - shrl $12, %eax - addl %eax, %ebx - /* Add 32K + 18 bytes of extra slack */ - addl $(32768 + 18), %ebx - /* Align on a 4K boundary */ - addl $4095, %ebx - andl $~4095, %ebx - -/* Copy the compressed kernel to the end of our buffer - * where decompression in place becomes safe. - */ - pushl %esi - leal _end(%ebp), %esi - leal _end(%ebx), %edi - movl $(_end - startup_32), %ecx - std - rep - movsb - cld - popl %esi - -/* Compute the kernel start address. - */ -#ifdef CONFIG_RELOCATABLE - addl $(CONFIG_PHYSICAL_ALIGN - 1), %ebp - andl $(~(CONFIG_PHYSICAL_ALIGN - 1)), %ebp -#else - movl $LOAD_PHYSICAL_ADDR, %ebp -#endif - -/* - * Jump to the relocated address. - */ - leal relocated(%ebx), %eax - jmp *%eax -.section ".text" -relocated: - -/* - * Clear BSS - */ - xorl %eax,%eax - leal _edata(%ebx),%edi - leal _end(%ebx), %ecx - subl %edi,%ecx - cld - rep - stosb - -/* - * Setup the stack for the decompressor - */ - leal stack_end(%ebx), %esp - -/* - * Do the decompression, and jump to the new kernel.. - */ - movl output_len(%ebx), %eax - pushl %eax - pushl %ebp # output address - movl input_len(%ebx), %eax - pushl %eax # input_len - leal input_data(%ebx), %eax - pushl %eax # input_data - leal _end(%ebx), %eax - pushl %eax # end of the image as third argument - pushl %esi # real mode pointer as second arg - call decompress_kernel - addl $20, %esp - popl %ecx - -#if CONFIG_RELOCATABLE -/* Find the address of the relocations. - */ - movl %ebp, %edi - addl %ecx, %edi - -/* Calculate the delta between where vmlinux was compiled to run - * and where it was actually loaded. - */ - movl %ebp, %ebx - subl $LOAD_PHYSICAL_ADDR, %ebx - jz 2f /* Nothing to be done if loaded at compiled addr. */ -/* - * Process relocations. - */ - -1: subl $4, %edi - movl 0(%edi), %ecx - testl %ecx, %ecx - jz 2f - addl %ebx, -__PAGE_OFFSET(%ebx, %ecx) - jmp 1b -2: -#endif - -/* - * Jump to the decompressed kernel. - */ - xorl %ebx,%ebx - jmp *%ebp - -.bss -.balign 4 -stack: - .fill 4096, 1, 0 -stack_end: diff --git a/arch/i386/boot/compressed/misc_32.c b/arch/i386/boot/compressed/misc_32.c deleted file mode 100644 index b28505c544c..00000000000 --- a/arch/i386/boot/compressed/misc_32.c +++ /dev/null @@ -1,379 +0,0 @@ -/* - * misc.c - * - * This is a collection of several routines from gzip-1.0.3 - * adapted for Linux. - * - * malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994 - * puts by Nick Holloway 1993, better puts by Martin Mares 1995 - * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 - */ - -#undef CONFIG_PARAVIRT -#include -#include -#include -#include -#include -#include - -/* WARNING!! - * This code is compiled with -fPIC and it is relocated dynamically - * at run time, but no relocation processing is performed. - * This means that it is not safe to place pointers in static structures. - */ - -/* - * Getting to provable safe in place decompression is hard. - * Worst case behaviours need to be analized. - * Background information: - * - * The file layout is: - * magic[2] - * method[1] - * flags[1] - * timestamp[4] - * extraflags[1] - * os[1] - * compressed data blocks[N] - * crc[4] orig_len[4] - * - * resulting in 18 bytes of non compressed data overhead. - * - * Files divided into blocks - * 1 bit (last block flag) - * 2 bits (block type) - * - * 1 block occurs every 32K -1 bytes or when there 50% compression has been achieved. - * The smallest block type encoding is always used. - * - * stored: - * 32 bits length in bytes. - * - * fixed: - * magic fixed tree. - * symbols. - * - * dynamic: - * dynamic tree encoding. - * symbols. - * - * - * The buffer for decompression in place is the length of the - * uncompressed data, plus a small amount extra to keep the algorithm safe. - * The compressed data is placed at the end of the buffer. The output - * pointer is placed at the start of the buffer and the input pointer - * is placed where the compressed data starts. Problems will occur - * when the output pointer overruns the input pointer. - * - * The output pointer can only overrun the input pointer if the input - * pointer is moving faster than the output pointer. A condition only - * triggered by data whose compressed form is larger than the uncompressed - * form. - * - * The worst case at the block level is a growth of the compressed data - * of 5 bytes per 32767 bytes. - * - * The worst case internal to a compressed block is very hard to figure. - * The worst case can at least be boundined by having one bit that represents - * 32764 bytes and then all of the rest of the bytes representing the very - * very last byte. - * - * All of which is enough to compute an amount of extra data that is required - * to be safe. To avoid problems at the block level allocating 5 extra bytes - * per 32767 bytes of data is sufficient. To avoind problems internal to a block - * adding an extra 32767 bytes (the worst case uncompressed block size) is - * sufficient, to ensure that in the worst case the decompressed data for - * block will stop the byte before the compressed data for a block begins. - * To avoid problems with the compressed data's meta information an extra 18 - * bytes are needed. Leading to the formula: - * - * extra_bytes = (uncompressed_size >> 12) + 32768 + 18 + decompressor_size. - * - * Adding 8 bytes per 32K is a bit excessive but much easier to calculate. - * Adding 32768 instead of 32767 just makes for round numbers. - * Adding the decompressor_size is necessary as it musht live after all - * of the data as well. Last I measured the decompressor is about 14K. - * 10K of actuall data and 4K of bss. - * - */ - -/* - * gzip declarations - */ - -#define OF(args) args -#define STATIC static - -#undef memset -#undef memcpy -#define memzero(s, n) memset ((s), 0, (n)) - -typedef unsigned char uch; -typedef unsigned short ush; -typedef unsigned long ulg; - -#define WSIZE 0x80000000 /* Window size must be at least 32k, - * and a power of two - * We don't actually have a window just - * a huge output buffer so I report - * a 2G windows size, as that should - * always be larger than our output buffer. - */ - -static uch *inbuf; /* input buffer */ -static uch *window; /* Sliding window buffer, (and final output buffer) */ - -static unsigned insize; /* valid bytes in inbuf */ -static unsigned inptr; /* index of next byte to be processed in inbuf */ -static unsigned outcnt; /* bytes in output buffer */ - -/* gzip flag byte */ -#define ASCII_FLAG 0x01 /* bit 0 set: file probably ASCII text */ -#define CONTINUATION 0x02 /* bit 1 set: continuation of multi-part gzip file */ -#define EXTRA_FIELD 0x04 /* bit 2 set: extra field present */ -#define ORIG_NAME 0x08 /* bit 3 set: original file name present */ -#define COMMENT 0x10 /* bit 4 set: file comment present */ -#define ENCRYPTED 0x20 /* bit 5 set: file is encrypted */ -#define RESERVED 0xC0 /* bit 6,7: reserved */ - -#define get_byte() (inptr < insize ? inbuf[inptr++] : fill_inbuf()) - -/* Diagnostic functions */ -#ifdef DEBUG -# define Assert(cond,msg) {if(!(cond)) error(msg);} -# define Trace(x) fprintf x -# define Tracev(x) {if (verbose) fprintf x ;} -# define Tracevv(x) {if (verbose>1) fprintf x ;} -# define Tracec(c,x) {if (verbose && (c)) fprintf x ;} -# define Tracecv(c,x) {if (verbose>1 && (c)) fprintf x ;} -#else -# define Assert(cond,msg) -# define Trace(x) -# define Tracev(x) -# define Tracevv(x) -# define Tracec(c,x) -# define Tracecv(c,x) -#endif - -static int fill_inbuf(void); -static void flush_window(void); -static void error(char *m); -static void gzip_mark(void **); -static void gzip_release(void **); - -/* - * This is set up by the setup-routine at boot-time - */ -static unsigned char *real_mode; /* Pointer to real-mode data */ - -#define RM_EXT_MEM_K (*(unsigned short *)(real_mode + 0x2)) -#ifndef STANDARD_MEMORY_BIOS_CALL -#define RM_ALT_MEM_K (*(unsigned long *)(real_mode + 0x1e0)) -#endif -#define RM_SCREEN_INFO (*(struct screen_info *)(real_mode+0)) - -extern unsigned char input_data[]; -extern int input_len; - -static long bytes_out = 0; - -static void *malloc(int size); -static void free(void *where); - -static void *memset(void *s, int c, unsigned n); -static void *memcpy(void *dest, const void *src, unsigned n); - -static void putstr(const char *); - -static unsigned long free_mem_ptr; -static unsigned long free_mem_end_ptr; - -#define HEAP_SIZE 0x4000 - -static char *vidmem = (char *)0xb8000; -static int vidport; -static int lines, cols; - -#ifdef CONFIG_X86_NUMAQ -void *xquad_portio; -#endif - -#include "../../../../lib/inflate.c" - -static void *malloc(int size) -{ - void *p; - - if (size <0) error("Malloc error"); - if (free_mem_ptr <= 0) error("Memory error"); - - free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */ - - p = (void *)free_mem_ptr; - free_mem_ptr += size; - - if (free_mem_ptr >= free_mem_end_ptr) - error("Out of memory"); - - return p; -} - -static void free(void *where) -{ /* Don't care */ -} - -static void gzip_mark(void **ptr) -{ - *ptr = (void *) free_mem_ptr; -} - -static void gzip_release(void **ptr) -{ - free_mem_ptr = (unsigned long) *ptr; -} - -static void scroll(void) -{ - int i; - - memcpy ( vidmem, vidmem + cols * 2, ( lines - 1 ) * cols * 2 ); - for ( i = ( lines - 1 ) * cols * 2; i < lines * cols * 2; i += 2 ) - vidmem[i] = ' '; -} - -static void putstr(const char *s) -{ - int x,y,pos; - char c; - - x = RM_SCREEN_INFO.orig_x; - y = RM_SCREEN_INFO.orig_y; - - while ( ( c = *s++ ) != '\0' ) { - if ( c == '\n' ) { - x = 0; - if ( ++y >= lines ) { - scroll(); - y--; - } - } else { - vidmem [ ( x + cols * y ) * 2 ] = c; - if ( ++x >= cols ) { - x = 0; - if ( ++y >= lines ) { - scroll(); - y--; - } - } - } - } - - RM_SCREEN_INFO.orig_x = x; - RM_SCREEN_INFO.orig_y = y; - - pos = (x + cols * y) * 2; /* Update cursor position */ - outb_p(14, vidport); - outb_p(0xff & (pos >> 9), vidport+1); - outb_p(15, vidport); - outb_p(0xff & (pos >> 1), vidport+1); -} - -static void* memset(void* s, int c, unsigned n) -{ - int i; - char *ss = (char*)s; - - for (i=0;i> 8); - } - crc = c; - bytes_out += (ulg)outcnt; - outcnt = 0; -} - -static void error(char *x) -{ - putstr("\n\n"); - putstr(x); - putstr("\n\n -- System halted"); - - while(1); /* Halt */ -} - -asmlinkage void decompress_kernel(void *rmode, unsigned long end, - uch *input_data, unsigned long input_len, uch *output) -{ - real_mode = rmode; - - if (RM_SCREEN_INFO.orig_video_mode == 7) { - vidmem = (char *) 0xb0000; - vidport = 0x3b4; - } else { - vidmem = (char *) 0xb8000; - vidport = 0x3d4; - } - - lines = RM_SCREEN_INFO.orig_video_lines; - cols = RM_SCREEN_INFO.orig_video_cols; - - window = output; /* Output buffer (Normally at 1M) */ - free_mem_ptr = end; /* Heap */ - free_mem_end_ptr = end + HEAP_SIZE; - inbuf = input_data; /* Input buffer */ - insize = input_len; - inptr = 0; - - if ((u32)output & (CONFIG_PHYSICAL_ALIGN -1)) - error("Destination address not CONFIG_PHYSICAL_ALIGN aligned"); - if (end > ((-__PAGE_OFFSET-(512 <<20)-1) & 0x7fffffff)) - error("Destination address too large"); -#ifndef CONFIG_RELOCATABLE - if ((u32)output != LOAD_PHYSICAL_ADDR) - error("Wrong destination address"); -#endif - - makecrc(); - putstr("Uncompressing Linux... "); - gunzip(); - putstr("Ok, booting the kernel.\n"); - return; -} diff --git a/arch/i386/boot/compressed/relocs.c b/arch/i386/boot/compressed/relocs.c deleted file mode 100644 index 2d77ee728f9..00000000000 --- a/arch/i386/boot/compressed/relocs.c +++ /dev/null @@ -1,631 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#define USE_BSD -#include - -#define MAX_SHDRS 100 -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -static Elf32_Ehdr ehdr; -static Elf32_Shdr shdr[MAX_SHDRS]; -static Elf32_Sym *symtab[MAX_SHDRS]; -static Elf32_Rel *reltab[MAX_SHDRS]; -static char *strtab[MAX_SHDRS]; -static unsigned long reloc_count, reloc_idx; -static unsigned long *relocs; - -/* - * Following symbols have been audited. There values are constant and do - * not change if bzImage is loaded at a different physical address than - * the address for which it has been compiled. Don't warn user about - * absolute relocations present w.r.t these symbols. - */ -static const char* safe_abs_relocs[] = { - "__kernel_vsyscall", - "__kernel_rt_sigreturn", - "__kernel_sigreturn", - "SYSENTER_RETURN", - "VDSO_NOTE_MASK", - "xen_irq_disable_direct_reloc", - "xen_save_fl_direct_reloc", -}; - -static int is_safe_abs_reloc(const char* sym_name) -{ - int i, array_size; - - array_size = sizeof(safe_abs_relocs)/sizeof(char*); - - for(i = 0; i < array_size; i++) { - if (!strcmp(sym_name, safe_abs_relocs[i])) - /* Match found */ - return 1; - } - if (strncmp(sym_name, "__crc_", 6) == 0) - return 1; - return 0; -} - -static void die(char *fmt, ...) -{ - va_list ap; - va_start(ap, fmt); - vfprintf(stderr, fmt, ap); - va_end(ap); - exit(1); -} - -static const char *sym_type(unsigned type) -{ - static const char *type_name[] = { -#define SYM_TYPE(X) [X] = #X - SYM_TYPE(STT_NOTYPE), - SYM_TYPE(STT_OBJECT), - SYM_TYPE(STT_FUNC), - SYM_TYPE(STT_SECTION), - SYM_TYPE(STT_FILE), - SYM_TYPE(STT_COMMON), - SYM_TYPE(STT_TLS), -#undef SYM_TYPE - }; - const char *name = "unknown sym type name"; - if (type < ARRAY_SIZE(type_name)) { - name = type_name[type]; - } - return name; -} - -static const char *sym_bind(unsigned bind) -{ - static const char *bind_name[] = { -#define SYM_BIND(X) [X] = #X - SYM_BIND(STB_LOCAL), - SYM_BIND(STB_GLOBAL), - SYM_BIND(STB_WEAK), -#undef SYM_BIND - }; - const char *name = "unknown sym bind name"; - if (bind < ARRAY_SIZE(bind_name)) { - name = bind_name[bind]; - } - return name; -} - -static const char *sym_visibility(unsigned visibility) -{ - static const char *visibility_name[] = { -#define SYM_VISIBILITY(X) [X] = #X - SYM_VISIBILITY(STV_DEFAULT), - SYM_VISIBILITY(STV_INTERNAL), - SYM_VISIBILITY(STV_HIDDEN), - SYM_VISIBILITY(STV_PROTECTED), -#undef SYM_VISIBILITY - }; - const char *name = "unknown sym visibility name"; - if (visibility < ARRAY_SIZE(visibility_name)) { - name = visibility_name[visibility]; - } - return name; -} - -static const char *rel_type(unsigned type) -{ - static const char *type_name[] = { -#define REL_TYPE(X) [X] = #X - REL_TYPE(R_386_NONE), - REL_TYPE(R_386_32), - REL_TYPE(R_386_PC32), - REL_TYPE(R_386_GOT32), - REL_TYPE(R_386_PLT32), - REL_TYPE(R_386_COPY), - REL_TYPE(R_386_GLOB_DAT), - REL_TYPE(R_386_JMP_SLOT), - REL_TYPE(R_386_RELATIVE), - REL_TYPE(R_386_GOTOFF), - REL_TYPE(R_386_GOTPC), -#undef REL_TYPE - }; - const char *name = "unknown type rel type name"; - if (type < ARRAY_SIZE(type_name)) { - name = type_name[type]; - } - return name; -} - -static const char *sec_name(unsigned shndx) -{ - const char *sec_strtab; - const char *name; - sec_strtab = strtab[ehdr.e_shstrndx]; - name = ""; - if (shndx < ehdr.e_shnum) { - name = sec_strtab + shdr[shndx].sh_name; - } - else if (shndx == SHN_ABS) { - name = "ABSOLUTE"; - } - else if (shndx == SHN_COMMON) { - name = "COMMON"; - } - return name; -} - -static const char *sym_name(const char *sym_strtab, Elf32_Sym *sym) -{ - const char *name; - name = ""; - if (sym->st_name) { - name = sym_strtab + sym->st_name; - } - else { - name = sec_name(shdr[sym->st_shndx].sh_name); - } - return name; -} - - - -#if BYTE_ORDER == LITTLE_ENDIAN -#define le16_to_cpu(val) (val) -#define le32_to_cpu(val) (val) -#endif -#if BYTE_ORDER == BIG_ENDIAN -#define le16_to_cpu(val) bswap_16(val) -#define le32_to_cpu(val) bswap_32(val) -#endif - -static uint16_t elf16_to_cpu(uint16_t val) -{ - return le16_to_cpu(val); -} - -static uint32_t elf32_to_cpu(uint32_t val) -{ - return le32_to_cpu(val); -} - -static void read_ehdr(FILE *fp) -{ - if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1) { - die("Cannot read ELF header: %s\n", - strerror(errno)); - } - if (memcmp(ehdr.e_ident, ELFMAG, 4) != 0) { - die("No ELF magic\n"); - } - if (ehdr.e_ident[EI_CLASS] != ELFCLASS32) { - die("Not a 32 bit executable\n"); - } - if (ehdr.e_ident[EI_DATA] != ELFDATA2LSB) { - die("Not a LSB ELF executable\n"); - } - if (ehdr.e_ident[EI_VERSION] != EV_CURRENT) { - die("Unknown ELF version\n"); - } - /* Convert the fields to native endian */ - ehdr.e_type = elf16_to_cpu(ehdr.e_type); - ehdr.e_machine = elf16_to_cpu(ehdr.e_machine); - ehdr.e_version = elf32_to_cpu(ehdr.e_version); - ehdr.e_entry = elf32_to_cpu(ehdr.e_entry); - ehdr.e_phoff = elf32_to_cpu(ehdr.e_phoff); - ehdr.e_shoff = elf32_to_cpu(ehdr.e_shoff); - ehdr.e_flags = elf32_to_cpu(ehdr.e_flags); - ehdr.e_ehsize = elf16_to_cpu(ehdr.e_ehsize); - ehdr.e_phentsize = elf16_to_cpu(ehdr.e_phentsize); - ehdr.e_phnum = elf16_to_cpu(ehdr.e_phnum); - ehdr.e_shentsize = elf16_to_cpu(ehdr.e_shentsize); - ehdr.e_shnum = elf16_to_cpu(ehdr.e_shnum); - ehdr.e_shstrndx = elf16_to_cpu(ehdr.e_shstrndx); - - if ((ehdr.e_type != ET_EXEC) && (ehdr.e_type != ET_DYN)) { - die("Unsupported ELF header type\n"); - } - if (ehdr.e_machine != EM_386) { - die("Not for x86\n"); - } - if (ehdr.e_version != EV_CURRENT) { - die("Unknown ELF version\n"); - } - if (ehdr.e_ehsize != sizeof(Elf32_Ehdr)) { - die("Bad Elf header size\n"); - } - if (ehdr.e_phentsize != sizeof(Elf32_Phdr)) { - die("Bad program header entry\n"); - } - if (ehdr.e_shentsize != sizeof(Elf32_Shdr)) { - die("Bad section header entry\n"); - } - if (ehdr.e_shstrndx >= ehdr.e_shnum) { - die("String table index out of bounds\n"); - } -} - -static void read_shdrs(FILE *fp) -{ - int i; - if (ehdr.e_shnum > MAX_SHDRS) { - die("%d section headers supported: %d\n", - ehdr.e_shnum, MAX_SHDRS); - } - if (fseek(fp, ehdr.e_shoff, SEEK_SET) < 0) { - die("Seek to %d failed: %s\n", - ehdr.e_shoff, strerror(errno)); - } - if (fread(&shdr, sizeof(shdr[0]), ehdr.e_shnum, fp) != ehdr.e_shnum) { - die("Cannot read ELF section headers: %s\n", - strerror(errno)); - } - for(i = 0; i < ehdr.e_shnum; i++) { - shdr[i].sh_name = elf32_to_cpu(shdr[i].sh_name); - shdr[i].sh_type = elf32_to_cpu(shdr[i].sh_type); - shdr[i].sh_flags = elf32_to_cpu(shdr[i].sh_flags); - shdr[i].sh_addr = elf32_to_cpu(shdr[i].sh_addr); - shdr[i].sh_offset = elf32_to_cpu(shdr[i].sh_offset); - shdr[i].sh_size = elf32_to_cpu(shdr[i].sh_size); - shdr[i].sh_link = elf32_to_cpu(shdr[i].sh_link); - shdr[i].sh_info = elf32_to_cpu(shdr[i].sh_info); - shdr[i].sh_addralign = elf32_to_cpu(shdr[i].sh_addralign); - shdr[i].sh_entsize = elf32_to_cpu(shdr[i].sh_entsize); - } - -} - -static void read_strtabs(FILE *fp) -{ - int i; - for(i = 0; i < ehdr.e_shnum; i++) { - if (shdr[i].sh_type != SHT_STRTAB) { - continue; - } - strtab[i] = malloc(shdr[i].sh_size); - if (!strtab[i]) { - die("malloc of %d bytes for strtab failed\n", - shdr[i].sh_size); - } - if (fseek(fp, shdr[i].sh_offset, SEEK_SET) < 0) { - die("Seek to %d failed: %s\n", - shdr[i].sh_offset, strerror(errno)); - } - if (fread(strtab[i], 1, shdr[i].sh_size, fp) != shdr[i].sh_size) { - die("Cannot read symbol table: %s\n", - strerror(errno)); - } - } -} - -static void read_symtabs(FILE *fp) -{ - int i,j; - for(i = 0; i < ehdr.e_shnum; i++) { - if (shdr[i].sh_type != SHT_SYMTAB) { - continue; - } - symtab[i] = malloc(shdr[i].sh_size); - if (!symtab[i]) { - die("malloc of %d bytes for symtab failed\n", - shdr[i].sh_size); - } - if (fseek(fp, shdr[i].sh_offset, SEEK_SET) < 0) { - die("Seek to %d failed: %s\n", - shdr[i].sh_offset, strerror(errno)); - } - if (fread(symtab[i], 1, shdr[i].sh_size, fp) != shdr[i].sh_size) { - die("Cannot read symbol table: %s\n", - strerror(errno)); - } - for(j = 0; j < shdr[i].sh_size/sizeof(symtab[i][0]); j++) { - symtab[i][j].st_name = elf32_to_cpu(symtab[i][j].st_name); - symtab[i][j].st_value = elf32_to_cpu(symtab[i][j].st_value); - symtab[i][j].st_size = elf32_to_cpu(symtab[i][j].st_size); - symtab[i][j].st_shndx = elf16_to_cpu(symtab[i][j].st_shndx); - } - } -} - - -static void read_relocs(FILE *fp) -{ - int i,j; - for(i = 0; i < ehdr.e_shnum; i++) { - if (shdr[i].sh_type != SHT_REL) { - continue; - } - reltab[i] = malloc(shdr[i].sh_size); - if (!reltab[i]) { - die("malloc of %d bytes for relocs failed\n", - shdr[i].sh_size); - } - if (fseek(fp, shdr[i].sh_offset, SEEK_SET) < 0) { - die("Seek to %d failed: %s\n", - shdr[i].sh_offset, strerror(errno)); - } - if (fread(reltab[i], 1, shdr[i].sh_size, fp) != shdr[i].sh_size) { - die("Cannot read symbol table: %s\n", - strerror(errno)); - } - for(j = 0; j < shdr[i].sh_size/sizeof(reltab[0][0]); j++) { - reltab[i][j].r_offset = elf32_to_cpu(reltab[i][j].r_offset); - reltab[i][j].r_info = elf32_to_cpu(reltab[i][j].r_info); - } - } -} - - -static void print_absolute_symbols(void) -{ - int i; - printf("Absolute symbols\n"); - printf(" Num: Value Size Type Bind Visibility Name\n"); - for(i = 0; i < ehdr.e_shnum; i++) { - char *sym_strtab; - Elf32_Sym *sh_symtab; - int j; - if (shdr[i].sh_type != SHT_SYMTAB) { - continue; - } - sh_symtab = symtab[i]; - sym_strtab = strtab[shdr[i].sh_link]; - for(j = 0; j < shdr[i].sh_size/sizeof(symtab[0][0]); j++) { - Elf32_Sym *sym; - const char *name; - sym = &symtab[i][j]; - name = sym_name(sym_strtab, sym); - if (sym->st_shndx != SHN_ABS) { - continue; - } - printf("%5d %08x %5d %10s %10s %12s %s\n", - j, sym->st_value, sym->st_size, - sym_type(ELF32_ST_TYPE(sym->st_info)), - sym_bind(ELF32_ST_BIND(sym->st_info)), - sym_visibility(ELF32_ST_VISIBILITY(sym->st_other)), - name); - } - } - printf("\n"); -} - -static void print_absolute_relocs(void) -{ - int i, printed = 0; - - for(i = 0; i < ehdr.e_shnum; i++) { - char *sym_strtab; - Elf32_Sym *sh_symtab; - unsigned sec_applies, sec_symtab; - int j; - if (shdr[i].sh_type != SHT_REL) { - continue; - } - sec_symtab = shdr[i].sh_link; - sec_applies = shdr[i].sh_info; - if (!(shdr[sec_applies].sh_flags & SHF_ALLOC)) { - continue; - } - sh_symtab = symtab[sec_symtab]; - sym_strtab = strtab[shdr[sec_symtab].sh_link]; - for(j = 0; j < shdr[i].sh_size/sizeof(reltab[0][0]); j++) { - Elf32_Rel *rel; - Elf32_Sym *sym; - const char *name; - rel = &reltab[i][j]; - sym = &sh_symtab[ELF32_R_SYM(rel->r_info)]; - name = sym_name(sym_strtab, sym); - if (sym->st_shndx != SHN_ABS) { - continue; - } - - /* Absolute symbols are not relocated if bzImage is - * loaded at a non-compiled address. Display a warning - * to user at compile time about the absolute - * relocations present. - * - * User need to audit the code to make sure - * some symbols which should have been section - * relative have not become absolute because of some - * linker optimization or wrong programming usage. - * - * Before warning check if this absolute symbol - * relocation is harmless. - */ - if (is_safe_abs_reloc(name)) - continue; - - if (!printed) { - printf("WARNING: Absolute relocations" - " present\n"); - printf("Offset Info Type Sym.Value " - "Sym.Name\n"); - printed = 1; - } - - printf("%08x %08x %10s %08x %s\n", - rel->r_offset, - rel->r_info, - rel_type(ELF32_R_TYPE(rel->r_info)), - sym->st_value, - name); - } - } - - if (printed) - printf("\n"); -} - -static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym)) -{ - int i; - /* Walk through the relocations */ - for(i = 0; i < ehdr.e_shnum; i++) { - char *sym_strtab; - Elf32_Sym *sh_symtab; - unsigned sec_applies, sec_symtab; - int j; - if (shdr[i].sh_type != SHT_REL) { - continue; - } - sec_symtab = shdr[i].sh_link; - sec_applies = shdr[i].sh_info; - if (!(shdr[sec_applies].sh_flags & SHF_ALLOC)) { - continue; - } - sh_symtab = symtab[sec_symtab]; - sym_strtab = strtab[shdr[sec_symtab].sh_link]; - for(j = 0; j < shdr[i].sh_size/sizeof(reltab[0][0]); j++) { - Elf32_Rel *rel; - Elf32_Sym *sym; - unsigned r_type; - rel = &reltab[i][j]; - sym = &sh_symtab[ELF32_R_SYM(rel->r_info)]; - r_type = ELF32_R_TYPE(rel->r_info); - /* Don't visit relocations to absolute symbols */ - if (sym->st_shndx == SHN_ABS) { - continue; - } - if (r_type == R_386_PC32) { - /* PC relative relocations don't need to be adjusted */ - } - else if (r_type == R_386_32) { - /* Visit relocations that need to be adjusted */ - visit(rel, sym); - } - else { - die("Unsupported relocation type: %d\n", r_type); - } - } - } -} - -static void count_reloc(Elf32_Rel *rel, Elf32_Sym *sym) -{ - reloc_count += 1; -} - -static void collect_reloc(Elf32_Rel *rel, Elf32_Sym *sym) -{ - /* Remember the address that needs to be adjusted. */ - relocs[reloc_idx++] = rel->r_offset; -} - -static int cmp_relocs(const void *va, const void *vb) -{ - const unsigned long *a, *b; - a = va; b = vb; - return (*a == *b)? 0 : (*a > *b)? 1 : -1; -} - -static void emit_relocs(int as_text) -{ - int i; - /* Count how many relocations I have and allocate space for them. */ - reloc_count = 0; - walk_relocs(count_reloc); - relocs = malloc(reloc_count * sizeof(relocs[0])); - if (!relocs) { - die("malloc of %d entries for relocs failed\n", - reloc_count); - } - /* Collect up the relocations */ - reloc_idx = 0; - walk_relocs(collect_reloc); - - /* Order the relocations for more efficient processing */ - qsort(relocs, reloc_count, sizeof(relocs[0]), cmp_relocs); - - /* Print the relocations */ - if (as_text) { - /* Print the relocations in a form suitable that - * gas will like. - */ - printf(".section \".data.reloc\",\"a\"\n"); - printf(".balign 4\n"); - for(i = 0; i < reloc_count; i++) { - printf("\t .long 0x%08lx\n", relocs[i]); - } - printf("\n"); - } - else { - unsigned char buf[4]; - buf[0] = buf[1] = buf[2] = buf[3] = 0; - /* Print a stop */ - printf("%c%c%c%c", buf[0], buf[1], buf[2], buf[3]); - /* Now print each relocation */ - for(i = 0; i < reloc_count; i++) { - buf[0] = (relocs[i] >> 0) & 0xff; - buf[1] = (relocs[i] >> 8) & 0xff; - buf[2] = (relocs[i] >> 16) & 0xff; - buf[3] = (relocs[i] >> 24) & 0xff; - printf("%c%c%c%c", buf[0], buf[1], buf[2], buf[3]); - } - } -} - -static void usage(void) -{ - die("relocs [--abs-syms |--abs-relocs | --text] vmlinux\n"); -} - -int main(int argc, char **argv) -{ - int show_absolute_syms, show_absolute_relocs; - int as_text; - const char *fname; - FILE *fp; - int i; - - show_absolute_syms = 0; - show_absolute_relocs = 0; - as_text = 0; - fname = NULL; - for(i = 1; i < argc; i++) { - char *arg = argv[i]; - if (*arg == '-') { - if (strcmp(argv[1], "--abs-syms") == 0) { - show_absolute_syms = 1; - continue; - } - - if (strcmp(argv[1], "--abs-relocs") == 0) { - show_absolute_relocs = 1; - continue; - } - else if (strcmp(argv[1], "--text") == 0) { - as_text = 1; - continue; - } - } - else if (!fname) { - fname = arg; - continue; - } - usage(); - } - if (!fname) { - usage(); - } - fp = fopen(fname, "r"); - if (!fp) { - die("Cannot open %s: %s\n", - fname, strerror(errno)); - } - read_ehdr(fp); - read_shdrs(fp); - read_strtabs(fp); - read_symtabs(fp); - read_relocs(fp); - if (show_absolute_syms) { - print_absolute_symbols(); - return 0; - } - if (show_absolute_relocs) { - print_absolute_relocs(); - return 0; - } - emit_relocs(as_text); - return 0; -} diff --git a/arch/i386/boot/compressed/vmlinux_32.lds b/arch/i386/boot/compressed/vmlinux_32.lds deleted file mode 100644 index cc4854f6c6c..00000000000 --- a/arch/i386/boot/compressed/vmlinux_32.lds +++ /dev/null @@ -1,43 +0,0 @@ -OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") -OUTPUT_ARCH(i386) -ENTRY(startup_32) -SECTIONS -{ - /* Be careful parts of head.S assume startup_32 is at - * address 0. - */ - . = 0 ; - .text.head : { - _head = . ; - *(.text.head) - _ehead = . ; - } - .data.compressed : { - *(.data.compressed) - } - .text : { - _text = .; /* Text */ - *(.text) - *(.text.*) - _etext = . ; - } - .rodata : { - _rodata = . ; - *(.rodata) /* read-only data */ - *(.rodata.*) - _erodata = . ; - } - .data : { - _data = . ; - *(.data) - *(.data.*) - _edata = . ; - } - .bss : { - _bss = . ; - *(.bss) - *(.bss.*) - *(COMMON) - _end = . ; - } -} diff --git a/arch/i386/boot/compressed/vmlinux_32.scr b/arch/i386/boot/compressed/vmlinux_32.scr deleted file mode 100644 index 707a88f7f29..00000000000 --- a/arch/i386/boot/compressed/vmlinux_32.scr +++ /dev/null @@ -1,10 +0,0 @@ -SECTIONS -{ - .data.compressed : { - input_len = .; - LONG(input_data_end - input_data) input_data = .; - *(.data) - output_len = . - 4; - input_data_end = .; - } -} diff --git a/arch/x86/boot/compressed/.gitignore b/arch/x86/boot/compressed/.gitignore new file mode 100644 index 00000000000..be0ed065249 --- /dev/null +++ b/arch/x86/boot/compressed/.gitignore @@ -0,0 +1 @@ +relocs diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile new file mode 100644 index 00000000000..76ab5caddf9 --- /dev/null +++ b/arch/x86/boot/compressed/Makefile @@ -0,0 +1,5 @@ +ifeq ($(CONFIG_X86_32),y) +include ${srctree}/arch/x86/boot/compressed/Makefile_32 +else +include ${srctree}/arch/x86_64/boot/compressed/Makefile_64 +endif diff --git a/arch/x86/boot/compressed/Makefile_32 b/arch/x86/boot/compressed/Makefile_32 new file mode 100644 index 00000000000..22613c652d2 --- /dev/null +++ b/arch/x86/boot/compressed/Makefile_32 @@ -0,0 +1,50 @@ +# +# linux/arch/x86/boot/compressed/Makefile +# +# create a compressed vmlinux image from the original vmlinux +# + +targets := vmlinux vmlinux.bin vmlinux.bin.gz head_32.o misc_32.o piggy.o \ + vmlinux.bin.all vmlinux.relocs +EXTRA_AFLAGS := -traditional + +LDFLAGS_vmlinux := -T +hostprogs-y := relocs + +CFLAGS := -m32 -D__KERNEL__ $(LINUX_INCLUDE) -O2 \ + -fno-strict-aliasing -fPIC \ + $(call cc-option,-ffreestanding) \ + $(call cc-option,-fno-stack-protector) +LDFLAGS := -m elf_i386 + +$(obj)/vmlinux: $(src)/vmlinux_32.lds $(obj)/head_32.o $(obj)/misc_32.o $(obj)/piggy.o FORCE + $(call if_changed,ld) + @: + +$(obj)/vmlinux.bin: vmlinux FORCE + $(call if_changed,objcopy) + +quiet_cmd_relocs = RELOCS $@ + cmd_relocs = $(obj)/relocs $< > $@;$(obj)/relocs --abs-relocs $< +$(obj)/vmlinux.relocs: vmlinux $(obj)/relocs FORCE + $(call if_changed,relocs) + +vmlinux.bin.all-y := $(obj)/vmlinux.bin +vmlinux.bin.all-$(CONFIG_RELOCATABLE) += $(obj)/vmlinux.relocs +quiet_cmd_relocbin = BUILD $@ + cmd_relocbin = cat $(filter-out FORCE,$^) > $@ +$(obj)/vmlinux.bin.all: $(vmlinux.bin.all-y) FORCE + $(call if_changed,relocbin) + +ifdef CONFIG_RELOCATABLE +$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin.all FORCE + $(call if_changed,gzip) +else +$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE + $(call if_changed,gzip) +endif + +LDFLAGS_piggy.o := -r --format binary --oformat elf32-i386 -T + +$(obj)/piggy.o: $(src)/vmlinux_32.scr $(obj)/vmlinux.bin.gz FORCE + $(call if_changed,ld) diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S new file mode 100644 index 00000000000..f35ea223752 --- /dev/null +++ b/arch/x86/boot/compressed/head_32.S @@ -0,0 +1,180 @@ +/* + * linux/boot/head.S + * + * Copyright (C) 1991, 1992, 1993 Linus Torvalds + */ + +/* + * head.S contains the 32-bit startup code. + * + * NOTE!!! Startup happens at absolute address 0x00001000, which is also where + * the page directory will exist. The startup code will be overwritten by + * the page directory. [According to comments etc elsewhere on a compressed + * kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC] + * + * Page 0 is deliberately kept safe, since System Management Mode code in + * laptops may need to access the BIOS data stored there. This is also + * useful for future device drivers that either access the BIOS via VM86 + * mode. + */ + +/* + * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 + */ +.text + +#include +#include +#include +#include + +.section ".text.head","ax",@progbits + .globl startup_32 + +startup_32: + cld + cli + movl $(__BOOT_DS),%eax + movl %eax,%ds + movl %eax,%es + movl %eax,%fs + movl %eax,%gs + movl %eax,%ss + +/* Calculate the delta between where we were compiled to run + * at and where we were actually loaded at. This can only be done + * with a short local call on x86. Nothing else will tell us what + * address we are running at. The reserved chunk of the real-mode + * data at 0x1e4 (defined as a scratch field) are used as the stack + * for this calculation. Only 4 bytes are needed. + */ + leal (0x1e4+4)(%esi), %esp + call 1f +1: popl %ebp + subl $1b, %ebp + +/* %ebp contains the address we are loaded at by the boot loader and %ebx + * contains the address where we should move the kernel image temporarily + * for safe in-place decompression. + */ + +#ifdef CONFIG_RELOCATABLE + movl %ebp, %ebx + addl $(CONFIG_PHYSICAL_ALIGN - 1), %ebx + andl $(~(CONFIG_PHYSICAL_ALIGN - 1)), %ebx +#else + movl $LOAD_PHYSICAL_ADDR, %ebx +#endif + + /* Replace the compressed data size with the uncompressed size */ + subl input_len(%ebp), %ebx + movl output_len(%ebp), %eax + addl %eax, %ebx + /* Add 8 bytes for every 32K input block */ + shrl $12, %eax + addl %eax, %ebx + /* Add 32K + 18 bytes of extra slack */ + addl $(32768 + 18), %ebx + /* Align on a 4K boundary */ + addl $4095, %ebx + andl $~4095, %ebx + +/* Copy the compressed kernel to the end of our buffer + * where decompression in place becomes safe. + */ + pushl %esi + leal _end(%ebp), %esi + leal _end(%ebx), %edi + movl $(_end - startup_32), %ecx + std + rep + movsb + cld + popl %esi + +/* Compute the kernel start address. + */ +#ifdef CONFIG_RELOCATABLE + addl $(CONFIG_PHYSICAL_ALIGN - 1), %ebp + andl $(~(CONFIG_PHYSICAL_ALIGN - 1)), %ebp +#else + movl $LOAD_PHYSICAL_ADDR, %ebp +#endif + +/* + * Jump to the relocated address. + */ + leal relocated(%ebx), %eax + jmp *%eax +.section ".text" +relocated: + +/* + * Clear BSS + */ + xorl %eax,%eax + leal _edata(%ebx),%edi + leal _end(%ebx), %ecx + subl %edi,%ecx + cld + rep + stosb + +/* + * Setup the stack for the decompressor + */ + leal stack_end(%ebx), %esp + +/* + * Do the decompression, and jump to the new kernel.. + */ + movl output_len(%ebx), %eax + pushl %eax + pushl %ebp # output address + movl input_len(%ebx), %eax + pushl %eax # input_len + leal input_data(%ebx), %eax + pushl %eax # input_data + leal _end(%ebx), %eax + pushl %eax # end of the image as third argument + pushl %esi # real mode pointer as second arg + call decompress_kernel + addl $20, %esp + popl %ecx + +#if CONFIG_RELOCATABLE +/* Find the address of the relocations. + */ + movl %ebp, %edi + addl %ecx, %edi + +/* Calculate the delta between where vmlinux was compiled to run + * and where it was actually loaded. + */ + movl %ebp, %ebx + subl $LOAD_PHYSICAL_ADDR, %ebx + jz 2f /* Nothing to be done if loaded at compiled addr. */ +/* + * Process relocations. + */ + +1: subl $4, %edi + movl 0(%edi), %ecx + testl %ecx, %ecx + jz 2f + addl %ebx, -__PAGE_OFFSET(%ebx, %ecx) + jmp 1b +2: +#endif + +/* + * Jump to the decompressed kernel. + */ + xorl %ebx,%ebx + jmp *%ebp + +.bss +.balign 4 +stack: + .fill 4096, 1, 0 +stack_end: diff --git a/arch/x86/boot/compressed/misc_32.c b/arch/x86/boot/compressed/misc_32.c new file mode 100644 index 00000000000..b28505c544c --- /dev/null +++ b/arch/x86/boot/compressed/misc_32.c @@ -0,0 +1,379 @@ +/* + * misc.c + * + * This is a collection of several routines from gzip-1.0.3 + * adapted for Linux. + * + * malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994 + * puts by Nick Holloway 1993, better puts by Martin Mares 1995 + * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 + */ + +#undef CONFIG_PARAVIRT +#include +#include +#include +#include +#include +#include + +/* WARNING!! + * This code is compiled with -fPIC and it is relocated dynamically + * at run time, but no relocation processing is performed. + * This means that it is not safe to place pointers in static structures. + */ + +/* + * Getting to provable safe in place decompression is hard. + * Worst case behaviours need to be analized. + * Background information: + * + * The file layout is: + * magic[2] + * method[1] + * flags[1] + * timestamp[4] + * extraflags[1] + * os[1] + * compressed data blocks[N] + * crc[4] orig_len[4] + * + * resulting in 18 bytes of non compressed data overhead. + * + * Files divided into blocks + * 1 bit (last block flag) + * 2 bits (block type) + * + * 1 block occurs every 32K -1 bytes or when there 50% compression has been achieved. + * The smallest block type encoding is always used. + * + * stored: + * 32 bits length in bytes. + * + * fixed: + * magic fixed tree. + * symbols. + * + * dynamic: + * dynamic tree encoding. + * symbols. + * + * + * The buffer for decompression in place is the length of the + * uncompressed data, plus a small amount extra to keep the algorithm safe. + * The compressed data is placed at the end of the buffer. The output + * pointer is placed at the start of the buffer and the input pointer + * is placed where the compressed data starts. Problems will occur + * when the output pointer overruns the input pointer. + * + * The output pointer can only overrun the input pointer if the input + * pointer is moving faster than the output pointer. A condition only + * triggered by data whose compressed form is larger than the uncompressed + * form. + * + * The worst case at the block level is a growth of the compressed data + * of 5 bytes per 32767 bytes. + * + * The worst case internal to a compressed block is very hard to figure. + * The worst case can at least be boundined by having one bit that represents + * 32764 bytes and then all of the rest of the bytes representing the very + * very last byte. + * + * All of which is enough to compute an amount of extra data that is required + * to be safe. To avoid problems at the block level allocating 5 extra bytes + * per 32767 bytes of data is sufficient. To avoind problems internal to a block + * adding an extra 32767 bytes (the worst case uncompressed block size) is + * sufficient, to ensure that in the worst case the decompressed data for + * block will stop the byte before the compressed data for a block begins. + * To avoid problems with the compressed data's meta information an extra 18 + * bytes are needed. Leading to the formula: + * + * extra_bytes = (uncompressed_size >> 12) + 32768 + 18 + decompressor_size. + * + * Adding 8 bytes per 32K is a bit excessive but much easier to calculate. + * Adding 32768 instead of 32767 just makes for round numbers. + * Adding the decompressor_size is necessary as it musht live after all + * of the data as well. Last I measured the decompressor is about 14K. + * 10K of actuall data and 4K of bss. + * + */ + +/* + * gzip declarations + */ + +#define OF(args) args +#define STATIC static + +#undef memset +#undef memcpy +#define memzero(s, n) memset ((s), 0, (n)) + +typedef unsigned char uch; +typedef unsigned short ush; +typedef unsigned long ulg; + +#define WSIZE 0x80000000 /* Window size must be at least 32k, + * and a power of two + * We don't actually have a window just + * a huge output buffer so I report + * a 2G windows size, as that should + * always be larger than our output buffer. + */ + +static uch *inbuf; /* input buffer */ +static uch *window; /* Sliding window buffer, (and final output buffer) */ + +static unsigned insize; /* valid bytes in inbuf */ +static unsigned inptr; /* index of next byte to be processed in inbuf */ +static unsigned outcnt; /* bytes in output buffer */ + +/* gzip flag byte */ +#define ASCII_FLAG 0x01 /* bit 0 set: file probably ASCII text */ +#define CONTINUATION 0x02 /* bit 1 set: continuation of multi-part gzip file */ +#define EXTRA_FIELD 0x04 /* bit 2 set: extra field present */ +#define ORIG_NAME 0x08 /* bit 3 set: original file name present */ +#define COMMENT 0x10 /* bit 4 set: file comment present */ +#define ENCRYPTED 0x20 /* bit 5 set: file is encrypted */ +#define RESERVED 0xC0 /* bit 6,7: reserved */ + +#define get_byte() (inptr < insize ? inbuf[inptr++] : fill_inbuf()) + +/* Diagnostic functions */ +#ifdef DEBUG +# define Assert(cond,msg) {if(!(cond)) error(msg);} +# define Trace(x) fprintf x +# define Tracev(x) {if (verbose) fprintf x ;} +# define Tracevv(x) {if (verbose>1) fprintf x ;} +# define Tracec(c,x) {if (verbose && (c)) fprintf x ;} +# define Tracecv(c,x) {if (verbose>1 && (c)) fprintf x ;} +#else +# define Assert(cond,msg) +# define Trace(x) +# define Tracev(x) +# define Tracevv(x) +# define Tracec(c,x) +# define Tracecv(c,x) +#endif + +static int fill_inbuf(void); +static void flush_window(void); +static void error(char *m); +static void gzip_mark(void **); +static void gzip_release(void **); + +/* + * This is set up by the setup-routine at boot-time + */ +static unsigned char *real_mode; /* Pointer to real-mode data */ + +#define RM_EXT_MEM_K (*(unsigned short *)(real_mode + 0x2)) +#ifndef STANDARD_MEMORY_BIOS_CALL +#define RM_ALT_MEM_K (*(unsigned long *)(real_mode + 0x1e0)) +#endif +#define RM_SCREEN_INFO (*(struct screen_info *)(real_mode+0)) + +extern unsigned char input_data[]; +extern int input_len; + +static long bytes_out = 0; + +static void *malloc(int size); +static void free(void *where); + +static void *memset(void *s, int c, unsigned n); +static void *memcpy(void *dest, const void *src, unsigned n); + +static void putstr(const char *); + +static unsigned long free_mem_ptr; +static unsigned long free_mem_end_ptr; + +#define HEAP_SIZE 0x4000 + +static char *vidmem = (char *)0xb8000; +static int vidport; +static int lines, cols; + +#ifdef CONFIG_X86_NUMAQ +void *xquad_portio; +#endif + +#include "../../../../lib/inflate.c" + +static void *malloc(int size) +{ + void *p; + + if (size <0) error("Malloc error"); + if (free_mem_ptr <= 0) error("Memory error"); + + free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */ + + p = (void *)free_mem_ptr; + free_mem_ptr += size; + + if (free_mem_ptr >= free_mem_end_ptr) + error("Out of memory"); + + return p; +} + +static void free(void *where) +{ /* Don't care */ +} + +static void gzip_mark(void **ptr) +{ + *ptr = (void *) free_mem_ptr; +} + +static void gzip_release(void **ptr) +{ + free_mem_ptr = (unsigned long) *ptr; +} + +static void scroll(void) +{ + int i; + + memcpy ( vidmem, vidmem + cols * 2, ( lines - 1 ) * cols * 2 ); + for ( i = ( lines - 1 ) * cols * 2; i < lines * cols * 2; i += 2 ) + vidmem[i] = ' '; +} + +static void putstr(const char *s) +{ + int x,y,pos; + char c; + + x = RM_SCREEN_INFO.orig_x; + y = RM_SCREEN_INFO.orig_y; + + while ( ( c = *s++ ) != '\0' ) { + if ( c == '\n' ) { + x = 0; + if ( ++y >= lines ) { + scroll(); + y--; + } + } else { + vidmem [ ( x + cols * y ) * 2 ] = c; + if ( ++x >= cols ) { + x = 0; + if ( ++y >= lines ) { + scroll(); + y--; + } + } + } + } + + RM_SCREEN_INFO.orig_x = x; + RM_SCREEN_INFO.orig_y = y; + + pos = (x + cols * y) * 2; /* Update cursor position */ + outb_p(14, vidport); + outb_p(0xff & (pos >> 9), vidport+1); + outb_p(15, vidport); + outb_p(0xff & (pos >> 1), vidport+1); +} + +static void* memset(void* s, int c, unsigned n) +{ + int i; + char *ss = (char*)s; + + for (i=0;i> 8); + } + crc = c; + bytes_out += (ulg)outcnt; + outcnt = 0; +} + +static void error(char *x) +{ + putstr("\n\n"); + putstr(x); + putstr("\n\n -- System halted"); + + while(1); /* Halt */ +} + +asmlinkage void decompress_kernel(void *rmode, unsigned long end, + uch *input_data, unsigned long input_len, uch *output) +{ + real_mode = rmode; + + if (RM_SCREEN_INFO.orig_video_mode == 7) { + vidmem = (char *) 0xb0000; + vidport = 0x3b4; + } else { + vidmem = (char *) 0xb8000; + vidport = 0x3d4; + } + + lines = RM_SCREEN_INFO.orig_video_lines; + cols = RM_SCREEN_INFO.orig_video_cols; + + window = output; /* Output buffer (Normally at 1M) */ + free_mem_ptr = end; /* Heap */ + free_mem_end_ptr = end + HEAP_SIZE; + inbuf = input_data; /* Input buffer */ + insize = input_len; + inptr = 0; + + if ((u32)output & (CONFIG_PHYSICAL_ALIGN -1)) + error("Destination address not CONFIG_PHYSICAL_ALIGN aligned"); + if (end > ((-__PAGE_OFFSET-(512 <<20)-1) & 0x7fffffff)) + error("Destination address too large"); +#ifndef CONFIG_RELOCATABLE + if ((u32)output != LOAD_PHYSICAL_ADDR) + error("Wrong destination address"); +#endif + + makecrc(); + putstr("Uncompressing Linux... "); + gunzip(); + putstr("Ok, booting the kernel.\n"); + return; +} diff --git a/arch/x86/boot/compressed/relocs.c b/arch/x86/boot/compressed/relocs.c new file mode 100644 index 00000000000..2d77ee728f9 --- /dev/null +++ b/arch/x86/boot/compressed/relocs.c @@ -0,0 +1,631 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#define USE_BSD +#include + +#define MAX_SHDRS 100 +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +static Elf32_Ehdr ehdr; +static Elf32_Shdr shdr[MAX_SHDRS]; +static Elf32_Sym *symtab[MAX_SHDRS]; +static Elf32_Rel *reltab[MAX_SHDRS]; +static char *strtab[MAX_SHDRS]; +static unsigned long reloc_count, reloc_idx; +static unsigned long *relocs; + +/* + * Following symbols have been audited. There values are constant and do + * not change if bzImage is loaded at a different physical address than + * the address for which it has been compiled. Don't warn user about + * absolute relocations present w.r.t these symbols. + */ +static const char* safe_abs_relocs[] = { + "__kernel_vsyscall", + "__kernel_rt_sigreturn", + "__kernel_sigreturn", + "SYSENTER_RETURN", + "VDSO_NOTE_MASK", + "xen_irq_disable_direct_reloc", + "xen_save_fl_direct_reloc", +}; + +static int is_safe_abs_reloc(const char* sym_name) +{ + int i, array_size; + + array_size = sizeof(safe_abs_relocs)/sizeof(char*); + + for(i = 0; i < array_size; i++) { + if (!strcmp(sym_name, safe_abs_relocs[i])) + /* Match found */ + return 1; + } + if (strncmp(sym_name, "__crc_", 6) == 0) + return 1; + return 0; +} + +static void die(char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + exit(1); +} + +static const char *sym_type(unsigned type) +{ + static const char *type_name[] = { +#define SYM_TYPE(X) [X] = #X + SYM_TYPE(STT_NOTYPE), + SYM_TYPE(STT_OBJECT), + SYM_TYPE(STT_FUNC), + SYM_TYPE(STT_SECTION), + SYM_TYPE(STT_FILE), + SYM_TYPE(STT_COMMON), + SYM_TYPE(STT_TLS), +#undef SYM_TYPE + }; + const char *name = "unknown sym type name"; + if (type < ARRAY_SIZE(type_name)) { + name = type_name[type]; + } + return name; +} + +static const char *sym_bind(unsigned bind) +{ + static const char *bind_name[] = { +#define SYM_BIND(X) [X] = #X + SYM_BIND(STB_LOCAL), + SYM_BIND(STB_GLOBAL), + SYM_BIND(STB_WEAK), +#undef SYM_BIND + }; + const char *name = "unknown sym bind name"; + if (bind < ARRAY_SIZE(bind_name)) { + name = bind_name[bind]; + } + return name; +} + +static const char *sym_visibility(unsigned visibility) +{ + static const char *visibility_name[] = { +#define SYM_VISIBILITY(X) [X] = #X + SYM_VISIBILITY(STV_DEFAULT), + SYM_VISIBILITY(STV_INTERNAL), + SYM_VISIBILITY(STV_HIDDEN), + SYM_VISIBILITY(STV_PROTECTED), +#undef SYM_VISIBILITY + }; + const char *name = "unknown sym visibility name"; + if (visibility < ARRAY_SIZE(visibility_name)) { + name = visibility_name[visibility]; + } + return name; +} + +static const char *rel_type(unsigned type) +{ + static const char *type_name[] = { +#define REL_TYPE(X) [X] = #X + REL_TYPE(R_386_NONE), + REL_TYPE(R_386_32), + REL_TYPE(R_386_PC32), + REL_TYPE(R_386_GOT32), + REL_TYPE(R_386_PLT32), + REL_TYPE(R_386_COPY), + REL_TYPE(R_386_GLOB_DAT), + REL_TYPE(R_386_JMP_SLOT), + REL_TYPE(R_386_RELATIVE), + REL_TYPE(R_386_GOTOFF), + REL_TYPE(R_386_GOTPC), +#undef REL_TYPE + }; + const char *name = "unknown type rel type name"; + if (type < ARRAY_SIZE(type_name)) { + name = type_name[type]; + } + return name; +} + +static const char *sec_name(unsigned shndx) +{ + const char *sec_strtab; + const char *name; + sec_strtab = strtab[ehdr.e_shstrndx]; + name = ""; + if (shndx < ehdr.e_shnum) { + name = sec_strtab + shdr[shndx].sh_name; + } + else if (shndx == SHN_ABS) { + name = "ABSOLUTE"; + } + else if (shndx == SHN_COMMON) { + name = "COMMON"; + } + return name; +} + +static const char *sym_name(const char *sym_strtab, Elf32_Sym *sym) +{ + const char *name; + name = ""; + if (sym->st_name) { + name = sym_strtab + sym->st_name; + } + else { + name = sec_name(shdr[sym->st_shndx].sh_name); + } + return name; +} + + + +#if BYTE_ORDER == LITTLE_ENDIAN +#define le16_to_cpu(val) (val) +#define le32_to_cpu(val) (val) +#endif +#if BYTE_ORDER == BIG_ENDIAN +#define le16_to_cpu(val) bswap_16(val) +#define le32_to_cpu(val) bswap_32(val) +#endif + +static uint16_t elf16_to_cpu(uint16_t val) +{ + return le16_to_cpu(val); +} + +static uint32_t elf32_to_cpu(uint32_t val) +{ + return le32_to_cpu(val); +} + +static void read_ehdr(FILE *fp) +{ + if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1) { + die("Cannot read ELF header: %s\n", + strerror(errno)); + } + if (memcmp(ehdr.e_ident, ELFMAG, 4) != 0) { + die("No ELF magic\n"); + } + if (ehdr.e_ident[EI_CLASS] != ELFCLASS32) { + die("Not a 32 bit executable\n"); + } + if (ehdr.e_ident[EI_DATA] != ELFDATA2LSB) { + die("Not a LSB ELF executable\n"); + } + if (ehdr.e_ident[EI_VERSION] != EV_CURRENT) { + die("Unknown ELF version\n"); + } + /* Convert the fields to native endian */ + ehdr.e_type = elf16_to_cpu(ehdr.e_type); + ehdr.e_machine = elf16_to_cpu(ehdr.e_machine); + ehdr.e_version = elf32_to_cpu(ehdr.e_version); + ehdr.e_entry = elf32_to_cpu(ehdr.e_entry); + ehdr.e_phoff = elf32_to_cpu(ehdr.e_phoff); + ehdr.e_shoff = elf32_to_cpu(ehdr.e_shoff); + ehdr.e_flags = elf32_to_cpu(ehdr.e_flags); + ehdr.e_ehsize = elf16_to_cpu(ehdr.e_ehsize); + ehdr.e_phentsize = elf16_to_cpu(ehdr.e_phentsize); + ehdr.e_phnum = elf16_to_cpu(ehdr.e_phnum); + ehdr.e_shentsize = elf16_to_cpu(ehdr.e_shentsize); + ehdr.e_shnum = elf16_to_cpu(ehdr.e_shnum); + ehdr.e_shstrndx = elf16_to_cpu(ehdr.e_shstrndx); + + if ((ehdr.e_type != ET_EXEC) && (ehdr.e_type != ET_DYN)) { + die("Unsupported ELF header type\n"); + } + if (ehdr.e_machine != EM_386) { + die("Not for x86\n"); + } + if (ehdr.e_version != EV_CURRENT) { + die("Unknown ELF version\n"); + } + if (ehdr.e_ehsize != sizeof(Elf32_Ehdr)) { + die("Bad Elf header size\n"); + } + if (ehdr.e_phentsize != sizeof(Elf32_Phdr)) { + die("Bad program header entry\n"); + } + if (ehdr.e_shentsize != sizeof(Elf32_Shdr)) { + die("Bad section header entry\n"); + } + if (ehdr.e_shstrndx >= ehdr.e_shnum) { + die("String table index out of bounds\n"); + } +} + +static void read_shdrs(FILE *fp) +{ + int i; + if (ehdr.e_shnum > MAX_SHDRS) { + die("%d section headers supported: %d\n", + ehdr.e_shnum, MAX_SHDRS); + } + if (fseek(fp, ehdr.e_shoff, SEEK_SET) < 0) { + die("Seek to %d failed: %s\n", + ehdr.e_shoff, strerror(errno)); + } + if (fread(&shdr, sizeof(shdr[0]), ehdr.e_shnum, fp) != ehdr.e_shnum) { + die("Cannot read ELF section headers: %s\n", + strerror(errno)); + } + for(i = 0; i < ehdr.e_shnum; i++) { + shdr[i].sh_name = elf32_to_cpu(shdr[i].sh_name); + shdr[i].sh_type = elf32_to_cpu(shdr[i].sh_type); + shdr[i].sh_flags = elf32_to_cpu(shdr[i].sh_flags); + shdr[i].sh_addr = elf32_to_cpu(shdr[i].sh_addr); + shdr[i].sh_offset = elf32_to_cpu(shdr[i].sh_offset); + shdr[i].sh_size = elf32_to_cpu(shdr[i].sh_size); + shdr[i].sh_link = elf32_to_cpu(shdr[i].sh_link); + shdr[i].sh_info = elf32_to_cpu(shdr[i].sh_info); + shdr[i].sh_addralign = elf32_to_cpu(shdr[i].sh_addralign); + shdr[i].sh_entsize = elf32_to_cpu(shdr[i].sh_entsize); + } + +} + +static void read_strtabs(FILE *fp) +{ + int i; + for(i = 0; i < ehdr.e_shnum; i++) { + if (shdr[i].sh_type != SHT_STRTAB) { + continue; + } + strtab[i] = malloc(shdr[i].sh_size); + if (!strtab[i]) { + die("malloc of %d bytes for strtab failed\n", + shdr[i].sh_size); + } + if (fseek(fp, shdr[i].sh_offset, SEEK_SET) < 0) { + die("Seek to %d failed: %s\n", + shdr[i].sh_offset, strerror(errno)); + } + if (fread(strtab[i], 1, shdr[i].sh_size, fp) != shdr[i].sh_size) { + die("Cannot read symbol table: %s\n", + strerror(errno)); + } + } +} + +static void read_symtabs(FILE *fp) +{ + int i,j; + for(i = 0; i < ehdr.e_shnum; i++) { + if (shdr[i].sh_type != SHT_SYMTAB) { + continue; + } + symtab[i] = malloc(shdr[i].sh_size); + if (!symtab[i]) { + die("malloc of %d bytes for symtab failed\n", + shdr[i].sh_size); + } + if (fseek(fp, shdr[i].sh_offset, SEEK_SET) < 0) { + die("Seek to %d failed: %s\n", + shdr[i].sh_offset, strerror(errno)); + } + if (fread(symtab[i], 1, shdr[i].sh_size, fp) != shdr[i].sh_size) { + die("Cannot read symbol table: %s\n", + strerror(errno)); + } + for(j = 0; j < shdr[i].sh_size/sizeof(symtab[i][0]); j++) { + symtab[i][j].st_name = elf32_to_cpu(symtab[i][j].st_name); + symtab[i][j].st_value = elf32_to_cpu(symtab[i][j].st_value); + symtab[i][j].st_size = elf32_to_cpu(symtab[i][j].st_size); + symtab[i][j].st_shndx = elf16_to_cpu(symtab[i][j].st_shndx); + } + } +} + + +static void read_relocs(FILE *fp) +{ + int i,j; + for(i = 0; i < ehdr.e_shnum; i++) { + if (shdr[i].sh_type != SHT_REL) { + continue; + } + reltab[i] = malloc(shdr[i].sh_size); + if (!reltab[i]) { + die("malloc of %d bytes for relocs failed\n", + shdr[i].sh_size); + } + if (fseek(fp, shdr[i].sh_offset, SEEK_SET) < 0) { + die("Seek to %d failed: %s\n", + shdr[i].sh_offset, strerror(errno)); + } + if (fread(reltab[i], 1, shdr[i].sh_size, fp) != shdr[i].sh_size) { + die("Cannot read symbol table: %s\n", + strerror(errno)); + } + for(j = 0; j < shdr[i].sh_size/sizeof(reltab[0][0]); j++) { + reltab[i][j].r_offset = elf32_to_cpu(reltab[i][j].r_offset); + reltab[i][j].r_info = elf32_to_cpu(reltab[i][j].r_info); + } + } +} + + +static void print_absolute_symbols(void) +{ + int i; + printf("Absolute symbols\n"); + printf(" Num: Value Size Type Bind Visibility Name\n"); + for(i = 0; i < ehdr.e_shnum; i++) { + char *sym_strtab; + Elf32_Sym *sh_symtab; + int j; + if (shdr[i].sh_type != SHT_SYMTAB) { + continue; + } + sh_symtab = symtab[i]; + sym_strtab = strtab[shdr[i].sh_link]; + for(j = 0; j < shdr[i].sh_size/sizeof(symtab[0][0]); j++) { + Elf32_Sym *sym; + const char *name; + sym = &symtab[i][j]; + name = sym_name(sym_strtab, sym); + if (sym->st_shndx != SHN_ABS) { + continue; + } + printf("%5d %08x %5d %10s %10s %12s %s\n", + j, sym->st_value, sym->st_size, + sym_type(ELF32_ST_TYPE(sym->st_info)), + sym_bind(ELF32_ST_BIND(sym->st_info)), + sym_visibility(ELF32_ST_VISIBILITY(sym->st_other)), + name); + } + } + printf("\n"); +} + +static void print_absolute_relocs(void) +{ + int i, printed = 0; + + for(i = 0; i < ehdr.e_shnum; i++) { + char *sym_strtab; + Elf32_Sym *sh_symtab; + unsigned sec_applies, sec_symtab; + int j; + if (shdr[i].sh_type != SHT_REL) { + continue; + } + sec_symtab = shdr[i].sh_link; + sec_applies = shdr[i].sh_info; + if (!(shdr[sec_applies].sh_flags & SHF_ALLOC)) { + continue; + } + sh_symtab = symtab[sec_symtab]; + sym_strtab = strtab[shdr[sec_symtab].sh_link]; + for(j = 0; j < shdr[i].sh_size/sizeof(reltab[0][0]); j++) { + Elf32_Rel *rel; + Elf32_Sym *sym; + const char *name; + rel = &reltab[i][j]; + sym = &sh_symtab[ELF32_R_SYM(rel->r_info)]; + name = sym_name(sym_strtab, sym); + if (sym->st_shndx != SHN_ABS) { + continue; + } + + /* Absolute symbols are not relocated if bzImage is + * loaded at a non-compiled address. Display a warning + * to user at compile time about the absolute + * relocations present. + * + * User need to audit the code to make sure + * some symbols which should have been section + * relative have not become absolute because of some + * linker optimization or wrong programming usage. + * + * Before warning check if this absolute symbol + * relocation is harmless. + */ + if (is_safe_abs_reloc(name)) + continue; + + if (!printed) { + printf("WARNING: Absolute relocations" + " present\n"); + printf("Offset Info Type Sym.Value " + "Sym.Name\n"); + printed = 1; + } + + printf("%08x %08x %10s %08x %s\n", + rel->r_offset, + rel->r_info, + rel_type(ELF32_R_TYPE(rel->r_info)), + sym->st_value, + name); + } + } + + if (printed) + printf("\n"); +} + +static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym)) +{ + int i; + /* Walk through the relocations */ + for(i = 0; i < ehdr.e_shnum; i++) { + char *sym_strtab; + Elf32_Sym *sh_symtab; + unsigned sec_applies, sec_symtab; + int j; + if (shdr[i].sh_type != SHT_REL) { + continue; + } + sec_symtab = shdr[i].sh_link; + sec_applies = shdr[i].sh_info; + if (!(shdr[sec_applies].sh_flags & SHF_ALLOC)) { + continue; + } + sh_symtab = symtab[sec_symtab]; + sym_strtab = strtab[shdr[sec_symtab].sh_link]; + for(j = 0; j < shdr[i].sh_size/sizeof(reltab[0][0]); j++) { + Elf32_Rel *rel; + Elf32_Sym *sym; + unsigned r_type; + rel = &reltab[i][j]; + sym = &sh_symtab[ELF32_R_SYM(rel->r_info)]; + r_type = ELF32_R_TYPE(rel->r_info); + /* Don't visit relocations to absolute symbols */ + if (sym->st_shndx == SHN_ABS) { + continue; + } + if (r_type == R_386_PC32) { + /* PC relative relocations don't need to be adjusted */ + } + else if (r_type == R_386_32) { + /* Visit relocations that need to be adjusted */ + visit(rel, sym); + } + else { + die("Unsupported relocation type: %d\n", r_type); + } + } + } +} + +static void count_reloc(Elf32_Rel *rel, Elf32_Sym *sym) +{ + reloc_count += 1; +} + +static void collect_reloc(Elf32_Rel *rel, Elf32_Sym *sym) +{ + /* Remember the address that needs to be adjusted. */ + relocs[reloc_idx++] = rel->r_offset; +} + +static int cmp_relocs(const void *va, const void *vb) +{ + const unsigned long *a, *b; + a = va; b = vb; + return (*a == *b)? 0 : (*a > *b)? 1 : -1; +} + +static void emit_relocs(int as_text) +{ + int i; + /* Count how many relocations I have and allocate space for them. */ + reloc_count = 0; + walk_relocs(count_reloc); + relocs = malloc(reloc_count * sizeof(relocs[0])); + if (!relocs) { + die("malloc of %d entries for relocs failed\n", + reloc_count); + } + /* Collect up the relocations */ + reloc_idx = 0; + walk_relocs(collect_reloc); + + /* Order the relocations for more efficient processing */ + qsort(relocs, reloc_count, sizeof(relocs[0]), cmp_relocs); + + /* Print the relocations */ + if (as_text) { + /* Print the relocations in a form suitable that + * gas will like. + */ + printf(".section \".data.reloc\",\"a\"\n"); + printf(".balign 4\n"); + for(i = 0; i < reloc_count; i++) { + printf("\t .long 0x%08lx\n", relocs[i]); + } + printf("\n"); + } + else { + unsigned char buf[4]; + buf[0] = buf[1] = buf[2] = buf[3] = 0; + /* Print a stop */ + printf("%c%c%c%c", buf[0], buf[1], buf[2], buf[3]); + /* Now print each relocation */ + for(i = 0; i < reloc_count; i++) { + buf[0] = (relocs[i] >> 0) & 0xff; + buf[1] = (relocs[i] >> 8) & 0xff; + buf[2] = (relocs[i] >> 16) & 0xff; + buf[3] = (relocs[i] >> 24) & 0xff; + printf("%c%c%c%c", buf[0], buf[1], buf[2], buf[3]); + } + } +} + +static void usage(void) +{ + die("relocs [--abs-syms |--abs-relocs | --text] vmlinux\n"); +} + +int main(int argc, char **argv) +{ + int show_absolute_syms, show_absolute_relocs; + int as_text; + const char *fname; + FILE *fp; + int i; + + show_absolute_syms = 0; + show_absolute_relocs = 0; + as_text = 0; + fname = NULL; + for(i = 1; i < argc; i++) { + char *arg = argv[i]; + if (*arg == '-') { + if (strcmp(argv[1], "--abs-syms") == 0) { + show_absolute_syms = 1; + continue; + } + + if (strcmp(argv[1], "--abs-relocs") == 0) { + show_absolute_relocs = 1; + continue; + } + else if (strcmp(argv[1], "--text") == 0) { + as_text = 1; + continue; + } + } + else if (!fname) { + fname = arg; + continue; + } + usage(); + } + if (!fname) { + usage(); + } + fp = fopen(fname, "r"); + if (!fp) { + die("Cannot open %s: %s\n", + fname, strerror(errno)); + } + read_ehdr(fp); + read_shdrs(fp); + read_strtabs(fp); + read_symtabs(fp); + read_relocs(fp); + if (show_absolute_syms) { + print_absolute_symbols(); + return 0; + } + if (show_absolute_relocs) { + print_absolute_relocs(); + return 0; + } + emit_relocs(as_text); + return 0; +} diff --git a/arch/x86/boot/compressed/vmlinux_32.lds b/arch/x86/boot/compressed/vmlinux_32.lds new file mode 100644 index 00000000000..cc4854f6c6c --- /dev/null +++ b/arch/x86/boot/compressed/vmlinux_32.lds @@ -0,0 +1,43 @@ +OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") +OUTPUT_ARCH(i386) +ENTRY(startup_32) +SECTIONS +{ + /* Be careful parts of head.S assume startup_32 is at + * address 0. + */ + . = 0 ; + .text.head : { + _head = . ; + *(.text.head) + _ehead = . ; + } + .data.compressed : { + *(.data.compressed) + } + .text : { + _text = .; /* Text */ + *(.text) + *(.text.*) + _etext = . ; + } + .rodata : { + _rodata = . ; + *(.rodata) /* read-only data */ + *(.rodata.*) + _erodata = . ; + } + .data : { + _data = . ; + *(.data) + *(.data.*) + _edata = . ; + } + .bss : { + _bss = . ; + *(.bss) + *(.bss.*) + *(COMMON) + _end = . ; + } +} diff --git a/arch/x86/boot/compressed/vmlinux_32.scr b/arch/x86/boot/compressed/vmlinux_32.scr new file mode 100644 index 00000000000..707a88f7f29 --- /dev/null +++ b/arch/x86/boot/compressed/vmlinux_32.scr @@ -0,0 +1,10 @@ +SECTIONS +{ + .data.compressed : { + input_len = .; + LONG(input_data_end - input_data) input_data = .; + *(.data) + output_len = . - 4; + input_data_end = .; + } +} diff --git a/arch/x86_64/boot/compressed/Makefile b/arch/x86_64/boot/compressed/Makefile index 78514f3f626..76ab5caddf9 100644 --- a/arch/x86_64/boot/compressed/Makefile +++ b/arch/x86_64/boot/compressed/Makefile @@ -1,5 +1,5 @@ ifeq ($(CONFIG_X86_32),y) -include ${srctree}/arch/i386/boot/compressed/Makefile_32 +include ${srctree}/arch/x86/boot/compressed/Makefile_32 else include ${srctree}/arch/x86_64/boot/compressed/Makefile_64 endif -- cgit v1.2.3-70-g09d2 From 96ae6ea0be1b902c28b3b463c27da42b41e2b63a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 11 Oct 2007 11:16:45 +0200 Subject: i386: move boot Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/i386/Makefile | 9 +- arch/i386/boot/.gitignore | 5 - arch/i386/boot/Makefile | 171 ---------------- arch/i386/boot/a20.c | 161 --------------- arch/i386/boot/apm.c | 98 --------- arch/i386/boot/bitops.h | 45 ---- arch/i386/boot/boot.h | 296 -------------------------- arch/i386/boot/cmdline.c | 97 --------- arch/i386/boot/code16gcc.h | 15 -- arch/i386/boot/copy.S | 101 --------- arch/i386/boot/cpu.c | 69 ------- arch/i386/boot/cpucheck.c | 268 ------------------------ arch/i386/boot/edd.c | 167 --------------- arch/i386/boot/header.S | 283 ------------------------- arch/i386/boot/install.sh | 61 ------ arch/i386/boot/main.c | 161 --------------- arch/i386/boot/mca.c | 43 ---- arch/i386/boot/memory.c | 118 ----------- arch/i386/boot/mtools.conf.in | 17 -- arch/i386/boot/pm.c | 174 ---------------- arch/i386/boot/pmjump.S | 54 ----- arch/i386/boot/printf.c | 307 --------------------------- arch/i386/boot/setup.ld | 54 ----- arch/i386/boot/string.c | 52 ----- arch/i386/boot/tty.c | 112 ---------- arch/i386/boot/version.c | 23 --- arch/i386/boot/vesa.h | 79 ------- arch/i386/boot/video-bios.c | 125 ----------- arch/i386/boot/video-vesa.c | 292 -------------------------- arch/i386/boot/video-vga.c | 261 ----------------------- arch/i386/boot/video.c | 467 ------------------------------------------ arch/i386/boot/video.h | 152 -------------- arch/i386/boot/voyager.c | 46 ----- arch/x86/boot/.gitignore | 5 + arch/x86/boot/Makefile | 171 ++++++++++++++++ arch/x86/boot/a20.c | 161 +++++++++++++++ arch/x86/boot/apm.c | 98 +++++++++ arch/x86/boot/bitops.h | 45 ++++ arch/x86/boot/boot.h | 296 ++++++++++++++++++++++++++ arch/x86/boot/cmdline.c | 97 +++++++++ arch/x86/boot/code16gcc.h | 15 ++ arch/x86/boot/copy.S | 101 +++++++++ arch/x86/boot/cpu.c | 69 +++++++ arch/x86/boot/cpucheck.c | 268 ++++++++++++++++++++++++ arch/x86/boot/edd.c | 167 +++++++++++++++ arch/x86/boot/header.S | 283 +++++++++++++++++++++++++ arch/x86/boot/install.sh | 61 ++++++ arch/x86/boot/main.c | 161 +++++++++++++++ arch/x86/boot/mca.c | 43 ++++ arch/x86/boot/memory.c | 118 +++++++++++ arch/x86/boot/mtools.conf.in | 17 ++ arch/x86/boot/pm.c | 174 ++++++++++++++++ arch/x86/boot/pmjump.S | 54 +++++ arch/x86/boot/printf.c | 307 +++++++++++++++++++++++++++ arch/x86/boot/setup.ld | 54 +++++ arch/x86/boot/string.c | 52 +++++ arch/x86/boot/tty.c | 112 ++++++++++ arch/x86/boot/version.c | 23 +++ arch/x86/boot/vesa.h | 79 +++++++ arch/x86/boot/video-bios.c | 125 +++++++++++ arch/x86/boot/video-vesa.c | 292 ++++++++++++++++++++++++++ arch/x86/boot/video-vga.c | 261 +++++++++++++++++++++++ arch/x86/boot/video.c | 467 ++++++++++++++++++++++++++++++++++++++++++ arch/x86/boot/video.h | 152 ++++++++++++++ arch/x86/boot/voyager.c | 46 +++++ arch/x86_64/boot/Makefile | 2 +- 66 files changed, 4381 insertions(+), 4378 deletions(-) delete mode 100644 arch/i386/boot/.gitignore delete mode 100644 arch/i386/boot/Makefile delete mode 100644 arch/i386/boot/a20.c delete mode 100644 arch/i386/boot/apm.c delete mode 100644 arch/i386/boot/bitops.h delete mode 100644 arch/i386/boot/boot.h delete mode 100644 arch/i386/boot/cmdline.c delete mode 100644 arch/i386/boot/code16gcc.h delete mode 100644 arch/i386/boot/copy.S delete mode 100644 arch/i386/boot/cpu.c delete mode 100644 arch/i386/boot/cpucheck.c delete mode 100644 arch/i386/boot/edd.c delete mode 100644 arch/i386/boot/header.S delete mode 100644 arch/i386/boot/install.sh delete mode 100644 arch/i386/boot/main.c delete mode 100644 arch/i386/boot/mca.c delete mode 100644 arch/i386/boot/memory.c delete mode 100644 arch/i386/boot/mtools.conf.in delete mode 100644 arch/i386/boot/pm.c delete mode 100644 arch/i386/boot/pmjump.S delete mode 100644 arch/i386/boot/printf.c delete mode 100644 arch/i386/boot/setup.ld delete mode 100644 arch/i386/boot/string.c delete mode 100644 arch/i386/boot/tty.c delete mode 100644 arch/i386/boot/version.c delete mode 100644 arch/i386/boot/vesa.h delete mode 100644 arch/i386/boot/video-bios.c delete mode 100644 arch/i386/boot/video-vesa.c delete mode 100644 arch/i386/boot/video-vga.c delete mode 100644 arch/i386/boot/video.c delete mode 100644 arch/i386/boot/video.h delete mode 100644 arch/i386/boot/voyager.c create mode 100644 arch/x86/boot/.gitignore create mode 100644 arch/x86/boot/Makefile create mode 100644 arch/x86/boot/a20.c create mode 100644 arch/x86/boot/apm.c create mode 100644 arch/x86/boot/bitops.h create mode 100644 arch/x86/boot/boot.h create mode 100644 arch/x86/boot/cmdline.c create mode 100644 arch/x86/boot/code16gcc.h create mode 100644 arch/x86/boot/copy.S create mode 100644 arch/x86/boot/cpu.c create mode 100644 arch/x86/boot/cpucheck.c create mode 100644 arch/x86/boot/edd.c create mode 100644 arch/x86/boot/header.S create mode 100644 arch/x86/boot/install.sh create mode 100644 arch/x86/boot/main.c create mode 100644 arch/x86/boot/mca.c create mode 100644 arch/x86/boot/memory.c create mode 100644 arch/x86/boot/mtools.conf.in create mode 100644 arch/x86/boot/pm.c create mode 100644 arch/x86/boot/pmjump.S create mode 100644 arch/x86/boot/printf.c create mode 100644 arch/x86/boot/setup.ld create mode 100644 arch/x86/boot/string.c create mode 100644 arch/x86/boot/tty.c create mode 100644 arch/x86/boot/version.c create mode 100644 arch/x86/boot/vesa.h create mode 100644 arch/x86/boot/video-bios.c create mode 100644 arch/x86/boot/video-vesa.c create mode 100644 arch/x86/boot/video-vga.c create mode 100644 arch/x86/boot/video.c create mode 100644 arch/x86/boot/video.h create mode 100644 arch/x86/boot/voyager.c (limited to 'arch/x86') diff --git a/arch/i386/Makefile b/arch/i386/Makefile index 510df782f94..776d8dcf234 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -116,7 +116,7 @@ drivers-$(CONFIG_FB) += arch/i386/video/ CFLAGS += $(mflags-y) AFLAGS += $(mflags-y) -boot := arch/i386/boot +boot := arch/x86/boot PHONY += zImage bzImage compressed zlilo bzlilo \ zdisk bzdisk fdimage fdimage144 fdimage288 isoimage install @@ -125,9 +125,11 @@ all: bzImage # KBUILD_IMAGE specify target image being built KBUILD_IMAGE := $(boot)/bzImage -zImage zlilo zdisk: KBUILD_IMAGE := arch/i386/boot/zImage +zImage zlilo zdisk: KBUILD_IMAGE := arch/x86/boot/zImage zImage bzImage: vmlinux + $(Q)mkdir -p $(objtree)/arch/i386/boot + $(Q)ln -fsn $(objtree)/arch/x86/boot/bzImage $(objtree)/arch/i386/boot/bzImage $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE) compressed: zImage @@ -145,7 +147,8 @@ install: $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) install archclean: - $(Q)$(MAKE) $(clean)=arch/i386/boot + $(Q)rm -rf $(objtree)/arch/i386/boot + $(Q)$(MAKE) $(clean)=arch/x86/boot define archhelp echo '* bzImage - Compressed kernel image (arch/$(ARCH)/boot/bzImage)' diff --git a/arch/i386/boot/.gitignore b/arch/i386/boot/.gitignore deleted file mode 100644 index 18465143cfa..00000000000 --- a/arch/i386/boot/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -bootsect -bzImage -setup -setup.bin -setup.elf diff --git a/arch/i386/boot/Makefile b/arch/i386/boot/Makefile deleted file mode 100644 index 93386a4e40b..00000000000 --- a/arch/i386/boot/Makefile +++ /dev/null @@ -1,171 +0,0 @@ -# -# arch/i386/boot/Makefile -# -# This file is subject to the terms and conditions of the GNU General Public -# License. See the file "COPYING" in the main directory of this archive -# for more details. -# -# Copyright (C) 1994 by Linus Torvalds -# - -# ROOT_DEV specifies the default root-device when making the image. -# This can be either FLOPPY, CURRENT, /dev/xxxx or empty, in which case -# the default of FLOPPY is used by 'build'. - -ROOT_DEV := CURRENT - -# If you want to preset the SVGA mode, uncomment the next line and -# set SVGA_MODE to whatever number you want. -# Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode. -# The number is the same as you would ordinarily press at bootup. - -SVGA_MODE := -DSVGA_MODE=NORMAL_VGA - -# If you want the RAM disk device, define this to be the size in blocks. - -#RAMDISK := -DRAMDISK=512 - -targets := vmlinux.bin setup.bin setup.elf zImage bzImage -subdir- := compressed - -setup-y += a20.o apm.o cmdline.o copy.o cpu.o cpucheck.o edd.o -setup-y += header.o main.o mca.o memory.o pm.o pmjump.o -setup-y += printf.o string.o tty.o video.o version.o voyager.o - -# The link order of the video-*.o modules can matter. In particular, -# video-vga.o *must* be listed first, followed by video-vesa.o. -# Hardware-specific drivers should follow in the order they should be -# probed, and video-bios.o should typically be last. -setup-y += video-vga.o -setup-y += video-vesa.o -setup-y += video-bios.o -targets += $(setup-y) -hostprogs-y := tools/build - -HOSTCFLAGS_build.o := $(LINUXINCLUDE) - -# --------------------------------------------------------------------------- - -# How to compile the 16-bit code. Note we always compile for -march=i386, -# that way we can complain to the user if the CPU is insufficient. -cflags-i386 := -cflags-x86_64 := -m32 -CFLAGS := $(LINUXINCLUDE) -g -Os -D_SETUP -D__KERNEL__ \ - $(cflags-$(ARCH)) \ - -Wall -Wstrict-prototypes \ - -march=i386 -mregparm=3 \ - -include $(srctree)/$(src)/code16gcc.h \ - -fno-strict-aliasing -fomit-frame-pointer \ - $(call cc-option, -ffreestanding) \ - $(call cc-option, -fno-toplevel-reorder,\ - $(call cc-option, -fno-unit-at-a-time)) \ - $(call cc-option, -fno-stack-protector) \ - $(call cc-option, -mpreferred-stack-boundary=2) -AFLAGS := $(CFLAGS) -D__ASSEMBLY__ - -$(obj)/zImage: IMAGE_OFFSET := 0x1000 -$(obj)/zImage: EXTRA_AFLAGS := $(SVGA_MODE) $(RAMDISK) -$(obj)/bzImage: IMAGE_OFFSET := 0x100000 -$(obj)/bzImage: EXTRA_CFLAGS := -D__BIG_KERNEL__ -$(obj)/bzImage: EXTRA_AFLAGS := $(SVGA_MODE) $(RAMDISK) -D__BIG_KERNEL__ -$(obj)/bzImage: BUILDFLAGS := -b - -quiet_cmd_image = BUILD $@ -cmd_image = $(obj)/tools/build $(BUILDFLAGS) $(obj)/setup.bin \ - $(obj)/vmlinux.bin $(ROOT_DEV) > $@ - -$(obj)/zImage $(obj)/bzImage: $(obj)/setup.bin \ - $(obj)/vmlinux.bin $(obj)/tools/build FORCE - $(call if_changed,image) - @echo 'Kernel: $@ is ready' ' (#'`cat .version`')' - -$(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE - $(call if_changed,objcopy) - -SETUP_OBJS = $(addprefix $(obj)/,$(setup-y)) - -LDFLAGS_setup.elf := -T -$(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE - $(call if_changed,ld) - -OBJCOPYFLAGS_setup.bin := -O binary - -$(obj)/setup.bin: $(obj)/setup.elf FORCE - $(call if_changed,objcopy) - -$(obj)/compressed/vmlinux: FORCE - $(Q)$(MAKE) $(build)=$(obj)/compressed IMAGE_OFFSET=$(IMAGE_OFFSET) $@ - -# Set this if you want to pass append arguments to the zdisk/fdimage/isoimage kernel -FDARGS = -# Set this if you want an initrd included with the zdisk/fdimage/isoimage kernel -FDINITRD = - -image_cmdline = default linux $(FDARGS) $(if $(FDINITRD),initrd=initrd.img,) - -$(obj)/mtools.conf: $(src)/mtools.conf.in - sed -e 's|@OBJ@|$(obj)|g' < $< > $@ - -# This requires write access to /dev/fd0 -zdisk: $(BOOTIMAGE) $(obj)/mtools.conf - MTOOLSRC=$(obj)/mtools.conf mformat a: ; sync - syslinux /dev/fd0 ; sync - echo '$(image_cmdline)' | \ - MTOOLSRC=$(src)/mtools.conf mcopy - a:syslinux.cfg - if [ -f '$(FDINITRD)' ] ; then \ - MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' a:initrd.img ; \ - fi - MTOOLSRC=$(obj)/mtools.conf mcopy $(BOOTIMAGE) a:linux ; sync - -# These require being root or having syslinux 2.02 or higher installed -fdimage fdimage144: $(BOOTIMAGE) $(obj)/mtools.conf - dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=1440 - MTOOLSRC=$(obj)/mtools.conf mformat v: ; sync - syslinux $(obj)/fdimage ; sync - echo '$(image_cmdline)' | \ - MTOOLSRC=$(obj)/mtools.conf mcopy - v:syslinux.cfg - if [ -f '$(FDINITRD)' ] ; then \ - MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' v:initrd.img ; \ - fi - MTOOLSRC=$(obj)/mtools.conf mcopy $(BOOTIMAGE) v:linux ; sync - -fdimage288: $(BOOTIMAGE) $(obj)/mtools.conf - dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=2880 - MTOOLSRC=$(obj)/mtools.conf mformat w: ; sync - syslinux $(obj)/fdimage ; sync - echo '$(image_cmdline)' | \ - MTOOLSRC=$(obj)/mtools.conf mcopy - w:syslinux.cfg - if [ -f '$(FDINITRD)' ] ; then \ - MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' w:initrd.img ; \ - fi - MTOOLSRC=$(obj)/mtools.conf mcopy $(BOOTIMAGE) w:linux ; sync - -isoimage: $(BOOTIMAGE) - -rm -rf $(obj)/isoimage - mkdir $(obj)/isoimage - for i in lib lib64 share end ; do \ - if [ -f /usr/$$i/syslinux/isolinux.bin ] ; then \ - cp /usr/$$i/syslinux/isolinux.bin $(obj)/isoimage ; \ - break ; \ - fi ; \ - if [ $$i = end ] ; then exit 1 ; fi ; \ - done - cp $(BOOTIMAGE) $(obj)/isoimage/linux - echo '$(image_cmdline)' > $(obj)/isoimage/isolinux.cfg - if [ -f '$(FDINITRD)' ] ; then \ - cp '$(FDINITRD)' $(obj)/isoimage/initrd.img ; \ - fi - mkisofs -J -r -o $(obj)/image.iso -b isolinux.bin -c boot.cat \ - -no-emul-boot -boot-load-size 4 -boot-info-table \ - $(obj)/isoimage - rm -rf $(obj)/isoimage - -zlilo: $(BOOTIMAGE) - if [ -f $(INSTALL_PATH)/vmlinuz ]; then mv $(INSTALL_PATH)/vmlinuz $(INSTALL_PATH)/vmlinuz.old; fi - if [ -f $(INSTALL_PATH)/System.map ]; then mv $(INSTALL_PATH)/System.map $(INSTALL_PATH)/System.old; fi - cat $(BOOTIMAGE) > $(INSTALL_PATH)/vmlinuz - cp System.map $(INSTALL_PATH)/ - if [ -x /sbin/lilo ]; then /sbin/lilo; else /etc/lilo/install; fi - -install: - sh $(srctree)/$(src)/install.sh $(KERNELRELEASE) $(BOOTIMAGE) System.map "$(INSTALL_PATH)" diff --git a/arch/i386/boot/a20.c b/arch/i386/boot/a20.c deleted file mode 100644 index 31348d054fc..00000000000 --- a/arch/i386/boot/a20.c +++ /dev/null @@ -1,161 +0,0 @@ -/* -*- linux-c -*- ------------------------------------------------------- * - * - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright 2007 rPath, Inc. - All Rights Reserved - * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * - * ----------------------------------------------------------------------- */ - -/* - * arch/i386/boot/a20.c - * - * Enable A20 gate (return -1 on failure) - */ - -#include "boot.h" - -#define MAX_8042_LOOPS 100000 - -static int empty_8042(void) -{ - u8 status; - int loops = MAX_8042_LOOPS; - - while (loops--) { - io_delay(); - - status = inb(0x64); - if (status & 1) { - /* Read and discard input data */ - io_delay(); - (void)inb(0x60); - } else if (!(status & 2)) { - /* Buffers empty, finished! */ - return 0; - } - } - - return -1; -} - -/* Returns nonzero if the A20 line is enabled. The memory address - used as a test is the int $0x80 vector, which should be safe. */ - -#define A20_TEST_ADDR (4*0x80) -#define A20_TEST_SHORT 32 -#define A20_TEST_LONG 2097152 /* 2^21 */ - -static int a20_test(int loops) -{ - int ok = 0; - int saved, ctr; - - set_fs(0x0000); - set_gs(0xffff); - - saved = ctr = rdfs32(A20_TEST_ADDR); - - while (loops--) { - wrfs32(++ctr, A20_TEST_ADDR); - io_delay(); /* Serialize and make delay constant */ - ok = rdgs32(A20_TEST_ADDR+0x10) ^ ctr; - if (ok) - break; - } - - wrfs32(saved, A20_TEST_ADDR); - return ok; -} - -/* Quick test to see if A20 is already enabled */ -static int a20_test_short(void) -{ - return a20_test(A20_TEST_SHORT); -} - -/* Longer test that actually waits for A20 to come on line; this - is useful when dealing with the KBC or other slow external circuitry. */ -static int a20_test_long(void) -{ - return a20_test(A20_TEST_LONG); -} - -static void enable_a20_bios(void) -{ - asm volatile("pushfl; int $0x15; popfl" - : : "a" ((u16)0x2401)); -} - -static void enable_a20_kbc(void) -{ - empty_8042(); - - outb(0xd1, 0x64); /* Command write */ - empty_8042(); - - outb(0xdf, 0x60); /* A20 on */ - empty_8042(); -} - -static void enable_a20_fast(void) -{ - u8 port_a; - - port_a = inb(0x92); /* Configuration port A */ - port_a |= 0x02; /* Enable A20 */ - port_a &= ~0x01; /* Do not reset machine */ - outb(port_a, 0x92); -} - -/* - * Actual routine to enable A20; return 0 on ok, -1 on failure - */ - -#define A20_ENABLE_LOOPS 255 /* Number of times to try */ - -int enable_a20(void) -{ - int loops = A20_ENABLE_LOOPS; - -#if defined(CONFIG_X86_ELAN) - /* Elan croaks if we try to touch the KBC */ - enable_a20_fast(); - while (!a20_test_long()) - ; - return 0; -#elif defined(CONFIG_X86_VOYAGER) - /* On Voyager, a20_test() is unsafe? */ - enable_a20_kbc(); - return 0; -#else - while (loops--) { - /* First, check to see if A20 is already enabled - (legacy free, etc.) */ - if (a20_test_short()) - return 0; - - /* Next, try the BIOS (INT 0x15, AX=0x2401) */ - enable_a20_bios(); - if (a20_test_short()) - return 0; - - /* Try enabling A20 through the keyboard controller */ - empty_8042(); - if (a20_test_short()) - return 0; /* BIOS worked, but with delayed reaction */ - - enable_a20_kbc(); - if (a20_test_long()) - return 0; - - /* Finally, try enabling the "fast A20 gate" */ - enable_a20_fast(); - if (a20_test_long()) - return 0; - } - - return -1; -#endif -} diff --git a/arch/i386/boot/apm.c b/arch/i386/boot/apm.c deleted file mode 100644 index eab50c55a3a..00000000000 --- a/arch/i386/boot/apm.c +++ /dev/null @@ -1,98 +0,0 @@ -/* -*- linux-c -*- ------------------------------------------------------- * - * - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright 2007 rPath, Inc. - All Rights Reserved - * - * Original APM BIOS checking by Stephen Rothwell, May 1994 - * (sfr@canb.auug.org.au) - * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * - * ----------------------------------------------------------------------- */ - -/* - * arch/i386/boot/apm.c - * - * Get APM BIOS information - */ - -#include "boot.h" - -#if defined(CONFIG_APM) || defined(CONFIG_APM_MODULE) - -int query_apm_bios(void) -{ - u16 ax, bx, cx, dx, di; - u32 ebx, esi; - u8 err; - - /* APM BIOS installation check */ - ax = 0x5300; - bx = cx = 0; - asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp ; setc %0" - : "=d" (err), "+a" (ax), "+b" (bx), "+c" (cx) - : : "esi", "edi"); - - if (err) - return -1; /* No APM BIOS */ - - if (bx != 0x504d) /* "PM" signature */ - return -1; - - if (!(cx & 0x02)) /* 32 bits supported? */ - return -1; - - /* Disconnect first, just in case */ - ax = 0x5304; - bx = 0; - asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp" - : "+a" (ax), "+b" (bx) - : : "ecx", "edx", "esi", "edi"); - - /* Paranoia */ - ebx = esi = 0; - cx = dx = di = 0; - - /* 32-bit connect */ - asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp ; setc %6" - : "=a" (ax), "+b" (ebx), "+c" (cx), "+d" (dx), - "+S" (esi), "+D" (di), "=m" (err) - : "a" (0x5303)); - - boot_params.apm_bios_info.cseg = ax; - boot_params.apm_bios_info.offset = ebx; - boot_params.apm_bios_info.cseg_16 = cx; - boot_params.apm_bios_info.dseg = dx; - boot_params.apm_bios_info.cseg_len = (u16)esi; - boot_params.apm_bios_info.cseg_16_len = esi >> 16; - boot_params.apm_bios_info.dseg_len = di; - - if (err) - return -1; - - /* Redo the installation check as the 32-bit connect; - some BIOSes return different flags this way... */ - - ax = 0x5300; - bx = cx = 0; - asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp ; setc %0" - : "=d" (err), "+a" (ax), "+b" (bx), "+c" (cx) - : : "esi", "edi"); - - if (err || bx != 0x504d) { - /* Failure with 32-bit connect, try to disconect and ignore */ - ax = 0x5304; - bx = 0; - asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp" - : "+a" (ax), "+b" (bx) - : : "ecx", "edx", "esi", "edi"); - return -1; - } - - boot_params.apm_bios_info.version = ax; - boot_params.apm_bios_info.flags = cx; - return 0; -} - -#endif diff --git a/arch/i386/boot/bitops.h b/arch/i386/boot/bitops.h deleted file mode 100644 index 8dcc8dc7db8..00000000000 --- a/arch/i386/boot/bitops.h +++ /dev/null @@ -1,45 +0,0 @@ -/* -*- linux-c -*- ------------------------------------------------------- * - * - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright 2007 rPath, Inc. - All Rights Reserved - * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * - * ----------------------------------------------------------------------- */ - -/* - * arch/i386/boot/bitops.h - * - * Very simple bitops for the boot code. - */ - -#ifndef BOOT_BITOPS_H -#define BOOT_BITOPS_H -#define _LINUX_BITOPS_H /* Inhibit inclusion of */ - -static inline int constant_test_bit(int nr, const void *addr) -{ - const u32 *p = (const u32 *)addr; - return ((1UL << (nr & 31)) & (p[nr >> 5])) != 0; -} -static inline int variable_test_bit(int nr, const void *addr) -{ - u8 v; - const u32 *p = (const u32 *)addr; - - asm("btl %2,%1; setc %0" : "=qm" (v) : "m" (*p), "Ir" (nr)); - return v; -} - -#define test_bit(nr,addr) \ -(__builtin_constant_p(nr) ? \ - constant_test_bit((nr),(addr)) : \ - variable_test_bit((nr),(addr))) - -static inline void set_bit(int nr, void *addr) -{ - asm("btsl %1,%0" : "+m" (*(u32 *)addr) : "Ir" (nr)); -} - -#endif /* BOOT_BITOPS_H */ diff --git a/arch/i386/boot/boot.h b/arch/i386/boot/boot.h deleted file mode 100644 index 20bab9431ac..00000000000 --- a/arch/i386/boot/boot.h +++ /dev/null @@ -1,296 +0,0 @@ -/* -*- linux-c -*- ------------------------------------------------------- * - * - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright 2007 rPath, Inc. - All Rights Reserved - * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * - * ----------------------------------------------------------------------- */ - -/* - * arch/i386/boot/boot.h - * - * Header file for the real-mode kernel code - */ - -#ifndef BOOT_BOOT_H -#define BOOT_BOOT_H - -#ifndef __ASSEMBLY__ - -#include -#include -#include -#include -#include - -/* Useful macros */ -#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) - -extern struct setup_header hdr; -extern struct boot_params boot_params; - -/* Basic port I/O */ -static inline void outb(u8 v, u16 port) -{ - asm volatile("outb %0,%1" : : "a" (v), "dN" (port)); -} -static inline u8 inb(u16 port) -{ - u8 v; - asm volatile("inb %1,%0" : "=a" (v) : "dN" (port)); - return v; -} - -static inline void outw(u16 v, u16 port) -{ - asm volatile("outw %0,%1" : : "a" (v), "dN" (port)); -} -static inline u16 inw(u16 port) -{ - u16 v; - asm volatile("inw %1,%0" : "=a" (v) : "dN" (port)); - return v; -} - -static inline void outl(u32 v, u16 port) -{ - asm volatile("outl %0,%1" : : "a" (v), "dN" (port)); -} -static inline u32 inl(u32 port) -{ - u32 v; - asm volatile("inl %1,%0" : "=a" (v) : "dN" (port)); - return v; -} - -static inline void io_delay(void) -{ - const u16 DELAY_PORT = 0x80; - asm volatile("outb %%al,%0" : : "dN" (DELAY_PORT)); -} - -/* These functions are used to reference data in other segments. */ - -static inline u16 ds(void) -{ - u16 seg; - asm("movw %%ds,%0" : "=rm" (seg)); - return seg; -} - -static inline void set_fs(u16 seg) -{ - asm volatile("movw %0,%%fs" : : "rm" (seg)); -} -static inline u16 fs(void) -{ - u16 seg; - asm volatile("movw %%fs,%0" : "=rm" (seg)); - return seg; -} - -static inline void set_gs(u16 seg) -{ - asm volatile("movw %0,%%gs" : : "rm" (seg)); -} -static inline u16 gs(void) -{ - u16 seg; - asm volatile("movw %%gs,%0" : "=rm" (seg)); - return seg; -} - -typedef unsigned int addr_t; - -static inline u8 rdfs8(addr_t addr) -{ - u8 v; - asm volatile("movb %%fs:%1,%0" : "=r" (v) : "m" (*(u8 *)addr)); - return v; -} -static inline u16 rdfs16(addr_t addr) -{ - u16 v; - asm volatile("movw %%fs:%1,%0" : "=r" (v) : "m" (*(u16 *)addr)); - return v; -} -static inline u32 rdfs32(addr_t addr) -{ - u32 v; - asm volatile("movl %%fs:%1,%0" : "=r" (v) : "m" (*(u32 *)addr)); - return v; -} - -static inline void wrfs8(u8 v, addr_t addr) -{ - asm volatile("movb %1,%%fs:%0" : "+m" (*(u8 *)addr) : "r" (v)); -} -static inline void wrfs16(u16 v, addr_t addr) -{ - asm volatile("movw %1,%%fs:%0" : "+m" (*(u16 *)addr) : "r" (v)); -} -static inline void wrfs32(u32 v, addr_t addr) -{ - asm volatile("movl %1,%%fs:%0" : "+m" (*(u32 *)addr) : "r" (v)); -} - -static inline u8 rdgs8(addr_t addr) -{ - u8 v; - asm volatile("movb %%gs:%1,%0" : "=r" (v) : "m" (*(u8 *)addr)); - return v; -} -static inline u16 rdgs16(addr_t addr) -{ - u16 v; - asm volatile("movw %%gs:%1,%0" : "=r" (v) : "m" (*(u16 *)addr)); - return v; -} -static inline u32 rdgs32(addr_t addr) -{ - u32 v; - asm volatile("movl %%gs:%1,%0" : "=r" (v) : "m" (*(u32 *)addr)); - return v; -} - -static inline void wrgs8(u8 v, addr_t addr) -{ - asm volatile("movb %1,%%gs:%0" : "+m" (*(u8 *)addr) : "r" (v)); -} -static inline void wrgs16(u16 v, addr_t addr) -{ - asm volatile("movw %1,%%gs:%0" : "+m" (*(u16 *)addr) : "r" (v)); -} -static inline void wrgs32(u32 v, addr_t addr) -{ - asm volatile("movl %1,%%gs:%0" : "+m" (*(u32 *)addr) : "r" (v)); -} - -/* Note: these only return true/false, not a signed return value! */ -static inline int memcmp(const void *s1, const void *s2, size_t len) -{ - u8 diff; - asm("repe; cmpsb; setnz %0" - : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len)); - return diff; -} - -static inline int memcmp_fs(const void *s1, addr_t s2, size_t len) -{ - u8 diff; - asm volatile("fs; repe; cmpsb; setnz %0" - : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len)); - return diff; -} -static inline int memcmp_gs(const void *s1, addr_t s2, size_t len) -{ - u8 diff; - asm volatile("gs; repe; cmpsb; setnz %0" - : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len)); - return diff; -} - -static inline int isdigit(int ch) -{ - return (ch >= '0') && (ch <= '9'); -} - -/* Heap -- available for dynamic lists. */ -#define STACK_SIZE 512 /* Minimum number of bytes for stack */ - -extern char _end[]; -extern char *HEAP; -extern char *heap_end; -#define RESET_HEAP() ((void *)( HEAP = _end )) -static inline char *__get_heap(size_t s, size_t a, size_t n) -{ - char *tmp; - - HEAP = (char *)(((size_t)HEAP+(a-1)) & ~(a-1)); - tmp = HEAP; - HEAP += s*n; - return tmp; -} -#define GET_HEAP(type, n) \ - ((type *)__get_heap(sizeof(type),__alignof__(type),(n))) - -static inline int heap_free(void) -{ - return heap_end-HEAP; -} - -/* copy.S */ - -void copy_to_fs(addr_t dst, void *src, size_t len); -void *copy_from_fs(void *dst, addr_t src, size_t len); -void copy_to_gs(addr_t dst, void *src, size_t len); -void *copy_from_gs(void *dst, addr_t src, size_t len); -void *memcpy(void *dst, void *src, size_t len); -void *memset(void *dst, int c, size_t len); - -#define memcpy(d,s,l) __builtin_memcpy(d,s,l) -#define memset(d,c,l) __builtin_memset(d,c,l) - -/* a20.c */ -int enable_a20(void); - -/* apm.c */ -int query_apm_bios(void); - -/* cmdline.c */ -int cmdline_find_option(const char *option, char *buffer, int bufsize); - -/* cpu.c, cpucheck.c */ -int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr); -int validate_cpu(void); - -/* edd.c */ -void query_edd(void); - -/* header.S */ -void __attribute__((noreturn)) die(void); - -/* mca.c */ -int query_mca(void); - -/* memory.c */ -int detect_memory(void); - -/* pm.c */ -void __attribute__((noreturn)) go_to_protected_mode(void); - -/* pmjump.S */ -void __attribute__((noreturn)) - protected_mode_jump(u32 entrypoint, u32 bootparams); - -/* printf.c */ -int sprintf(char *buf, const char *fmt, ...); -int vsprintf(char *buf, const char *fmt, va_list args); -int printf(const char *fmt, ...); - -/* string.c */ -int strcmp(const char *str1, const char *str2); -size_t strnlen(const char *s, size_t maxlen); -unsigned int atou(const char *s); - -/* tty.c */ -void puts(const char *); -void putchar(int); -int getchar(void); -void kbd_flush(void); -int getchar_timeout(void); - -/* video.c */ -void set_video(void); - -/* video-vesa.c */ -void vesa_store_edid(void); - -/* voyager.c */ -int query_voyager(void); - -#endif /* __ASSEMBLY__ */ - -#endif /* BOOT_BOOT_H */ diff --git a/arch/i386/boot/cmdline.c b/arch/i386/boot/cmdline.c deleted file mode 100644 index 34bb778c435..00000000000 --- a/arch/i386/boot/cmdline.c +++ /dev/null @@ -1,97 +0,0 @@ -/* -*- linux-c -*- ------------------------------------------------------- * - * - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright 2007 rPath, Inc. - All Rights Reserved - * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * - * ----------------------------------------------------------------------- */ - -/* - * arch/i386/boot/cmdline.c - * - * Simple command-line parser for early boot. - */ - -#include "boot.h" - -static inline int myisspace(u8 c) -{ - return c <= ' '; /* Close enough approximation */ -} - -/* - * Find a non-boolean option, that is, "option=argument". In accordance - * with standard Linux practice, if this option is repeated, this returns - * the last instance on the command line. - * - * Returns the length of the argument (regardless of if it was - * truncated to fit in the buffer), or -1 on not found. - */ -int cmdline_find_option(const char *option, char *buffer, int bufsize) -{ - u32 cmdline_ptr = boot_params.hdr.cmd_line_ptr; - addr_t cptr; - char c; - int len = -1; - const char *opptr = NULL; - char *bufptr = buffer; - enum { - st_wordstart, /* Start of word/after whitespace */ - st_wordcmp, /* Comparing this word */ - st_wordskip, /* Miscompare, skip */ - st_bufcpy /* Copying this to buffer */ - } state = st_wordstart; - - if (!cmdline_ptr || cmdline_ptr >= 0x100000) - return -1; /* No command line, or inaccessible */ - - cptr = cmdline_ptr & 0xf; - set_fs(cmdline_ptr >> 4); - - while (cptr < 0x10000 && (c = rdfs8(cptr++))) { - switch (state) { - case st_wordstart: - if (myisspace(c)) - break; - - /* else */ - state = st_wordcmp; - opptr = option; - /* fall through */ - - case st_wordcmp: - if (c == '=' && !*opptr) { - len = 0; - bufptr = buffer; - state = st_bufcpy; - } else if (myisspace(c)) { - state = st_wordstart; - } else if (c != *opptr++) { - state = st_wordskip; - } - break; - - case st_wordskip: - if (myisspace(c)) - state = st_wordstart; - break; - - case st_bufcpy: - if (myisspace(c)) { - state = st_wordstart; - } else { - if (len < bufsize-1) - *bufptr++ = c; - len++; - } - break; - } - } - - if (bufsize) - *bufptr = '\0'; - - return len; -} diff --git a/arch/i386/boot/code16gcc.h b/arch/i386/boot/code16gcc.h deleted file mode 100644 index d93e48010b6..00000000000 --- a/arch/i386/boot/code16gcc.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * code16gcc.h - * - * This file is -include'd when compiling 16-bit C code. - * Note: this asm() needs to be emitted before gcc emits any code. - * Depending on gcc version, this requires -fno-unit-at-a-time or - * -fno-toplevel-reorder. - * - * Hopefully gcc will eventually have a real -m16 option so we can - * drop this hack long term. - */ - -#ifndef __ASSEMBLY__ -asm(".code16gcc"); -#endif diff --git a/arch/i386/boot/copy.S b/arch/i386/boot/copy.S deleted file mode 100644 index ef127e56a3c..00000000000 --- a/arch/i386/boot/copy.S +++ /dev/null @@ -1,101 +0,0 @@ -/* ----------------------------------------------------------------------- * - * - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright 2007 rPath, Inc. - All Rights Reserved - * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * - * ----------------------------------------------------------------------- */ - -/* - * arch/i386/boot/copy.S - * - * Memory copy routines - */ - - .code16gcc - .text - - .globl memcpy - .type memcpy, @function -memcpy: - pushw %si - pushw %di - movw %ax, %di - movw %dx, %si - pushw %cx - shrw $2, %cx - rep; movsl - popw %cx - andw $3, %cx - rep; movsb - popw %di - popw %si - ret - .size memcpy, .-memcpy - - .globl memset - .type memset, @function -memset: - pushw %di - movw %ax, %di - movzbl %dl, %eax - imull $0x01010101,%eax - pushw %cx - shrw $2, %cx - rep; stosl - popw %cx - andw $3, %cx - rep; stosb - popw %di - ret - .size memset, .-memset - - .globl copy_from_fs - .type copy_from_fs, @function -copy_from_fs: - pushw %ds - pushw %fs - popw %ds - call memcpy - popw %ds - ret - .size copy_from_fs, .-copy_from_fs - - .globl copy_to_fs - .type copy_to_fs, @function -copy_to_fs: - pushw %es - pushw %fs - popw %es - call memcpy - popw %es - ret - .size copy_to_fs, .-copy_to_fs - -#if 0 /* Not currently used, but can be enabled as needed */ - - .globl copy_from_gs - .type copy_from_gs, @function -copy_from_gs: - pushw %ds - pushw %gs - popw %ds - call memcpy - popw %ds - ret - .size copy_from_gs, .-copy_from_gs - .globl copy_to_gs - - .type copy_to_gs, @function -copy_to_gs: - pushw %es - pushw %gs - popw %es - call memcpy - popw %es - ret - .size copy_to_gs, .-copy_to_gs - -#endif diff --git a/arch/i386/boot/cpu.c b/arch/i386/boot/cpu.c deleted file mode 100644 index 2a5c32da585..00000000000 --- a/arch/i386/boot/cpu.c +++ /dev/null @@ -1,69 +0,0 @@ -/* -*- linux-c -*- ------------------------------------------------------- * - * - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright 2007 rPath, Inc. - All Rights Reserved - * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * - * ----------------------------------------------------------------------- */ - -/* - * arch/i386/boot/cpu.c - * - * Check for obligatory CPU features and abort if the features are not - * present. - */ - -#include "boot.h" -#include "bitops.h" -#include - -static char *cpu_name(int level) -{ - static char buf[6]; - - if (level == 64) { - return "x86-64"; - } else { - sprintf(buf, "i%d86", level); - return buf; - } -} - -int validate_cpu(void) -{ - u32 *err_flags; - int cpu_level, req_level; - - check_cpu(&cpu_level, &req_level, &err_flags); - - if (cpu_level < req_level) { - printf("This kernel requires an %s CPU, ", - cpu_name(req_level)); - printf("but only detected an %s CPU.\n", - cpu_name(cpu_level)); - return -1; - } - - if (err_flags) { - int i, j; - puts("This kernel requires the following features " - "not present on the CPU:\n"); - - for (i = 0; i < NCAPINTS; i++) { - u32 e = err_flags[i]; - - for (j = 0; j < 32; j++) { - if (e & 1) - printf("%d:%d ", i, j); - - e >>= 1; - } - } - putchar('\n'); - return -1; - } else { - return 0; - } -} diff --git a/arch/i386/boot/cpucheck.c b/arch/i386/boot/cpucheck.c deleted file mode 100644 index e655a89c551..00000000000 --- a/arch/i386/boot/cpucheck.c +++ /dev/null @@ -1,268 +0,0 @@ -/* -*- linux-c -*- ------------------------------------------------------- * - * - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright 2007 rPath, Inc. - All Rights Reserved - * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * - * ----------------------------------------------------------------------- */ - -/* - * arch/i386/boot/cpucheck.c - * - * Check for obligatory CPU features and abort if the features are not - * present. This code should be compilable as 16-, 32- or 64-bit - * code, so be very careful with types and inline assembly. - * - * This code should not contain any messages; that requires an - * additional wrapper. - * - * As written, this code is not safe for inclusion into the kernel - * proper (after FPU initialization, in particular). - */ - -#ifdef _SETUP -# include "boot.h" -# include "bitops.h" -#endif -#include -#include -#include -#include -#include - -struct cpu_features { - int level; /* Family, or 64 for x86-64 */ - int model; - u32 flags[NCAPINTS]; -}; - -static struct cpu_features cpu; -static u32 cpu_vendor[3]; -static u32 err_flags[NCAPINTS]; - -#ifdef CONFIG_X86_64 -static const int req_level = 64; -#elif defined(CONFIG_X86_MINIMUM_CPU_FAMILY) -static const int req_level = CONFIG_X86_MINIMUM_CPU_FAMILY; -#else -static const int req_level = 3; -#endif - -static const u32 req_flags[NCAPINTS] = -{ - REQUIRED_MASK0, - REQUIRED_MASK1, - REQUIRED_MASK2, - REQUIRED_MASK3, - REQUIRED_MASK4, - REQUIRED_MASK5, - REQUIRED_MASK6, - REQUIRED_MASK7, -}; - -#define A32(a,b,c,d) (((d) << 24)+((c) << 16)+((b) << 8)+(a)) - -static int is_amd(void) -{ - return cpu_vendor[0] == A32('A','u','t','h') && - cpu_vendor[1] == A32('e','n','t','i') && - cpu_vendor[2] == A32('c','A','M','D'); -} - -static int is_centaur(void) -{ - return cpu_vendor[0] == A32('C','e','n','t') && - cpu_vendor[1] == A32('a','u','r','H') && - cpu_vendor[2] == A32('a','u','l','s'); -} - -static int is_transmeta(void) -{ - return cpu_vendor[0] == A32('G','e','n','u') && - cpu_vendor[1] == A32('i','n','e','T') && - cpu_vendor[2] == A32('M','x','8','6'); -} - -static int has_fpu(void) -{ - u16 fcw = -1, fsw = -1; - u32 cr0; - - asm("movl %%cr0,%0" : "=r" (cr0)); - if (cr0 & (X86_CR0_EM|X86_CR0_TS)) { - cr0 &= ~(X86_CR0_EM|X86_CR0_TS); - asm volatile("movl %0,%%cr0" : : "r" (cr0)); - } - - asm volatile("fninit ; fnstsw %0 ; fnstcw %1" - : "+m" (fsw), "+m" (fcw)); - - return fsw == 0 && (fcw & 0x103f) == 0x003f; -} - -static int has_eflag(u32 mask) -{ - u32 f0, f1; - - asm("pushfl ; " - "pushfl ; " - "popl %0 ; " - "movl %0,%1 ; " - "xorl %2,%1 ; " - "pushl %1 ; " - "popfl ; " - "pushfl ; " - "popl %1 ; " - "popfl" - : "=&r" (f0), "=&r" (f1) - : "ri" (mask)); - - return !!((f0^f1) & mask); -} - -static void get_flags(void) -{ - u32 max_intel_level, max_amd_level; - u32 tfms; - - if (has_fpu()) - set_bit(X86_FEATURE_FPU, cpu.flags); - - if (has_eflag(X86_EFLAGS_ID)) { - asm("cpuid" - : "=a" (max_intel_level), - "=b" (cpu_vendor[0]), - "=d" (cpu_vendor[1]), - "=c" (cpu_vendor[2]) - : "a" (0)); - - if (max_intel_level >= 0x00000001 && - max_intel_level <= 0x0000ffff) { - asm("cpuid" - : "=a" (tfms), - "=c" (cpu.flags[4]), - "=d" (cpu.flags[0]) - : "a" (0x00000001) - : "ebx"); - cpu.level = (tfms >> 8) & 15; - cpu.model = (tfms >> 4) & 15; - if (cpu.level >= 6) - cpu.model += ((tfms >> 16) & 0xf) << 4; - } - - asm("cpuid" - : "=a" (max_amd_level) - : "a" (0x80000000) - : "ebx", "ecx", "edx"); - - if (max_amd_level >= 0x80000001 && - max_amd_level <= 0x8000ffff) { - u32 eax = 0x80000001; - asm("cpuid" - : "+a" (eax), - "=c" (cpu.flags[6]), - "=d" (cpu.flags[1]) - : : "ebx"); - } - } -} - -/* Returns a bitmask of which words we have error bits in */ -static int check_flags(void) -{ - u32 err; - int i; - - err = 0; - for (i = 0; i < NCAPINTS; i++) { - err_flags[i] = req_flags[i] & ~cpu.flags[i]; - if (err_flags[i]) - err |= 1 << i; - } - - return err; -} - -/* - * Returns -1 on error. - * - * *cpu_level is set to the current CPU level; *req_level to the required - * level. x86-64 is considered level 64 for this purpose. - * - * *err_flags_ptr is set to the flags error array if there are flags missing. - */ -int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr) -{ - int err; - - memset(&cpu.flags, 0, sizeof cpu.flags); - cpu.level = 3; - - if (has_eflag(X86_EFLAGS_AC)) - cpu.level = 4; - - get_flags(); - err = check_flags(); - - if (test_bit(X86_FEATURE_LM, cpu.flags)) - cpu.level = 64; - - if (err == 0x01 && - !(err_flags[0] & - ~((1 << X86_FEATURE_XMM)|(1 << X86_FEATURE_XMM2))) && - is_amd()) { - /* If this is an AMD and we're only missing SSE+SSE2, try to - turn them on */ - - u32 ecx = MSR_K7_HWCR; - u32 eax, edx; - - asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx)); - eax &= ~(1 << 15); - asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); - - get_flags(); /* Make sure it really did something */ - err = check_flags(); - } else if (err == 0x01 && - !(err_flags[0] & ~(1 << X86_FEATURE_CX8)) && - is_centaur() && cpu.model >= 6) { - /* If this is a VIA C3, we might have to enable CX8 - explicitly */ - - u32 ecx = MSR_VIA_FCR; - u32 eax, edx; - - asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx)); - eax |= (1<<1)|(1<<7); - asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); - - set_bit(X86_FEATURE_CX8, cpu.flags); - err = check_flags(); - } else if (err == 0x01 && is_transmeta()) { - /* Transmeta might have masked feature bits in word 0 */ - - u32 ecx = 0x80860004; - u32 eax, edx; - u32 level = 1; - - asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx)); - asm("wrmsr" : : "a" (~0), "d" (edx), "c" (ecx)); - asm("cpuid" - : "+a" (level), "=d" (cpu.flags[0]) - : : "ecx", "ebx"); - asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); - - err = check_flags(); - } - - if (err_flags_ptr) - *err_flags_ptr = err ? err_flags : NULL; - if (cpu_level_ptr) - *cpu_level_ptr = cpu.level; - if (req_level_ptr) - *req_level_ptr = req_level; - - return (cpu.level < req_level || err) ? -1 : 0; -} diff --git a/arch/i386/boot/edd.c b/arch/i386/boot/edd.c deleted file mode 100644 index bd138e442ec..00000000000 --- a/arch/i386/boot/edd.c +++ /dev/null @@ -1,167 +0,0 @@ -/* -*- linux-c -*- ------------------------------------------------------- * - * - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright 2007 rPath, Inc. - All Rights Reserved - * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * - * ----------------------------------------------------------------------- */ - -/* - * arch/i386/boot/edd.c - * - * Get EDD BIOS disk information - */ - -#include "boot.h" -#include - -#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) - -/* - * Read the MBR (first sector) from a specific device. - */ -static int read_mbr(u8 devno, void *buf) -{ - u16 ax, bx, cx, dx; - - ax = 0x0201; /* Legacy Read, one sector */ - cx = 0x0001; /* Sector 0-0-1 */ - dx = devno; - bx = (size_t)buf; - asm volatile("pushfl; stc; int $0x13; setc %%al; popfl" - : "+a" (ax), "+c" (cx), "+d" (dx), "+b" (bx) - : : "esi", "edi", "memory"); - - return -(u8)ax; /* 0 or -1 */ -} - -static u32 read_mbr_sig(u8 devno, struct edd_info *ei, u32 *mbrsig) -{ - int sector_size; - char *mbrbuf_ptr, *mbrbuf_end; - u32 buf_base, mbr_base; - extern char _end[]; - - sector_size = ei->params.bytes_per_sector; - if (!sector_size) - sector_size = 512; /* Best available guess */ - - /* Produce a naturally aligned buffer on the heap */ - buf_base = (ds() << 4) + (u32)&_end; - mbr_base = (buf_base+sector_size-1) & ~(sector_size-1); - mbrbuf_ptr = _end + (mbr_base-buf_base); - mbrbuf_end = mbrbuf_ptr + sector_size; - - /* Make sure we actually have space on the heap... */ - if (!(boot_params.hdr.loadflags & CAN_USE_HEAP)) - return -1; - if (mbrbuf_end > (char *)(size_t)boot_params.hdr.heap_end_ptr) - return -1; - - if (read_mbr(devno, mbrbuf_ptr)) - return -1; - - *mbrsig = *(u32 *)&mbrbuf_ptr[EDD_MBR_SIG_OFFSET]; - return 0; -} - -static int get_edd_info(u8 devno, struct edd_info *ei) -{ - u16 ax, bx, cx, dx, di; - - memset(ei, 0, sizeof *ei); - - /* Check Extensions Present */ - - ax = 0x4100; - bx = EDDMAGIC1; - dx = devno; - asm("pushfl; stc; int $0x13; setc %%al; popfl" - : "+a" (ax), "+b" (bx), "=c" (cx), "+d" (dx) - : : "esi", "edi"); - - if ((u8)ax) - return -1; /* No extended information */ - - if (bx != EDDMAGIC2) - return -1; - - ei->device = devno; - ei->version = ax >> 8; /* EDD version number */ - ei->interface_support = cx; /* EDD functionality subsets */ - - /* Extended Get Device Parameters */ - - ei->params.length = sizeof(ei->params); - ax = 0x4800; - dx = devno; - asm("pushfl; int $0x13; popfl" - : "+a" (ax), "+d" (dx), "=m" (ei->params) - : "S" (&ei->params) - : "ebx", "ecx", "edi"); - - /* Get legacy CHS parameters */ - - /* Ralf Brown recommends setting ES:DI to 0:0 */ - ax = 0x0800; - dx = devno; - di = 0; - asm("pushw %%es; " - "movw %%di,%%es; " - "pushfl; stc; int $0x13; setc %%al; popfl; " - "popw %%es" - : "+a" (ax), "=b" (bx), "=c" (cx), "+d" (dx), "+D" (di) - : : "esi"); - - if ((u8)ax == 0) { - ei->legacy_max_cylinder = (cx >> 8) + ((cx & 0xc0) << 2); - ei->legacy_max_head = dx >> 8; - ei->legacy_sectors_per_track = cx & 0x3f; - } - - return 0; -} - -void query_edd(void) -{ - char eddarg[8]; - int do_mbr = 1; - int do_edd = 1; - int devno; - struct edd_info ei, *edp; - u32 *mbrptr; - - if (cmdline_find_option("edd", eddarg, sizeof eddarg) > 0) { - if (!strcmp(eddarg, "skipmbr") || !strcmp(eddarg, "skip")) - do_mbr = 0; - else if (!strcmp(eddarg, "off")) - do_edd = 0; - } - - edp = boot_params.eddbuf; - mbrptr = boot_params.edd_mbr_sig_buffer; - - if (!do_edd) - return; - - for (devno = 0x80; devno < 0x80+EDD_MBR_SIG_MAX; devno++) { - /* - * Scan the BIOS-supported hard disks and query EDD - * information... - */ - get_edd_info(devno, &ei); - - if (boot_params.eddbuf_entries < EDDMAXNR) { - memcpy(edp, &ei, sizeof ei); - edp++; - boot_params.eddbuf_entries++; - } - - if (do_mbr && !read_mbr_sig(devno, &ei, mbrptr++)) - boot_params.edd_mbr_sig_buf_entries = devno-0x80+1; - } -} - -#endif diff --git a/arch/i386/boot/header.S b/arch/i386/boot/header.S deleted file mode 100644 index f3140e596d4..00000000000 --- a/arch/i386/boot/header.S +++ /dev/null @@ -1,283 +0,0 @@ -/* - * header.S - * - * Copyright (C) 1991, 1992 Linus Torvalds - * - * Based on bootsect.S and setup.S - * modified by more people than can be counted - * - * Rewritten as a common file by H. Peter Anvin (Apr 2007) - * - * BIG FAT NOTE: We're in real mode using 64k segments. Therefore segment - * addresses must be multiplied by 16 to obtain their respective linear - * addresses. To avoid confusion, linear addresses are written using leading - * hex while segment addresses are written as segment:offset. - * - */ - -#include -#include -#include -#include -#include -#include -#include "boot.h" - -SETUPSECTS = 4 /* default nr of setup-sectors */ -BOOTSEG = 0x07C0 /* original address of boot-sector */ -SYSSEG = DEF_SYSSEG /* system loaded at 0x10000 (65536) */ -SYSSIZE = DEF_SYSSIZE /* system size: # of 16-byte clicks */ - /* to be loaded */ -ROOT_DEV = 0 /* ROOT_DEV is now written by "build" */ -SWAP_DEV = 0 /* SWAP_DEV is now written by "build" */ - -#ifndef SVGA_MODE -#define SVGA_MODE ASK_VGA -#endif - -#ifndef RAMDISK -#define RAMDISK 0 -#endif - -#ifndef ROOT_RDONLY -#define ROOT_RDONLY 1 -#endif - - .code16 - .section ".bstext", "ax" - - .global bootsect_start -bootsect_start: - - # Normalize the start address - ljmp $BOOTSEG, $start2 - -start2: - movw %cs, %ax - movw %ax, %ds - movw %ax, %es - movw %ax, %ss - xorw %sp, %sp - sti - cld - - movw $bugger_off_msg, %si - -msg_loop: - lodsb - andb %al, %al - jz bs_die - movb $0xe, %ah - movw $7, %bx - int $0x10 - jmp msg_loop - -bs_die: - # Allow the user to press a key, then reboot - xorw %ax, %ax - int $0x16 - int $0x19 - - # int 0x19 should never return. In case it does anyway, - # invoke the BIOS reset code... - ljmp $0xf000,$0xfff0 - - .section ".bsdata", "a" -bugger_off_msg: - .ascii "Direct booting from floppy is no longer supported.\r\n" - .ascii "Please use a boot loader program instead.\r\n" - .ascii "\n" - .ascii "Remove disk and press any key to reboot . . .\r\n" - .byte 0 - - - # Kernel attributes; used by setup. This is part 1 of the - # header, from the old boot sector. - - .section ".header", "a" - .globl hdr -hdr: -setup_sects: .byte SETUPSECTS -root_flags: .word ROOT_RDONLY -syssize: .long SYSSIZE -ram_size: .word RAMDISK -vid_mode: .word SVGA_MODE -root_dev: .word ROOT_DEV -boot_flag: .word 0xAA55 - - # offset 512, entry point - - .globl _start -_start: - # Explicitly enter this as bytes, or the assembler - # tries to generate a 3-byte jump here, which causes - # everything else to push off to the wrong offset. - .byte 0xeb # short (2-byte) jump - .byte start_of_setup-1f -1: - - # Part 2 of the header, from the old setup.S - - .ascii "HdrS" # header signature - .word 0x0206 # header version number (>= 0x0105) - # or else old loadlin-1.5 will fail) - .globl realmode_swtch -realmode_swtch: .word 0, 0 # default_switch, SETUPSEG -start_sys_seg: .word SYSSEG - .word kernel_version-512 # pointing to kernel version string - # above section of header is compatible - # with loadlin-1.5 (header v1.5). Don't - # change it. - -type_of_loader: .byte 0 # = 0, old one (LILO, Loadlin, - # Bootlin, SYSLX, bootsect...) - # See Documentation/i386/boot.txt for - # assigned ids - -# flags, unused bits must be zero (RFU) bit within loadflags -loadflags: -LOADED_HIGH = 1 # If set, the kernel is loaded high -CAN_USE_HEAP = 0x80 # If set, the loader also has set - # heap_end_ptr to tell how much - # space behind setup.S can be used for - # heap purposes. - # Only the loader knows what is free -#ifndef __BIG_KERNEL__ - .byte 0 -#else - .byte LOADED_HIGH -#endif - -setup_move_size: .word 0x8000 # size to move, when setup is not - # loaded at 0x90000. We will move setup - # to 0x90000 then just before jumping - # into the kernel. However, only the - # loader knows how much data behind - # us also needs to be loaded. - -code32_start: # here loaders can put a different - # start address for 32-bit code. -#ifndef __BIG_KERNEL__ - .long 0x1000 # 0x1000 = default for zImage -#else - .long 0x100000 # 0x100000 = default for big kernel -#endif - -ramdisk_image: .long 0 # address of loaded ramdisk image - # Here the loader puts the 32-bit - # address where it loaded the image. - # This only will be read by the kernel. - -ramdisk_size: .long 0 # its size in bytes - -bootsect_kludge: - .long 0 # obsolete - -heap_end_ptr: .word _end+1024 # (Header version 0x0201 or later) - # space from here (exclusive) down to - # end of setup code can be used by setup - # for local heap purposes. - -pad1: .word 0 -cmd_line_ptr: .long 0 # (Header version 0x0202 or later) - # If nonzero, a 32-bit pointer - # to the kernel command line. - # The command line should be - # located between the start of - # setup and the end of low - # memory (0xa0000), or it may - # get overwritten before it - # gets read. If this field is - # used, there is no longer - # anything magical about the - # 0x90000 segment; the setup - # can be located anywhere in - # low memory 0x10000 or higher. - -ramdisk_max: .long (-__PAGE_OFFSET-(512 << 20)-1) & 0x7fffffff - # (Header version 0x0203 or later) - # The highest safe address for - # the contents of an initrd - -kernel_alignment: .long CONFIG_PHYSICAL_ALIGN #physical addr alignment - #required for protected mode - #kernel -#ifdef CONFIG_RELOCATABLE -relocatable_kernel: .byte 1 -#else -relocatable_kernel: .byte 0 -#endif -pad2: .byte 0 -pad3: .word 0 - -cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line, - #added with boot protocol - #version 2.06 - -# End of setup header ##################################################### - - .section ".inittext", "ax" -start_of_setup: -#ifdef SAFE_RESET_DISK_CONTROLLER -# Reset the disk controller. - movw $0x0000, %ax # Reset disk controller - movb $0x80, %dl # All disks - int $0x13 -#endif - -# We will have entered with %cs = %ds+0x20, normalize %cs so -# it is on par with the other segments. - pushw %ds - pushw $setup2 - lretw - -setup2: -# Force %es = %ds - movw %ds, %ax - movw %ax, %es - cld - -# Stack paranoia: align the stack and make sure it is good -# for both 16- and 32-bit references. In particular, if we -# were meant to have been using the full 16-bit segment, the -# caller might have set %sp to zero, which breaks %esp-based -# references. - andw $~3, %sp # dword align (might as well...) - jnz 1f - movw $0xfffc, %sp # Make sure we're not zero -1: movzwl %sp, %esp # Clear upper half of %esp - sti - -# Check signature at end of setup - cmpl $0x5a5aaa55, setup_sig - jne setup_bad - -# Zero the bss - movw $__bss_start, %di - movw $_end+3, %cx - xorl %eax, %eax - subw %di, %cx - shrw $2, %cx - rep; stosl - -# Jump to C code (should not return) - calll main - -# Setup corrupt somehow... -setup_bad: - movl $setup_corrupt, %eax - calll puts - # Fall through... - - .globl die - .type die, @function -die: - hlt - jmp die - - .size die, .-die - - .section ".initdata", "a" -setup_corrupt: - .byte 7 - .string "No setup signature found...\n" diff --git a/arch/i386/boot/install.sh b/arch/i386/boot/install.sh deleted file mode 100644 index 88d77761d01..00000000000 --- a/arch/i386/boot/install.sh +++ /dev/null @@ -1,61 +0,0 @@ -#!/bin/sh -# -# arch/i386/boot/install.sh -# -# This file is subject to the terms and conditions of the GNU General Public -# License. See the file "COPYING" in the main directory of this archive -# for more details. -# -# Copyright (C) 1995 by Linus Torvalds -# -# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin -# -# "make install" script for i386 architecture -# -# Arguments: -# $1 - kernel version -# $2 - kernel image file -# $3 - kernel map file -# $4 - default install path (blank if root directory) -# - -verify () { - if [ ! -f "$1" ]; then - echo "" 1>&2 - echo " *** Missing file: $1" 1>&2 - echo ' *** You need to run "make" before "make install".' 1>&2 - echo "" 1>&2 - exit 1 - fi -} - -# Make sure the files actually exist -verify "$2" -verify "$3" - -# User may have a custom install script - -if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi -if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi - -# Default install - same as make zlilo - -if [ -f $4/vmlinuz ]; then - mv $4/vmlinuz $4/vmlinuz.old -fi - -if [ -f $4/System.map ]; then - mv $4/System.map $4/System.old -fi - -cat $2 > $4/vmlinuz -cp $3 $4/System.map - -if [ -x /sbin/lilo ]; then - /sbin/lilo -elif [ -x /etc/lilo/install ]; then - /etc/lilo/install -else - sync - echo "Cannot find LILO." -fi diff --git a/arch/i386/boot/main.c b/arch/i386/boot/main.c deleted file mode 100644 index 0eeef3989a1..00000000000 --- a/arch/i386/boot/main.c +++ /dev/null @@ -1,161 +0,0 @@ -/* -*- linux-c -*- ------------------------------------------------------- * - * - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright 2007 rPath, Inc. - All Rights Reserved - * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * - * ----------------------------------------------------------------------- */ - -/* - * arch/i386/boot/main.c - * - * Main module for the real-mode kernel code - */ - -#include "boot.h" - -struct boot_params boot_params __attribute__((aligned(16))); - -char *HEAP = _end; -char *heap_end = _end; /* Default end of heap = no heap */ - -/* - * Copy the header into the boot parameter block. Since this - * screws up the old-style command line protocol, adjust by - * filling in the new-style command line pointer instead. - */ -#define OLD_CL_MAGIC 0xA33F -#define OLD_CL_ADDRESS 0x20 - -static void copy_boot_params(void) -{ - struct old_cmdline { - u16 cl_magic; - u16 cl_offset; - }; - const struct old_cmdline * const oldcmd = - (const struct old_cmdline *)OLD_CL_ADDRESS; - - BUILD_BUG_ON(sizeof boot_params != 4096); - memcpy(&boot_params.hdr, &hdr, sizeof hdr); - - if (!boot_params.hdr.cmd_line_ptr && - oldcmd->cl_magic == OLD_CL_MAGIC) { - /* Old-style command line protocol. */ - u16 cmdline_seg; - - /* Figure out if the command line falls in the region - of memory that an old kernel would have copied up - to 0x90000... */ - if (oldcmd->cl_offset < boot_params.hdr.setup_move_size) - cmdline_seg = ds(); - else - cmdline_seg = 0x9000; - - boot_params.hdr.cmd_line_ptr = - (cmdline_seg << 4) + oldcmd->cl_offset; - } -} - -/* - * Set the keyboard repeat rate to maximum. Unclear why this - * is done here; this might be possible to kill off as stale code. - */ -static void keyboard_set_repeat(void) -{ - u16 ax = 0x0305; - u16 bx = 0; - asm volatile("int $0x16" - : "+a" (ax), "+b" (bx) - : : "ecx", "edx", "esi", "edi"); -} - -/* - * Get Intel SpeedStep (IST) information. - */ -static void query_ist(void) -{ - asm("int $0x15" - : "=a" (boot_params.ist_info.signature), - "=b" (boot_params.ist_info.command), - "=c" (boot_params.ist_info.event), - "=d" (boot_params.ist_info.perf_level) - : "a" (0x0000e980), /* IST Support */ - "d" (0x47534943)); /* Request value */ -} - -/* - * Tell the BIOS what CPU mode we intend to run in. - */ -static void set_bios_mode(void) -{ -#ifdef CONFIG_X86_64 - u32 eax, ebx; - - eax = 0xec00; - ebx = 2; - asm volatile("int $0x15" - : "+a" (eax), "+b" (ebx) - : : "ecx", "edx", "esi", "edi"); -#endif -} - -void main(void) -{ - /* First, copy the boot header into the "zeropage" */ - copy_boot_params(); - - /* End of heap check */ - if (boot_params.hdr.loadflags & CAN_USE_HEAP) { - heap_end = (char *)(boot_params.hdr.heap_end_ptr - +0x200-STACK_SIZE); - } else { - /* Boot protocol 2.00 only, no heap available */ - puts("WARNING: Ancient bootloader, some functionality " - "may be limited!\n"); - } - - /* Make sure we have all the proper CPU support */ - if (validate_cpu()) { - puts("Unable to boot - please use a kernel appropriate " - "for your CPU.\n"); - die(); - } - - /* Tell the BIOS what CPU mode we intend to run in. */ - set_bios_mode(); - - /* Detect memory layout */ - detect_memory(); - - /* Set keyboard repeat rate (why?) */ - keyboard_set_repeat(); - - /* Set the video mode */ - set_video(); - - /* Query MCA information */ - query_mca(); - - /* Voyager */ -#ifdef CONFIG_X86_VOYAGER - query_voyager(); -#endif - - /* Query Intel SpeedStep (IST) information */ - query_ist(); - - /* Query APM information */ -#if defined(CONFIG_APM) || defined(CONFIG_APM_MODULE) - query_apm_bios(); -#endif - - /* Query EDD information */ -#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) - query_edd(); -#endif - /* Do the last things and invoke protected mode */ - go_to_protected_mode(); -} diff --git a/arch/i386/boot/mca.c b/arch/i386/boot/mca.c deleted file mode 100644 index 68222f2d4b6..00000000000 --- a/arch/i386/boot/mca.c +++ /dev/null @@ -1,43 +0,0 @@ -/* -*- linux-c -*- ------------------------------------------------------- * - * - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright 2007 rPath, Inc. - All Rights Reserved - * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * - * ----------------------------------------------------------------------- */ - -/* - * arch/i386/boot/mca.c - * - * Get the MCA system description table - */ - -#include "boot.h" - -int query_mca(void) -{ - u8 err; - u16 es, bx, len; - - asm("pushw %%es ; " - "int $0x15 ; " - "setc %0 ; " - "movw %%es, %1 ; " - "popw %%es" - : "=acd" (err), "=acdSD" (es), "=b" (bx) - : "a" (0xc000)); - - if (err) - return -1; /* No MCA present */ - - set_fs(es); - len = rdfs16(bx); - - if (len > sizeof(boot_params.sys_desc_table)) - len = sizeof(boot_params.sys_desc_table); - - copy_from_fs(&boot_params.sys_desc_table, bx, len); - return 0; -} diff --git a/arch/i386/boot/memory.c b/arch/i386/boot/memory.c deleted file mode 100644 index 378353956b5..00000000000 --- a/arch/i386/boot/memory.c +++ /dev/null @@ -1,118 +0,0 @@ -/* -*- linux-c -*- ------------------------------------------------------- * - * - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright 2007 rPath, Inc. - All Rights Reserved - * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * - * ----------------------------------------------------------------------- */ - -/* - * arch/i386/boot/memory.c - * - * Memory detection code - */ - -#include "boot.h" - -#define SMAP 0x534d4150 /* ASCII "SMAP" */ - -static int detect_memory_e820(void) -{ - int count = 0; - u32 next = 0; - u32 size, id; - u8 err; - struct e820entry *desc = boot_params.e820_map; - - do { - size = sizeof(struct e820entry); - - /* Important: %edx is clobbered by some BIOSes, - so it must be either used for the error output - or explicitly marked clobbered. */ - asm("int $0x15; setc %0" - : "=d" (err), "+b" (next), "=a" (id), "+c" (size), - "=m" (*desc) - : "D" (desc), "d" (SMAP), "a" (0xe820)); - - /* Some BIOSes stop returning SMAP in the middle of - the search loop. We don't know exactly how the BIOS - screwed up the map at that point, we might have a - partial map, the full map, or complete garbage, so - just return failure. */ - if (id != SMAP) { - count = 0; - break; - } - - if (err) - break; - - count++; - desc++; - } while (next && count < E820MAX); - - return boot_params.e820_entries = count; -} - -static int detect_memory_e801(void) -{ - u16 ax, bx, cx, dx; - u8 err; - - bx = cx = dx = 0; - ax = 0xe801; - asm("stc; int $0x15; setc %0" - : "=m" (err), "+a" (ax), "+b" (bx), "+c" (cx), "+d" (dx)); - - if (err) - return -1; - - /* Do we really need to do this? */ - if (cx || dx) { - ax = cx; - bx = dx; - } - - if (ax > 15*1024) - return -1; /* Bogus! */ - - /* This ignores memory above 16MB if we have a memory hole - there. If someone actually finds a machine with a memory - hole at 16MB and no support for 0E820h they should probably - generate a fake e820 map. */ - boot_params.alt_mem_k = (ax == 15*1024) ? (dx << 6)+ax : ax; - - return 0; -} - -static int detect_memory_88(void) -{ - u16 ax; - u8 err; - - ax = 0x8800; - asm("stc; int $0x15; setc %0" : "=bcdm" (err), "+a" (ax)); - - boot_params.screen_info.ext_mem_k = ax; - - return -err; -} - -int detect_memory(void) -{ - int err = -1; - - if (detect_memory_e820() > 0) - err = 0; - - if (!detect_memory_e801()) - err = 0; - - if (!detect_memory_88()) - err = 0; - - return err; -} diff --git a/arch/i386/boot/mtools.conf.in b/arch/i386/boot/mtools.conf.in deleted file mode 100644 index efd6d2490c1..00000000000 --- a/arch/i386/boot/mtools.conf.in +++ /dev/null @@ -1,17 +0,0 @@ -# -# mtools configuration file for "make (b)zdisk" -# - -# Actual floppy drive -drive a: - file="/dev/fd0" - -# 1.44 MB floppy disk image -drive v: - file="@OBJ@/fdimage" cylinders=80 heads=2 sectors=18 filter - -# 2.88 MB floppy disk image (mostly for virtual uses) -drive w: - file="@OBJ@/fdimage" cylinders=80 heads=2 sectors=36 filter - - diff --git a/arch/i386/boot/pm.c b/arch/i386/boot/pm.c deleted file mode 100644 index 09fb342cc62..00000000000 --- a/arch/i386/boot/pm.c +++ /dev/null @@ -1,174 +0,0 @@ -/* -*- linux-c -*- ------------------------------------------------------- * - * - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright 2007 rPath, Inc. - All Rights Reserved - * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * - * ----------------------------------------------------------------------- */ - -/* - * arch/i386/boot/pm.c - * - * Prepare the machine for transition to protected mode. - */ - -#include "boot.h" -#include - -/* - * Invoke the realmode switch hook if present; otherwise - * disable all interrupts. - */ -static void realmode_switch_hook(void) -{ - if (boot_params.hdr.realmode_swtch) { - asm volatile("lcallw *%0" - : : "m" (boot_params.hdr.realmode_swtch) - : "eax", "ebx", "ecx", "edx"); - } else { - asm volatile("cli"); - outb(0x80, 0x70); /* Disable NMI */ - io_delay(); - } -} - -/* - * A zImage kernel is loaded at 0x10000 but wants to run at 0x1000. - * A bzImage kernel is loaded and runs at 0x100000. - */ -static void move_kernel_around(void) -{ - /* Note: rely on the compile-time option here rather than - the LOADED_HIGH flag. The Qemu kernel loader unconditionally - sets the loadflags to zero. */ -#ifndef __BIG_KERNEL__ - u16 dst_seg, src_seg; - u32 syssize; - - dst_seg = 0x1000 >> 4; - src_seg = 0x10000 >> 4; - syssize = boot_params.hdr.syssize; /* Size in 16-byte paragraphs */ - - while (syssize) { - int paras = (syssize >= 0x1000) ? 0x1000 : syssize; - int dwords = paras << 2; - - asm volatile("pushw %%es ; " - "pushw %%ds ; " - "movw %1,%%es ; " - "movw %2,%%ds ; " - "xorw %%di,%%di ; " - "xorw %%si,%%si ; " - "rep;movsl ; " - "popw %%ds ; " - "popw %%es" - : "+c" (dwords) - : "r" (dst_seg), "r" (src_seg) - : "esi", "edi"); - - syssize -= paras; - dst_seg += paras; - src_seg += paras; - } -#endif -} - -/* - * Disable all interrupts at the legacy PIC. - */ -static void mask_all_interrupts(void) -{ - outb(0xff, 0xa1); /* Mask all interrupts on the secondary PIC */ - io_delay(); - outb(0xfb, 0x21); /* Mask all but cascade on the primary PIC */ - io_delay(); -} - -/* - * Reset IGNNE# if asserted in the FPU. - */ -static void reset_coprocessor(void) -{ - outb(0, 0xf0); - io_delay(); - outb(0, 0xf1); - io_delay(); -} - -/* - * Set up the GDT - */ -#define GDT_ENTRY(flags,base,limit) \ - (((u64)(base & 0xff000000) << 32) | \ - ((u64)flags << 40) | \ - ((u64)(limit & 0x00ff0000) << 32) | \ - ((u64)(base & 0x00ffff00) << 16) | \ - ((u64)(limit & 0x0000ffff))) - -struct gdt_ptr { - u16 len; - u32 ptr; -} __attribute__((packed)); - -static void setup_gdt(void) -{ - /* There are machines which are known to not boot with the GDT - being 8-byte unaligned. Intel recommends 16 byte alignment. */ - static const u64 boot_gdt[] __attribute__((aligned(16))) = { - /* CS: code, read/execute, 4 GB, base 0 */ - [GDT_ENTRY_BOOT_CS] = GDT_ENTRY(0xc09b, 0, 0xfffff), - /* DS: data, read/write, 4 GB, base 0 */ - [GDT_ENTRY_BOOT_DS] = GDT_ENTRY(0xc093, 0, 0xfffff), - }; - /* Xen HVM incorrectly stores a pointer to the gdt_ptr, instead - of the gdt_ptr contents. Thus, make it static so it will - stay in memory, at least long enough that we switch to the - proper kernel GDT. */ - static struct gdt_ptr gdt; - - gdt.len = sizeof(boot_gdt)-1; - gdt.ptr = (u32)&boot_gdt + (ds() << 4); - - asm volatile("lgdtl %0" : : "m" (gdt)); -} - -/* - * Set up the IDT - */ -static void setup_idt(void) -{ - static const struct gdt_ptr null_idt = {0, 0}; - asm volatile("lidtl %0" : : "m" (null_idt)); -} - -/* - * Actual invocation sequence - */ -void go_to_protected_mode(void) -{ - /* Hook before leaving real mode, also disables interrupts */ - realmode_switch_hook(); - - /* Move the kernel/setup to their final resting places */ - move_kernel_around(); - - /* Enable the A20 gate */ - if (enable_a20()) { - puts("A20 gate not responding, unable to boot...\n"); - die(); - } - - /* Reset coprocessor (IGNNE#) */ - reset_coprocessor(); - - /* Mask all interrupts in the PIC */ - mask_all_interrupts(); - - /* Actual transition to protected mode... */ - setup_idt(); - setup_gdt(); - protected_mode_jump(boot_params.hdr.code32_start, - (u32)&boot_params + (ds() << 4)); -} diff --git a/arch/i386/boot/pmjump.S b/arch/i386/boot/pmjump.S deleted file mode 100644 index 2e559233725..00000000000 --- a/arch/i386/boot/pmjump.S +++ /dev/null @@ -1,54 +0,0 @@ -/* ----------------------------------------------------------------------- * - * - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright 2007 rPath, Inc. - All Rights Reserved - * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * - * ----------------------------------------------------------------------- */ - -/* - * arch/i386/boot/pmjump.S - * - * The actual transition into protected mode - */ - -#include -#include - - .text - - .globl protected_mode_jump - .type protected_mode_jump, @function - - .code16 - -/* - * void protected_mode_jump(u32 entrypoint, u32 bootparams); - */ -protected_mode_jump: - xorl %ebx, %ebx # Flag to indicate this is a boot - movl %edx, %esi # Pointer to boot_params table - movl %eax, 2f # Patch ljmpl instruction - jmp 1f # Short jump to flush instruction q. - -1: - movw $__BOOT_DS, %cx - - movl %cr0, %edx - orb $1, %dl # Protected mode (PE) bit - movl %edx, %cr0 - - movw %cx, %ds - movw %cx, %es - movw %cx, %fs - movw %cx, %gs - movw %cx, %ss - - # Jump to the 32-bit entrypoint - .byte 0x66, 0xea # ljmpl opcode -2: .long 0 # offset - .word __BOOT_CS # segment - - .size protected_mode_jump, .-protected_mode_jump diff --git a/arch/i386/boot/printf.c b/arch/i386/boot/printf.c deleted file mode 100644 index 1a09f9309d3..00000000000 --- a/arch/i386/boot/printf.c +++ /dev/null @@ -1,307 +0,0 @@ -/* -*- linux-c -*- ------------------------------------------------------- * - * - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright 2007 rPath, Inc. - All Rights Reserved - * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * - * ----------------------------------------------------------------------- */ - -/* - * arch/i386/boot/printf.c - * - * Oh, it's a waste of space, but oh-so-yummy for debugging. This - * version of printf() does not include 64-bit support. "Live with - * it." - * - */ - -#include "boot.h" - -static int skip_atoi(const char **s) -{ - int i = 0; - - while (isdigit(**s)) - i = i * 10 + *((*s)++) - '0'; - return i; -} - -#define ZEROPAD 1 /* pad with zero */ -#define SIGN 2 /* unsigned/signed long */ -#define PLUS 4 /* show plus */ -#define SPACE 8 /* space if plus */ -#define LEFT 16 /* left justified */ -#define SPECIAL 32 /* 0x */ -#define LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */ - -#define do_div(n,base) ({ \ -int __res; \ -__res = ((unsigned long) n) % (unsigned) base; \ -n = ((unsigned long) n) / (unsigned) base; \ -__res; }) - -static char *number(char *str, long num, int base, int size, int precision, - int type) -{ - char c, sign, tmp[66]; - const char *digits = "0123456789abcdefghijklmnopqrstuvwxyz"; - int i; - - if (type & LARGE) - digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; - if (type & LEFT) - type &= ~ZEROPAD; - if (base < 2 || base > 36) - return 0; - c = (type & ZEROPAD) ? '0' : ' '; - sign = 0; - if (type & SIGN) { - if (num < 0) { - sign = '-'; - num = -num; - size--; - } else if (type & PLUS) { - sign = '+'; - size--; - } else if (type & SPACE) { - sign = ' '; - size--; - } - } - if (type & SPECIAL) { - if (base == 16) - size -= 2; - else if (base == 8) - size--; - } - i = 0; - if (num == 0) - tmp[i++] = '0'; - else - while (num != 0) - tmp[i++] = digits[do_div(num, base)]; - if (i > precision) - precision = i; - size -= precision; - if (!(type & (ZEROPAD + LEFT))) - while (size-- > 0) - *str++ = ' '; - if (sign) - *str++ = sign; - if (type & SPECIAL) { - if (base == 8) - *str++ = '0'; - else if (base == 16) { - *str++ = '0'; - *str++ = digits[33]; - } - } - if (!(type & LEFT)) - while (size-- > 0) - *str++ = c; - while (i < precision--) - *str++ = '0'; - while (i-- > 0) - *str++ = tmp[i]; - while (size-- > 0) - *str++ = ' '; - return str; -} - -int vsprintf(char *buf, const char *fmt, va_list args) -{ - int len; - unsigned long num; - int i, base; - char *str; - const char *s; - - int flags; /* flags to number() */ - - int field_width; /* width of output field */ - int precision; /* min. # of digits for integers; max - number of chars for from string */ - int qualifier; /* 'h', 'l', or 'L' for integer fields */ - - for (str = buf; *fmt; ++fmt) { - if (*fmt != '%') { - *str++ = *fmt; - continue; - } - - /* process flags */ - flags = 0; - repeat: - ++fmt; /* this also skips first '%' */ - switch (*fmt) { - case '-': - flags |= LEFT; - goto repeat; - case '+': - flags |= PLUS; - goto repeat; - case ' ': - flags |= SPACE; - goto repeat; - case '#': - flags |= SPECIAL; - goto repeat; - case '0': - flags |= ZEROPAD; - goto repeat; - } - - /* get field width */ - field_width = -1; - if (isdigit(*fmt)) - field_width = skip_atoi(&fmt); - else if (*fmt == '*') { - ++fmt; - /* it's the next argument */ - field_width = va_arg(args, int); - if (field_width < 0) { - field_width = -field_width; - flags |= LEFT; - } - } - - /* get the precision */ - precision = -1; - if (*fmt == '.') { - ++fmt; - if (isdigit(*fmt)) - precision = skip_atoi(&fmt); - else if (*fmt == '*') { - ++fmt; - /* it's the next argument */ - precision = va_arg(args, int); - } - if (precision < 0) - precision = 0; - } - - /* get the conversion qualifier */ - qualifier = -1; - if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L') { - qualifier = *fmt; - ++fmt; - } - - /* default base */ - base = 10; - - switch (*fmt) { - case 'c': - if (!(flags & LEFT)) - while (--field_width > 0) - *str++ = ' '; - *str++ = (unsigned char)va_arg(args, int); - while (--field_width > 0) - *str++ = ' '; - continue; - - case 's': - s = va_arg(args, char *); - len = strnlen(s, precision); - - if (!(flags & LEFT)) - while (len < field_width--) - *str++ = ' '; - for (i = 0; i < len; ++i) - *str++ = *s++; - while (len < field_width--) - *str++ = ' '; - continue; - - case 'p': - if (field_width == -1) { - field_width = 2 * sizeof(void *); - flags |= ZEROPAD; - } - str = number(str, - (unsigned long)va_arg(args, void *), 16, - field_width, precision, flags); - continue; - - case 'n': - if (qualifier == 'l') { - long *ip = va_arg(args, long *); - *ip = (str - buf); - } else { - int *ip = va_arg(args, int *); - *ip = (str - buf); - } - continue; - - case '%': - *str++ = '%'; - continue; - - /* integer number formats - set up the flags and "break" */ - case 'o': - base = 8; - break; - - case 'X': - flags |= LARGE; - case 'x': - base = 16; - break; - - case 'd': - case 'i': - flags |= SIGN; - case 'u': - break; - - default: - *str++ = '%'; - if (*fmt) - *str++ = *fmt; - else - --fmt; - continue; - } - if (qualifier == 'l') - num = va_arg(args, unsigned long); - else if (qualifier == 'h') { - num = (unsigned short)va_arg(args, int); - if (flags & SIGN) - num = (short)num; - } else if (flags & SIGN) - num = va_arg(args, int); - else - num = va_arg(args, unsigned int); - str = number(str, num, base, field_width, precision, flags); - } - *str = '\0'; - return str - buf; -} - -int sprintf(char *buf, const char *fmt, ...) -{ - va_list args; - int i; - - va_start(args, fmt); - i = vsprintf(buf, fmt, args); - va_end(args); - return i; -} - -int printf(const char *fmt, ...) -{ - char printf_buf[1024]; - va_list args; - int printed; - - va_start(args, fmt); - printed = vsprintf(printf_buf, fmt, args); - va_end(args); - - puts(printf_buf); - - return printed; -} diff --git a/arch/i386/boot/setup.ld b/arch/i386/boot/setup.ld deleted file mode 100644 index df9234b3a5e..00000000000 --- a/arch/i386/boot/setup.ld +++ /dev/null @@ -1,54 +0,0 @@ -/* - * setup.ld - * - * Linker script for the i386 setup code - */ -OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") -OUTPUT_ARCH(i386) -ENTRY(_start) - -SECTIONS -{ - . = 0; - .bstext : { *(.bstext) } - .bsdata : { *(.bsdata) } - - . = 497; - .header : { *(.header) } - .inittext : { *(.inittext) } - .initdata : { *(.initdata) } - .text : { *(.text*) } - - . = ALIGN(16); - .rodata : { *(.rodata*) } - - .videocards : { - video_cards = .; - *(.videocards) - video_cards_end = .; - } - - . = ALIGN(16); - .data : { *(.data*) } - - .signature : { - setup_sig = .; - LONG(0x5a5aaa55) - } - - - . = ALIGN(16); - .bss : - { - __bss_start = .; - *(.bss) - __bss_end = .; - } - . = ALIGN(16); - _end = .; - - /DISCARD/ : { *(.note*) } - - . = ASSERT(_end <= 0x8000, "Setup too big!"); - . = ASSERT(hdr == 0x1f1, "The setup header has the wrong offset!"); -} diff --git a/arch/i386/boot/string.c b/arch/i386/boot/string.c deleted file mode 100644 index 481a2209778..00000000000 --- a/arch/i386/boot/string.c +++ /dev/null @@ -1,52 +0,0 @@ -/* -*- linux-c -*- ------------------------------------------------------- * - * - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright 2007 rPath, Inc. - All Rights Reserved - * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * - * ----------------------------------------------------------------------- */ - -/* - * arch/i386/boot/string.c - * - * Very basic string functions - */ - -#include "boot.h" - -int strcmp(const char *str1, const char *str2) -{ - const unsigned char *s1 = (const unsigned char *)str1; - const unsigned char *s2 = (const unsigned char *)str2; - int delta = 0; - - while (*s1 || *s2) { - delta = *s2 - *s1; - if (delta) - return delta; - s1++; - s2++; - } - return 0; -} - -size_t strnlen(const char *s, size_t maxlen) -{ - const char *es = s; - while (*es && maxlen) { - es++; - maxlen--; - } - - return (es - s); -} - -unsigned int atou(const char *s) -{ - unsigned int i = 0; - while (isdigit(*s)) - i = i * 10 + (*s++ - '0'); - return i; -} diff --git a/arch/i386/boot/tty.c b/arch/i386/boot/tty.c deleted file mode 100644 index f3f14bd2637..00000000000 --- a/arch/i386/boot/tty.c +++ /dev/null @@ -1,112 +0,0 @@ -/* -*- linux-c -*- ------------------------------------------------------- * - * - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright 2007 rPath, Inc. - All Rights Reserved - * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * - * ----------------------------------------------------------------------- */ - -/* - * arch/i386/boot/tty.c - * - * Very simple screen I/O - * XXX: Probably should add very simple serial I/O? - */ - -#include "boot.h" - -/* - * These functions are in .inittext so they can be used to signal - * error during initialization. - */ - -void __attribute__((section(".inittext"))) putchar(int ch) -{ - unsigned char c = ch; - - if (c == '\n') - putchar('\r'); /* \n -> \r\n */ - - /* int $0x10 is known to have bugs involving touching registers - it shouldn't. Be extra conservative... */ - asm volatile("pushal; pushw %%ds; int $0x10; popw %%ds; popal" - : : "b" (0x0007), "c" (0x0001), "a" (0x0e00|ch)); -} - -void __attribute__((section(".inittext"))) puts(const char *str) -{ - int n = 0; - while (*str) { - putchar(*str++); - n++; - } -} - -/* - * Read the CMOS clock through the BIOS, and return the - * seconds in BCD. - */ - -static u8 gettime(void) -{ - u16 ax = 0x0200; - u16 cx, dx; - - asm volatile("int $0x1a" - : "+a" (ax), "=c" (cx), "=d" (dx) - : : "ebx", "esi", "edi"); - - return dx >> 8; -} - -/* - * Read from the keyboard - */ -int getchar(void) -{ - u16 ax = 0; - asm volatile("int $0x16" : "+a" (ax)); - - return ax & 0xff; -} - -static int kbd_pending(void) -{ - u8 pending; - asm volatile("int $0x16; setnz %0" - : "=rm" (pending) - : "a" (0x0100)); - return pending; -} - -void kbd_flush(void) -{ - for (;;) { - if (!kbd_pending()) - break; - getchar(); - } -} - -int getchar_timeout(void) -{ - int cnt = 30; - int t0, t1; - - t0 = gettime(); - - while (cnt) { - if (kbd_pending()) - return getchar(); - - t1 = gettime(); - if (t0 != t1) { - cnt--; - t0 = t1; - } - } - - return 0; /* Timeout! */ -} diff --git a/arch/i386/boot/version.c b/arch/i386/boot/version.c deleted file mode 100644 index c61462f7d9a..00000000000 --- a/arch/i386/boot/version.c +++ /dev/null @@ -1,23 +0,0 @@ -/* -*- linux-c -*- ------------------------------------------------------- * - * - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright 2007 rPath, Inc. - All Rights Reserved - * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * - * ----------------------------------------------------------------------- */ - -/* - * arch/i386/boot/version.c - * - * Kernel version string - */ - -#include "boot.h" -#include -#include - -const char kernel_version[] = - UTS_RELEASE " (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") " - UTS_VERSION; diff --git a/arch/i386/boot/vesa.h b/arch/i386/boot/vesa.h deleted file mode 100644 index ff5b73cd406..00000000000 --- a/arch/i386/boot/vesa.h +++ /dev/null @@ -1,79 +0,0 @@ -/* ----------------------------------------------------------------------- * - * - * Copyright 1999-2007 H. Peter Anvin - All Rights Reserved - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, Inc., 53 Temple Place Ste 330, - * Boston MA 02111-1307, USA; either version 2 of the License, or - * (at your option) any later version; incorporated herein by reference. - * - * ----------------------------------------------------------------------- */ - -#ifndef BOOT_VESA_H -#define BOOT_VESA_H - -typedef struct { - u16 off, seg; -} far_ptr; - -/* VESA General Information table */ -struct vesa_general_info { - u32 signature; /* 0 Magic number = "VESA" */ - u16 version; /* 4 */ - far_ptr vendor_string; /* 6 */ - u32 capabilities; /* 10 */ - far_ptr video_mode_ptr; /* 14 */ - u16 total_memory; /* 18 */ - - u16 oem_software_rev; /* 20 */ - far_ptr oem_vendor_name_ptr; /* 22 */ - far_ptr oem_product_name_ptr; /* 26 */ - far_ptr oem_product_rev_ptr; /* 30 */ - - u8 reserved[222]; /* 34 */ - u8 oem_data[256]; /* 256 */ -} __attribute__ ((packed)); - -#define VESA_MAGIC ('V' + ('E' << 8) + ('S' << 16) + ('A' << 24)) -#define VBE2_MAGIC ('V' + ('B' << 8) + ('E' << 16) + ('2' << 24)) - -struct vesa_mode_info { - u16 mode_attr; /* 0 */ - u8 win_attr[2]; /* 2 */ - u16 win_grain; /* 4 */ - u16 win_size; /* 6 */ - u16 win_seg[2]; /* 8 */ - far_ptr win_scheme; /* 12 */ - u16 logical_scan; /* 16 */ - - u16 h_res; /* 18 */ - u16 v_res; /* 20 */ - u8 char_width; /* 22 */ - u8 char_height; /* 23 */ - u8 memory_planes; /* 24 */ - u8 bpp; /* 25 */ - u8 banks; /* 26 */ - u8 memory_layout; /* 27 */ - u8 bank_size; /* 28 */ - u8 image_planes; /* 29 */ - u8 page_function; /* 30 */ - - u8 rmask; /* 31 */ - u8 rpos; /* 32 */ - u8 gmask; /* 33 */ - u8 gpos; /* 34 */ - u8 bmask; /* 35 */ - u8 bpos; /* 36 */ - u8 resv_mask; /* 37 */ - u8 resv_pos; /* 38 */ - u8 dcm_info; /* 39 */ - - u32 lfb_ptr; /* 40 Linear frame buffer address */ - u32 offscreen_ptr; /* 44 Offscreen memory address */ - u16 offscreen_size; /* 48 */ - - u8 reserved[206]; /* 50 */ -} __attribute__ ((packed)); - -#endif /* LIB_SYS_VESA_H */ diff --git a/arch/i386/boot/video-bios.c b/arch/i386/boot/video-bios.c deleted file mode 100644 index 68e65d95cdf..00000000000 --- a/arch/i386/boot/video-bios.c +++ /dev/null @@ -1,125 +0,0 @@ -/* -*- linux-c -*- ------------------------------------------------------- * - * - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright 2007 rPath, Inc. - All Rights Reserved - * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * - * ----------------------------------------------------------------------- */ - -/* - * arch/i386/boot/video-bios.c - * - * Standard video BIOS modes - * - * We have two options for this; silent and scanned. - */ - -#include "boot.h" -#include "video.h" - -__videocard video_bios; - -/* Set a conventional BIOS mode */ -static int set_bios_mode(u8 mode); - -static int bios_set_mode(struct mode_info *mi) -{ - return set_bios_mode(mi->mode - VIDEO_FIRST_BIOS); -} - -static int set_bios_mode(u8 mode) -{ - u16 ax; - u8 new_mode; - - ax = mode; /* AH=0x00 Set Video Mode */ - asm volatile(INT10 - : "+a" (ax) - : : "ebx", "ecx", "edx", "esi", "edi"); - - ax = 0x0f00; /* Get Current Video Mode */ - asm volatile(INT10 - : "+a" (ax) - : : "ebx", "ecx", "edx", "esi", "edi"); - - do_restore = 1; /* Assume video contents were lost */ - new_mode = ax & 0x7f; /* Not all BIOSes are clean with the top bit */ - - if (new_mode == mode) - return 0; /* Mode change OK */ - - if (new_mode != boot_params.screen_info.orig_video_mode) { - /* Mode setting failed, but we didn't end up where we - started. That's bad. Try to revert to the original - video mode. */ - ax = boot_params.screen_info.orig_video_mode; - asm volatile(INT10 - : "+a" (ax) - : : "ebx", "ecx", "edx", "esi", "edi"); - } - return -1; -} - -static int bios_probe(void) -{ - u8 mode; - u8 saved_mode = boot_params.screen_info.orig_video_mode; - u16 crtc; - struct mode_info *mi; - int nmodes = 0; - - if (adapter != ADAPTER_EGA && adapter != ADAPTER_VGA) - return 0; - - set_fs(0); - crtc = vga_crtc(); - - video_bios.modes = GET_HEAP(struct mode_info, 0); - - for (mode = 0x14; mode <= 0x7f; mode++) { - if (heap_free() < sizeof(struct mode_info)) - break; - - if (mode_defined(VIDEO_FIRST_BIOS+mode)) - continue; - - if (set_bios_mode(mode)) - continue; - - /* Try to verify that it's a text mode. */ - - /* Attribute Controller: make graphics controller disabled */ - if (in_idx(0x3c0, 0x10) & 0x01) - continue; - - /* Graphics Controller: verify Alpha addressing enabled */ - if (in_idx(0x3ce, 0x06) & 0x01) - continue; - - /* CRTC cursor location low should be zero(?) */ - if (in_idx(crtc, 0x0f)) - continue; - - mi = GET_HEAP(struct mode_info, 1); - mi->mode = VIDEO_FIRST_BIOS+mode; - mi->x = rdfs16(0x44a); - mi->y = rdfs8(0x484)+1; - nmodes++; - } - - set_bios_mode(saved_mode); - - return nmodes; -} - -__videocard video_bios = -{ - .card_name = "BIOS (scanned)", - .probe = bios_probe, - .set_mode = bios_set_mode, - .unsafe = 1, - .xmode_first = VIDEO_FIRST_BIOS, - .xmode_n = 0x80, -}; diff --git a/arch/i386/boot/video-vesa.c b/arch/i386/boot/video-vesa.c deleted file mode 100644 index 19219071071..00000000000 --- a/arch/i386/boot/video-vesa.c +++ /dev/null @@ -1,292 +0,0 @@ -/* -*- linux-c -*- ------------------------------------------------------- * - * - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright 2007 rPath, Inc. - All Rights Reserved - * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * - * ----------------------------------------------------------------------- */ - -/* - * arch/i386/boot/video-vesa.c - * - * VESA text modes - */ - -#include "boot.h" -#include "video.h" -#include "vesa.h" - -/* VESA information */ -static struct vesa_general_info vginfo; -static struct vesa_mode_info vminfo; - -__videocard video_vesa; - -static void vesa_store_mode_params_graphics(void); - -static int vesa_probe(void) -{ -#if defined(CONFIG_VIDEO_VESA) || defined(CONFIG_FIRMWARE_EDID) - u16 ax, cx, di; - u16 mode; - addr_t mode_ptr; - struct mode_info *mi; - int nmodes = 0; - - video_vesa.modes = GET_HEAP(struct mode_info, 0); - - vginfo.signature = VBE2_MAGIC; - - ax = 0x4f00; - di = (size_t)&vginfo; - asm(INT10 - : "+a" (ax), "+D" (di), "=m" (vginfo) - : : "ebx", "ecx", "edx", "esi"); - - if (ax != 0x004f || - vginfo.signature != VESA_MAGIC || - vginfo.version < 0x0102) - return 0; /* Not present */ -#endif /* CONFIG_VIDEO_VESA || CONFIG_FIRMWARE_EDID */ -#ifdef CONFIG_VIDEO_VESA - set_fs(vginfo.video_mode_ptr.seg); - mode_ptr = vginfo.video_mode_ptr.off; - - while ((mode = rdfs16(mode_ptr)) != 0xffff) { - mode_ptr += 2; - - if (heap_free() < sizeof(struct mode_info)) - break; /* Heap full, can't save mode info */ - - if (mode & ~0x1ff) - continue; - - memset(&vminfo, 0, sizeof vminfo); /* Just in case... */ - - ax = 0x4f01; - cx = mode; - di = (size_t)&vminfo; - asm(INT10 - : "+a" (ax), "+c" (cx), "+D" (di), "=m" (vminfo) - : : "ebx", "edx", "esi"); - - if (ax != 0x004f) - continue; - - if ((vminfo.mode_attr & 0x15) == 0x05) { - /* Text Mode, TTY BIOS supported, - supported by hardware */ - mi = GET_HEAP(struct mode_info, 1); - mi->mode = mode + VIDEO_FIRST_VESA; - mi->x = vminfo.h_res; - mi->y = vminfo.v_res; - nmodes++; - } else if ((vminfo.mode_attr & 0x99) == 0x99) { -#ifdef CONFIG_FB - /* Graphics mode, color, linear frame buffer - supported -- register the mode but hide from - the menu. Only do this if framebuffer is - configured, however, otherwise the user will - be left without a screen. */ - mi = GET_HEAP(struct mode_info, 1); - mi->mode = mode + VIDEO_FIRST_VESA; - mi->x = mi->y = 0; - nmodes++; -#endif - } - } - - return nmodes; -#else - return 0; -#endif /* CONFIG_VIDEO_VESA */ -} - -static int vesa_set_mode(struct mode_info *mode) -{ - u16 ax, bx, cx, di; - int is_graphic; - u16 vesa_mode = mode->mode - VIDEO_FIRST_VESA; - - memset(&vminfo, 0, sizeof vminfo); /* Just in case... */ - - ax = 0x4f01; - cx = vesa_mode; - di = (size_t)&vminfo; - asm(INT10 - : "+a" (ax), "+c" (cx), "+D" (di), "=m" (vminfo) - : : "ebx", "edx", "esi"); - - if (ax != 0x004f) - return -1; - - if ((vminfo.mode_attr & 0x15) == 0x05) { - /* It's a supported text mode */ - is_graphic = 0; - } else if ((vminfo.mode_attr & 0x99) == 0x99) { - /* It's a graphics mode with linear frame buffer */ - is_graphic = 1; - vesa_mode |= 0x4000; /* Request linear frame buffer */ - } else { - return -1; /* Invalid mode */ - } - - - ax = 0x4f02; - bx = vesa_mode; - di = 0; - asm volatile(INT10 - : "+a" (ax), "+b" (bx), "+D" (di) - : : "ecx", "edx", "esi"); - - if (ax != 0x004f) - return -1; - - graphic_mode = is_graphic; - if (!is_graphic) { - /* Text mode */ - force_x = mode->x; - force_y = mode->y; - do_restore = 1; - } else { - /* Graphics mode */ - vesa_store_mode_params_graphics(); - } - - return 0; -} - - -/* Switch DAC to 8-bit mode */ -static void vesa_dac_set_8bits(void) -{ - u8 dac_size = 6; - - /* If possible, switch the DAC to 8-bit mode */ - if (vginfo.capabilities & 1) { - u16 ax, bx; - - ax = 0x4f08; - bx = 0x0800; - asm volatile(INT10 - : "+a" (ax), "+b" (bx) - : : "ecx", "edx", "esi", "edi"); - - if (ax == 0x004f) - dac_size = bx >> 8; - } - - /* Set the color sizes to the DAC size, and offsets to 0 */ - boot_params.screen_info.red_size = dac_size; - boot_params.screen_info.green_size = dac_size; - boot_params.screen_info.blue_size = dac_size; - boot_params.screen_info.rsvd_size = dac_size; - - boot_params.screen_info.red_pos = 0; - boot_params.screen_info.green_pos = 0; - boot_params.screen_info.blue_pos = 0; - boot_params.screen_info.rsvd_pos = 0; -} - -/* Save the VESA protected mode info */ -static void vesa_store_pm_info(void) -{ - u16 ax, bx, di, es; - - ax = 0x4f0a; - bx = di = 0; - asm("pushw %%es; "INT10"; movw %%es,%0; popw %%es" - : "=d" (es), "+a" (ax), "+b" (bx), "+D" (di) - : : "ecx", "esi"); - - if (ax != 0x004f) - return; - - boot_params.screen_info.vesapm_seg = es; - boot_params.screen_info.vesapm_off = di; -} - -/* - * Save video mode parameters for graphics mode - */ -static void vesa_store_mode_params_graphics(void) -{ - /* Tell the kernel we're in VESA graphics mode */ - boot_params.screen_info.orig_video_isVGA = 0x23; - - /* Mode parameters */ - boot_params.screen_info.vesa_attributes = vminfo.mode_attr; - boot_params.screen_info.lfb_linelength = vminfo.logical_scan; - boot_params.screen_info.lfb_width = vminfo.h_res; - boot_params.screen_info.lfb_height = vminfo.v_res; - boot_params.screen_info.lfb_depth = vminfo.bpp; - boot_params.screen_info.pages = vminfo.image_planes; - boot_params.screen_info.lfb_base = vminfo.lfb_ptr; - memcpy(&boot_params.screen_info.red_size, - &vminfo.rmask, 8); - - /* General parameters */ - boot_params.screen_info.lfb_size = vginfo.total_memory; - - if (vminfo.bpp <= 8) - vesa_dac_set_8bits(); - - vesa_store_pm_info(); -} - -/* - * Save EDID information for the kernel; this is invoked, separately, - * after mode-setting. - */ -void vesa_store_edid(void) -{ -#ifdef CONFIG_FIRMWARE_EDID - u16 ax, bx, cx, dx, di; - - /* Apparently used as a nonsense token... */ - memset(&boot_params.edid_info, 0x13, sizeof boot_params.edid_info); - - if (vginfo.version < 0x0200) - return; /* EDID requires VBE 2.0+ */ - - ax = 0x4f15; /* VBE DDC */ - bx = 0x0000; /* Report DDC capabilities */ - cx = 0; /* Controller 0 */ - di = 0; /* ES:DI must be 0 by spec */ - - /* Note: The VBE DDC spec is different from the main VESA spec; - we genuinely have to assume all registers are destroyed here. */ - - asm("pushw %%es; movw %2,%%es; "INT10"; popw %%es" - : "+a" (ax), "+b" (bx) - : "c" (cx), "D" (di) - : "esi"); - - if (ax != 0x004f) - return; /* No EDID */ - - /* BH = time in seconds to transfer EDD information */ - /* BL = DDC level supported */ - - ax = 0x4f15; /* VBE DDC */ - bx = 0x0001; /* Read EDID */ - cx = 0; /* Controller 0 */ - dx = 0; /* EDID block number */ - di =(size_t) &boot_params.edid_info; /* (ES:)Pointer to block */ - asm(INT10 - : "+a" (ax), "+b" (bx), "+d" (dx), "=m" (boot_params.edid_info) - : "c" (cx), "D" (di) - : "esi"); -#endif /* CONFIG_FIRMWARE_EDID */ -} - -__videocard video_vesa = -{ - .card_name = "VESA", - .probe = vesa_probe, - .set_mode = vesa_set_mode, - .xmode_first = VIDEO_FIRST_VESA, - .xmode_n = 0x200, -}; diff --git a/arch/i386/boot/video-vga.c b/arch/i386/boot/video-vga.c deleted file mode 100644 index aef02f9ec0c..00000000000 --- a/arch/i386/boot/video-vga.c +++ /dev/null @@ -1,261 +0,0 @@ -/* -*- linux-c -*- ------------------------------------------------------- * - * - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright 2007 rPath, Inc. - All Rights Reserved - * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * - * ----------------------------------------------------------------------- */ - -/* - * arch/i386/boot/video-vga.c - * - * Common all-VGA modes - */ - -#include "boot.h" -#include "video.h" - -static struct mode_info vga_modes[] = { - { VIDEO_80x25, 80, 25 }, - { VIDEO_8POINT, 80, 50 }, - { VIDEO_80x43, 80, 43 }, - { VIDEO_80x28, 80, 28 }, - { VIDEO_80x30, 80, 30 }, - { VIDEO_80x34, 80, 34 }, - { VIDEO_80x60, 80, 60 }, -}; - -static struct mode_info ega_modes[] = { - { VIDEO_80x25, 80, 25 }, - { VIDEO_8POINT, 80, 43 }, -}; - -static struct mode_info cga_modes[] = { - { VIDEO_80x25, 80, 25 }, -}; - -__videocard video_vga; - -/* Set basic 80x25 mode */ -static u8 vga_set_basic_mode(void) -{ - u16 ax; - u8 rows; - u8 mode; - -#ifdef CONFIG_VIDEO_400_HACK - if (adapter >= ADAPTER_VGA) { - asm volatile(INT10 - : : "a" (0x1202), "b" (0x0030) - : "ecx", "edx", "esi", "edi"); - } -#endif - - ax = 0x0f00; - asm volatile(INT10 - : "+a" (ax) - : : "ebx", "ecx", "edx", "esi", "edi"); - - mode = (u8)ax; - - set_fs(0); - rows = rdfs8(0x484); /* rows minus one */ - -#ifndef CONFIG_VIDEO_400_HACK - if ((ax == 0x5003 || ax == 0x5007) && - (rows == 0 || rows == 24)) - return mode; -#endif - - if (mode != 3 && mode != 7) - mode = 3; - - /* Set the mode */ - ax = mode; - asm volatile(INT10 - : "+a" (ax) - : : "ebx", "ecx", "edx", "esi", "edi"); - do_restore = 1; - return mode; -} - -static void vga_set_8font(void) -{ - /* Set 8x8 font - 80x43 on EGA, 80x50 on VGA */ - - /* Set 8x8 font */ - asm volatile(INT10 : : "a" (0x1112), "b" (0)); - - /* Use alternate print screen */ - asm volatile(INT10 : : "a" (0x1200), "b" (0x20)); - - /* Turn off cursor emulation */ - asm volatile(INT10 : : "a" (0x1201), "b" (0x34)); - - /* Cursor is scan lines 6-7 */ - asm volatile(INT10 : : "a" (0x0100), "c" (0x0607)); -} - -static void vga_set_14font(void) -{ - /* Set 9x14 font - 80x28 on VGA */ - - /* Set 9x14 font */ - asm volatile(INT10 : : "a" (0x1111), "b" (0)); - - /* Turn off cursor emulation */ - asm volatile(INT10 : : "a" (0x1201), "b" (0x34)); - - /* Cursor is scan lines 11-12 */ - asm volatile(INT10 : : "a" (0x0100), "c" (0x0b0c)); -} - -static void vga_set_80x43(void) -{ - /* Set 80x43 mode on VGA (not EGA) */ - - /* Set 350 scans */ - asm volatile(INT10 : : "a" (0x1201), "b" (0x30)); - - /* Reset video mode */ - asm volatile(INT10 : : "a" (0x0003)); - - vga_set_8font(); -} - -/* I/O address of the VGA CRTC */ -u16 vga_crtc(void) -{ - return (inb(0x3cc) & 1) ? 0x3d4 : 0x3b4; -} - -static void vga_set_480_scanlines(int end) -{ - u16 crtc; - u8 csel; - - crtc = vga_crtc(); - - out_idx(0x0c, crtc, 0x11); /* Vertical sync end, unlock CR0-7 */ - out_idx(0x0b, crtc, 0x06); /* Vertical total */ - out_idx(0x3e, crtc, 0x07); /* Vertical overflow */ - out_idx(0xea, crtc, 0x10); /* Vertical sync start */ - out_idx(end, crtc, 0x12); /* Vertical display end */ - out_idx(0xe7, crtc, 0x15); /* Vertical blank start */ - out_idx(0x04, crtc, 0x16); /* Vertical blank end */ - csel = inb(0x3cc); - csel &= 0x0d; - csel |= 0xe2; - outb(csel, 0x3cc); -} - -static void vga_set_80x30(void) -{ - vga_set_480_scanlines(0xdf); -} - -static void vga_set_80x34(void) -{ - vga_set_14font(); - vga_set_480_scanlines(0xdb); -} - -static void vga_set_80x60(void) -{ - vga_set_8font(); - vga_set_480_scanlines(0xdf); -} - -static int vga_set_mode(struct mode_info *mode) -{ - /* Set the basic mode */ - vga_set_basic_mode(); - - /* Override a possibly broken BIOS */ - force_x = mode->x; - force_y = mode->y; - - switch (mode->mode) { - case VIDEO_80x25: - break; - case VIDEO_8POINT: - vga_set_8font(); - break; - case VIDEO_80x43: - vga_set_80x43(); - break; - case VIDEO_80x28: - vga_set_14font(); - break; - case VIDEO_80x30: - vga_set_80x30(); - break; - case VIDEO_80x34: - vga_set_80x34(); - break; - case VIDEO_80x60: - vga_set_80x60(); - break; - } - - return 0; -} - -/* - * Note: this probe includes basic information required by all - * systems. It should be executed first, by making sure - * video-vga.c is listed first in the Makefile. - */ -static int vga_probe(void) -{ - static const char *card_name[] = { - "CGA/MDA/HGC", "EGA", "VGA" - }; - static struct mode_info *mode_lists[] = { - cga_modes, - ega_modes, - vga_modes, - }; - static int mode_count[] = { - sizeof(cga_modes)/sizeof(struct mode_info), - sizeof(ega_modes)/sizeof(struct mode_info), - sizeof(vga_modes)/sizeof(struct mode_info), - }; - u8 vga_flag; - - asm(INT10 - : "=b" (boot_params.screen_info.orig_video_ega_bx) - : "a" (0x1200), "b" (0x10) /* Check EGA/VGA */ - : "ecx", "edx", "esi", "edi"); - - /* If we have MDA/CGA/HGC then BL will be unchanged at 0x10 */ - if ((u8)boot_params.screen_info.orig_video_ega_bx != 0x10) { - /* EGA/VGA */ - asm(INT10 - : "=a" (vga_flag) - : "a" (0x1a00) - : "ebx", "ecx", "edx", "esi", "edi"); - - if (vga_flag == 0x1a) { - adapter = ADAPTER_VGA; - boot_params.screen_info.orig_video_isVGA = 1; - } else { - adapter = ADAPTER_EGA; - } - } else { - adapter = ADAPTER_CGA; - } - - video_vga.modes = mode_lists[adapter]; - video_vga.card_name = card_name[adapter]; - return mode_count[adapter]; -} - -__videocard video_vga = -{ - .card_name = "VGA", - .probe = vga_probe, - .set_mode = vga_set_mode, -}; diff --git a/arch/i386/boot/video.c b/arch/i386/boot/video.c deleted file mode 100644 index e4ba897bf9a..00000000000 --- a/arch/i386/boot/video.c +++ /dev/null @@ -1,467 +0,0 @@ -/* -*- linux-c -*- ------------------------------------------------------- * - * - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright 2007 rPath, Inc. - All Rights Reserved - * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * - * ----------------------------------------------------------------------- */ - -/* - * arch/i386/boot/video.c - * - * Select video mode - */ - -#include "boot.h" -#include "video.h" -#include "vesa.h" - -/* - * Mode list variables - */ -static struct card_info cards[]; /* List of cards to probe for */ - -/* - * Common variables - */ -int adapter; /* 0=CGA/MDA/HGC, 1=EGA, 2=VGA+ */ -u16 video_segment; -int force_x, force_y; /* Don't query the BIOS for cols/rows */ - -int do_restore = 0; /* Screen contents changed during mode flip */ -int graphic_mode; /* Graphic mode with linear frame buffer */ - -static void store_cursor_position(void) -{ - u16 curpos; - u16 ax, bx; - - ax = 0x0300; - bx = 0; - asm(INT10 - : "=d" (curpos), "+a" (ax), "+b" (bx) - : : "ecx", "esi", "edi"); - - boot_params.screen_info.orig_x = curpos; - boot_params.screen_info.orig_y = curpos >> 8; -} - -static void store_video_mode(void) -{ - u16 ax, page; - - /* N.B.: the saving of the video page here is a bit silly, - since we pretty much assume page 0 everywhere. */ - ax = 0x0f00; - asm(INT10 - : "+a" (ax), "=b" (page) - : : "ecx", "edx", "esi", "edi"); - - /* Not all BIOSes are clean with respect to the top bit */ - boot_params.screen_info.orig_video_mode = ax & 0x7f; - boot_params.screen_info.orig_video_page = page >> 8; -} - -/* - * Store the video mode parameters for later usage by the kernel. - * This is done by asking the BIOS except for the rows/columns - * parameters in the default 80x25 mode -- these are set directly, - * because some very obscure BIOSes supply insane values. - */ -static void store_mode_params(void) -{ - u16 font_size; - int x, y; - - /* For graphics mode, it is up to the mode-setting driver - (currently only video-vesa.c) to store the parameters */ - if (graphic_mode) - return; - - store_cursor_position(); - store_video_mode(); - - if (boot_params.screen_info.orig_video_mode == 0x07) { - /* MDA, HGC, or VGA in monochrome mode */ - video_segment = 0xb000; - } else { - /* CGA, EGA, VGA and so forth */ - video_segment = 0xb800; - } - - set_fs(0); - font_size = rdfs16(0x485); /* Font size, BIOS area */ - boot_params.screen_info.orig_video_points = font_size; - - x = rdfs16(0x44a); - y = (adapter == ADAPTER_CGA) ? 25 : rdfs8(0x484)+1; - - if (force_x) - x = force_x; - if (force_y) - y = force_y; - - boot_params.screen_info.orig_video_cols = x; - boot_params.screen_info.orig_video_lines = y; -} - -/* Probe the video drivers and have them generate their mode lists. */ -static void probe_cards(int unsafe) -{ - struct card_info *card; - static u8 probed[2]; - - if (probed[unsafe]) - return; - - probed[unsafe] = 1; - - for (card = video_cards; card < video_cards_end; card++) { - if (card->unsafe == unsafe) { - if (card->probe) - card->nmodes = card->probe(); - else - card->nmodes = 0; - } - } -} - -/* Test if a mode is defined */ -int mode_defined(u16 mode) -{ - struct card_info *card; - struct mode_info *mi; - int i; - - for (card = video_cards; card < video_cards_end; card++) { - mi = card->modes; - for (i = 0; i < card->nmodes; i++, mi++) { - if (mi->mode == mode) - return 1; - } - } - - return 0; -} - -/* Set mode (without recalc) */ -static int raw_set_mode(u16 mode, u16 *real_mode) -{ - int nmode, i; - struct card_info *card; - struct mode_info *mi; - - /* Drop the recalc bit if set */ - mode &= ~VIDEO_RECALC; - - /* Scan for mode based on fixed ID, position, or resolution */ - nmode = 0; - for (card = video_cards; card < video_cards_end; card++) { - mi = card->modes; - for (i = 0; i < card->nmodes; i++, mi++) { - int visible = mi->x || mi->y; - - if ((mode == nmode && visible) || - mode == mi->mode || - mode == (mi->y << 8)+mi->x) { - *real_mode = mi->mode; - return card->set_mode(mi); - } - - if (visible) - nmode++; - } - } - - /* Nothing found? Is it an "exceptional" (unprobed) mode? */ - for (card = video_cards; card < video_cards_end; card++) { - if (mode >= card->xmode_first && - mode < card->xmode_first+card->xmode_n) { - struct mode_info mix; - *real_mode = mix.mode = mode; - mix.x = mix.y = 0; - return card->set_mode(&mix); - } - } - - /* Otherwise, failure... */ - return -1; -} - -/* - * Recalculate the vertical video cutoff (hack!) - */ -static void vga_recalc_vertical(void) -{ - unsigned int font_size, rows; - u16 crtc; - u8 pt, ov; - - set_fs(0); - font_size = rdfs8(0x485); /* BIOS: font size (pixels) */ - rows = force_y ? force_y : rdfs8(0x484)+1; /* Text rows */ - - rows *= font_size; /* Visible scan lines */ - rows--; /* ... minus one */ - - crtc = vga_crtc(); - - pt = in_idx(crtc, 0x11); - pt &= ~0x80; /* Unlock CR0-7 */ - out_idx(pt, crtc, 0x11); - - out_idx((u8)rows, crtc, 0x12); /* Lower height register */ - - ov = in_idx(crtc, 0x07); /* Overflow register */ - ov &= 0xbd; - ov |= (rows >> (8-1)) & 0x02; - ov |= (rows >> (9-6)) & 0x40; - out_idx(ov, crtc, 0x07); -} - -/* Set mode (with recalc if specified) */ -static int set_mode(u16 mode) -{ - int rv; - u16 real_mode; - - /* Very special mode numbers... */ - if (mode == VIDEO_CURRENT_MODE) - return 0; /* Nothing to do... */ - else if (mode == NORMAL_VGA) - mode = VIDEO_80x25; - else if (mode == EXTENDED_VGA) - mode = VIDEO_8POINT; - - rv = raw_set_mode(mode, &real_mode); - if (rv) - return rv; - - if (mode & VIDEO_RECALC) - vga_recalc_vertical(); - - /* Save the canonical mode number for the kernel, not - an alias, size specification or menu position */ - boot_params.hdr.vid_mode = real_mode; - return 0; -} - -static unsigned int get_entry(void) -{ - char entry_buf[4]; - int i, len = 0; - int key; - unsigned int v; - - do { - key = getchar(); - - if (key == '\b') { - if (len > 0) { - puts("\b \b"); - len--; - } - } else if ((key >= '0' && key <= '9') || - (key >= 'A' && key <= 'Z') || - (key >= 'a' && key <= 'z')) { - if (len < sizeof entry_buf) { - entry_buf[len++] = key; - putchar(key); - } - } - } while (key != '\r'); - putchar('\n'); - - if (len == 0) - return VIDEO_CURRENT_MODE; /* Default */ - - v = 0; - for (i = 0; i < len; i++) { - v <<= 4; - key = entry_buf[i] | 0x20; - v += (key > '9') ? key-'a'+10 : key-'0'; - } - - return v; -} - -static void display_menu(void) -{ - struct card_info *card; - struct mode_info *mi; - char ch; - int i; - - puts("Mode: COLSxROWS:\n"); - - ch = '0'; - for (card = video_cards; card < video_cards_end; card++) { - mi = card->modes; - for (i = 0; i < card->nmodes; i++, mi++) { - int visible = mi->x && mi->y; - u16 mode_id = mi->mode ? mi->mode : - (mi->y << 8)+mi->x; - - if (!visible) - continue; /* Hidden mode */ - - printf("%c %04X %3dx%-3d %s\n", - ch, mode_id, mi->x, mi->y, card->card_name); - - if (ch == '9') - ch = 'a'; - else if (ch == 'z' || ch == ' ') - ch = ' '; /* Out of keys... */ - else - ch++; - } - } -} - -#define H(x) ((x)-'a'+10) -#define SCAN ((H('s')<<12)+(H('c')<<8)+(H('a')<<4)+H('n')) - -static unsigned int mode_menu(void) -{ - int key; - unsigned int sel; - - puts("Press to see video modes available, " - " to continue, or wait 30 sec\n"); - - kbd_flush(); - while (1) { - key = getchar_timeout(); - if (key == ' ' || key == 0) - return VIDEO_CURRENT_MODE; /* Default */ - if (key == '\r') - break; - putchar('\a'); /* Beep! */ - } - - - for (;;) { - display_menu(); - - puts("Enter a video mode or \"scan\" to scan for " - "additional modes: "); - sel = get_entry(); - if (sel != SCAN) - return sel; - - probe_cards(1); - } -} - -#ifdef CONFIG_VIDEO_RETAIN -/* Save screen content to the heap */ -struct saved_screen { - int x, y; - int curx, cury; - u16 *data; -} saved; - -static void save_screen(void) -{ - /* Should be called after store_mode_params() */ - saved.x = boot_params.screen_info.orig_video_cols; - saved.y = boot_params.screen_info.orig_video_lines; - saved.curx = boot_params.screen_info.orig_x; - saved.cury = boot_params.screen_info.orig_y; - - if (heap_free() < saved.x*saved.y*sizeof(u16)+512) - return; /* Not enough heap to save the screen */ - - saved.data = GET_HEAP(u16, saved.x*saved.y); - - set_fs(video_segment); - copy_from_fs(saved.data, 0, saved.x*saved.y*sizeof(u16)); -} - -static void restore_screen(void) -{ - /* Should be called after store_mode_params() */ - int xs = boot_params.screen_info.orig_video_cols; - int ys = boot_params.screen_info.orig_video_lines; - int y; - addr_t dst = 0; - u16 *src = saved.data; - u16 ax, bx, dx; - - if (graphic_mode) - return; /* Can't restore onto a graphic mode */ - - if (!src) - return; /* No saved screen contents */ - - /* Restore screen contents */ - - set_fs(video_segment); - for (y = 0; y < ys; y++) { - int npad; - - if (y < saved.y) { - int copy = (xs < saved.x) ? xs : saved.x; - copy_to_fs(dst, src, copy*sizeof(u16)); - dst += copy*sizeof(u16); - src += saved.x; - npad = (xs < saved.x) ? 0 : xs-saved.x; - } else { - npad = xs; - } - - /* Writes "npad" blank characters to - video_segment:dst and advances dst */ - asm volatile("pushw %%es ; " - "movw %2,%%es ; " - "shrw %%cx ; " - "jnc 1f ; " - "stosw \n\t" - "1: rep;stosl ; " - "popw %%es" - : "+D" (dst), "+c" (npad) - : "bdS" (video_segment), - "a" (0x07200720)); - } - - /* Restore cursor position */ - ax = 0x0200; /* Set cursor position */ - bx = 0; /* Page number (<< 8) */ - dx = (saved.cury << 8)+saved.curx; - asm volatile(INT10 - : "+a" (ax), "+b" (bx), "+d" (dx) - : : "ecx", "esi", "edi"); -} -#else -#define save_screen() ((void)0) -#define restore_screen() ((void)0) -#endif - -void set_video(void) -{ - u16 mode = boot_params.hdr.vid_mode; - - RESET_HEAP(); - - store_mode_params(); - save_screen(); - probe_cards(0); - - for (;;) { - if (mode == ASK_VGA) - mode = mode_menu(); - - if (!set_mode(mode)) - break; - - printf("Undefined video mode number: %x\n", mode); - mode = ASK_VGA; - } - vesa_store_edid(); - store_mode_params(); - - if (do_restore) - restore_screen(); -} diff --git a/arch/i386/boot/video.h b/arch/i386/boot/video.h deleted file mode 100644 index b92447d5121..00000000000 --- a/arch/i386/boot/video.h +++ /dev/null @@ -1,152 +0,0 @@ -/* -*- linux-c -*- ------------------------------------------------------- * - * - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright 2007 rPath, Inc. - All Rights Reserved - * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * - * ----------------------------------------------------------------------- */ - -/* - * arch/i386/boot/video.h - * - * Header file for the real-mode video probing code - */ - -#ifndef BOOT_VIDEO_H -#define BOOT_VIDEO_H - -#include - -/* Enable autodetection of SVGA adapters and modes. */ -#undef CONFIG_VIDEO_SVGA - -/* Enable autodetection of VESA modes */ -#define CONFIG_VIDEO_VESA - -/* Retain screen contents when switching modes */ -#define CONFIG_VIDEO_RETAIN - -/* Force 400 scan lines for standard modes (hack to fix bad BIOS behaviour */ -#undef CONFIG_VIDEO_400_HACK - -/* This code uses an extended set of video mode numbers. These include: - * Aliases for standard modes - * NORMAL_VGA (-1) - * EXTENDED_VGA (-2) - * ASK_VGA (-3) - * Video modes numbered by menu position -- NOT RECOMMENDED because of lack - * of compatibility when extending the table. These are between 0x00 and 0xff. - */ -#define VIDEO_FIRST_MENU 0x0000 - -/* Standard BIOS video modes (BIOS number + 0x0100) */ -#define VIDEO_FIRST_BIOS 0x0100 - -/* VESA BIOS video modes (VESA number + 0x0200) */ -#define VIDEO_FIRST_VESA 0x0200 - -/* Video7 special modes (BIOS number + 0x0900) */ -#define VIDEO_FIRST_V7 0x0900 - -/* Special video modes */ -#define VIDEO_FIRST_SPECIAL 0x0f00 -#define VIDEO_80x25 0x0f00 -#define VIDEO_8POINT 0x0f01 -#define VIDEO_80x43 0x0f02 -#define VIDEO_80x28 0x0f03 -#define VIDEO_CURRENT_MODE 0x0f04 -#define VIDEO_80x30 0x0f05 -#define VIDEO_80x34 0x0f06 -#define VIDEO_80x60 0x0f07 -#define VIDEO_GFX_HACK 0x0f08 -#define VIDEO_LAST_SPECIAL 0x0f09 - -/* Video modes given by resolution */ -#define VIDEO_FIRST_RESOLUTION 0x1000 - -/* The "recalculate timings" flag */ -#define VIDEO_RECALC 0x8000 - -/* Define DO_STORE according to CONFIG_VIDEO_RETAIN */ -#ifdef CONFIG_VIDEO_RETAIN -void store_screen(void); -#define DO_STORE() store_screen() -#else -#define DO_STORE() ((void)0) -#endif /* CONFIG_VIDEO_RETAIN */ - -/* - * Mode table structures - */ - -struct mode_info { - u16 mode; /* Mode number (vga= style) */ - u8 x, y; /* Width, height */ -}; - -struct card_info { - const char *card_name; - int (*set_mode)(struct mode_info *mode); - int (*probe)(void); - struct mode_info *modes; - int nmodes; /* Number of probed modes so far */ - int unsafe; /* Probing is unsafe, only do after "scan" */ - u16 xmode_first; /* Unprobed modes to try to call anyway */ - u16 xmode_n; /* Size of unprobed mode range */ -}; - -#define __videocard struct card_info __attribute__((section(".videocards"))) -extern struct card_info video_cards[], video_cards_end[]; - -int mode_defined(u16 mode); /* video.c */ - -/* Basic video information */ -#define ADAPTER_CGA 0 /* CGA/MDA/HGC */ -#define ADAPTER_EGA 1 -#define ADAPTER_VGA 2 - -extern int adapter; -extern u16 video_segment; -extern int force_x, force_y; /* Don't query the BIOS for cols/rows */ -extern int do_restore; /* Restore screen contents */ -extern int graphic_mode; /* Graphics mode with linear frame buffer */ - -/* - * int $0x10 is notorious for touching registers it shouldn't. - * gcc doesn't like %ebp being clobbered, so define it as a push/pop - * sequence here. - * - * A number of systems, including the original PC can clobber %bp in - * certain circumstances, like when scrolling. There exists at least - * one Trident video card which could clobber DS under a set of - * circumstances that we are unlikely to encounter (scrolling when - * using an extended graphics mode of more than 800x600 pixels), but - * it's cheap insurance to deal with that here. - */ -#define INT10 "pushl %%ebp; pushw %%ds; int $0x10; popw %%ds; popl %%ebp" - -/* Accessing VGA indexed registers */ -static inline u8 in_idx(u16 port, u8 index) -{ - outb(index, port); - return inb(port+1); -} - -static inline void out_idx(u8 v, u16 port, u8 index) -{ - outw(index+(v << 8), port); -} - -/* Writes a value to an indexed port and then reads the port again */ -static inline u8 tst_idx(u8 v, u16 port, u8 index) -{ - out_idx(port, index, v); - return in_idx(port, index); -} - -/* Get the I/O port of the VGA CRTC */ -u16 vga_crtc(void); /* video-vga.c */ - -#endif /* BOOT_VIDEO_H */ diff --git a/arch/i386/boot/voyager.c b/arch/i386/boot/voyager.c deleted file mode 100644 index 61c8fe0453b..00000000000 --- a/arch/i386/boot/voyager.c +++ /dev/null @@ -1,46 +0,0 @@ -/* -*- linux-c -*- ------------------------------------------------------- * - * - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright 2007 rPath, Inc. - All Rights Reserved - * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * - * ----------------------------------------------------------------------- */ - -/* - * arch/i386/boot/voyager.c - * - * Get the Voyager config information - */ - -#include "boot.h" - -#ifdef CONFIG_X86_VOYAGER - -int query_voyager(void) -{ - u8 err; - u16 es, di; - /* Abuse the apm_bios_info area for this */ - u8 *data_ptr = (u8 *)&boot_params.apm_bios_info; - - data_ptr[0] = 0xff; /* Flag on config not found(?) */ - - asm("pushw %%es ; " - "int $0x15 ; " - "setc %0 ; " - "movw %%es, %1 ; " - "popw %%es" - : "=q" (err), "=r" (es), "=D" (di) - : "a" (0xffc0)); - - if (err) - return -1; /* Not Voyager */ - - set_fs(es); - copy_from_fs(data_ptr, di, 7); /* Table is 7 bytes apparently */ - return 0; -} - -#endif /* CONFIG_X86_VOYAGER */ diff --git a/arch/x86/boot/.gitignore b/arch/x86/boot/.gitignore new file mode 100644 index 00000000000..18465143cfa --- /dev/null +++ b/arch/x86/boot/.gitignore @@ -0,0 +1,5 @@ +bootsect +bzImage +setup +setup.bin +setup.elf diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile new file mode 100644 index 00000000000..cb1035f2b7e --- /dev/null +++ b/arch/x86/boot/Makefile @@ -0,0 +1,171 @@ +# +# arch/x86/boot/Makefile +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1994 by Linus Torvalds +# + +# ROOT_DEV specifies the default root-device when making the image. +# This can be either FLOPPY, CURRENT, /dev/xxxx or empty, in which case +# the default of FLOPPY is used by 'build'. + +ROOT_DEV := CURRENT + +# If you want to preset the SVGA mode, uncomment the next line and +# set SVGA_MODE to whatever number you want. +# Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode. +# The number is the same as you would ordinarily press at bootup. + +SVGA_MODE := -DSVGA_MODE=NORMAL_VGA + +# If you want the RAM disk device, define this to be the size in blocks. + +#RAMDISK := -DRAMDISK=512 + +targets := vmlinux.bin setup.bin setup.elf zImage bzImage +subdir- := compressed + +setup-y += a20.o apm.o cmdline.o copy.o cpu.o cpucheck.o edd.o +setup-y += header.o main.o mca.o memory.o pm.o pmjump.o +setup-y += printf.o string.o tty.o video.o version.o voyager.o + +# The link order of the video-*.o modules can matter. In particular, +# video-vga.o *must* be listed first, followed by video-vesa.o. +# Hardware-specific drivers should follow in the order they should be +# probed, and video-bios.o should typically be last. +setup-y += video-vga.o +setup-y += video-vesa.o +setup-y += video-bios.o +targets += $(setup-y) +hostprogs-y := tools/build + +HOSTCFLAGS_build.o := $(LINUXINCLUDE) + +# --------------------------------------------------------------------------- + +# How to compile the 16-bit code. Note we always compile for -march=i386, +# that way we can complain to the user if the CPU is insufficient. +cflags-i386 := +cflags-x86_64 := -m32 +CFLAGS := $(LINUXINCLUDE) -g -Os -D_SETUP -D__KERNEL__ \ + $(cflags-$(ARCH)) \ + -Wall -Wstrict-prototypes \ + -march=i386 -mregparm=3 \ + -include $(srctree)/$(src)/code16gcc.h \ + -fno-strict-aliasing -fomit-frame-pointer \ + $(call cc-option, -ffreestanding) \ + $(call cc-option, -fno-toplevel-reorder,\ + $(call cc-option, -fno-unit-at-a-time)) \ + $(call cc-option, -fno-stack-protector) \ + $(call cc-option, -mpreferred-stack-boundary=2) +AFLAGS := $(CFLAGS) -D__ASSEMBLY__ + +$(obj)/zImage: IMAGE_OFFSET := 0x1000 +$(obj)/zImage: EXTRA_AFLAGS := $(SVGA_MODE) $(RAMDISK) +$(obj)/bzImage: IMAGE_OFFSET := 0x100000 +$(obj)/bzImage: EXTRA_CFLAGS := -D__BIG_KERNEL__ +$(obj)/bzImage: EXTRA_AFLAGS := $(SVGA_MODE) $(RAMDISK) -D__BIG_KERNEL__ +$(obj)/bzImage: BUILDFLAGS := -b + +quiet_cmd_image = BUILD $@ +cmd_image = $(obj)/tools/build $(BUILDFLAGS) $(obj)/setup.bin \ + $(obj)/vmlinux.bin $(ROOT_DEV) > $@ + +$(obj)/zImage $(obj)/bzImage: $(obj)/setup.bin \ + $(obj)/vmlinux.bin $(obj)/tools/build FORCE + $(call if_changed,image) + @echo 'Kernel: $@ is ready' ' (#'`cat .version`')' + +$(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE + $(call if_changed,objcopy) + +SETUP_OBJS = $(addprefix $(obj)/,$(setup-y)) + +LDFLAGS_setup.elf := -T +$(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE + $(call if_changed,ld) + +OBJCOPYFLAGS_setup.bin := -O binary + +$(obj)/setup.bin: $(obj)/setup.elf FORCE + $(call if_changed,objcopy) + +$(obj)/compressed/vmlinux: FORCE + $(Q)$(MAKE) $(build)=$(obj)/compressed IMAGE_OFFSET=$(IMAGE_OFFSET) $@ + +# Set this if you want to pass append arguments to the zdisk/fdimage/isoimage kernel +FDARGS = +# Set this if you want an initrd included with the zdisk/fdimage/isoimage kernel +FDINITRD = + +image_cmdline = default linux $(FDARGS) $(if $(FDINITRD),initrd=initrd.img,) + +$(obj)/mtools.conf: $(src)/mtools.conf.in + sed -e 's|@OBJ@|$(obj)|g' < $< > $@ + +# This requires write access to /dev/fd0 +zdisk: $(BOOTIMAGE) $(obj)/mtools.conf + MTOOLSRC=$(obj)/mtools.conf mformat a: ; sync + syslinux /dev/fd0 ; sync + echo '$(image_cmdline)' | \ + MTOOLSRC=$(src)/mtools.conf mcopy - a:syslinux.cfg + if [ -f '$(FDINITRD)' ] ; then \ + MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' a:initrd.img ; \ + fi + MTOOLSRC=$(obj)/mtools.conf mcopy $(BOOTIMAGE) a:linux ; sync + +# These require being root or having syslinux 2.02 or higher installed +fdimage fdimage144: $(BOOTIMAGE) $(obj)/mtools.conf + dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=1440 + MTOOLSRC=$(obj)/mtools.conf mformat v: ; sync + syslinux $(obj)/fdimage ; sync + echo '$(image_cmdline)' | \ + MTOOLSRC=$(obj)/mtools.conf mcopy - v:syslinux.cfg + if [ -f '$(FDINITRD)' ] ; then \ + MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' v:initrd.img ; \ + fi + MTOOLSRC=$(obj)/mtools.conf mcopy $(BOOTIMAGE) v:linux ; sync + +fdimage288: $(BOOTIMAGE) $(obj)/mtools.conf + dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=2880 + MTOOLSRC=$(obj)/mtools.conf mformat w: ; sync + syslinux $(obj)/fdimage ; sync + echo '$(image_cmdline)' | \ + MTOOLSRC=$(obj)/mtools.conf mcopy - w:syslinux.cfg + if [ -f '$(FDINITRD)' ] ; then \ + MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' w:initrd.img ; \ + fi + MTOOLSRC=$(obj)/mtools.conf mcopy $(BOOTIMAGE) w:linux ; sync + +isoimage: $(BOOTIMAGE) + -rm -rf $(obj)/isoimage + mkdir $(obj)/isoimage + for i in lib lib64 share end ; do \ + if [ -f /usr/$$i/syslinux/isolinux.bin ] ; then \ + cp /usr/$$i/syslinux/isolinux.bin $(obj)/isoimage ; \ + break ; \ + fi ; \ + if [ $$i = end ] ; then exit 1 ; fi ; \ + done + cp $(BOOTIMAGE) $(obj)/isoimage/linux + echo '$(image_cmdline)' > $(obj)/isoimage/isolinux.cfg + if [ -f '$(FDINITRD)' ] ; then \ + cp '$(FDINITRD)' $(obj)/isoimage/initrd.img ; \ + fi + mkisofs -J -r -o $(obj)/image.iso -b isolinux.bin -c boot.cat \ + -no-emul-boot -boot-load-size 4 -boot-info-table \ + $(obj)/isoimage + rm -rf $(obj)/isoimage + +zlilo: $(BOOTIMAGE) + if [ -f $(INSTALL_PATH)/vmlinuz ]; then mv $(INSTALL_PATH)/vmlinuz $(INSTALL_PATH)/vmlinuz.old; fi + if [ -f $(INSTALL_PATH)/System.map ]; then mv $(INSTALL_PATH)/System.map $(INSTALL_PATH)/System.old; fi + cat $(BOOTIMAGE) > $(INSTALL_PATH)/vmlinuz + cp System.map $(INSTALL_PATH)/ + if [ -x /sbin/lilo ]; then /sbin/lilo; else /etc/lilo/install; fi + +install: + sh $(srctree)/$(src)/install.sh $(KERNELRELEASE) $(BOOTIMAGE) System.map "$(INSTALL_PATH)" diff --git a/arch/x86/boot/a20.c b/arch/x86/boot/a20.c new file mode 100644 index 00000000000..31348d054fc --- /dev/null +++ b/arch/x86/boot/a20.c @@ -0,0 +1,161 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * arch/i386/boot/a20.c + * + * Enable A20 gate (return -1 on failure) + */ + +#include "boot.h" + +#define MAX_8042_LOOPS 100000 + +static int empty_8042(void) +{ + u8 status; + int loops = MAX_8042_LOOPS; + + while (loops--) { + io_delay(); + + status = inb(0x64); + if (status & 1) { + /* Read and discard input data */ + io_delay(); + (void)inb(0x60); + } else if (!(status & 2)) { + /* Buffers empty, finished! */ + return 0; + } + } + + return -1; +} + +/* Returns nonzero if the A20 line is enabled. The memory address + used as a test is the int $0x80 vector, which should be safe. */ + +#define A20_TEST_ADDR (4*0x80) +#define A20_TEST_SHORT 32 +#define A20_TEST_LONG 2097152 /* 2^21 */ + +static int a20_test(int loops) +{ + int ok = 0; + int saved, ctr; + + set_fs(0x0000); + set_gs(0xffff); + + saved = ctr = rdfs32(A20_TEST_ADDR); + + while (loops--) { + wrfs32(++ctr, A20_TEST_ADDR); + io_delay(); /* Serialize and make delay constant */ + ok = rdgs32(A20_TEST_ADDR+0x10) ^ ctr; + if (ok) + break; + } + + wrfs32(saved, A20_TEST_ADDR); + return ok; +} + +/* Quick test to see if A20 is already enabled */ +static int a20_test_short(void) +{ + return a20_test(A20_TEST_SHORT); +} + +/* Longer test that actually waits for A20 to come on line; this + is useful when dealing with the KBC or other slow external circuitry. */ +static int a20_test_long(void) +{ + return a20_test(A20_TEST_LONG); +} + +static void enable_a20_bios(void) +{ + asm volatile("pushfl; int $0x15; popfl" + : : "a" ((u16)0x2401)); +} + +static void enable_a20_kbc(void) +{ + empty_8042(); + + outb(0xd1, 0x64); /* Command write */ + empty_8042(); + + outb(0xdf, 0x60); /* A20 on */ + empty_8042(); +} + +static void enable_a20_fast(void) +{ + u8 port_a; + + port_a = inb(0x92); /* Configuration port A */ + port_a |= 0x02; /* Enable A20 */ + port_a &= ~0x01; /* Do not reset machine */ + outb(port_a, 0x92); +} + +/* + * Actual routine to enable A20; return 0 on ok, -1 on failure + */ + +#define A20_ENABLE_LOOPS 255 /* Number of times to try */ + +int enable_a20(void) +{ + int loops = A20_ENABLE_LOOPS; + +#if defined(CONFIG_X86_ELAN) + /* Elan croaks if we try to touch the KBC */ + enable_a20_fast(); + while (!a20_test_long()) + ; + return 0; +#elif defined(CONFIG_X86_VOYAGER) + /* On Voyager, a20_test() is unsafe? */ + enable_a20_kbc(); + return 0; +#else + while (loops--) { + /* First, check to see if A20 is already enabled + (legacy free, etc.) */ + if (a20_test_short()) + return 0; + + /* Next, try the BIOS (INT 0x15, AX=0x2401) */ + enable_a20_bios(); + if (a20_test_short()) + return 0; + + /* Try enabling A20 through the keyboard controller */ + empty_8042(); + if (a20_test_short()) + return 0; /* BIOS worked, but with delayed reaction */ + + enable_a20_kbc(); + if (a20_test_long()) + return 0; + + /* Finally, try enabling the "fast A20 gate" */ + enable_a20_fast(); + if (a20_test_long()) + return 0; + } + + return -1; +#endif +} diff --git a/arch/x86/boot/apm.c b/arch/x86/boot/apm.c new file mode 100644 index 00000000000..eab50c55a3a --- /dev/null +++ b/arch/x86/boot/apm.c @@ -0,0 +1,98 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * + * Original APM BIOS checking by Stephen Rothwell, May 1994 + * (sfr@canb.auug.org.au) + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * arch/i386/boot/apm.c + * + * Get APM BIOS information + */ + +#include "boot.h" + +#if defined(CONFIG_APM) || defined(CONFIG_APM_MODULE) + +int query_apm_bios(void) +{ + u16 ax, bx, cx, dx, di; + u32 ebx, esi; + u8 err; + + /* APM BIOS installation check */ + ax = 0x5300; + bx = cx = 0; + asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp ; setc %0" + : "=d" (err), "+a" (ax), "+b" (bx), "+c" (cx) + : : "esi", "edi"); + + if (err) + return -1; /* No APM BIOS */ + + if (bx != 0x504d) /* "PM" signature */ + return -1; + + if (!(cx & 0x02)) /* 32 bits supported? */ + return -1; + + /* Disconnect first, just in case */ + ax = 0x5304; + bx = 0; + asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp" + : "+a" (ax), "+b" (bx) + : : "ecx", "edx", "esi", "edi"); + + /* Paranoia */ + ebx = esi = 0; + cx = dx = di = 0; + + /* 32-bit connect */ + asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp ; setc %6" + : "=a" (ax), "+b" (ebx), "+c" (cx), "+d" (dx), + "+S" (esi), "+D" (di), "=m" (err) + : "a" (0x5303)); + + boot_params.apm_bios_info.cseg = ax; + boot_params.apm_bios_info.offset = ebx; + boot_params.apm_bios_info.cseg_16 = cx; + boot_params.apm_bios_info.dseg = dx; + boot_params.apm_bios_info.cseg_len = (u16)esi; + boot_params.apm_bios_info.cseg_16_len = esi >> 16; + boot_params.apm_bios_info.dseg_len = di; + + if (err) + return -1; + + /* Redo the installation check as the 32-bit connect; + some BIOSes return different flags this way... */ + + ax = 0x5300; + bx = cx = 0; + asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp ; setc %0" + : "=d" (err), "+a" (ax), "+b" (bx), "+c" (cx) + : : "esi", "edi"); + + if (err || bx != 0x504d) { + /* Failure with 32-bit connect, try to disconect and ignore */ + ax = 0x5304; + bx = 0; + asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp" + : "+a" (ax), "+b" (bx) + : : "ecx", "edx", "esi", "edi"); + return -1; + } + + boot_params.apm_bios_info.version = ax; + boot_params.apm_bios_info.flags = cx; + return 0; +} + +#endif diff --git a/arch/x86/boot/bitops.h b/arch/x86/boot/bitops.h new file mode 100644 index 00000000000..8dcc8dc7db8 --- /dev/null +++ b/arch/x86/boot/bitops.h @@ -0,0 +1,45 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * arch/i386/boot/bitops.h + * + * Very simple bitops for the boot code. + */ + +#ifndef BOOT_BITOPS_H +#define BOOT_BITOPS_H +#define _LINUX_BITOPS_H /* Inhibit inclusion of */ + +static inline int constant_test_bit(int nr, const void *addr) +{ + const u32 *p = (const u32 *)addr; + return ((1UL << (nr & 31)) & (p[nr >> 5])) != 0; +} +static inline int variable_test_bit(int nr, const void *addr) +{ + u8 v; + const u32 *p = (const u32 *)addr; + + asm("btl %2,%1; setc %0" : "=qm" (v) : "m" (*p), "Ir" (nr)); + return v; +} + +#define test_bit(nr,addr) \ +(__builtin_constant_p(nr) ? \ + constant_test_bit((nr),(addr)) : \ + variable_test_bit((nr),(addr))) + +static inline void set_bit(int nr, void *addr) +{ + asm("btsl %1,%0" : "+m" (*(u32 *)addr) : "Ir" (nr)); +} + +#endif /* BOOT_BITOPS_H */ diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h new file mode 100644 index 00000000000..20bab9431ac --- /dev/null +++ b/arch/x86/boot/boot.h @@ -0,0 +1,296 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * arch/i386/boot/boot.h + * + * Header file for the real-mode kernel code + */ + +#ifndef BOOT_BOOT_H +#define BOOT_BOOT_H + +#ifndef __ASSEMBLY__ + +#include +#include +#include +#include +#include + +/* Useful macros */ +#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) + +extern struct setup_header hdr; +extern struct boot_params boot_params; + +/* Basic port I/O */ +static inline void outb(u8 v, u16 port) +{ + asm volatile("outb %0,%1" : : "a" (v), "dN" (port)); +} +static inline u8 inb(u16 port) +{ + u8 v; + asm volatile("inb %1,%0" : "=a" (v) : "dN" (port)); + return v; +} + +static inline void outw(u16 v, u16 port) +{ + asm volatile("outw %0,%1" : : "a" (v), "dN" (port)); +} +static inline u16 inw(u16 port) +{ + u16 v; + asm volatile("inw %1,%0" : "=a" (v) : "dN" (port)); + return v; +} + +static inline void outl(u32 v, u16 port) +{ + asm volatile("outl %0,%1" : : "a" (v), "dN" (port)); +} +static inline u32 inl(u32 port) +{ + u32 v; + asm volatile("inl %1,%0" : "=a" (v) : "dN" (port)); + return v; +} + +static inline void io_delay(void) +{ + const u16 DELAY_PORT = 0x80; + asm volatile("outb %%al,%0" : : "dN" (DELAY_PORT)); +} + +/* These functions are used to reference data in other segments. */ + +static inline u16 ds(void) +{ + u16 seg; + asm("movw %%ds,%0" : "=rm" (seg)); + return seg; +} + +static inline void set_fs(u16 seg) +{ + asm volatile("movw %0,%%fs" : : "rm" (seg)); +} +static inline u16 fs(void) +{ + u16 seg; + asm volatile("movw %%fs,%0" : "=rm" (seg)); + return seg; +} + +static inline void set_gs(u16 seg) +{ + asm volatile("movw %0,%%gs" : : "rm" (seg)); +} +static inline u16 gs(void) +{ + u16 seg; + asm volatile("movw %%gs,%0" : "=rm" (seg)); + return seg; +} + +typedef unsigned int addr_t; + +static inline u8 rdfs8(addr_t addr) +{ + u8 v; + asm volatile("movb %%fs:%1,%0" : "=r" (v) : "m" (*(u8 *)addr)); + return v; +} +static inline u16 rdfs16(addr_t addr) +{ + u16 v; + asm volatile("movw %%fs:%1,%0" : "=r" (v) : "m" (*(u16 *)addr)); + return v; +} +static inline u32 rdfs32(addr_t addr) +{ + u32 v; + asm volatile("movl %%fs:%1,%0" : "=r" (v) : "m" (*(u32 *)addr)); + return v; +} + +static inline void wrfs8(u8 v, addr_t addr) +{ + asm volatile("movb %1,%%fs:%0" : "+m" (*(u8 *)addr) : "r" (v)); +} +static inline void wrfs16(u16 v, addr_t addr) +{ + asm volatile("movw %1,%%fs:%0" : "+m" (*(u16 *)addr) : "r" (v)); +} +static inline void wrfs32(u32 v, addr_t addr) +{ + asm volatile("movl %1,%%fs:%0" : "+m" (*(u32 *)addr) : "r" (v)); +} + +static inline u8 rdgs8(addr_t addr) +{ + u8 v; + asm volatile("movb %%gs:%1,%0" : "=r" (v) : "m" (*(u8 *)addr)); + return v; +} +static inline u16 rdgs16(addr_t addr) +{ + u16 v; + asm volatile("movw %%gs:%1,%0" : "=r" (v) : "m" (*(u16 *)addr)); + return v; +} +static inline u32 rdgs32(addr_t addr) +{ + u32 v; + asm volatile("movl %%gs:%1,%0" : "=r" (v) : "m" (*(u32 *)addr)); + return v; +} + +static inline void wrgs8(u8 v, addr_t addr) +{ + asm volatile("movb %1,%%gs:%0" : "+m" (*(u8 *)addr) : "r" (v)); +} +static inline void wrgs16(u16 v, addr_t addr) +{ + asm volatile("movw %1,%%gs:%0" : "+m" (*(u16 *)addr) : "r" (v)); +} +static inline void wrgs32(u32 v, addr_t addr) +{ + asm volatile("movl %1,%%gs:%0" : "+m" (*(u32 *)addr) : "r" (v)); +} + +/* Note: these only return true/false, not a signed return value! */ +static inline int memcmp(const void *s1, const void *s2, size_t len) +{ + u8 diff; + asm("repe; cmpsb; setnz %0" + : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len)); + return diff; +} + +static inline int memcmp_fs(const void *s1, addr_t s2, size_t len) +{ + u8 diff; + asm volatile("fs; repe; cmpsb; setnz %0" + : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len)); + return diff; +} +static inline int memcmp_gs(const void *s1, addr_t s2, size_t len) +{ + u8 diff; + asm volatile("gs; repe; cmpsb; setnz %0" + : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len)); + return diff; +} + +static inline int isdigit(int ch) +{ + return (ch >= '0') && (ch <= '9'); +} + +/* Heap -- available for dynamic lists. */ +#define STACK_SIZE 512 /* Minimum number of bytes for stack */ + +extern char _end[]; +extern char *HEAP; +extern char *heap_end; +#define RESET_HEAP() ((void *)( HEAP = _end )) +static inline char *__get_heap(size_t s, size_t a, size_t n) +{ + char *tmp; + + HEAP = (char *)(((size_t)HEAP+(a-1)) & ~(a-1)); + tmp = HEAP; + HEAP += s*n; + return tmp; +} +#define GET_HEAP(type, n) \ + ((type *)__get_heap(sizeof(type),__alignof__(type),(n))) + +static inline int heap_free(void) +{ + return heap_end-HEAP; +} + +/* copy.S */ + +void copy_to_fs(addr_t dst, void *src, size_t len); +void *copy_from_fs(void *dst, addr_t src, size_t len); +void copy_to_gs(addr_t dst, void *src, size_t len); +void *copy_from_gs(void *dst, addr_t src, size_t len); +void *memcpy(void *dst, void *src, size_t len); +void *memset(void *dst, int c, size_t len); + +#define memcpy(d,s,l) __builtin_memcpy(d,s,l) +#define memset(d,c,l) __builtin_memset(d,c,l) + +/* a20.c */ +int enable_a20(void); + +/* apm.c */ +int query_apm_bios(void); + +/* cmdline.c */ +int cmdline_find_option(const char *option, char *buffer, int bufsize); + +/* cpu.c, cpucheck.c */ +int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr); +int validate_cpu(void); + +/* edd.c */ +void query_edd(void); + +/* header.S */ +void __attribute__((noreturn)) die(void); + +/* mca.c */ +int query_mca(void); + +/* memory.c */ +int detect_memory(void); + +/* pm.c */ +void __attribute__((noreturn)) go_to_protected_mode(void); + +/* pmjump.S */ +void __attribute__((noreturn)) + protected_mode_jump(u32 entrypoint, u32 bootparams); + +/* printf.c */ +int sprintf(char *buf, const char *fmt, ...); +int vsprintf(char *buf, const char *fmt, va_list args); +int printf(const char *fmt, ...); + +/* string.c */ +int strcmp(const char *str1, const char *str2); +size_t strnlen(const char *s, size_t maxlen); +unsigned int atou(const char *s); + +/* tty.c */ +void puts(const char *); +void putchar(int); +int getchar(void); +void kbd_flush(void); +int getchar_timeout(void); + +/* video.c */ +void set_video(void); + +/* video-vesa.c */ +void vesa_store_edid(void); + +/* voyager.c */ +int query_voyager(void); + +#endif /* __ASSEMBLY__ */ + +#endif /* BOOT_BOOT_H */ diff --git a/arch/x86/boot/cmdline.c b/arch/x86/boot/cmdline.c new file mode 100644 index 00000000000..34bb778c435 --- /dev/null +++ b/arch/x86/boot/cmdline.c @@ -0,0 +1,97 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * arch/i386/boot/cmdline.c + * + * Simple command-line parser for early boot. + */ + +#include "boot.h" + +static inline int myisspace(u8 c) +{ + return c <= ' '; /* Close enough approximation */ +} + +/* + * Find a non-boolean option, that is, "option=argument". In accordance + * with standard Linux practice, if this option is repeated, this returns + * the last instance on the command line. + * + * Returns the length of the argument (regardless of if it was + * truncated to fit in the buffer), or -1 on not found. + */ +int cmdline_find_option(const char *option, char *buffer, int bufsize) +{ + u32 cmdline_ptr = boot_params.hdr.cmd_line_ptr; + addr_t cptr; + char c; + int len = -1; + const char *opptr = NULL; + char *bufptr = buffer; + enum { + st_wordstart, /* Start of word/after whitespace */ + st_wordcmp, /* Comparing this word */ + st_wordskip, /* Miscompare, skip */ + st_bufcpy /* Copying this to buffer */ + } state = st_wordstart; + + if (!cmdline_ptr || cmdline_ptr >= 0x100000) + return -1; /* No command line, or inaccessible */ + + cptr = cmdline_ptr & 0xf; + set_fs(cmdline_ptr >> 4); + + while (cptr < 0x10000 && (c = rdfs8(cptr++))) { + switch (state) { + case st_wordstart: + if (myisspace(c)) + break; + + /* else */ + state = st_wordcmp; + opptr = option; + /* fall through */ + + case st_wordcmp: + if (c == '=' && !*opptr) { + len = 0; + bufptr = buffer; + state = st_bufcpy; + } else if (myisspace(c)) { + state = st_wordstart; + } else if (c != *opptr++) { + state = st_wordskip; + } + break; + + case st_wordskip: + if (myisspace(c)) + state = st_wordstart; + break; + + case st_bufcpy: + if (myisspace(c)) { + state = st_wordstart; + } else { + if (len < bufsize-1) + *bufptr++ = c; + len++; + } + break; + } + } + + if (bufsize) + *bufptr = '\0'; + + return len; +} diff --git a/arch/x86/boot/code16gcc.h b/arch/x86/boot/code16gcc.h new file mode 100644 index 00000000000..d93e48010b6 --- /dev/null +++ b/arch/x86/boot/code16gcc.h @@ -0,0 +1,15 @@ +/* + * code16gcc.h + * + * This file is -include'd when compiling 16-bit C code. + * Note: this asm() needs to be emitted before gcc emits any code. + * Depending on gcc version, this requires -fno-unit-at-a-time or + * -fno-toplevel-reorder. + * + * Hopefully gcc will eventually have a real -m16 option so we can + * drop this hack long term. + */ + +#ifndef __ASSEMBLY__ +asm(".code16gcc"); +#endif diff --git a/arch/x86/boot/copy.S b/arch/x86/boot/copy.S new file mode 100644 index 00000000000..ef127e56a3c --- /dev/null +++ b/arch/x86/boot/copy.S @@ -0,0 +1,101 @@ +/* ----------------------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * arch/i386/boot/copy.S + * + * Memory copy routines + */ + + .code16gcc + .text + + .globl memcpy + .type memcpy, @function +memcpy: + pushw %si + pushw %di + movw %ax, %di + movw %dx, %si + pushw %cx + shrw $2, %cx + rep; movsl + popw %cx + andw $3, %cx + rep; movsb + popw %di + popw %si + ret + .size memcpy, .-memcpy + + .globl memset + .type memset, @function +memset: + pushw %di + movw %ax, %di + movzbl %dl, %eax + imull $0x01010101,%eax + pushw %cx + shrw $2, %cx + rep; stosl + popw %cx + andw $3, %cx + rep; stosb + popw %di + ret + .size memset, .-memset + + .globl copy_from_fs + .type copy_from_fs, @function +copy_from_fs: + pushw %ds + pushw %fs + popw %ds + call memcpy + popw %ds + ret + .size copy_from_fs, .-copy_from_fs + + .globl copy_to_fs + .type copy_to_fs, @function +copy_to_fs: + pushw %es + pushw %fs + popw %es + call memcpy + popw %es + ret + .size copy_to_fs, .-copy_to_fs + +#if 0 /* Not currently used, but can be enabled as needed */ + + .globl copy_from_gs + .type copy_from_gs, @function +copy_from_gs: + pushw %ds + pushw %gs + popw %ds + call memcpy + popw %ds + ret + .size copy_from_gs, .-copy_from_gs + .globl copy_to_gs + + .type copy_to_gs, @function +copy_to_gs: + pushw %es + pushw %gs + popw %es + call memcpy + popw %es + ret + .size copy_to_gs, .-copy_to_gs + +#endif diff --git a/arch/x86/boot/cpu.c b/arch/x86/boot/cpu.c new file mode 100644 index 00000000000..2a5c32da585 --- /dev/null +++ b/arch/x86/boot/cpu.c @@ -0,0 +1,69 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * arch/i386/boot/cpu.c + * + * Check for obligatory CPU features and abort if the features are not + * present. + */ + +#include "boot.h" +#include "bitops.h" +#include + +static char *cpu_name(int level) +{ + static char buf[6]; + + if (level == 64) { + return "x86-64"; + } else { + sprintf(buf, "i%d86", level); + return buf; + } +} + +int validate_cpu(void) +{ + u32 *err_flags; + int cpu_level, req_level; + + check_cpu(&cpu_level, &req_level, &err_flags); + + if (cpu_level < req_level) { + printf("This kernel requires an %s CPU, ", + cpu_name(req_level)); + printf("but only detected an %s CPU.\n", + cpu_name(cpu_level)); + return -1; + } + + if (err_flags) { + int i, j; + puts("This kernel requires the following features " + "not present on the CPU:\n"); + + for (i = 0; i < NCAPINTS; i++) { + u32 e = err_flags[i]; + + for (j = 0; j < 32; j++) { + if (e & 1) + printf("%d:%d ", i, j); + + e >>= 1; + } + } + putchar('\n'); + return -1; + } else { + return 0; + } +} diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c new file mode 100644 index 00000000000..e655a89c551 --- /dev/null +++ b/arch/x86/boot/cpucheck.c @@ -0,0 +1,268 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * arch/i386/boot/cpucheck.c + * + * Check for obligatory CPU features and abort if the features are not + * present. This code should be compilable as 16-, 32- or 64-bit + * code, so be very careful with types and inline assembly. + * + * This code should not contain any messages; that requires an + * additional wrapper. + * + * As written, this code is not safe for inclusion into the kernel + * proper (after FPU initialization, in particular). + */ + +#ifdef _SETUP +# include "boot.h" +# include "bitops.h" +#endif +#include +#include +#include +#include +#include + +struct cpu_features { + int level; /* Family, or 64 for x86-64 */ + int model; + u32 flags[NCAPINTS]; +}; + +static struct cpu_features cpu; +static u32 cpu_vendor[3]; +static u32 err_flags[NCAPINTS]; + +#ifdef CONFIG_X86_64 +static const int req_level = 64; +#elif defined(CONFIG_X86_MINIMUM_CPU_FAMILY) +static const int req_level = CONFIG_X86_MINIMUM_CPU_FAMILY; +#else +static const int req_level = 3; +#endif + +static const u32 req_flags[NCAPINTS] = +{ + REQUIRED_MASK0, + REQUIRED_MASK1, + REQUIRED_MASK2, + REQUIRED_MASK3, + REQUIRED_MASK4, + REQUIRED_MASK5, + REQUIRED_MASK6, + REQUIRED_MASK7, +}; + +#define A32(a,b,c,d) (((d) << 24)+((c) << 16)+((b) << 8)+(a)) + +static int is_amd(void) +{ + return cpu_vendor[0] == A32('A','u','t','h') && + cpu_vendor[1] == A32('e','n','t','i') && + cpu_vendor[2] == A32('c','A','M','D'); +} + +static int is_centaur(void) +{ + return cpu_vendor[0] == A32('C','e','n','t') && + cpu_vendor[1] == A32('a','u','r','H') && + cpu_vendor[2] == A32('a','u','l','s'); +} + +static int is_transmeta(void) +{ + return cpu_vendor[0] == A32('G','e','n','u') && + cpu_vendor[1] == A32('i','n','e','T') && + cpu_vendor[2] == A32('M','x','8','6'); +} + +static int has_fpu(void) +{ + u16 fcw = -1, fsw = -1; + u32 cr0; + + asm("movl %%cr0,%0" : "=r" (cr0)); + if (cr0 & (X86_CR0_EM|X86_CR0_TS)) { + cr0 &= ~(X86_CR0_EM|X86_CR0_TS); + asm volatile("movl %0,%%cr0" : : "r" (cr0)); + } + + asm volatile("fninit ; fnstsw %0 ; fnstcw %1" + : "+m" (fsw), "+m" (fcw)); + + return fsw == 0 && (fcw & 0x103f) == 0x003f; +} + +static int has_eflag(u32 mask) +{ + u32 f0, f1; + + asm("pushfl ; " + "pushfl ; " + "popl %0 ; " + "movl %0,%1 ; " + "xorl %2,%1 ; " + "pushl %1 ; " + "popfl ; " + "pushfl ; " + "popl %1 ; " + "popfl" + : "=&r" (f0), "=&r" (f1) + : "ri" (mask)); + + return !!((f0^f1) & mask); +} + +static void get_flags(void) +{ + u32 max_intel_level, max_amd_level; + u32 tfms; + + if (has_fpu()) + set_bit(X86_FEATURE_FPU, cpu.flags); + + if (has_eflag(X86_EFLAGS_ID)) { + asm("cpuid" + : "=a" (max_intel_level), + "=b" (cpu_vendor[0]), + "=d" (cpu_vendor[1]), + "=c" (cpu_vendor[2]) + : "a" (0)); + + if (max_intel_level >= 0x00000001 && + max_intel_level <= 0x0000ffff) { + asm("cpuid" + : "=a" (tfms), + "=c" (cpu.flags[4]), + "=d" (cpu.flags[0]) + : "a" (0x00000001) + : "ebx"); + cpu.level = (tfms >> 8) & 15; + cpu.model = (tfms >> 4) & 15; + if (cpu.level >= 6) + cpu.model += ((tfms >> 16) & 0xf) << 4; + } + + asm("cpuid" + : "=a" (max_amd_level) + : "a" (0x80000000) + : "ebx", "ecx", "edx"); + + if (max_amd_level >= 0x80000001 && + max_amd_level <= 0x8000ffff) { + u32 eax = 0x80000001; + asm("cpuid" + : "+a" (eax), + "=c" (cpu.flags[6]), + "=d" (cpu.flags[1]) + : : "ebx"); + } + } +} + +/* Returns a bitmask of which words we have error bits in */ +static int check_flags(void) +{ + u32 err; + int i; + + err = 0; + for (i = 0; i < NCAPINTS; i++) { + err_flags[i] = req_flags[i] & ~cpu.flags[i]; + if (err_flags[i]) + err |= 1 << i; + } + + return err; +} + +/* + * Returns -1 on error. + * + * *cpu_level is set to the current CPU level; *req_level to the required + * level. x86-64 is considered level 64 for this purpose. + * + * *err_flags_ptr is set to the flags error array if there are flags missing. + */ +int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr) +{ + int err; + + memset(&cpu.flags, 0, sizeof cpu.flags); + cpu.level = 3; + + if (has_eflag(X86_EFLAGS_AC)) + cpu.level = 4; + + get_flags(); + err = check_flags(); + + if (test_bit(X86_FEATURE_LM, cpu.flags)) + cpu.level = 64; + + if (err == 0x01 && + !(err_flags[0] & + ~((1 << X86_FEATURE_XMM)|(1 << X86_FEATURE_XMM2))) && + is_amd()) { + /* If this is an AMD and we're only missing SSE+SSE2, try to + turn them on */ + + u32 ecx = MSR_K7_HWCR; + u32 eax, edx; + + asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx)); + eax &= ~(1 << 15); + asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); + + get_flags(); /* Make sure it really did something */ + err = check_flags(); + } else if (err == 0x01 && + !(err_flags[0] & ~(1 << X86_FEATURE_CX8)) && + is_centaur() && cpu.model >= 6) { + /* If this is a VIA C3, we might have to enable CX8 + explicitly */ + + u32 ecx = MSR_VIA_FCR; + u32 eax, edx; + + asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx)); + eax |= (1<<1)|(1<<7); + asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); + + set_bit(X86_FEATURE_CX8, cpu.flags); + err = check_flags(); + } else if (err == 0x01 && is_transmeta()) { + /* Transmeta might have masked feature bits in word 0 */ + + u32 ecx = 0x80860004; + u32 eax, edx; + u32 level = 1; + + asm("rdmsr" : "=a" (eax), "=d" (edx) : "c" (ecx)); + asm("wrmsr" : : "a" (~0), "d" (edx), "c" (ecx)); + asm("cpuid" + : "+a" (level), "=d" (cpu.flags[0]) + : : "ecx", "ebx"); + asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); + + err = check_flags(); + } + + if (err_flags_ptr) + *err_flags_ptr = err ? err_flags : NULL; + if (cpu_level_ptr) + *cpu_level_ptr = cpu.level; + if (req_level_ptr) + *req_level_ptr = req_level; + + return (cpu.level < req_level || err) ? -1 : 0; +} diff --git a/arch/x86/boot/edd.c b/arch/x86/boot/edd.c new file mode 100644 index 00000000000..bd138e442ec --- /dev/null +++ b/arch/x86/boot/edd.c @@ -0,0 +1,167 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * arch/i386/boot/edd.c + * + * Get EDD BIOS disk information + */ + +#include "boot.h" +#include + +#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) + +/* + * Read the MBR (first sector) from a specific device. + */ +static int read_mbr(u8 devno, void *buf) +{ + u16 ax, bx, cx, dx; + + ax = 0x0201; /* Legacy Read, one sector */ + cx = 0x0001; /* Sector 0-0-1 */ + dx = devno; + bx = (size_t)buf; + asm volatile("pushfl; stc; int $0x13; setc %%al; popfl" + : "+a" (ax), "+c" (cx), "+d" (dx), "+b" (bx) + : : "esi", "edi", "memory"); + + return -(u8)ax; /* 0 or -1 */ +} + +static u32 read_mbr_sig(u8 devno, struct edd_info *ei, u32 *mbrsig) +{ + int sector_size; + char *mbrbuf_ptr, *mbrbuf_end; + u32 buf_base, mbr_base; + extern char _end[]; + + sector_size = ei->params.bytes_per_sector; + if (!sector_size) + sector_size = 512; /* Best available guess */ + + /* Produce a naturally aligned buffer on the heap */ + buf_base = (ds() << 4) + (u32)&_end; + mbr_base = (buf_base+sector_size-1) & ~(sector_size-1); + mbrbuf_ptr = _end + (mbr_base-buf_base); + mbrbuf_end = mbrbuf_ptr + sector_size; + + /* Make sure we actually have space on the heap... */ + if (!(boot_params.hdr.loadflags & CAN_USE_HEAP)) + return -1; + if (mbrbuf_end > (char *)(size_t)boot_params.hdr.heap_end_ptr) + return -1; + + if (read_mbr(devno, mbrbuf_ptr)) + return -1; + + *mbrsig = *(u32 *)&mbrbuf_ptr[EDD_MBR_SIG_OFFSET]; + return 0; +} + +static int get_edd_info(u8 devno, struct edd_info *ei) +{ + u16 ax, bx, cx, dx, di; + + memset(ei, 0, sizeof *ei); + + /* Check Extensions Present */ + + ax = 0x4100; + bx = EDDMAGIC1; + dx = devno; + asm("pushfl; stc; int $0x13; setc %%al; popfl" + : "+a" (ax), "+b" (bx), "=c" (cx), "+d" (dx) + : : "esi", "edi"); + + if ((u8)ax) + return -1; /* No extended information */ + + if (bx != EDDMAGIC2) + return -1; + + ei->device = devno; + ei->version = ax >> 8; /* EDD version number */ + ei->interface_support = cx; /* EDD functionality subsets */ + + /* Extended Get Device Parameters */ + + ei->params.length = sizeof(ei->params); + ax = 0x4800; + dx = devno; + asm("pushfl; int $0x13; popfl" + : "+a" (ax), "+d" (dx), "=m" (ei->params) + : "S" (&ei->params) + : "ebx", "ecx", "edi"); + + /* Get legacy CHS parameters */ + + /* Ralf Brown recommends setting ES:DI to 0:0 */ + ax = 0x0800; + dx = devno; + di = 0; + asm("pushw %%es; " + "movw %%di,%%es; " + "pushfl; stc; int $0x13; setc %%al; popfl; " + "popw %%es" + : "+a" (ax), "=b" (bx), "=c" (cx), "+d" (dx), "+D" (di) + : : "esi"); + + if ((u8)ax == 0) { + ei->legacy_max_cylinder = (cx >> 8) + ((cx & 0xc0) << 2); + ei->legacy_max_head = dx >> 8; + ei->legacy_sectors_per_track = cx & 0x3f; + } + + return 0; +} + +void query_edd(void) +{ + char eddarg[8]; + int do_mbr = 1; + int do_edd = 1; + int devno; + struct edd_info ei, *edp; + u32 *mbrptr; + + if (cmdline_find_option("edd", eddarg, sizeof eddarg) > 0) { + if (!strcmp(eddarg, "skipmbr") || !strcmp(eddarg, "skip")) + do_mbr = 0; + else if (!strcmp(eddarg, "off")) + do_edd = 0; + } + + edp = boot_params.eddbuf; + mbrptr = boot_params.edd_mbr_sig_buffer; + + if (!do_edd) + return; + + for (devno = 0x80; devno < 0x80+EDD_MBR_SIG_MAX; devno++) { + /* + * Scan the BIOS-supported hard disks and query EDD + * information... + */ + get_edd_info(devno, &ei); + + if (boot_params.eddbuf_entries < EDDMAXNR) { + memcpy(edp, &ei, sizeof ei); + edp++; + boot_params.eddbuf_entries++; + } + + if (do_mbr && !read_mbr_sig(devno, &ei, mbrptr++)) + boot_params.edd_mbr_sig_buf_entries = devno-0x80+1; + } +} + +#endif diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S new file mode 100644 index 00000000000..f3140e596d4 --- /dev/null +++ b/arch/x86/boot/header.S @@ -0,0 +1,283 @@ +/* + * header.S + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * Based on bootsect.S and setup.S + * modified by more people than can be counted + * + * Rewritten as a common file by H. Peter Anvin (Apr 2007) + * + * BIG FAT NOTE: We're in real mode using 64k segments. Therefore segment + * addresses must be multiplied by 16 to obtain their respective linear + * addresses. To avoid confusion, linear addresses are written using leading + * hex while segment addresses are written as segment:offset. + * + */ + +#include +#include +#include +#include +#include +#include +#include "boot.h" + +SETUPSECTS = 4 /* default nr of setup-sectors */ +BOOTSEG = 0x07C0 /* original address of boot-sector */ +SYSSEG = DEF_SYSSEG /* system loaded at 0x10000 (65536) */ +SYSSIZE = DEF_SYSSIZE /* system size: # of 16-byte clicks */ + /* to be loaded */ +ROOT_DEV = 0 /* ROOT_DEV is now written by "build" */ +SWAP_DEV = 0 /* SWAP_DEV is now written by "build" */ + +#ifndef SVGA_MODE +#define SVGA_MODE ASK_VGA +#endif + +#ifndef RAMDISK +#define RAMDISK 0 +#endif + +#ifndef ROOT_RDONLY +#define ROOT_RDONLY 1 +#endif + + .code16 + .section ".bstext", "ax" + + .global bootsect_start +bootsect_start: + + # Normalize the start address + ljmp $BOOTSEG, $start2 + +start2: + movw %cs, %ax + movw %ax, %ds + movw %ax, %es + movw %ax, %ss + xorw %sp, %sp + sti + cld + + movw $bugger_off_msg, %si + +msg_loop: + lodsb + andb %al, %al + jz bs_die + movb $0xe, %ah + movw $7, %bx + int $0x10 + jmp msg_loop + +bs_die: + # Allow the user to press a key, then reboot + xorw %ax, %ax + int $0x16 + int $0x19 + + # int 0x19 should never return. In case it does anyway, + # invoke the BIOS reset code... + ljmp $0xf000,$0xfff0 + + .section ".bsdata", "a" +bugger_off_msg: + .ascii "Direct booting from floppy is no longer supported.\r\n" + .ascii "Please use a boot loader program instead.\r\n" + .ascii "\n" + .ascii "Remove disk and press any key to reboot . . .\r\n" + .byte 0 + + + # Kernel attributes; used by setup. This is part 1 of the + # header, from the old boot sector. + + .section ".header", "a" + .globl hdr +hdr: +setup_sects: .byte SETUPSECTS +root_flags: .word ROOT_RDONLY +syssize: .long SYSSIZE +ram_size: .word RAMDISK +vid_mode: .word SVGA_MODE +root_dev: .word ROOT_DEV +boot_flag: .word 0xAA55 + + # offset 512, entry point + + .globl _start +_start: + # Explicitly enter this as bytes, or the assembler + # tries to generate a 3-byte jump here, which causes + # everything else to push off to the wrong offset. + .byte 0xeb # short (2-byte) jump + .byte start_of_setup-1f +1: + + # Part 2 of the header, from the old setup.S + + .ascii "HdrS" # header signature + .word 0x0206 # header version number (>= 0x0105) + # or else old loadlin-1.5 will fail) + .globl realmode_swtch +realmode_swtch: .word 0, 0 # default_switch, SETUPSEG +start_sys_seg: .word SYSSEG + .word kernel_version-512 # pointing to kernel version string + # above section of header is compatible + # with loadlin-1.5 (header v1.5). Don't + # change it. + +type_of_loader: .byte 0 # = 0, old one (LILO, Loadlin, + # Bootlin, SYSLX, bootsect...) + # See Documentation/i386/boot.txt for + # assigned ids + +# flags, unused bits must be zero (RFU) bit within loadflags +loadflags: +LOADED_HIGH = 1 # If set, the kernel is loaded high +CAN_USE_HEAP = 0x80 # If set, the loader also has set + # heap_end_ptr to tell how much + # space behind setup.S can be used for + # heap purposes. + # Only the loader knows what is free +#ifndef __BIG_KERNEL__ + .byte 0 +#else + .byte LOADED_HIGH +#endif + +setup_move_size: .word 0x8000 # size to move, when setup is not + # loaded at 0x90000. We will move setup + # to 0x90000 then just before jumping + # into the kernel. However, only the + # loader knows how much data behind + # us also needs to be loaded. + +code32_start: # here loaders can put a different + # start address for 32-bit code. +#ifndef __BIG_KERNEL__ + .long 0x1000 # 0x1000 = default for zImage +#else + .long 0x100000 # 0x100000 = default for big kernel +#endif + +ramdisk_image: .long 0 # address of loaded ramdisk image + # Here the loader puts the 32-bit + # address where it loaded the image. + # This only will be read by the kernel. + +ramdisk_size: .long 0 # its size in bytes + +bootsect_kludge: + .long 0 # obsolete + +heap_end_ptr: .word _end+1024 # (Header version 0x0201 or later) + # space from here (exclusive) down to + # end of setup code can be used by setup + # for local heap purposes. + +pad1: .word 0 +cmd_line_ptr: .long 0 # (Header version 0x0202 or later) + # If nonzero, a 32-bit pointer + # to the kernel command line. + # The command line should be + # located between the start of + # setup and the end of low + # memory (0xa0000), or it may + # get overwritten before it + # gets read. If this field is + # used, there is no longer + # anything magical about the + # 0x90000 segment; the setup + # can be located anywhere in + # low memory 0x10000 or higher. + +ramdisk_max: .long (-__PAGE_OFFSET-(512 << 20)-1) & 0x7fffffff + # (Header version 0x0203 or later) + # The highest safe address for + # the contents of an initrd + +kernel_alignment: .long CONFIG_PHYSICAL_ALIGN #physical addr alignment + #required for protected mode + #kernel +#ifdef CONFIG_RELOCATABLE +relocatable_kernel: .byte 1 +#else +relocatable_kernel: .byte 0 +#endif +pad2: .byte 0 +pad3: .word 0 + +cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line, + #added with boot protocol + #version 2.06 + +# End of setup header ##################################################### + + .section ".inittext", "ax" +start_of_setup: +#ifdef SAFE_RESET_DISK_CONTROLLER +# Reset the disk controller. + movw $0x0000, %ax # Reset disk controller + movb $0x80, %dl # All disks + int $0x13 +#endif + +# We will have entered with %cs = %ds+0x20, normalize %cs so +# it is on par with the other segments. + pushw %ds + pushw $setup2 + lretw + +setup2: +# Force %es = %ds + movw %ds, %ax + movw %ax, %es + cld + +# Stack paranoia: align the stack and make sure it is good +# for both 16- and 32-bit references. In particular, if we +# were meant to have been using the full 16-bit segment, the +# caller might have set %sp to zero, which breaks %esp-based +# references. + andw $~3, %sp # dword align (might as well...) + jnz 1f + movw $0xfffc, %sp # Make sure we're not zero +1: movzwl %sp, %esp # Clear upper half of %esp + sti + +# Check signature at end of setup + cmpl $0x5a5aaa55, setup_sig + jne setup_bad + +# Zero the bss + movw $__bss_start, %di + movw $_end+3, %cx + xorl %eax, %eax + subw %di, %cx + shrw $2, %cx + rep; stosl + +# Jump to C code (should not return) + calll main + +# Setup corrupt somehow... +setup_bad: + movl $setup_corrupt, %eax + calll puts + # Fall through... + + .globl die + .type die, @function +die: + hlt + jmp die + + .size die, .-die + + .section ".initdata", "a" +setup_corrupt: + .byte 7 + .string "No setup signature found...\n" diff --git a/arch/x86/boot/install.sh b/arch/x86/boot/install.sh new file mode 100644 index 00000000000..88d77761d01 --- /dev/null +++ b/arch/x86/boot/install.sh @@ -0,0 +1,61 @@ +#!/bin/sh +# +# arch/i386/boot/install.sh +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1995 by Linus Torvalds +# +# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin +# +# "make install" script for i386 architecture +# +# Arguments: +# $1 - kernel version +# $2 - kernel image file +# $3 - kernel map file +# $4 - default install path (blank if root directory) +# + +verify () { + if [ ! -f "$1" ]; then + echo "" 1>&2 + echo " *** Missing file: $1" 1>&2 + echo ' *** You need to run "make" before "make install".' 1>&2 + echo "" 1>&2 + exit 1 + fi +} + +# Make sure the files actually exist +verify "$2" +verify "$3" + +# User may have a custom install script + +if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi +if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi + +# Default install - same as make zlilo + +if [ -f $4/vmlinuz ]; then + mv $4/vmlinuz $4/vmlinuz.old +fi + +if [ -f $4/System.map ]; then + mv $4/System.map $4/System.old +fi + +cat $2 > $4/vmlinuz +cp $3 $4/System.map + +if [ -x /sbin/lilo ]; then + /sbin/lilo +elif [ -x /etc/lilo/install ]; then + /etc/lilo/install +else + sync + echo "Cannot find LILO." +fi diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c new file mode 100644 index 00000000000..0eeef3989a1 --- /dev/null +++ b/arch/x86/boot/main.c @@ -0,0 +1,161 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * arch/i386/boot/main.c + * + * Main module for the real-mode kernel code + */ + +#include "boot.h" + +struct boot_params boot_params __attribute__((aligned(16))); + +char *HEAP = _end; +char *heap_end = _end; /* Default end of heap = no heap */ + +/* + * Copy the header into the boot parameter block. Since this + * screws up the old-style command line protocol, adjust by + * filling in the new-style command line pointer instead. + */ +#define OLD_CL_MAGIC 0xA33F +#define OLD_CL_ADDRESS 0x20 + +static void copy_boot_params(void) +{ + struct old_cmdline { + u16 cl_magic; + u16 cl_offset; + }; + const struct old_cmdline * const oldcmd = + (const struct old_cmdline *)OLD_CL_ADDRESS; + + BUILD_BUG_ON(sizeof boot_params != 4096); + memcpy(&boot_params.hdr, &hdr, sizeof hdr); + + if (!boot_params.hdr.cmd_line_ptr && + oldcmd->cl_magic == OLD_CL_MAGIC) { + /* Old-style command line protocol. */ + u16 cmdline_seg; + + /* Figure out if the command line falls in the region + of memory that an old kernel would have copied up + to 0x90000... */ + if (oldcmd->cl_offset < boot_params.hdr.setup_move_size) + cmdline_seg = ds(); + else + cmdline_seg = 0x9000; + + boot_params.hdr.cmd_line_ptr = + (cmdline_seg << 4) + oldcmd->cl_offset; + } +} + +/* + * Set the keyboard repeat rate to maximum. Unclear why this + * is done here; this might be possible to kill off as stale code. + */ +static void keyboard_set_repeat(void) +{ + u16 ax = 0x0305; + u16 bx = 0; + asm volatile("int $0x16" + : "+a" (ax), "+b" (bx) + : : "ecx", "edx", "esi", "edi"); +} + +/* + * Get Intel SpeedStep (IST) information. + */ +static void query_ist(void) +{ + asm("int $0x15" + : "=a" (boot_params.ist_info.signature), + "=b" (boot_params.ist_info.command), + "=c" (boot_params.ist_info.event), + "=d" (boot_params.ist_info.perf_level) + : "a" (0x0000e980), /* IST Support */ + "d" (0x47534943)); /* Request value */ +} + +/* + * Tell the BIOS what CPU mode we intend to run in. + */ +static void set_bios_mode(void) +{ +#ifdef CONFIG_X86_64 + u32 eax, ebx; + + eax = 0xec00; + ebx = 2; + asm volatile("int $0x15" + : "+a" (eax), "+b" (ebx) + : : "ecx", "edx", "esi", "edi"); +#endif +} + +void main(void) +{ + /* First, copy the boot header into the "zeropage" */ + copy_boot_params(); + + /* End of heap check */ + if (boot_params.hdr.loadflags & CAN_USE_HEAP) { + heap_end = (char *)(boot_params.hdr.heap_end_ptr + +0x200-STACK_SIZE); + } else { + /* Boot protocol 2.00 only, no heap available */ + puts("WARNING: Ancient bootloader, some functionality " + "may be limited!\n"); + } + + /* Make sure we have all the proper CPU support */ + if (validate_cpu()) { + puts("Unable to boot - please use a kernel appropriate " + "for your CPU.\n"); + die(); + } + + /* Tell the BIOS what CPU mode we intend to run in. */ + set_bios_mode(); + + /* Detect memory layout */ + detect_memory(); + + /* Set keyboard repeat rate (why?) */ + keyboard_set_repeat(); + + /* Set the video mode */ + set_video(); + + /* Query MCA information */ + query_mca(); + + /* Voyager */ +#ifdef CONFIG_X86_VOYAGER + query_voyager(); +#endif + + /* Query Intel SpeedStep (IST) information */ + query_ist(); + + /* Query APM information */ +#if defined(CONFIG_APM) || defined(CONFIG_APM_MODULE) + query_apm_bios(); +#endif + + /* Query EDD information */ +#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) + query_edd(); +#endif + /* Do the last things and invoke protected mode */ + go_to_protected_mode(); +} diff --git a/arch/x86/boot/mca.c b/arch/x86/boot/mca.c new file mode 100644 index 00000000000..68222f2d4b6 --- /dev/null +++ b/arch/x86/boot/mca.c @@ -0,0 +1,43 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * arch/i386/boot/mca.c + * + * Get the MCA system description table + */ + +#include "boot.h" + +int query_mca(void) +{ + u8 err; + u16 es, bx, len; + + asm("pushw %%es ; " + "int $0x15 ; " + "setc %0 ; " + "movw %%es, %1 ; " + "popw %%es" + : "=acd" (err), "=acdSD" (es), "=b" (bx) + : "a" (0xc000)); + + if (err) + return -1; /* No MCA present */ + + set_fs(es); + len = rdfs16(bx); + + if (len > sizeof(boot_params.sys_desc_table)) + len = sizeof(boot_params.sys_desc_table); + + copy_from_fs(&boot_params.sys_desc_table, bx, len); + return 0; +} diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c new file mode 100644 index 00000000000..378353956b5 --- /dev/null +++ b/arch/x86/boot/memory.c @@ -0,0 +1,118 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * arch/i386/boot/memory.c + * + * Memory detection code + */ + +#include "boot.h" + +#define SMAP 0x534d4150 /* ASCII "SMAP" */ + +static int detect_memory_e820(void) +{ + int count = 0; + u32 next = 0; + u32 size, id; + u8 err; + struct e820entry *desc = boot_params.e820_map; + + do { + size = sizeof(struct e820entry); + + /* Important: %edx is clobbered by some BIOSes, + so it must be either used for the error output + or explicitly marked clobbered. */ + asm("int $0x15; setc %0" + : "=d" (err), "+b" (next), "=a" (id), "+c" (size), + "=m" (*desc) + : "D" (desc), "d" (SMAP), "a" (0xe820)); + + /* Some BIOSes stop returning SMAP in the middle of + the search loop. We don't know exactly how the BIOS + screwed up the map at that point, we might have a + partial map, the full map, or complete garbage, so + just return failure. */ + if (id != SMAP) { + count = 0; + break; + } + + if (err) + break; + + count++; + desc++; + } while (next && count < E820MAX); + + return boot_params.e820_entries = count; +} + +static int detect_memory_e801(void) +{ + u16 ax, bx, cx, dx; + u8 err; + + bx = cx = dx = 0; + ax = 0xe801; + asm("stc; int $0x15; setc %0" + : "=m" (err), "+a" (ax), "+b" (bx), "+c" (cx), "+d" (dx)); + + if (err) + return -1; + + /* Do we really need to do this? */ + if (cx || dx) { + ax = cx; + bx = dx; + } + + if (ax > 15*1024) + return -1; /* Bogus! */ + + /* This ignores memory above 16MB if we have a memory hole + there. If someone actually finds a machine with a memory + hole at 16MB and no support for 0E820h they should probably + generate a fake e820 map. */ + boot_params.alt_mem_k = (ax == 15*1024) ? (dx << 6)+ax : ax; + + return 0; +} + +static int detect_memory_88(void) +{ + u16 ax; + u8 err; + + ax = 0x8800; + asm("stc; int $0x15; setc %0" : "=bcdm" (err), "+a" (ax)); + + boot_params.screen_info.ext_mem_k = ax; + + return -err; +} + +int detect_memory(void) +{ + int err = -1; + + if (detect_memory_e820() > 0) + err = 0; + + if (!detect_memory_e801()) + err = 0; + + if (!detect_memory_88()) + err = 0; + + return err; +} diff --git a/arch/x86/boot/mtools.conf.in b/arch/x86/boot/mtools.conf.in new file mode 100644 index 00000000000..efd6d2490c1 --- /dev/null +++ b/arch/x86/boot/mtools.conf.in @@ -0,0 +1,17 @@ +# +# mtools configuration file for "make (b)zdisk" +# + +# Actual floppy drive +drive a: + file="/dev/fd0" + +# 1.44 MB floppy disk image +drive v: + file="@OBJ@/fdimage" cylinders=80 heads=2 sectors=18 filter + +# 2.88 MB floppy disk image (mostly for virtual uses) +drive w: + file="@OBJ@/fdimage" cylinders=80 heads=2 sectors=36 filter + + diff --git a/arch/x86/boot/pm.c b/arch/x86/boot/pm.c new file mode 100644 index 00000000000..09fb342cc62 --- /dev/null +++ b/arch/x86/boot/pm.c @@ -0,0 +1,174 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * arch/i386/boot/pm.c + * + * Prepare the machine for transition to protected mode. + */ + +#include "boot.h" +#include + +/* + * Invoke the realmode switch hook if present; otherwise + * disable all interrupts. + */ +static void realmode_switch_hook(void) +{ + if (boot_params.hdr.realmode_swtch) { + asm volatile("lcallw *%0" + : : "m" (boot_params.hdr.realmode_swtch) + : "eax", "ebx", "ecx", "edx"); + } else { + asm volatile("cli"); + outb(0x80, 0x70); /* Disable NMI */ + io_delay(); + } +} + +/* + * A zImage kernel is loaded at 0x10000 but wants to run at 0x1000. + * A bzImage kernel is loaded and runs at 0x100000. + */ +static void move_kernel_around(void) +{ + /* Note: rely on the compile-time option here rather than + the LOADED_HIGH flag. The Qemu kernel loader unconditionally + sets the loadflags to zero. */ +#ifndef __BIG_KERNEL__ + u16 dst_seg, src_seg; + u32 syssize; + + dst_seg = 0x1000 >> 4; + src_seg = 0x10000 >> 4; + syssize = boot_params.hdr.syssize; /* Size in 16-byte paragraphs */ + + while (syssize) { + int paras = (syssize >= 0x1000) ? 0x1000 : syssize; + int dwords = paras << 2; + + asm volatile("pushw %%es ; " + "pushw %%ds ; " + "movw %1,%%es ; " + "movw %2,%%ds ; " + "xorw %%di,%%di ; " + "xorw %%si,%%si ; " + "rep;movsl ; " + "popw %%ds ; " + "popw %%es" + : "+c" (dwords) + : "r" (dst_seg), "r" (src_seg) + : "esi", "edi"); + + syssize -= paras; + dst_seg += paras; + src_seg += paras; + } +#endif +} + +/* + * Disable all interrupts at the legacy PIC. + */ +static void mask_all_interrupts(void) +{ + outb(0xff, 0xa1); /* Mask all interrupts on the secondary PIC */ + io_delay(); + outb(0xfb, 0x21); /* Mask all but cascade on the primary PIC */ + io_delay(); +} + +/* + * Reset IGNNE# if asserted in the FPU. + */ +static void reset_coprocessor(void) +{ + outb(0, 0xf0); + io_delay(); + outb(0, 0xf1); + io_delay(); +} + +/* + * Set up the GDT + */ +#define GDT_ENTRY(flags,base,limit) \ + (((u64)(base & 0xff000000) << 32) | \ + ((u64)flags << 40) | \ + ((u64)(limit & 0x00ff0000) << 32) | \ + ((u64)(base & 0x00ffff00) << 16) | \ + ((u64)(limit & 0x0000ffff))) + +struct gdt_ptr { + u16 len; + u32 ptr; +} __attribute__((packed)); + +static void setup_gdt(void) +{ + /* There are machines which are known to not boot with the GDT + being 8-byte unaligned. Intel recommends 16 byte alignment. */ + static const u64 boot_gdt[] __attribute__((aligned(16))) = { + /* CS: code, read/execute, 4 GB, base 0 */ + [GDT_ENTRY_BOOT_CS] = GDT_ENTRY(0xc09b, 0, 0xfffff), + /* DS: data, read/write, 4 GB, base 0 */ + [GDT_ENTRY_BOOT_DS] = GDT_ENTRY(0xc093, 0, 0xfffff), + }; + /* Xen HVM incorrectly stores a pointer to the gdt_ptr, instead + of the gdt_ptr contents. Thus, make it static so it will + stay in memory, at least long enough that we switch to the + proper kernel GDT. */ + static struct gdt_ptr gdt; + + gdt.len = sizeof(boot_gdt)-1; + gdt.ptr = (u32)&boot_gdt + (ds() << 4); + + asm volatile("lgdtl %0" : : "m" (gdt)); +} + +/* + * Set up the IDT + */ +static void setup_idt(void) +{ + static const struct gdt_ptr null_idt = {0, 0}; + asm volatile("lidtl %0" : : "m" (null_idt)); +} + +/* + * Actual invocation sequence + */ +void go_to_protected_mode(void) +{ + /* Hook before leaving real mode, also disables interrupts */ + realmode_switch_hook(); + + /* Move the kernel/setup to their final resting places */ + move_kernel_around(); + + /* Enable the A20 gate */ + if (enable_a20()) { + puts("A20 gate not responding, unable to boot...\n"); + die(); + } + + /* Reset coprocessor (IGNNE#) */ + reset_coprocessor(); + + /* Mask all interrupts in the PIC */ + mask_all_interrupts(); + + /* Actual transition to protected mode... */ + setup_idt(); + setup_gdt(); + protected_mode_jump(boot_params.hdr.code32_start, + (u32)&boot_params + (ds() << 4)); +} diff --git a/arch/x86/boot/pmjump.S b/arch/x86/boot/pmjump.S new file mode 100644 index 00000000000..2e559233725 --- /dev/null +++ b/arch/x86/boot/pmjump.S @@ -0,0 +1,54 @@ +/* ----------------------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * arch/i386/boot/pmjump.S + * + * The actual transition into protected mode + */ + +#include +#include + + .text + + .globl protected_mode_jump + .type protected_mode_jump, @function + + .code16 + +/* + * void protected_mode_jump(u32 entrypoint, u32 bootparams); + */ +protected_mode_jump: + xorl %ebx, %ebx # Flag to indicate this is a boot + movl %edx, %esi # Pointer to boot_params table + movl %eax, 2f # Patch ljmpl instruction + jmp 1f # Short jump to flush instruction q. + +1: + movw $__BOOT_DS, %cx + + movl %cr0, %edx + orb $1, %dl # Protected mode (PE) bit + movl %edx, %cr0 + + movw %cx, %ds + movw %cx, %es + movw %cx, %fs + movw %cx, %gs + movw %cx, %ss + + # Jump to the 32-bit entrypoint + .byte 0x66, 0xea # ljmpl opcode +2: .long 0 # offset + .word __BOOT_CS # segment + + .size protected_mode_jump, .-protected_mode_jump diff --git a/arch/x86/boot/printf.c b/arch/x86/boot/printf.c new file mode 100644 index 00000000000..1a09f9309d3 --- /dev/null +++ b/arch/x86/boot/printf.c @@ -0,0 +1,307 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * arch/i386/boot/printf.c + * + * Oh, it's a waste of space, but oh-so-yummy for debugging. This + * version of printf() does not include 64-bit support. "Live with + * it." + * + */ + +#include "boot.h" + +static int skip_atoi(const char **s) +{ + int i = 0; + + while (isdigit(**s)) + i = i * 10 + *((*s)++) - '0'; + return i; +} + +#define ZEROPAD 1 /* pad with zero */ +#define SIGN 2 /* unsigned/signed long */ +#define PLUS 4 /* show plus */ +#define SPACE 8 /* space if plus */ +#define LEFT 16 /* left justified */ +#define SPECIAL 32 /* 0x */ +#define LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */ + +#define do_div(n,base) ({ \ +int __res; \ +__res = ((unsigned long) n) % (unsigned) base; \ +n = ((unsigned long) n) / (unsigned) base; \ +__res; }) + +static char *number(char *str, long num, int base, int size, int precision, + int type) +{ + char c, sign, tmp[66]; + const char *digits = "0123456789abcdefghijklmnopqrstuvwxyz"; + int i; + + if (type & LARGE) + digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + if (type & LEFT) + type &= ~ZEROPAD; + if (base < 2 || base > 36) + return 0; + c = (type & ZEROPAD) ? '0' : ' '; + sign = 0; + if (type & SIGN) { + if (num < 0) { + sign = '-'; + num = -num; + size--; + } else if (type & PLUS) { + sign = '+'; + size--; + } else if (type & SPACE) { + sign = ' '; + size--; + } + } + if (type & SPECIAL) { + if (base == 16) + size -= 2; + else if (base == 8) + size--; + } + i = 0; + if (num == 0) + tmp[i++] = '0'; + else + while (num != 0) + tmp[i++] = digits[do_div(num, base)]; + if (i > precision) + precision = i; + size -= precision; + if (!(type & (ZEROPAD + LEFT))) + while (size-- > 0) + *str++ = ' '; + if (sign) + *str++ = sign; + if (type & SPECIAL) { + if (base == 8) + *str++ = '0'; + else if (base == 16) { + *str++ = '0'; + *str++ = digits[33]; + } + } + if (!(type & LEFT)) + while (size-- > 0) + *str++ = c; + while (i < precision--) + *str++ = '0'; + while (i-- > 0) + *str++ = tmp[i]; + while (size-- > 0) + *str++ = ' '; + return str; +} + +int vsprintf(char *buf, const char *fmt, va_list args) +{ + int len; + unsigned long num; + int i, base; + char *str; + const char *s; + + int flags; /* flags to number() */ + + int field_width; /* width of output field */ + int precision; /* min. # of digits for integers; max + number of chars for from string */ + int qualifier; /* 'h', 'l', or 'L' for integer fields */ + + for (str = buf; *fmt; ++fmt) { + if (*fmt != '%') { + *str++ = *fmt; + continue; + } + + /* process flags */ + flags = 0; + repeat: + ++fmt; /* this also skips first '%' */ + switch (*fmt) { + case '-': + flags |= LEFT; + goto repeat; + case '+': + flags |= PLUS; + goto repeat; + case ' ': + flags |= SPACE; + goto repeat; + case '#': + flags |= SPECIAL; + goto repeat; + case '0': + flags |= ZEROPAD; + goto repeat; + } + + /* get field width */ + field_width = -1; + if (isdigit(*fmt)) + field_width = skip_atoi(&fmt); + else if (*fmt == '*') { + ++fmt; + /* it's the next argument */ + field_width = va_arg(args, int); + if (field_width < 0) { + field_width = -field_width; + flags |= LEFT; + } + } + + /* get the precision */ + precision = -1; + if (*fmt == '.') { + ++fmt; + if (isdigit(*fmt)) + precision = skip_atoi(&fmt); + else if (*fmt == '*') { + ++fmt; + /* it's the next argument */ + precision = va_arg(args, int); + } + if (precision < 0) + precision = 0; + } + + /* get the conversion qualifier */ + qualifier = -1; + if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L') { + qualifier = *fmt; + ++fmt; + } + + /* default base */ + base = 10; + + switch (*fmt) { + case 'c': + if (!(flags & LEFT)) + while (--field_width > 0) + *str++ = ' '; + *str++ = (unsigned char)va_arg(args, int); + while (--field_width > 0) + *str++ = ' '; + continue; + + case 's': + s = va_arg(args, char *); + len = strnlen(s, precision); + + if (!(flags & LEFT)) + while (len < field_width--) + *str++ = ' '; + for (i = 0; i < len; ++i) + *str++ = *s++; + while (len < field_width--) + *str++ = ' '; + continue; + + case 'p': + if (field_width == -1) { + field_width = 2 * sizeof(void *); + flags |= ZEROPAD; + } + str = number(str, + (unsigned long)va_arg(args, void *), 16, + field_width, precision, flags); + continue; + + case 'n': + if (qualifier == 'l') { + long *ip = va_arg(args, long *); + *ip = (str - buf); + } else { + int *ip = va_arg(args, int *); + *ip = (str - buf); + } + continue; + + case '%': + *str++ = '%'; + continue; + + /* integer number formats - set up the flags and "break" */ + case 'o': + base = 8; + break; + + case 'X': + flags |= LARGE; + case 'x': + base = 16; + break; + + case 'd': + case 'i': + flags |= SIGN; + case 'u': + break; + + default: + *str++ = '%'; + if (*fmt) + *str++ = *fmt; + else + --fmt; + continue; + } + if (qualifier == 'l') + num = va_arg(args, unsigned long); + else if (qualifier == 'h') { + num = (unsigned short)va_arg(args, int); + if (flags & SIGN) + num = (short)num; + } else if (flags & SIGN) + num = va_arg(args, int); + else + num = va_arg(args, unsigned int); + str = number(str, num, base, field_width, precision, flags); + } + *str = '\0'; + return str - buf; +} + +int sprintf(char *buf, const char *fmt, ...) +{ + va_list args; + int i; + + va_start(args, fmt); + i = vsprintf(buf, fmt, args); + va_end(args); + return i; +} + +int printf(const char *fmt, ...) +{ + char printf_buf[1024]; + va_list args; + int printed; + + va_start(args, fmt); + printed = vsprintf(printf_buf, fmt, args); + va_end(args); + + puts(printf_buf); + + return printed; +} diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld new file mode 100644 index 00000000000..df9234b3a5e --- /dev/null +++ b/arch/x86/boot/setup.ld @@ -0,0 +1,54 @@ +/* + * setup.ld + * + * Linker script for the i386 setup code + */ +OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") +OUTPUT_ARCH(i386) +ENTRY(_start) + +SECTIONS +{ + . = 0; + .bstext : { *(.bstext) } + .bsdata : { *(.bsdata) } + + . = 497; + .header : { *(.header) } + .inittext : { *(.inittext) } + .initdata : { *(.initdata) } + .text : { *(.text*) } + + . = ALIGN(16); + .rodata : { *(.rodata*) } + + .videocards : { + video_cards = .; + *(.videocards) + video_cards_end = .; + } + + . = ALIGN(16); + .data : { *(.data*) } + + .signature : { + setup_sig = .; + LONG(0x5a5aaa55) + } + + + . = ALIGN(16); + .bss : + { + __bss_start = .; + *(.bss) + __bss_end = .; + } + . = ALIGN(16); + _end = .; + + /DISCARD/ : { *(.note*) } + + . = ASSERT(_end <= 0x8000, "Setup too big!"); + . = ASSERT(hdr == 0x1f1, "The setup header has the wrong offset!"); +} diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c new file mode 100644 index 00000000000..481a2209778 --- /dev/null +++ b/arch/x86/boot/string.c @@ -0,0 +1,52 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * arch/i386/boot/string.c + * + * Very basic string functions + */ + +#include "boot.h" + +int strcmp(const char *str1, const char *str2) +{ + const unsigned char *s1 = (const unsigned char *)str1; + const unsigned char *s2 = (const unsigned char *)str2; + int delta = 0; + + while (*s1 || *s2) { + delta = *s2 - *s1; + if (delta) + return delta; + s1++; + s2++; + } + return 0; +} + +size_t strnlen(const char *s, size_t maxlen) +{ + const char *es = s; + while (*es && maxlen) { + es++; + maxlen--; + } + + return (es - s); +} + +unsigned int atou(const char *s) +{ + unsigned int i = 0; + while (isdigit(*s)) + i = i * 10 + (*s++ - '0'); + return i; +} diff --git a/arch/x86/boot/tty.c b/arch/x86/boot/tty.c new file mode 100644 index 00000000000..f3f14bd2637 --- /dev/null +++ b/arch/x86/boot/tty.c @@ -0,0 +1,112 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * arch/i386/boot/tty.c + * + * Very simple screen I/O + * XXX: Probably should add very simple serial I/O? + */ + +#include "boot.h" + +/* + * These functions are in .inittext so they can be used to signal + * error during initialization. + */ + +void __attribute__((section(".inittext"))) putchar(int ch) +{ + unsigned char c = ch; + + if (c == '\n') + putchar('\r'); /* \n -> \r\n */ + + /* int $0x10 is known to have bugs involving touching registers + it shouldn't. Be extra conservative... */ + asm volatile("pushal; pushw %%ds; int $0x10; popw %%ds; popal" + : : "b" (0x0007), "c" (0x0001), "a" (0x0e00|ch)); +} + +void __attribute__((section(".inittext"))) puts(const char *str) +{ + int n = 0; + while (*str) { + putchar(*str++); + n++; + } +} + +/* + * Read the CMOS clock through the BIOS, and return the + * seconds in BCD. + */ + +static u8 gettime(void) +{ + u16 ax = 0x0200; + u16 cx, dx; + + asm volatile("int $0x1a" + : "+a" (ax), "=c" (cx), "=d" (dx) + : : "ebx", "esi", "edi"); + + return dx >> 8; +} + +/* + * Read from the keyboard + */ +int getchar(void) +{ + u16 ax = 0; + asm volatile("int $0x16" : "+a" (ax)); + + return ax & 0xff; +} + +static int kbd_pending(void) +{ + u8 pending; + asm volatile("int $0x16; setnz %0" + : "=rm" (pending) + : "a" (0x0100)); + return pending; +} + +void kbd_flush(void) +{ + for (;;) { + if (!kbd_pending()) + break; + getchar(); + } +} + +int getchar_timeout(void) +{ + int cnt = 30; + int t0, t1; + + t0 = gettime(); + + while (cnt) { + if (kbd_pending()) + return getchar(); + + t1 = gettime(); + if (t0 != t1) { + cnt--; + t0 = t1; + } + } + + return 0; /* Timeout! */ +} diff --git a/arch/x86/boot/version.c b/arch/x86/boot/version.c new file mode 100644 index 00000000000..c61462f7d9a --- /dev/null +++ b/arch/x86/boot/version.c @@ -0,0 +1,23 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * arch/i386/boot/version.c + * + * Kernel version string + */ + +#include "boot.h" +#include +#include + +const char kernel_version[] = + UTS_RELEASE " (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") " + UTS_VERSION; diff --git a/arch/x86/boot/vesa.h b/arch/x86/boot/vesa.h new file mode 100644 index 00000000000..ff5b73cd406 --- /dev/null +++ b/arch/x86/boot/vesa.h @@ -0,0 +1,79 @@ +/* ----------------------------------------------------------------------- * + * + * Copyright 1999-2007 H. Peter Anvin - All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 53 Temple Place Ste 330, + * Boston MA 02111-1307, USA; either version 2 of the License, or + * (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +#ifndef BOOT_VESA_H +#define BOOT_VESA_H + +typedef struct { + u16 off, seg; +} far_ptr; + +/* VESA General Information table */ +struct vesa_general_info { + u32 signature; /* 0 Magic number = "VESA" */ + u16 version; /* 4 */ + far_ptr vendor_string; /* 6 */ + u32 capabilities; /* 10 */ + far_ptr video_mode_ptr; /* 14 */ + u16 total_memory; /* 18 */ + + u16 oem_software_rev; /* 20 */ + far_ptr oem_vendor_name_ptr; /* 22 */ + far_ptr oem_product_name_ptr; /* 26 */ + far_ptr oem_product_rev_ptr; /* 30 */ + + u8 reserved[222]; /* 34 */ + u8 oem_data[256]; /* 256 */ +} __attribute__ ((packed)); + +#define VESA_MAGIC ('V' + ('E' << 8) + ('S' << 16) + ('A' << 24)) +#define VBE2_MAGIC ('V' + ('B' << 8) + ('E' << 16) + ('2' << 24)) + +struct vesa_mode_info { + u16 mode_attr; /* 0 */ + u8 win_attr[2]; /* 2 */ + u16 win_grain; /* 4 */ + u16 win_size; /* 6 */ + u16 win_seg[2]; /* 8 */ + far_ptr win_scheme; /* 12 */ + u16 logical_scan; /* 16 */ + + u16 h_res; /* 18 */ + u16 v_res; /* 20 */ + u8 char_width; /* 22 */ + u8 char_height; /* 23 */ + u8 memory_planes; /* 24 */ + u8 bpp; /* 25 */ + u8 banks; /* 26 */ + u8 memory_layout; /* 27 */ + u8 bank_size; /* 28 */ + u8 image_planes; /* 29 */ + u8 page_function; /* 30 */ + + u8 rmask; /* 31 */ + u8 rpos; /* 32 */ + u8 gmask; /* 33 */ + u8 gpos; /* 34 */ + u8 bmask; /* 35 */ + u8 bpos; /* 36 */ + u8 resv_mask; /* 37 */ + u8 resv_pos; /* 38 */ + u8 dcm_info; /* 39 */ + + u32 lfb_ptr; /* 40 Linear frame buffer address */ + u32 offscreen_ptr; /* 44 Offscreen memory address */ + u16 offscreen_size; /* 48 */ + + u8 reserved[206]; /* 50 */ +} __attribute__ ((packed)); + +#endif /* LIB_SYS_VESA_H */ diff --git a/arch/x86/boot/video-bios.c b/arch/x86/boot/video-bios.c new file mode 100644 index 00000000000..68e65d95cdf --- /dev/null +++ b/arch/x86/boot/video-bios.c @@ -0,0 +1,125 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * arch/i386/boot/video-bios.c + * + * Standard video BIOS modes + * + * We have two options for this; silent and scanned. + */ + +#include "boot.h" +#include "video.h" + +__videocard video_bios; + +/* Set a conventional BIOS mode */ +static int set_bios_mode(u8 mode); + +static int bios_set_mode(struct mode_info *mi) +{ + return set_bios_mode(mi->mode - VIDEO_FIRST_BIOS); +} + +static int set_bios_mode(u8 mode) +{ + u16 ax; + u8 new_mode; + + ax = mode; /* AH=0x00 Set Video Mode */ + asm volatile(INT10 + : "+a" (ax) + : : "ebx", "ecx", "edx", "esi", "edi"); + + ax = 0x0f00; /* Get Current Video Mode */ + asm volatile(INT10 + : "+a" (ax) + : : "ebx", "ecx", "edx", "esi", "edi"); + + do_restore = 1; /* Assume video contents were lost */ + new_mode = ax & 0x7f; /* Not all BIOSes are clean with the top bit */ + + if (new_mode == mode) + return 0; /* Mode change OK */ + + if (new_mode != boot_params.screen_info.orig_video_mode) { + /* Mode setting failed, but we didn't end up where we + started. That's bad. Try to revert to the original + video mode. */ + ax = boot_params.screen_info.orig_video_mode; + asm volatile(INT10 + : "+a" (ax) + : : "ebx", "ecx", "edx", "esi", "edi"); + } + return -1; +} + +static int bios_probe(void) +{ + u8 mode; + u8 saved_mode = boot_params.screen_info.orig_video_mode; + u16 crtc; + struct mode_info *mi; + int nmodes = 0; + + if (adapter != ADAPTER_EGA && adapter != ADAPTER_VGA) + return 0; + + set_fs(0); + crtc = vga_crtc(); + + video_bios.modes = GET_HEAP(struct mode_info, 0); + + for (mode = 0x14; mode <= 0x7f; mode++) { + if (heap_free() < sizeof(struct mode_info)) + break; + + if (mode_defined(VIDEO_FIRST_BIOS+mode)) + continue; + + if (set_bios_mode(mode)) + continue; + + /* Try to verify that it's a text mode. */ + + /* Attribute Controller: make graphics controller disabled */ + if (in_idx(0x3c0, 0x10) & 0x01) + continue; + + /* Graphics Controller: verify Alpha addressing enabled */ + if (in_idx(0x3ce, 0x06) & 0x01) + continue; + + /* CRTC cursor location low should be zero(?) */ + if (in_idx(crtc, 0x0f)) + continue; + + mi = GET_HEAP(struct mode_info, 1); + mi->mode = VIDEO_FIRST_BIOS+mode; + mi->x = rdfs16(0x44a); + mi->y = rdfs8(0x484)+1; + nmodes++; + } + + set_bios_mode(saved_mode); + + return nmodes; +} + +__videocard video_bios = +{ + .card_name = "BIOS (scanned)", + .probe = bios_probe, + .set_mode = bios_set_mode, + .unsafe = 1, + .xmode_first = VIDEO_FIRST_BIOS, + .xmode_n = 0x80, +}; diff --git a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c new file mode 100644 index 00000000000..19219071071 --- /dev/null +++ b/arch/x86/boot/video-vesa.c @@ -0,0 +1,292 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * arch/i386/boot/video-vesa.c + * + * VESA text modes + */ + +#include "boot.h" +#include "video.h" +#include "vesa.h" + +/* VESA information */ +static struct vesa_general_info vginfo; +static struct vesa_mode_info vminfo; + +__videocard video_vesa; + +static void vesa_store_mode_params_graphics(void); + +static int vesa_probe(void) +{ +#if defined(CONFIG_VIDEO_VESA) || defined(CONFIG_FIRMWARE_EDID) + u16 ax, cx, di; + u16 mode; + addr_t mode_ptr; + struct mode_info *mi; + int nmodes = 0; + + video_vesa.modes = GET_HEAP(struct mode_info, 0); + + vginfo.signature = VBE2_MAGIC; + + ax = 0x4f00; + di = (size_t)&vginfo; + asm(INT10 + : "+a" (ax), "+D" (di), "=m" (vginfo) + : : "ebx", "ecx", "edx", "esi"); + + if (ax != 0x004f || + vginfo.signature != VESA_MAGIC || + vginfo.version < 0x0102) + return 0; /* Not present */ +#endif /* CONFIG_VIDEO_VESA || CONFIG_FIRMWARE_EDID */ +#ifdef CONFIG_VIDEO_VESA + set_fs(vginfo.video_mode_ptr.seg); + mode_ptr = vginfo.video_mode_ptr.off; + + while ((mode = rdfs16(mode_ptr)) != 0xffff) { + mode_ptr += 2; + + if (heap_free() < sizeof(struct mode_info)) + break; /* Heap full, can't save mode info */ + + if (mode & ~0x1ff) + continue; + + memset(&vminfo, 0, sizeof vminfo); /* Just in case... */ + + ax = 0x4f01; + cx = mode; + di = (size_t)&vminfo; + asm(INT10 + : "+a" (ax), "+c" (cx), "+D" (di), "=m" (vminfo) + : : "ebx", "edx", "esi"); + + if (ax != 0x004f) + continue; + + if ((vminfo.mode_attr & 0x15) == 0x05) { + /* Text Mode, TTY BIOS supported, + supported by hardware */ + mi = GET_HEAP(struct mode_info, 1); + mi->mode = mode + VIDEO_FIRST_VESA; + mi->x = vminfo.h_res; + mi->y = vminfo.v_res; + nmodes++; + } else if ((vminfo.mode_attr & 0x99) == 0x99) { +#ifdef CONFIG_FB + /* Graphics mode, color, linear frame buffer + supported -- register the mode but hide from + the menu. Only do this if framebuffer is + configured, however, otherwise the user will + be left without a screen. */ + mi = GET_HEAP(struct mode_info, 1); + mi->mode = mode + VIDEO_FIRST_VESA; + mi->x = mi->y = 0; + nmodes++; +#endif + } + } + + return nmodes; +#else + return 0; +#endif /* CONFIG_VIDEO_VESA */ +} + +static int vesa_set_mode(struct mode_info *mode) +{ + u16 ax, bx, cx, di; + int is_graphic; + u16 vesa_mode = mode->mode - VIDEO_FIRST_VESA; + + memset(&vminfo, 0, sizeof vminfo); /* Just in case... */ + + ax = 0x4f01; + cx = vesa_mode; + di = (size_t)&vminfo; + asm(INT10 + : "+a" (ax), "+c" (cx), "+D" (di), "=m" (vminfo) + : : "ebx", "edx", "esi"); + + if (ax != 0x004f) + return -1; + + if ((vminfo.mode_attr & 0x15) == 0x05) { + /* It's a supported text mode */ + is_graphic = 0; + } else if ((vminfo.mode_attr & 0x99) == 0x99) { + /* It's a graphics mode with linear frame buffer */ + is_graphic = 1; + vesa_mode |= 0x4000; /* Request linear frame buffer */ + } else { + return -1; /* Invalid mode */ + } + + + ax = 0x4f02; + bx = vesa_mode; + di = 0; + asm volatile(INT10 + : "+a" (ax), "+b" (bx), "+D" (di) + : : "ecx", "edx", "esi"); + + if (ax != 0x004f) + return -1; + + graphic_mode = is_graphic; + if (!is_graphic) { + /* Text mode */ + force_x = mode->x; + force_y = mode->y; + do_restore = 1; + } else { + /* Graphics mode */ + vesa_store_mode_params_graphics(); + } + + return 0; +} + + +/* Switch DAC to 8-bit mode */ +static void vesa_dac_set_8bits(void) +{ + u8 dac_size = 6; + + /* If possible, switch the DAC to 8-bit mode */ + if (vginfo.capabilities & 1) { + u16 ax, bx; + + ax = 0x4f08; + bx = 0x0800; + asm volatile(INT10 + : "+a" (ax), "+b" (bx) + : : "ecx", "edx", "esi", "edi"); + + if (ax == 0x004f) + dac_size = bx >> 8; + } + + /* Set the color sizes to the DAC size, and offsets to 0 */ + boot_params.screen_info.red_size = dac_size; + boot_params.screen_info.green_size = dac_size; + boot_params.screen_info.blue_size = dac_size; + boot_params.screen_info.rsvd_size = dac_size; + + boot_params.screen_info.red_pos = 0; + boot_params.screen_info.green_pos = 0; + boot_params.screen_info.blue_pos = 0; + boot_params.screen_info.rsvd_pos = 0; +} + +/* Save the VESA protected mode info */ +static void vesa_store_pm_info(void) +{ + u16 ax, bx, di, es; + + ax = 0x4f0a; + bx = di = 0; + asm("pushw %%es; "INT10"; movw %%es,%0; popw %%es" + : "=d" (es), "+a" (ax), "+b" (bx), "+D" (di) + : : "ecx", "esi"); + + if (ax != 0x004f) + return; + + boot_params.screen_info.vesapm_seg = es; + boot_params.screen_info.vesapm_off = di; +} + +/* + * Save video mode parameters for graphics mode + */ +static void vesa_store_mode_params_graphics(void) +{ + /* Tell the kernel we're in VESA graphics mode */ + boot_params.screen_info.orig_video_isVGA = 0x23; + + /* Mode parameters */ + boot_params.screen_info.vesa_attributes = vminfo.mode_attr; + boot_params.screen_info.lfb_linelength = vminfo.logical_scan; + boot_params.screen_info.lfb_width = vminfo.h_res; + boot_params.screen_info.lfb_height = vminfo.v_res; + boot_params.screen_info.lfb_depth = vminfo.bpp; + boot_params.screen_info.pages = vminfo.image_planes; + boot_params.screen_info.lfb_base = vminfo.lfb_ptr; + memcpy(&boot_params.screen_info.red_size, + &vminfo.rmask, 8); + + /* General parameters */ + boot_params.screen_info.lfb_size = vginfo.total_memory; + + if (vminfo.bpp <= 8) + vesa_dac_set_8bits(); + + vesa_store_pm_info(); +} + +/* + * Save EDID information for the kernel; this is invoked, separately, + * after mode-setting. + */ +void vesa_store_edid(void) +{ +#ifdef CONFIG_FIRMWARE_EDID + u16 ax, bx, cx, dx, di; + + /* Apparently used as a nonsense token... */ + memset(&boot_params.edid_info, 0x13, sizeof boot_params.edid_info); + + if (vginfo.version < 0x0200) + return; /* EDID requires VBE 2.0+ */ + + ax = 0x4f15; /* VBE DDC */ + bx = 0x0000; /* Report DDC capabilities */ + cx = 0; /* Controller 0 */ + di = 0; /* ES:DI must be 0 by spec */ + + /* Note: The VBE DDC spec is different from the main VESA spec; + we genuinely have to assume all registers are destroyed here. */ + + asm("pushw %%es; movw %2,%%es; "INT10"; popw %%es" + : "+a" (ax), "+b" (bx) + : "c" (cx), "D" (di) + : "esi"); + + if (ax != 0x004f) + return; /* No EDID */ + + /* BH = time in seconds to transfer EDD information */ + /* BL = DDC level supported */ + + ax = 0x4f15; /* VBE DDC */ + bx = 0x0001; /* Read EDID */ + cx = 0; /* Controller 0 */ + dx = 0; /* EDID block number */ + di =(size_t) &boot_params.edid_info; /* (ES:)Pointer to block */ + asm(INT10 + : "+a" (ax), "+b" (bx), "+d" (dx), "=m" (boot_params.edid_info) + : "c" (cx), "D" (di) + : "esi"); +#endif /* CONFIG_FIRMWARE_EDID */ +} + +__videocard video_vesa = +{ + .card_name = "VESA", + .probe = vesa_probe, + .set_mode = vesa_set_mode, + .xmode_first = VIDEO_FIRST_VESA, + .xmode_n = 0x200, +}; diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c new file mode 100644 index 00000000000..aef02f9ec0c --- /dev/null +++ b/arch/x86/boot/video-vga.c @@ -0,0 +1,261 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * arch/i386/boot/video-vga.c + * + * Common all-VGA modes + */ + +#include "boot.h" +#include "video.h" + +static struct mode_info vga_modes[] = { + { VIDEO_80x25, 80, 25 }, + { VIDEO_8POINT, 80, 50 }, + { VIDEO_80x43, 80, 43 }, + { VIDEO_80x28, 80, 28 }, + { VIDEO_80x30, 80, 30 }, + { VIDEO_80x34, 80, 34 }, + { VIDEO_80x60, 80, 60 }, +}; + +static struct mode_info ega_modes[] = { + { VIDEO_80x25, 80, 25 }, + { VIDEO_8POINT, 80, 43 }, +}; + +static struct mode_info cga_modes[] = { + { VIDEO_80x25, 80, 25 }, +}; + +__videocard video_vga; + +/* Set basic 80x25 mode */ +static u8 vga_set_basic_mode(void) +{ + u16 ax; + u8 rows; + u8 mode; + +#ifdef CONFIG_VIDEO_400_HACK + if (adapter >= ADAPTER_VGA) { + asm volatile(INT10 + : : "a" (0x1202), "b" (0x0030) + : "ecx", "edx", "esi", "edi"); + } +#endif + + ax = 0x0f00; + asm volatile(INT10 + : "+a" (ax) + : : "ebx", "ecx", "edx", "esi", "edi"); + + mode = (u8)ax; + + set_fs(0); + rows = rdfs8(0x484); /* rows minus one */ + +#ifndef CONFIG_VIDEO_400_HACK + if ((ax == 0x5003 || ax == 0x5007) && + (rows == 0 || rows == 24)) + return mode; +#endif + + if (mode != 3 && mode != 7) + mode = 3; + + /* Set the mode */ + ax = mode; + asm volatile(INT10 + : "+a" (ax) + : : "ebx", "ecx", "edx", "esi", "edi"); + do_restore = 1; + return mode; +} + +static void vga_set_8font(void) +{ + /* Set 8x8 font - 80x43 on EGA, 80x50 on VGA */ + + /* Set 8x8 font */ + asm volatile(INT10 : : "a" (0x1112), "b" (0)); + + /* Use alternate print screen */ + asm volatile(INT10 : : "a" (0x1200), "b" (0x20)); + + /* Turn off cursor emulation */ + asm volatile(INT10 : : "a" (0x1201), "b" (0x34)); + + /* Cursor is scan lines 6-7 */ + asm volatile(INT10 : : "a" (0x0100), "c" (0x0607)); +} + +static void vga_set_14font(void) +{ + /* Set 9x14 font - 80x28 on VGA */ + + /* Set 9x14 font */ + asm volatile(INT10 : : "a" (0x1111), "b" (0)); + + /* Turn off cursor emulation */ + asm volatile(INT10 : : "a" (0x1201), "b" (0x34)); + + /* Cursor is scan lines 11-12 */ + asm volatile(INT10 : : "a" (0x0100), "c" (0x0b0c)); +} + +static void vga_set_80x43(void) +{ + /* Set 80x43 mode on VGA (not EGA) */ + + /* Set 350 scans */ + asm volatile(INT10 : : "a" (0x1201), "b" (0x30)); + + /* Reset video mode */ + asm volatile(INT10 : : "a" (0x0003)); + + vga_set_8font(); +} + +/* I/O address of the VGA CRTC */ +u16 vga_crtc(void) +{ + return (inb(0x3cc) & 1) ? 0x3d4 : 0x3b4; +} + +static void vga_set_480_scanlines(int end) +{ + u16 crtc; + u8 csel; + + crtc = vga_crtc(); + + out_idx(0x0c, crtc, 0x11); /* Vertical sync end, unlock CR0-7 */ + out_idx(0x0b, crtc, 0x06); /* Vertical total */ + out_idx(0x3e, crtc, 0x07); /* Vertical overflow */ + out_idx(0xea, crtc, 0x10); /* Vertical sync start */ + out_idx(end, crtc, 0x12); /* Vertical display end */ + out_idx(0xe7, crtc, 0x15); /* Vertical blank start */ + out_idx(0x04, crtc, 0x16); /* Vertical blank end */ + csel = inb(0x3cc); + csel &= 0x0d; + csel |= 0xe2; + outb(csel, 0x3cc); +} + +static void vga_set_80x30(void) +{ + vga_set_480_scanlines(0xdf); +} + +static void vga_set_80x34(void) +{ + vga_set_14font(); + vga_set_480_scanlines(0xdb); +} + +static void vga_set_80x60(void) +{ + vga_set_8font(); + vga_set_480_scanlines(0xdf); +} + +static int vga_set_mode(struct mode_info *mode) +{ + /* Set the basic mode */ + vga_set_basic_mode(); + + /* Override a possibly broken BIOS */ + force_x = mode->x; + force_y = mode->y; + + switch (mode->mode) { + case VIDEO_80x25: + break; + case VIDEO_8POINT: + vga_set_8font(); + break; + case VIDEO_80x43: + vga_set_80x43(); + break; + case VIDEO_80x28: + vga_set_14font(); + break; + case VIDEO_80x30: + vga_set_80x30(); + break; + case VIDEO_80x34: + vga_set_80x34(); + break; + case VIDEO_80x60: + vga_set_80x60(); + break; + } + + return 0; +} + +/* + * Note: this probe includes basic information required by all + * systems. It should be executed first, by making sure + * video-vga.c is listed first in the Makefile. + */ +static int vga_probe(void) +{ + static const char *card_name[] = { + "CGA/MDA/HGC", "EGA", "VGA" + }; + static struct mode_info *mode_lists[] = { + cga_modes, + ega_modes, + vga_modes, + }; + static int mode_count[] = { + sizeof(cga_modes)/sizeof(struct mode_info), + sizeof(ega_modes)/sizeof(struct mode_info), + sizeof(vga_modes)/sizeof(struct mode_info), + }; + u8 vga_flag; + + asm(INT10 + : "=b" (boot_params.screen_info.orig_video_ega_bx) + : "a" (0x1200), "b" (0x10) /* Check EGA/VGA */ + : "ecx", "edx", "esi", "edi"); + + /* If we have MDA/CGA/HGC then BL will be unchanged at 0x10 */ + if ((u8)boot_params.screen_info.orig_video_ega_bx != 0x10) { + /* EGA/VGA */ + asm(INT10 + : "=a" (vga_flag) + : "a" (0x1a00) + : "ebx", "ecx", "edx", "esi", "edi"); + + if (vga_flag == 0x1a) { + adapter = ADAPTER_VGA; + boot_params.screen_info.orig_video_isVGA = 1; + } else { + adapter = ADAPTER_EGA; + } + } else { + adapter = ADAPTER_CGA; + } + + video_vga.modes = mode_lists[adapter]; + video_vga.card_name = card_name[adapter]; + return mode_count[adapter]; +} + +__videocard video_vga = +{ + .card_name = "VGA", + .probe = vga_probe, + .set_mode = vga_set_mode, +}; diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c new file mode 100644 index 00000000000..e4ba897bf9a --- /dev/null +++ b/arch/x86/boot/video.c @@ -0,0 +1,467 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * arch/i386/boot/video.c + * + * Select video mode + */ + +#include "boot.h" +#include "video.h" +#include "vesa.h" + +/* + * Mode list variables + */ +static struct card_info cards[]; /* List of cards to probe for */ + +/* + * Common variables + */ +int adapter; /* 0=CGA/MDA/HGC, 1=EGA, 2=VGA+ */ +u16 video_segment; +int force_x, force_y; /* Don't query the BIOS for cols/rows */ + +int do_restore = 0; /* Screen contents changed during mode flip */ +int graphic_mode; /* Graphic mode with linear frame buffer */ + +static void store_cursor_position(void) +{ + u16 curpos; + u16 ax, bx; + + ax = 0x0300; + bx = 0; + asm(INT10 + : "=d" (curpos), "+a" (ax), "+b" (bx) + : : "ecx", "esi", "edi"); + + boot_params.screen_info.orig_x = curpos; + boot_params.screen_info.orig_y = curpos >> 8; +} + +static void store_video_mode(void) +{ + u16 ax, page; + + /* N.B.: the saving of the video page here is a bit silly, + since we pretty much assume page 0 everywhere. */ + ax = 0x0f00; + asm(INT10 + : "+a" (ax), "=b" (page) + : : "ecx", "edx", "esi", "edi"); + + /* Not all BIOSes are clean with respect to the top bit */ + boot_params.screen_info.orig_video_mode = ax & 0x7f; + boot_params.screen_info.orig_video_page = page >> 8; +} + +/* + * Store the video mode parameters for later usage by the kernel. + * This is done by asking the BIOS except for the rows/columns + * parameters in the default 80x25 mode -- these are set directly, + * because some very obscure BIOSes supply insane values. + */ +static void store_mode_params(void) +{ + u16 font_size; + int x, y; + + /* For graphics mode, it is up to the mode-setting driver + (currently only video-vesa.c) to store the parameters */ + if (graphic_mode) + return; + + store_cursor_position(); + store_video_mode(); + + if (boot_params.screen_info.orig_video_mode == 0x07) { + /* MDA, HGC, or VGA in monochrome mode */ + video_segment = 0xb000; + } else { + /* CGA, EGA, VGA and so forth */ + video_segment = 0xb800; + } + + set_fs(0); + font_size = rdfs16(0x485); /* Font size, BIOS area */ + boot_params.screen_info.orig_video_points = font_size; + + x = rdfs16(0x44a); + y = (adapter == ADAPTER_CGA) ? 25 : rdfs8(0x484)+1; + + if (force_x) + x = force_x; + if (force_y) + y = force_y; + + boot_params.screen_info.orig_video_cols = x; + boot_params.screen_info.orig_video_lines = y; +} + +/* Probe the video drivers and have them generate their mode lists. */ +static void probe_cards(int unsafe) +{ + struct card_info *card; + static u8 probed[2]; + + if (probed[unsafe]) + return; + + probed[unsafe] = 1; + + for (card = video_cards; card < video_cards_end; card++) { + if (card->unsafe == unsafe) { + if (card->probe) + card->nmodes = card->probe(); + else + card->nmodes = 0; + } + } +} + +/* Test if a mode is defined */ +int mode_defined(u16 mode) +{ + struct card_info *card; + struct mode_info *mi; + int i; + + for (card = video_cards; card < video_cards_end; card++) { + mi = card->modes; + for (i = 0; i < card->nmodes; i++, mi++) { + if (mi->mode == mode) + return 1; + } + } + + return 0; +} + +/* Set mode (without recalc) */ +static int raw_set_mode(u16 mode, u16 *real_mode) +{ + int nmode, i; + struct card_info *card; + struct mode_info *mi; + + /* Drop the recalc bit if set */ + mode &= ~VIDEO_RECALC; + + /* Scan for mode based on fixed ID, position, or resolution */ + nmode = 0; + for (card = video_cards; card < video_cards_end; card++) { + mi = card->modes; + for (i = 0; i < card->nmodes; i++, mi++) { + int visible = mi->x || mi->y; + + if ((mode == nmode && visible) || + mode == mi->mode || + mode == (mi->y << 8)+mi->x) { + *real_mode = mi->mode; + return card->set_mode(mi); + } + + if (visible) + nmode++; + } + } + + /* Nothing found? Is it an "exceptional" (unprobed) mode? */ + for (card = video_cards; card < video_cards_end; card++) { + if (mode >= card->xmode_first && + mode < card->xmode_first+card->xmode_n) { + struct mode_info mix; + *real_mode = mix.mode = mode; + mix.x = mix.y = 0; + return card->set_mode(&mix); + } + } + + /* Otherwise, failure... */ + return -1; +} + +/* + * Recalculate the vertical video cutoff (hack!) + */ +static void vga_recalc_vertical(void) +{ + unsigned int font_size, rows; + u16 crtc; + u8 pt, ov; + + set_fs(0); + font_size = rdfs8(0x485); /* BIOS: font size (pixels) */ + rows = force_y ? force_y : rdfs8(0x484)+1; /* Text rows */ + + rows *= font_size; /* Visible scan lines */ + rows--; /* ... minus one */ + + crtc = vga_crtc(); + + pt = in_idx(crtc, 0x11); + pt &= ~0x80; /* Unlock CR0-7 */ + out_idx(pt, crtc, 0x11); + + out_idx((u8)rows, crtc, 0x12); /* Lower height register */ + + ov = in_idx(crtc, 0x07); /* Overflow register */ + ov &= 0xbd; + ov |= (rows >> (8-1)) & 0x02; + ov |= (rows >> (9-6)) & 0x40; + out_idx(ov, crtc, 0x07); +} + +/* Set mode (with recalc if specified) */ +static int set_mode(u16 mode) +{ + int rv; + u16 real_mode; + + /* Very special mode numbers... */ + if (mode == VIDEO_CURRENT_MODE) + return 0; /* Nothing to do... */ + else if (mode == NORMAL_VGA) + mode = VIDEO_80x25; + else if (mode == EXTENDED_VGA) + mode = VIDEO_8POINT; + + rv = raw_set_mode(mode, &real_mode); + if (rv) + return rv; + + if (mode & VIDEO_RECALC) + vga_recalc_vertical(); + + /* Save the canonical mode number for the kernel, not + an alias, size specification or menu position */ + boot_params.hdr.vid_mode = real_mode; + return 0; +} + +static unsigned int get_entry(void) +{ + char entry_buf[4]; + int i, len = 0; + int key; + unsigned int v; + + do { + key = getchar(); + + if (key == '\b') { + if (len > 0) { + puts("\b \b"); + len--; + } + } else if ((key >= '0' && key <= '9') || + (key >= 'A' && key <= 'Z') || + (key >= 'a' && key <= 'z')) { + if (len < sizeof entry_buf) { + entry_buf[len++] = key; + putchar(key); + } + } + } while (key != '\r'); + putchar('\n'); + + if (len == 0) + return VIDEO_CURRENT_MODE; /* Default */ + + v = 0; + for (i = 0; i < len; i++) { + v <<= 4; + key = entry_buf[i] | 0x20; + v += (key > '9') ? key-'a'+10 : key-'0'; + } + + return v; +} + +static void display_menu(void) +{ + struct card_info *card; + struct mode_info *mi; + char ch; + int i; + + puts("Mode: COLSxROWS:\n"); + + ch = '0'; + for (card = video_cards; card < video_cards_end; card++) { + mi = card->modes; + for (i = 0; i < card->nmodes; i++, mi++) { + int visible = mi->x && mi->y; + u16 mode_id = mi->mode ? mi->mode : + (mi->y << 8)+mi->x; + + if (!visible) + continue; /* Hidden mode */ + + printf("%c %04X %3dx%-3d %s\n", + ch, mode_id, mi->x, mi->y, card->card_name); + + if (ch == '9') + ch = 'a'; + else if (ch == 'z' || ch == ' ') + ch = ' '; /* Out of keys... */ + else + ch++; + } + } +} + +#define H(x) ((x)-'a'+10) +#define SCAN ((H('s')<<12)+(H('c')<<8)+(H('a')<<4)+H('n')) + +static unsigned int mode_menu(void) +{ + int key; + unsigned int sel; + + puts("Press to see video modes available, " + " to continue, or wait 30 sec\n"); + + kbd_flush(); + while (1) { + key = getchar_timeout(); + if (key == ' ' || key == 0) + return VIDEO_CURRENT_MODE; /* Default */ + if (key == '\r') + break; + putchar('\a'); /* Beep! */ + } + + + for (;;) { + display_menu(); + + puts("Enter a video mode or \"scan\" to scan for " + "additional modes: "); + sel = get_entry(); + if (sel != SCAN) + return sel; + + probe_cards(1); + } +} + +#ifdef CONFIG_VIDEO_RETAIN +/* Save screen content to the heap */ +struct saved_screen { + int x, y; + int curx, cury; + u16 *data; +} saved; + +static void save_screen(void) +{ + /* Should be called after store_mode_params() */ + saved.x = boot_params.screen_info.orig_video_cols; + saved.y = boot_params.screen_info.orig_video_lines; + saved.curx = boot_params.screen_info.orig_x; + saved.cury = boot_params.screen_info.orig_y; + + if (heap_free() < saved.x*saved.y*sizeof(u16)+512) + return; /* Not enough heap to save the screen */ + + saved.data = GET_HEAP(u16, saved.x*saved.y); + + set_fs(video_segment); + copy_from_fs(saved.data, 0, saved.x*saved.y*sizeof(u16)); +} + +static void restore_screen(void) +{ + /* Should be called after store_mode_params() */ + int xs = boot_params.screen_info.orig_video_cols; + int ys = boot_params.screen_info.orig_video_lines; + int y; + addr_t dst = 0; + u16 *src = saved.data; + u16 ax, bx, dx; + + if (graphic_mode) + return; /* Can't restore onto a graphic mode */ + + if (!src) + return; /* No saved screen contents */ + + /* Restore screen contents */ + + set_fs(video_segment); + for (y = 0; y < ys; y++) { + int npad; + + if (y < saved.y) { + int copy = (xs < saved.x) ? xs : saved.x; + copy_to_fs(dst, src, copy*sizeof(u16)); + dst += copy*sizeof(u16); + src += saved.x; + npad = (xs < saved.x) ? 0 : xs-saved.x; + } else { + npad = xs; + } + + /* Writes "npad" blank characters to + video_segment:dst and advances dst */ + asm volatile("pushw %%es ; " + "movw %2,%%es ; " + "shrw %%cx ; " + "jnc 1f ; " + "stosw \n\t" + "1: rep;stosl ; " + "popw %%es" + : "+D" (dst), "+c" (npad) + : "bdS" (video_segment), + "a" (0x07200720)); + } + + /* Restore cursor position */ + ax = 0x0200; /* Set cursor position */ + bx = 0; /* Page number (<< 8) */ + dx = (saved.cury << 8)+saved.curx; + asm volatile(INT10 + : "+a" (ax), "+b" (bx), "+d" (dx) + : : "ecx", "esi", "edi"); +} +#else +#define save_screen() ((void)0) +#define restore_screen() ((void)0) +#endif + +void set_video(void) +{ + u16 mode = boot_params.hdr.vid_mode; + + RESET_HEAP(); + + store_mode_params(); + save_screen(); + probe_cards(0); + + for (;;) { + if (mode == ASK_VGA) + mode = mode_menu(); + + if (!set_mode(mode)) + break; + + printf("Undefined video mode number: %x\n", mode); + mode = ASK_VGA; + } + vesa_store_edid(); + store_mode_params(); + + if (do_restore) + restore_screen(); +} diff --git a/arch/x86/boot/video.h b/arch/x86/boot/video.h new file mode 100644 index 00000000000..b92447d5121 --- /dev/null +++ b/arch/x86/boot/video.h @@ -0,0 +1,152 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * arch/i386/boot/video.h + * + * Header file for the real-mode video probing code + */ + +#ifndef BOOT_VIDEO_H +#define BOOT_VIDEO_H + +#include + +/* Enable autodetection of SVGA adapters and modes. */ +#undef CONFIG_VIDEO_SVGA + +/* Enable autodetection of VESA modes */ +#define CONFIG_VIDEO_VESA + +/* Retain screen contents when switching modes */ +#define CONFIG_VIDEO_RETAIN + +/* Force 400 scan lines for standard modes (hack to fix bad BIOS behaviour */ +#undef CONFIG_VIDEO_400_HACK + +/* This code uses an extended set of video mode numbers. These include: + * Aliases for standard modes + * NORMAL_VGA (-1) + * EXTENDED_VGA (-2) + * ASK_VGA (-3) + * Video modes numbered by menu position -- NOT RECOMMENDED because of lack + * of compatibility when extending the table. These are between 0x00 and 0xff. + */ +#define VIDEO_FIRST_MENU 0x0000 + +/* Standard BIOS video modes (BIOS number + 0x0100) */ +#define VIDEO_FIRST_BIOS 0x0100 + +/* VESA BIOS video modes (VESA number + 0x0200) */ +#define VIDEO_FIRST_VESA 0x0200 + +/* Video7 special modes (BIOS number + 0x0900) */ +#define VIDEO_FIRST_V7 0x0900 + +/* Special video modes */ +#define VIDEO_FIRST_SPECIAL 0x0f00 +#define VIDEO_80x25 0x0f00 +#define VIDEO_8POINT 0x0f01 +#define VIDEO_80x43 0x0f02 +#define VIDEO_80x28 0x0f03 +#define VIDEO_CURRENT_MODE 0x0f04 +#define VIDEO_80x30 0x0f05 +#define VIDEO_80x34 0x0f06 +#define VIDEO_80x60 0x0f07 +#define VIDEO_GFX_HACK 0x0f08 +#define VIDEO_LAST_SPECIAL 0x0f09 + +/* Video modes given by resolution */ +#define VIDEO_FIRST_RESOLUTION 0x1000 + +/* The "recalculate timings" flag */ +#define VIDEO_RECALC 0x8000 + +/* Define DO_STORE according to CONFIG_VIDEO_RETAIN */ +#ifdef CONFIG_VIDEO_RETAIN +void store_screen(void); +#define DO_STORE() store_screen() +#else +#define DO_STORE() ((void)0) +#endif /* CONFIG_VIDEO_RETAIN */ + +/* + * Mode table structures + */ + +struct mode_info { + u16 mode; /* Mode number (vga= style) */ + u8 x, y; /* Width, height */ +}; + +struct card_info { + const char *card_name; + int (*set_mode)(struct mode_info *mode); + int (*probe)(void); + struct mode_info *modes; + int nmodes; /* Number of probed modes so far */ + int unsafe; /* Probing is unsafe, only do after "scan" */ + u16 xmode_first; /* Unprobed modes to try to call anyway */ + u16 xmode_n; /* Size of unprobed mode range */ +}; + +#define __videocard struct card_info __attribute__((section(".videocards"))) +extern struct card_info video_cards[], video_cards_end[]; + +int mode_defined(u16 mode); /* video.c */ + +/* Basic video information */ +#define ADAPTER_CGA 0 /* CGA/MDA/HGC */ +#define ADAPTER_EGA 1 +#define ADAPTER_VGA 2 + +extern int adapter; +extern u16 video_segment; +extern int force_x, force_y; /* Don't query the BIOS for cols/rows */ +extern int do_restore; /* Restore screen contents */ +extern int graphic_mode; /* Graphics mode with linear frame buffer */ + +/* + * int $0x10 is notorious for touching registers it shouldn't. + * gcc doesn't like %ebp being clobbered, so define it as a push/pop + * sequence here. + * + * A number of systems, including the original PC can clobber %bp in + * certain circumstances, like when scrolling. There exists at least + * one Trident video card which could clobber DS under a set of + * circumstances that we are unlikely to encounter (scrolling when + * using an extended graphics mode of more than 800x600 pixels), but + * it's cheap insurance to deal with that here. + */ +#define INT10 "pushl %%ebp; pushw %%ds; int $0x10; popw %%ds; popl %%ebp" + +/* Accessing VGA indexed registers */ +static inline u8 in_idx(u16 port, u8 index) +{ + outb(index, port); + return inb(port+1); +} + +static inline void out_idx(u8 v, u16 port, u8 index) +{ + outw(index+(v << 8), port); +} + +/* Writes a value to an indexed port and then reads the port again */ +static inline u8 tst_idx(u8 v, u16 port, u8 index) +{ + out_idx(port, index, v); + return in_idx(port, index); +} + +/* Get the I/O port of the VGA CRTC */ +u16 vga_crtc(void); /* video-vga.c */ + +#endif /* BOOT_VIDEO_H */ diff --git a/arch/x86/boot/voyager.c b/arch/x86/boot/voyager.c new file mode 100644 index 00000000000..61c8fe0453b --- /dev/null +++ b/arch/x86/boot/voyager.c @@ -0,0 +1,46 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright 2007 rPath, Inc. - All Rights Reserved + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +/* + * arch/i386/boot/voyager.c + * + * Get the Voyager config information + */ + +#include "boot.h" + +#ifdef CONFIG_X86_VOYAGER + +int query_voyager(void) +{ + u8 err; + u16 es, di; + /* Abuse the apm_bios_info area for this */ + u8 *data_ptr = (u8 *)&boot_params.apm_bios_info; + + data_ptr[0] = 0xff; /* Flag on config not found(?) */ + + asm("pushw %%es ; " + "int $0x15 ; " + "setc %0 ; " + "movw %%es, %1 ; " + "popw %%es" + : "=q" (err), "=r" (es), "=D" (di) + : "a" (0xffc0)); + + if (err) + return -1; /* Not Voyager */ + + set_fs(es); + copy_from_fs(data_ptr, di, 7); /* Table is 7 bytes apparently */ + return 0; +} + +#endif /* CONFIG_X86_VOYAGER */ diff --git a/arch/x86_64/boot/Makefile b/arch/x86_64/boot/Makefile index 67096389de1..0f7b6c2cfa5 100644 --- a/arch/x86_64/boot/Makefile +++ b/arch/x86_64/boot/Makefile @@ -5,5 +5,5 @@ # So tell kbuild that we fetch the code from i386 and include the # Makefile from i386 too. -src := arch/i386/boot +src := arch/x86/boot include $(src)/Makefile -- cgit v1.2.3-70-g09d2 From ad757b6aa5801b81dec609d87753604a06313c53 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 11 Oct 2007 11:16:47 +0200 Subject: i386: move mm Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/i386/Makefile | 2 +- arch/i386/mm/Makefile | 5 - arch/i386/mm/Makefile_32 | 10 - arch/i386/mm/boot_ioremap_32.c | 100 ----- arch/i386/mm/discontig_32.c | 431 --------------------- arch/i386/mm/extable_32.c | 35 -- arch/i386/mm/fault_32.c | 657 ------------------------------- arch/i386/mm/highmem_32.c | 113 ------ arch/i386/mm/hugetlbpage.c | 391 ------------------- arch/i386/mm/init_32.c | 858 ----------------------------------------- arch/i386/mm/ioremap_32.c | 274 ------------- arch/i386/mm/mmap_32.c | 77 ---- arch/i386/mm/pageattr_32.c | 278 ------------- arch/i386/mm/pgtable_32.c | 373 ------------------ arch/x86/mm/Makefile | 5 + arch/x86/mm/Makefile_32 | 10 + arch/x86/mm/boot_ioremap_32.c | 100 +++++ arch/x86/mm/discontig_32.c | 431 +++++++++++++++++++++ arch/x86/mm/extable_32.c | 35 ++ arch/x86/mm/fault_32.c | 657 +++++++++++++++++++++++++++++++ arch/x86/mm/highmem_32.c | 113 ++++++ arch/x86/mm/hugetlbpage.c | 391 +++++++++++++++++++ arch/x86/mm/init_32.c | 858 +++++++++++++++++++++++++++++++++++++++++ arch/x86/mm/ioremap_32.c | 274 +++++++++++++ arch/x86/mm/mmap_32.c | 77 ++++ arch/x86/mm/pageattr_32.c | 278 +++++++++++++ arch/x86/mm/pgtable_32.c | 373 ++++++++++++++++++ arch/x86_64/mm/Makefile | 2 +- arch/x86_64/mm/Makefile_64 | 2 +- 29 files changed, 3605 insertions(+), 3605 deletions(-) delete mode 100644 arch/i386/mm/Makefile delete mode 100644 arch/i386/mm/Makefile_32 delete mode 100644 arch/i386/mm/boot_ioremap_32.c delete mode 100644 arch/i386/mm/discontig_32.c delete mode 100644 arch/i386/mm/extable_32.c delete mode 100644 arch/i386/mm/fault_32.c delete mode 100644 arch/i386/mm/highmem_32.c delete mode 100644 arch/i386/mm/hugetlbpage.c delete mode 100644 arch/i386/mm/init_32.c delete mode 100644 arch/i386/mm/ioremap_32.c delete mode 100644 arch/i386/mm/mmap_32.c delete mode 100644 arch/i386/mm/pageattr_32.c delete mode 100644 arch/i386/mm/pgtable_32.c create mode 100644 arch/x86/mm/Makefile create mode 100644 arch/x86/mm/Makefile_32 create mode 100644 arch/x86/mm/boot_ioremap_32.c create mode 100644 arch/x86/mm/discontig_32.c create mode 100644 arch/x86/mm/extable_32.c create mode 100644 arch/x86/mm/fault_32.c create mode 100644 arch/x86/mm/highmem_32.c create mode 100644 arch/x86/mm/hugetlbpage.c create mode 100644 arch/x86/mm/init_32.c create mode 100644 arch/x86/mm/ioremap_32.c create mode 100644 arch/x86/mm/mmap_32.c create mode 100644 arch/x86/mm/pageattr_32.c create mode 100644 arch/x86/mm/pgtable_32.c (limited to 'arch/x86') diff --git a/arch/i386/Makefile b/arch/i386/Makefile index 776d8dcf234..cbdc14fddc3 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -103,7 +103,7 @@ head-y := arch/i386/kernel/head_32.o arch/i386/kernel/init_task_32.o libs-y += arch/x86/lib/ core-y += arch/i386/kernel/ \ - arch/i386/mm/ \ + arch/x86/mm/ \ $(mcore-y)/ \ arch/x86/crypto/ drivers-$(CONFIG_MATH_EMULATION) += arch/x86/math-emu/ diff --git a/arch/i386/mm/Makefile b/arch/i386/mm/Makefile deleted file mode 100644 index 4042f8563a1..00000000000 --- a/arch/i386/mm/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -ifeq ($(CONFIG_X86_32),y) -include ${srctree}/arch/i386/mm/Makefile_32 -else -include ${srctree}/arch/x86_64/mm/Makefile_64 -endif diff --git a/arch/i386/mm/Makefile_32 b/arch/i386/mm/Makefile_32 deleted file mode 100644 index 362b4ad082d..00000000000 --- a/arch/i386/mm/Makefile_32 +++ /dev/null @@ -1,10 +0,0 @@ -# -# Makefile for the linux i386-specific parts of the memory manager. -# - -obj-y := init_32.o pgtable_32.o fault_32.o ioremap_32.o extable_32.o pageattr_32.o mmap_32.o - -obj-$(CONFIG_NUMA) += discontig_32.o -obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -obj-$(CONFIG_HIGHMEM) += highmem_32.o -obj-$(CONFIG_BOOT_IOREMAP) += boot_ioremap_32.o diff --git a/arch/i386/mm/boot_ioremap_32.c b/arch/i386/mm/boot_ioremap_32.c deleted file mode 100644 index 4de95a17a7d..00000000000 --- a/arch/i386/mm/boot_ioremap_32.c +++ /dev/null @@ -1,100 +0,0 @@ -/* - * arch/i386/mm/boot_ioremap.c - * - * Re-map functions for early boot-time before paging_init() when the - * boot-time pagetables are still in use - * - * Written by Dave Hansen - */ - - -/* - * We need to use the 2-level pagetable functions, but CONFIG_X86_PAE - * keeps that from happenning. If anyone has a better way, I'm listening. - * - * boot_pte_t is defined only if this all works correctly - */ - -#undef CONFIG_X86_PAE -#undef CONFIG_PARAVIRT -#include -#include -#include -#include -#include - -/* - * I'm cheating here. It is known that the two boot PTE pages are - * allocated next to each other. I'm pretending that they're just - * one big array. - */ - -#define BOOT_PTE_PTRS (PTRS_PER_PTE*2) - -static unsigned long boot_pte_index(unsigned long vaddr) -{ - return __pa(vaddr) >> PAGE_SHIFT; -} - -static inline boot_pte_t* boot_vaddr_to_pte(void *address) -{ - boot_pte_t* boot_pg = (boot_pte_t*)pg0; - return &boot_pg[boot_pte_index((unsigned long)address)]; -} - -/* - * This is only for a caller who is clever enough to page-align - * phys_addr and virtual_source, and who also has a preference - * about which virtual address from which to steal ptes - */ -static void __boot_ioremap(unsigned long phys_addr, unsigned long nrpages, - void* virtual_source) -{ - boot_pte_t* pte; - int i; - char *vaddr = virtual_source; - - pte = boot_vaddr_to_pte(virtual_source); - for (i=0; i < nrpages; i++, phys_addr += PAGE_SIZE, pte++) { - set_pte(pte, pfn_pte(phys_addr>>PAGE_SHIFT, PAGE_KERNEL)); - __flush_tlb_one(&vaddr[i*PAGE_SIZE]); - } -} - -/* the virtual space we're going to remap comes from this array */ -#define BOOT_IOREMAP_PAGES 4 -#define BOOT_IOREMAP_SIZE (BOOT_IOREMAP_PAGES*PAGE_SIZE) -static __initdata char boot_ioremap_space[BOOT_IOREMAP_SIZE] - __attribute__ ((aligned (PAGE_SIZE))); - -/* - * This only applies to things which need to ioremap before paging_init() - * bt_ioremap() and plain ioremap() are both useless at this point. - * - * When used, we're still using the boot-time pagetables, which only - * have 2 PTE pages mapping the first 8MB - * - * There is no unmap. The boot-time PTE pages aren't used after boot. - * If you really want the space back, just remap it yourself. - * boot_ioremap(&ioremap_space-PAGE_OFFSET, BOOT_IOREMAP_SIZE) - */ -__init void* boot_ioremap(unsigned long phys_addr, unsigned long size) -{ - unsigned long last_addr, offset; - unsigned int nrpages; - - last_addr = phys_addr + size - 1; - - /* page align the requested address */ - offset = phys_addr & ~PAGE_MASK; - phys_addr &= PAGE_MASK; - size = PAGE_ALIGN(last_addr) - phys_addr; - - nrpages = size >> PAGE_SHIFT; - if (nrpages > BOOT_IOREMAP_PAGES) - return NULL; - - __boot_ioremap(phys_addr, nrpages, boot_ioremap_space); - - return &boot_ioremap_space[offset]; -} diff --git a/arch/i386/mm/discontig_32.c b/arch/i386/mm/discontig_32.c deleted file mode 100644 index 860e912a3fb..00000000000 --- a/arch/i386/mm/discontig_32.c +++ /dev/null @@ -1,431 +0,0 @@ -/* - * Written by: Patricia Gaughen , IBM Corporation - * August 2002: added remote node KVA remap - Martin J. Bligh - * - * Copyright (C) 2002, IBM Corp. - * - * All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; -EXPORT_SYMBOL(node_data); -bootmem_data_t node0_bdata; - -/* - * numa interface - we expect the numa architecture specific code to have - * populated the following initialisation. - * - * 1) node_online_map - the map of all nodes configured (online) in the system - * 2) node_start_pfn - the starting page frame number for a node - * 3) node_end_pfn - the ending page fram number for a node - */ -unsigned long node_start_pfn[MAX_NUMNODES] __read_mostly; -unsigned long node_end_pfn[MAX_NUMNODES] __read_mostly; - - -#ifdef CONFIG_DISCONTIGMEM -/* - * 4) physnode_map - the mapping between a pfn and owning node - * physnode_map keeps track of the physical memory layout of a generic - * numa node on a 256Mb break (each element of the array will - * represent 256Mb of memory and will be marked by the node id. so, - * if the first gig is on node 0, and the second gig is on node 1 - * physnode_map will contain: - * - * physnode_map[0-3] = 0; - * physnode_map[4-7] = 1; - * physnode_map[8- ] = -1; - */ -s8 physnode_map[MAX_ELEMENTS] __read_mostly = { [0 ... (MAX_ELEMENTS - 1)] = -1}; -EXPORT_SYMBOL(physnode_map); - -void memory_present(int nid, unsigned long start, unsigned long end) -{ - unsigned long pfn; - - printk(KERN_INFO "Node: %d, start_pfn: %ld, end_pfn: %ld\n", - nid, start, end); - printk(KERN_DEBUG " Setting physnode_map array to node %d for pfns:\n", nid); - printk(KERN_DEBUG " "); - for (pfn = start; pfn < end; pfn += PAGES_PER_ELEMENT) { - physnode_map[pfn / PAGES_PER_ELEMENT] = nid; - printk("%ld ", pfn); - } - printk("\n"); -} - -unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn, - unsigned long end_pfn) -{ - unsigned long nr_pages = end_pfn - start_pfn; - - if (!nr_pages) - return 0; - - return (nr_pages + 1) * sizeof(struct page); -} -#endif - -extern unsigned long find_max_low_pfn(void); -extern void add_one_highpage_init(struct page *, int, int); -extern unsigned long highend_pfn, highstart_pfn; - -#define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE) - -unsigned long node_remap_start_pfn[MAX_NUMNODES]; -unsigned long node_remap_size[MAX_NUMNODES]; -unsigned long node_remap_offset[MAX_NUMNODES]; -void *node_remap_start_vaddr[MAX_NUMNODES]; -void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); - -void *node_remap_end_vaddr[MAX_NUMNODES]; -void *node_remap_alloc_vaddr[MAX_NUMNODES]; -static unsigned long kva_start_pfn; -static unsigned long kva_pages; -/* - * FLAT - support for basic PC memory model with discontig enabled, essentially - * a single node with all available processors in it with a flat - * memory map. - */ -int __init get_memcfg_numa_flat(void) -{ - printk("NUMA - single node, flat memory mode\n"); - - /* Run the memory configuration and find the top of memory. */ - find_max_pfn(); - node_start_pfn[0] = 0; - node_end_pfn[0] = max_pfn; - memory_present(0, 0, max_pfn); - - /* Indicate there is one node available. */ - nodes_clear(node_online_map); - node_set_online(0); - return 1; -} - -/* - * Find the highest page frame number we have available for the node - */ -static void __init find_max_pfn_node(int nid) -{ - if (node_end_pfn[nid] > max_pfn) - node_end_pfn[nid] = max_pfn; - /* - * if a user has given mem=XXXX, then we need to make sure - * that the node _starts_ before that, too, not just ends - */ - if (node_start_pfn[nid] > max_pfn) - node_start_pfn[nid] = max_pfn; - BUG_ON(node_start_pfn[nid] > node_end_pfn[nid]); -} - -/* - * Allocate memory for the pg_data_t for this node via a crude pre-bootmem - * method. For node zero take this from the bottom of memory, for - * subsequent nodes place them at node_remap_start_vaddr which contains - * node local data in physically node local memory. See setup_memory() - * for details. - */ -static void __init allocate_pgdat(int nid) -{ - if (nid && node_has_online_mem(nid)) - NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid]; - else { - NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(min_low_pfn)); - min_low_pfn += PFN_UP(sizeof(pg_data_t)); - } -} - -void *alloc_remap(int nid, unsigned long size) -{ - void *allocation = node_remap_alloc_vaddr[nid]; - - size = ALIGN(size, L1_CACHE_BYTES); - - if (!allocation || (allocation + size) >= node_remap_end_vaddr[nid]) - return 0; - - node_remap_alloc_vaddr[nid] += size; - memset(allocation, 0, size); - - return allocation; -} - -void __init remap_numa_kva(void) -{ - void *vaddr; - unsigned long pfn; - int node; - - for_each_online_node(node) { - for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) { - vaddr = node_remap_start_vaddr[node]+(pfn< max_pfn) - continue; - if (node_end_pfn[nid] > max_pfn) - node_end_pfn[nid] = max_pfn; - - /* ensure the remap includes space for the pgdat. */ - size = node_remap_size[nid] + sizeof(pg_data_t); - - /* convert size to large (pmd size) pages, rounding up */ - size = (size + LARGE_PAGE_BYTES - 1) / LARGE_PAGE_BYTES; - /* now the roundup is correct, convert to PAGE_SIZE pages */ - size = size * PTRS_PER_PTE; - - /* - * Validate the region we are allocating only contains valid - * pages. - */ - for (pfn = node_end_pfn[nid] - size; - pfn < node_end_pfn[nid]; pfn++) - if (!page_is_ram(pfn)) - break; - - if (pfn != node_end_pfn[nid]) - size = 0; - - printk("Reserving %ld pages of KVA for lmem_map of node %d\n", - size, nid); - node_remap_size[nid] = size; - node_remap_offset[nid] = reserve_pages; - reserve_pages += size; - printk("Shrinking node %d from %ld pages to %ld pages\n", - nid, node_end_pfn[nid], node_end_pfn[nid] - size); - - if (node_end_pfn[nid] & (PTRS_PER_PTE-1)) { - /* - * Align node_end_pfn[] and node_remap_start_pfn[] to - * pmd boundary. remap_numa_kva will barf otherwise. - */ - printk("Shrinking node %d further by %ld pages for proper alignment\n", - nid, node_end_pfn[nid] & (PTRS_PER_PTE-1)); - size += node_end_pfn[nid] & (PTRS_PER_PTE-1); - } - - node_end_pfn[nid] -= size; - node_remap_start_pfn[nid] = node_end_pfn[nid]; - shrink_active_range(nid, old_end_pfn, node_end_pfn[nid]); - } - printk("Reserving total of %ld pages for numa KVA remap\n", - reserve_pages); - return reserve_pages; -} - -extern void setup_bootmem_allocator(void); -unsigned long __init setup_memory(void) -{ - int nid; - unsigned long system_start_pfn, system_max_low_pfn; - - /* - * When mapping a NUMA machine we allocate the node_mem_map arrays - * from node local memory. They are then mapped directly into KVA - * between zone normal and vmalloc space. Calculate the size of - * this space and use it to adjust the boundry between ZONE_NORMAL - * and ZONE_HIGHMEM. - */ - find_max_pfn(); - get_memcfg_numa(); - - kva_pages = calculate_numa_remap_pages(); - - /* partially used pages are not usable - thus round upwards */ - system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end); - - kva_start_pfn = find_max_low_pfn() - kva_pages; - -#ifdef CONFIG_BLK_DEV_INITRD - /* Numa kva area is below the initrd */ - if (LOADER_TYPE && INITRD_START) - kva_start_pfn = PFN_DOWN(INITRD_START) - kva_pages; -#endif - kva_start_pfn -= kva_start_pfn & (PTRS_PER_PTE-1); - - system_max_low_pfn = max_low_pfn = find_max_low_pfn(); - printk("kva_start_pfn ~ %ld find_max_low_pfn() ~ %ld\n", - kva_start_pfn, max_low_pfn); - printk("max_pfn = %ld\n", max_pfn); -#ifdef CONFIG_HIGHMEM - highstart_pfn = highend_pfn = max_pfn; - if (max_pfn > system_max_low_pfn) - highstart_pfn = system_max_low_pfn; - printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", - pages_to_mb(highend_pfn - highstart_pfn)); - num_physpages = highend_pfn; - high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; -#else - num_physpages = system_max_low_pfn; - high_memory = (void *) __va(system_max_low_pfn * PAGE_SIZE - 1) + 1; -#endif - printk(KERN_NOTICE "%ldMB LOWMEM available.\n", - pages_to_mb(system_max_low_pfn)); - printk("min_low_pfn = %ld, max_low_pfn = %ld, highstart_pfn = %ld\n", - min_low_pfn, max_low_pfn, highstart_pfn); - - printk("Low memory ends at vaddr %08lx\n", - (ulong) pfn_to_kaddr(max_low_pfn)); - for_each_online_node(nid) { - node_remap_start_vaddr[nid] = pfn_to_kaddr( - kva_start_pfn + node_remap_offset[nid]); - /* Init the node remap allocator */ - node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] + - (node_remap_size[nid] * PAGE_SIZE); - node_remap_alloc_vaddr[nid] = node_remap_start_vaddr[nid] + - ALIGN(sizeof(pg_data_t), PAGE_SIZE); - - allocate_pgdat(nid); - printk ("node %d will remap to vaddr %08lx - %08lx\n", nid, - (ulong) node_remap_start_vaddr[nid], - (ulong) pfn_to_kaddr(highstart_pfn - + node_remap_offset[nid] + node_remap_size[nid])); - } - printk("High memory starts at vaddr %08lx\n", - (ulong) pfn_to_kaddr(highstart_pfn)); - for_each_online_node(nid) - find_max_pfn_node(nid); - - memset(NODE_DATA(0), 0, sizeof(struct pglist_data)); - NODE_DATA(0)->bdata = &node0_bdata; - setup_bootmem_allocator(); - return max_low_pfn; -} - -void __init numa_kva_reserve(void) -{ - reserve_bootmem(PFN_PHYS(kva_start_pfn),PFN_PHYS(kva_pages)); -} - -void __init zone_sizes_init(void) -{ - int nid; - unsigned long max_zone_pfns[MAX_NR_ZONES]; - memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); - max_zone_pfns[ZONE_DMA] = - virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; - max_zone_pfns[ZONE_NORMAL] = max_low_pfn; -#ifdef CONFIG_HIGHMEM - max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; -#endif - - /* If SRAT has not registered memory, register it now */ - if (find_max_pfn_with_active_regions() == 0) { - for_each_online_node(nid) { - if (node_has_online_mem(nid)) - add_active_range(nid, node_start_pfn[nid], - node_end_pfn[nid]); - } - } - - free_area_init_nodes(max_zone_pfns); - return; -} - -void __init set_highmem_pages_init(int bad_ppro) -{ -#ifdef CONFIG_HIGHMEM - struct zone *zone; - struct page *page; - - for_each_zone(zone) { - unsigned long node_pfn, zone_start_pfn, zone_end_pfn; - - if (!is_highmem(zone)) - continue; - - zone_start_pfn = zone->zone_start_pfn; - zone_end_pfn = zone_start_pfn + zone->spanned_pages; - - printk("Initializing %s for node %d (%08lx:%08lx)\n", - zone->name, zone_to_nid(zone), - zone_start_pfn, zone_end_pfn); - - for (node_pfn = zone_start_pfn; node_pfn < zone_end_pfn; node_pfn++) { - if (!pfn_valid(node_pfn)) - continue; - page = pfn_to_page(node_pfn); - add_one_highpage_init(page, node_pfn, bad_ppro); - } - } - totalram_pages += totalhigh_pages; -#endif -} - -#ifdef CONFIG_MEMORY_HOTPLUG -int paddr_to_nid(u64 addr) -{ - int nid; - unsigned long pfn = PFN_DOWN(addr); - - for_each_node(nid) - if (node_start_pfn[nid] <= pfn && - pfn < node_end_pfn[nid]) - return nid; - - return -1; -} - -/* - * This function is used to ask node id BEFORE memmap and mem_section's - * initialization (pfn_to_nid() can't be used yet). - * If _PXM is not defined on ACPI's DSDT, node id must be found by this. - */ -int memory_add_physaddr_to_nid(u64 addr) -{ - int nid = paddr_to_nid(addr); - return (nid >= 0) ? nid : 0; -} - -EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); -#endif diff --git a/arch/i386/mm/extable_32.c b/arch/i386/mm/extable_32.c deleted file mode 100644 index 0ce4f22a263..00000000000 --- a/arch/i386/mm/extable_32.c +++ /dev/null @@ -1,35 +0,0 @@ -/* - * linux/arch/i386/mm/extable.c - */ - -#include -#include -#include - -int fixup_exception(struct pt_regs *regs) -{ - const struct exception_table_entry *fixup; - -#ifdef CONFIG_PNPBIOS - if (unlikely(SEGMENT_IS_PNP_CODE(regs->xcs))) - { - extern u32 pnp_bios_fault_eip, pnp_bios_fault_esp; - extern u32 pnp_bios_is_utter_crap; - pnp_bios_is_utter_crap = 1; - printk(KERN_CRIT "PNPBIOS fault.. attempting recovery.\n"); - __asm__ volatile( - "movl %0, %%esp\n\t" - "jmp *%1\n\t" - : : "g" (pnp_bios_fault_esp), "g" (pnp_bios_fault_eip)); - panic("do_trap: can't hit this"); - } -#endif - - fixup = search_exception_tables(regs->eip); - if (fixup) { - regs->eip = fixup->fixup; - return 1; - } - - return 0; -} diff --git a/arch/i386/mm/fault_32.c b/arch/i386/mm/fault_32.c deleted file mode 100644 index fcb38e7f354..00000000000 --- a/arch/i386/mm/fault_32.c +++ /dev/null @@ -1,657 +0,0 @@ -/* - * linux/arch/i386/mm/fault.c - * - * Copyright (C) 1995 Linus Torvalds - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include /* For unblank_screen() */ -#include -#include /* for max_low_pfn */ -#include -#include -#include -#include -#include - -#include -#include -#include - -extern void die(const char *,struct pt_regs *,long); - -static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain); - -int register_page_fault_notifier(struct notifier_block *nb) -{ - vmalloc_sync_all(); - return atomic_notifier_chain_register(¬ify_page_fault_chain, nb); -} -EXPORT_SYMBOL_GPL(register_page_fault_notifier); - -int unregister_page_fault_notifier(struct notifier_block *nb) -{ - return atomic_notifier_chain_unregister(¬ify_page_fault_chain, nb); -} -EXPORT_SYMBOL_GPL(unregister_page_fault_notifier); - -static inline int notify_page_fault(struct pt_regs *regs, long err) -{ - struct die_args args = { - .regs = regs, - .str = "page fault", - .err = err, - .trapnr = 14, - .signr = SIGSEGV - }; - return atomic_notifier_call_chain(¬ify_page_fault_chain, - DIE_PAGE_FAULT, &args); -} - -/* - * Return EIP plus the CS segment base. The segment limit is also - * adjusted, clamped to the kernel/user address space (whichever is - * appropriate), and returned in *eip_limit. - * - * The segment is checked, because it might have been changed by another - * task between the original faulting instruction and here. - * - * If CS is no longer a valid code segment, or if EIP is beyond the - * limit, or if it is a kernel address when CS is not a kernel segment, - * then the returned value will be greater than *eip_limit. - * - * This is slow, but is very rarely executed. - */ -static inline unsigned long get_segment_eip(struct pt_regs *regs, - unsigned long *eip_limit) -{ - unsigned long eip = regs->eip; - unsigned seg = regs->xcs & 0xffff; - u32 seg_ar, seg_limit, base, *desc; - - /* Unlikely, but must come before segment checks. */ - if (unlikely(regs->eflags & VM_MASK)) { - base = seg << 4; - *eip_limit = base + 0xffff; - return base + (eip & 0xffff); - } - - /* The standard kernel/user address space limit. */ - *eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg; - - /* By far the most common cases. */ - if (likely(SEGMENT_IS_FLAT_CODE(seg))) - return eip; - - /* Check the segment exists, is within the current LDT/GDT size, - that kernel/user (ring 0..3) has the appropriate privilege, - that it's a code segment, and get the limit. */ - __asm__ ("larl %3,%0; lsll %3,%1" - : "=&r" (seg_ar), "=r" (seg_limit) : "0" (0), "rm" (seg)); - if ((~seg_ar & 0x9800) || eip > seg_limit) { - *eip_limit = 0; - return 1; /* So that returned eip > *eip_limit. */ - } - - /* Get the GDT/LDT descriptor base. - When you look for races in this code remember that - LDT and other horrors are only used in user space. */ - if (seg & (1<<2)) { - /* Must lock the LDT while reading it. */ - down(¤t->mm->context.sem); - desc = current->mm->context.ldt; - desc = (void *)desc + (seg & ~7); - } else { - /* Must disable preemption while reading the GDT. */ - desc = (u32 *)get_cpu_gdt_table(get_cpu()); - desc = (void *)desc + (seg & ~7); - } - - /* Decode the code segment base from the descriptor */ - base = get_desc_base((unsigned long *)desc); - - if (seg & (1<<2)) { - up(¤t->mm->context.sem); - } else - put_cpu(); - - /* Adjust EIP and segment limit, and clamp at the kernel limit. - It's legitimate for segments to wrap at 0xffffffff. */ - seg_limit += base; - if (seg_limit < *eip_limit && seg_limit >= base) - *eip_limit = seg_limit; - return eip + base; -} - -/* - * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch. - * Check that here and ignore it. - */ -static int __is_prefetch(struct pt_regs *regs, unsigned long addr) -{ - unsigned long limit; - unsigned char *instr = (unsigned char *)get_segment_eip (regs, &limit); - int scan_more = 1; - int prefetch = 0; - int i; - - for (i = 0; scan_more && i < 15; i++) { - unsigned char opcode; - unsigned char instr_hi; - unsigned char instr_lo; - - if (instr > (unsigned char *)limit) - break; - if (probe_kernel_address(instr, opcode)) - break; - - instr_hi = opcode & 0xf0; - instr_lo = opcode & 0x0f; - instr++; - - switch (instr_hi) { - case 0x20: - case 0x30: - /* Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes. */ - scan_more = ((instr_lo & 7) == 0x6); - break; - - case 0x60: - /* 0x64 thru 0x67 are valid prefixes in all modes. */ - scan_more = (instr_lo & 0xC) == 0x4; - break; - case 0xF0: - /* 0xF0, 0xF2, and 0xF3 are valid prefixes */ - scan_more = !instr_lo || (instr_lo>>1) == 1; - break; - case 0x00: - /* Prefetch instruction is 0x0F0D or 0x0F18 */ - scan_more = 0; - if (instr > (unsigned char *)limit) - break; - if (probe_kernel_address(instr, opcode)) - break; - prefetch = (instr_lo == 0xF) && - (opcode == 0x0D || opcode == 0x18); - break; - default: - scan_more = 0; - break; - } - } - return prefetch; -} - -static inline int is_prefetch(struct pt_regs *regs, unsigned long addr, - unsigned long error_code) -{ - if (unlikely(boot_cpu_data.x86_vendor == X86_VENDOR_AMD && - boot_cpu_data.x86 >= 6)) { - /* Catch an obscure case of prefetch inside an NX page. */ - if (nx_enabled && (error_code & 16)) - return 0; - return __is_prefetch(regs, addr); - } - return 0; -} - -static noinline void force_sig_info_fault(int si_signo, int si_code, - unsigned long address, struct task_struct *tsk) -{ - siginfo_t info; - - info.si_signo = si_signo; - info.si_errno = 0; - info.si_code = si_code; - info.si_addr = (void __user *)address; - force_sig_info(si_signo, &info, tsk); -} - -fastcall void do_invalid_op(struct pt_regs *, unsigned long); - -static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) -{ - unsigned index = pgd_index(address); - pgd_t *pgd_k; - pud_t *pud, *pud_k; - pmd_t *pmd, *pmd_k; - - pgd += index; - pgd_k = init_mm.pgd + index; - - if (!pgd_present(*pgd_k)) - return NULL; - - /* - * set_pgd(pgd, *pgd_k); here would be useless on PAE - * and redundant with the set_pmd() on non-PAE. As would - * set_pud. - */ - - pud = pud_offset(pgd, address); - pud_k = pud_offset(pgd_k, address); - if (!pud_present(*pud_k)) - return NULL; - - pmd = pmd_offset(pud, address); - pmd_k = pmd_offset(pud_k, address); - if (!pmd_present(*pmd_k)) - return NULL; - if (!pmd_present(*pmd)) { - set_pmd(pmd, *pmd_k); - arch_flush_lazy_mmu_mode(); - } else - BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); - return pmd_k; -} - -/* - * Handle a fault on the vmalloc or module mapping area - * - * This assumes no large pages in there. - */ -static inline int vmalloc_fault(unsigned long address) -{ - unsigned long pgd_paddr; - pmd_t *pmd_k; - pte_t *pte_k; - /* - * Synchronize this task's top level page-table - * with the 'reference' page table. - * - * Do _not_ use "current" here. We might be inside - * an interrupt in the middle of a task switch.. - */ - pgd_paddr = read_cr3(); - pmd_k = vmalloc_sync_one(__va(pgd_paddr), address); - if (!pmd_k) - return -1; - pte_k = pte_offset_kernel(pmd_k, address); - if (!pte_present(*pte_k)) - return -1; - return 0; -} - -int show_unhandled_signals = 1; - -/* - * This routine handles page faults. It determines the address, - * and the problem, and then passes it off to one of the appropriate - * routines. - * - * error_code: - * bit 0 == 0 means no page found, 1 means protection fault - * bit 1 == 0 means read, 1 means write - * bit 2 == 0 means kernel, 1 means user-mode - * bit 3 == 1 means use of reserved bit detected - * bit 4 == 1 means fault was an instruction fetch - */ -fastcall void __kprobes do_page_fault(struct pt_regs *regs, - unsigned long error_code) -{ - struct task_struct *tsk; - struct mm_struct *mm; - struct vm_area_struct * vma; - unsigned long address; - int write, si_code; - int fault; - - /* get the address */ - address = read_cr2(); - - tsk = current; - - si_code = SEGV_MAPERR; - - /* - * We fault-in kernel-space virtual memory on-demand. The - * 'reference' page table is init_mm.pgd. - * - * NOTE! We MUST NOT take any locks for this case. We may - * be in an interrupt or a critical region, and should - * only copy the information from the master page table, - * nothing more. - * - * This verifies that the fault happens in kernel space - * (error_code & 4) == 0, and that the fault was not a - * protection error (error_code & 9) == 0. - */ - if (unlikely(address >= TASK_SIZE)) { - if (!(error_code & 0x0000000d) && vmalloc_fault(address) >= 0) - return; - if (notify_page_fault(regs, error_code) == NOTIFY_STOP) - return; - /* - * Don't take the mm semaphore here. If we fixup a prefetch - * fault we could otherwise deadlock. - */ - goto bad_area_nosemaphore; - } - - if (notify_page_fault(regs, error_code) == NOTIFY_STOP) - return; - - /* It's safe to allow irq's after cr2 has been saved and the vmalloc - fault has been handled. */ - if (regs->eflags & (X86_EFLAGS_IF|VM_MASK)) - local_irq_enable(); - - mm = tsk->mm; - - /* - * If we're in an interrupt, have no user context or are running in an - * atomic region then we must not take the fault.. - */ - if (in_atomic() || !mm) - goto bad_area_nosemaphore; - - /* When running in the kernel we expect faults to occur only to - * addresses in user space. All other faults represent errors in the - * kernel and should generate an OOPS. Unfortunatly, in the case of an - * erroneous fault occurring in a code path which already holds mmap_sem - * we will deadlock attempting to validate the fault against the - * address space. Luckily the kernel only validly references user - * space from well defined areas of code, which are listed in the - * exceptions table. - * - * As the vast majority of faults will be valid we will only perform - * the source reference check when there is a possibilty of a deadlock. - * Attempt to lock the address space, if we cannot we then validate the - * source. If this is invalid we can skip the address space check, - * thus avoiding the deadlock. - */ - if (!down_read_trylock(&mm->mmap_sem)) { - if ((error_code & 4) == 0 && - !search_exception_tables(regs->eip)) - goto bad_area_nosemaphore; - down_read(&mm->mmap_sem); - } - - vma = find_vma(mm, address); - if (!vma) - goto bad_area; - if (vma->vm_start <= address) - goto good_area; - if (!(vma->vm_flags & VM_GROWSDOWN)) - goto bad_area; - if (error_code & 4) { - /* - * Accessing the stack below %esp is always a bug. - * The large cushion allows instructions like enter - * and pusha to work. ("enter $65535,$31" pushes - * 32 pointers and then decrements %esp by 65535.) - */ - if (address + 65536 + 32 * sizeof(unsigned long) < regs->esp) - goto bad_area; - } - if (expand_stack(vma, address)) - goto bad_area; -/* - * Ok, we have a good vm_area for this memory access, so - * we can handle it.. - */ -good_area: - si_code = SEGV_ACCERR; - write = 0; - switch (error_code & 3) { - default: /* 3: write, present */ - /* fall through */ - case 2: /* write, not present */ - if (!(vma->vm_flags & VM_WRITE)) - goto bad_area; - write++; - break; - case 1: /* read, present */ - goto bad_area; - case 0: /* read, not present */ - if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) - goto bad_area; - } - - survive: - /* - * If for any reason at all we couldn't handle the fault, - * make sure we exit gracefully rather than endlessly redo - * the fault. - */ - fault = handle_mm_fault(mm, vma, address, write); - if (unlikely(fault & VM_FAULT_ERROR)) { - if (fault & VM_FAULT_OOM) - goto out_of_memory; - else if (fault & VM_FAULT_SIGBUS) - goto do_sigbus; - BUG(); - } - if (fault & VM_FAULT_MAJOR) - tsk->maj_flt++; - else - tsk->min_flt++; - - /* - * Did it hit the DOS screen memory VA from vm86 mode? - */ - if (regs->eflags & VM_MASK) { - unsigned long bit = (address - 0xA0000) >> PAGE_SHIFT; - if (bit < 32) - tsk->thread.screen_bitmap |= 1 << bit; - } - up_read(&mm->mmap_sem); - return; - -/* - * Something tried to access memory that isn't in our memory map.. - * Fix it, but check if it's kernel or user first.. - */ -bad_area: - up_read(&mm->mmap_sem); - -bad_area_nosemaphore: - /* User mode accesses just cause a SIGSEGV */ - if (error_code & 4) { - /* - * It's possible to have interrupts off here. - */ - local_irq_enable(); - - /* - * Valid to do another page fault here because this one came - * from user space. - */ - if (is_prefetch(regs, address, error_code)) - return; - - if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && - printk_ratelimit()) { - printk("%s%s[%d]: segfault at %08lx eip %08lx " - "esp %08lx error %lx\n", - tsk->pid > 1 ? KERN_INFO : KERN_EMERG, - tsk->comm, tsk->pid, address, regs->eip, - regs->esp, error_code); - } - tsk->thread.cr2 = address; - /* Kernel addresses are always protection faults */ - tsk->thread.error_code = error_code | (address >= TASK_SIZE); - tsk->thread.trap_no = 14; - force_sig_info_fault(SIGSEGV, si_code, address, tsk); - return; - } - -#ifdef CONFIG_X86_F00F_BUG - /* - * Pentium F0 0F C7 C8 bug workaround. - */ - if (boot_cpu_data.f00f_bug) { - unsigned long nr; - - nr = (address - idt_descr.address) >> 3; - - if (nr == 6) { - do_invalid_op(regs, 0); - return; - } - } -#endif - -no_context: - /* Are we prepared to handle this kernel fault? */ - if (fixup_exception(regs)) - return; - - /* - * Valid to do another page fault here, because if this fault - * had been triggered by is_prefetch fixup_exception would have - * handled it. - */ - if (is_prefetch(regs, address, error_code)) - return; - -/* - * Oops. The kernel tried to access some bad page. We'll have to - * terminate things with extreme prejudice. - */ - - bust_spinlocks(1); - - if (oops_may_print()) { - __typeof__(pte_val(__pte(0))) page; - -#ifdef CONFIG_X86_PAE - if (error_code & 16) { - pte_t *pte = lookup_address(address); - - if (pte && pte_present(*pte) && !pte_exec_kernel(*pte)) - printk(KERN_CRIT "kernel tried to execute " - "NX-protected page - exploit attempt? " - "(uid: %d)\n", current->uid); - } -#endif - if (address < PAGE_SIZE) - printk(KERN_ALERT "BUG: unable to handle kernel NULL " - "pointer dereference"); - else - printk(KERN_ALERT "BUG: unable to handle kernel paging" - " request"); - printk(" at virtual address %08lx\n",address); - printk(KERN_ALERT " printing eip:\n"); - printk("%08lx\n", regs->eip); - - page = read_cr3(); - page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT]; -#ifdef CONFIG_X86_PAE - printk(KERN_ALERT "*pdpt = %016Lx\n", page); - if ((page >> PAGE_SHIFT) < max_low_pfn - && page & _PAGE_PRESENT) { - page &= PAGE_MASK; - page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT) - & (PTRS_PER_PMD - 1)]; - printk(KERN_ALERT "*pde = %016Lx\n", page); - page &= ~_PAGE_NX; - } -#else - printk(KERN_ALERT "*pde = %08lx\n", page); -#endif - - /* - * We must not directly access the pte in the highpte - * case if the page table is located in highmem. - * And let's rather not kmap-atomic the pte, just in case - * it's allocated already. - */ - if ((page >> PAGE_SHIFT) < max_low_pfn - && (page & _PAGE_PRESENT)) { - page &= PAGE_MASK; - page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT) - & (PTRS_PER_PTE - 1)]; - printk(KERN_ALERT "*pte = %0*Lx\n", sizeof(page)*2, (u64)page); - } - } - - tsk->thread.cr2 = address; - tsk->thread.trap_no = 14; - tsk->thread.error_code = error_code; - die("Oops", regs, error_code); - bust_spinlocks(0); - do_exit(SIGKILL); - -/* - * We ran out of memory, or some other thing happened to us that made - * us unable to handle the page fault gracefully. - */ -out_of_memory: - up_read(&mm->mmap_sem); - if (is_init(tsk)) { - yield(); - down_read(&mm->mmap_sem); - goto survive; - } - printk("VM: killing process %s\n", tsk->comm); - if (error_code & 4) - do_exit(SIGKILL); - goto no_context; - -do_sigbus: - up_read(&mm->mmap_sem); - - /* Kernel mode? Handle exceptions or die */ - if (!(error_code & 4)) - goto no_context; - - /* User space => ok to do another page fault */ - if (is_prefetch(regs, address, error_code)) - return; - - tsk->thread.cr2 = address; - tsk->thread.error_code = error_code; - tsk->thread.trap_no = 14; - force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); -} - -void vmalloc_sync_all(void) -{ - /* - * Note that races in the updates of insync and start aren't - * problematic: insync can only get set bits added, and updates to - * start are only improving performance (without affecting correctness - * if undone). - */ - static DECLARE_BITMAP(insync, PTRS_PER_PGD); - static unsigned long start = TASK_SIZE; - unsigned long address; - - if (SHARED_KERNEL_PMD) - return; - - BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK); - for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) { - if (!test_bit(pgd_index(address), insync)) { - unsigned long flags; - struct page *page; - - spin_lock_irqsave(&pgd_lock, flags); - for (page = pgd_list; page; page = - (struct page *)page->index) - if (!vmalloc_sync_one(page_address(page), - address)) { - BUG_ON(page != pgd_list); - break; - } - spin_unlock_irqrestore(&pgd_lock, flags); - if (!page) - set_bit(pgd_index(address), insync); - } - if (address == start && test_bit(pgd_index(address), insync)) - start = address + PGDIR_SIZE; - } -} diff --git a/arch/i386/mm/highmem_32.c b/arch/i386/mm/highmem_32.c deleted file mode 100644 index 1c3bf95f735..00000000000 --- a/arch/i386/mm/highmem_32.c +++ /dev/null @@ -1,113 +0,0 @@ -#include -#include - -void *kmap(struct page *page) -{ - might_sleep(); - if (!PageHighMem(page)) - return page_address(page); - return kmap_high(page); -} - -void kunmap(struct page *page) -{ - if (in_interrupt()) - BUG(); - if (!PageHighMem(page)) - return; - kunmap_high(page); -} - -/* - * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because - * no global lock is needed and because the kmap code must perform a global TLB - * invalidation when the kmap pool wraps. - * - * However when holding an atomic kmap is is not legal to sleep, so atomic - * kmaps are appropriate for short, tight code paths only. - */ -void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) -{ - enum fixed_addresses idx; - unsigned long vaddr; - - /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ - pagefault_disable(); - - if (!PageHighMem(page)) - return page_address(page); - - idx = type + KM_TYPE_NR*smp_processor_id(); - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - BUG_ON(!pte_none(*(kmap_pte-idx))); - set_pte(kmap_pte-idx, mk_pte(page, prot)); - arch_flush_lazy_mmu_mode(); - - return (void *)vaddr; -} - -void *kmap_atomic(struct page *page, enum km_type type) -{ - return kmap_atomic_prot(page, type, kmap_prot); -} - -void kunmap_atomic(void *kvaddr, enum km_type type) -{ - unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; - enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); - - /* - * Force other mappings to Oops if they'll try to access this pte - * without first remap it. Keeping stale mappings around is a bad idea - * also, in case the page changes cacheability attributes or becomes - * a protected page in a hypervisor. - */ - if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx)) - kpte_clear_flush(kmap_pte-idx, vaddr); - else { -#ifdef CONFIG_DEBUG_HIGHMEM - BUG_ON(vaddr < PAGE_OFFSET); - BUG_ON(vaddr >= (unsigned long)high_memory); -#endif - } - - arch_flush_lazy_mmu_mode(); - pagefault_enable(); -} - -/* This is the same as kmap_atomic() but can map memory that doesn't - * have a struct page associated with it. - */ -void *kmap_atomic_pfn(unsigned long pfn, enum km_type type) -{ - enum fixed_addresses idx; - unsigned long vaddr; - - pagefault_disable(); - - idx = type + KM_TYPE_NR*smp_processor_id(); - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot)); - arch_flush_lazy_mmu_mode(); - - return (void*) vaddr; -} - -struct page *kmap_atomic_to_page(void *ptr) -{ - unsigned long idx, vaddr = (unsigned long)ptr; - pte_t *pte; - - if (vaddr < FIXADDR_START) - return virt_to_page(ptr); - - idx = virt_to_fix(vaddr); - pte = kmap_pte - (idx - FIX_KMAP_BEGIN); - return pte_page(*pte); -} - -EXPORT_SYMBOL(kmap); -EXPORT_SYMBOL(kunmap); -EXPORT_SYMBOL(kmap_atomic); -EXPORT_SYMBOL(kunmap_atomic); -EXPORT_SYMBOL(kmap_atomic_to_page); diff --git a/arch/i386/mm/hugetlbpage.c b/arch/i386/mm/hugetlbpage.c deleted file mode 100644 index 6c06d9c0488..00000000000 --- a/arch/i386/mm/hugetlbpage.c +++ /dev/null @@ -1,391 +0,0 @@ -/* - * IA-32 Huge TLB Page Support for Kernel. - * - * Copyright (C) 2002, Rohit Seth - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static unsigned long page_table_shareable(struct vm_area_struct *svma, - struct vm_area_struct *vma, - unsigned long addr, pgoff_t idx) -{ - unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) + - svma->vm_start; - unsigned long sbase = saddr & PUD_MASK; - unsigned long s_end = sbase + PUD_SIZE; - - /* - * match the virtual addresses, permission and the alignment of the - * page table page. - */ - if (pmd_index(addr) != pmd_index(saddr) || - vma->vm_flags != svma->vm_flags || - sbase < svma->vm_start || svma->vm_end < s_end) - return 0; - - return saddr; -} - -static int vma_shareable(struct vm_area_struct *vma, unsigned long addr) -{ - unsigned long base = addr & PUD_MASK; - unsigned long end = base + PUD_SIZE; - - /* - * check on proper vm_flags and page table alignment - */ - if (vma->vm_flags & VM_MAYSHARE && - vma->vm_start <= base && end <= vma->vm_end) - return 1; - return 0; -} - -/* - * search for a shareable pmd page for hugetlb. - */ -static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) -{ - struct vm_area_struct *vma = find_vma(mm, addr); - struct address_space *mapping = vma->vm_file->f_mapping; - pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + - vma->vm_pgoff; - struct prio_tree_iter iter; - struct vm_area_struct *svma; - unsigned long saddr; - pte_t *spte = NULL; - - if (!vma_shareable(vma, addr)) - return; - - spin_lock(&mapping->i_mmap_lock); - vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) { - if (svma == vma) - continue; - - saddr = page_table_shareable(svma, vma, addr, idx); - if (saddr) { - spte = huge_pte_offset(svma->vm_mm, saddr); - if (spte) { - get_page(virt_to_page(spte)); - break; - } - } - } - - if (!spte) - goto out; - - spin_lock(&mm->page_table_lock); - if (pud_none(*pud)) - pud_populate(mm, pud, (unsigned long) spte & PAGE_MASK); - else - put_page(virt_to_page(spte)); - spin_unlock(&mm->page_table_lock); -out: - spin_unlock(&mapping->i_mmap_lock); -} - -/* - * unmap huge page backed by shared pte. - * - * Hugetlb pte page is ref counted at the time of mapping. If pte is shared - * indicated by page_count > 1, unmap is achieved by clearing pud and - * decrementing the ref count. If count == 1, the pte page is not shared. - * - * called with vma->vm_mm->page_table_lock held. - * - * returns: 1 successfully unmapped a shared pte page - * 0 the underlying pte page is not shared, or it is the last user - */ -int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) -{ - pgd_t *pgd = pgd_offset(mm, *addr); - pud_t *pud = pud_offset(pgd, *addr); - - BUG_ON(page_count(virt_to_page(ptep)) == 0); - if (page_count(virt_to_page(ptep)) == 1) - return 0; - - pud_clear(pud); - put_page(virt_to_page(ptep)); - *addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE; - return 1; -} - -pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) -{ - pgd_t *pgd; - pud_t *pud; - pte_t *pte = NULL; - - pgd = pgd_offset(mm, addr); - pud = pud_alloc(mm, pgd, addr); - if (pud) { - if (pud_none(*pud)) - huge_pmd_share(mm, addr, pud); - pte = (pte_t *) pmd_alloc(mm, pud, addr); - } - BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); - - return pte; -} - -pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd = NULL; - - pgd = pgd_offset(mm, addr); - if (pgd_present(*pgd)) { - pud = pud_offset(pgd, addr); - if (pud_present(*pud)) - pmd = pmd_offset(pud, addr); - } - return (pte_t *) pmd; -} - -#if 0 /* This is just for testing */ -struct page * -follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) -{ - unsigned long start = address; - int length = 1; - int nr; - struct page *page; - struct vm_area_struct *vma; - - vma = find_vma(mm, addr); - if (!vma || !is_vm_hugetlb_page(vma)) - return ERR_PTR(-EINVAL); - - pte = huge_pte_offset(mm, address); - - /* hugetlb should be locked, and hence, prefaulted */ - WARN_ON(!pte || pte_none(*pte)); - - page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)]; - - WARN_ON(!PageCompound(page)); - - return page; -} - -int pmd_huge(pmd_t pmd) -{ - return 0; -} - -struct page * -follow_huge_pmd(struct mm_struct *mm, unsigned long address, - pmd_t *pmd, int write) -{ - return NULL; -} - -#else - -struct page * -follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) -{ - return ERR_PTR(-EINVAL); -} - -int pmd_huge(pmd_t pmd) -{ - return !!(pmd_val(pmd) & _PAGE_PSE); -} - -struct page * -follow_huge_pmd(struct mm_struct *mm, unsigned long address, - pmd_t *pmd, int write) -{ - struct page *page; - - page = pte_page(*(pte_t *)pmd); - if (page) - page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT); - return page; -} -#endif - -/* x86_64 also uses this file */ - -#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA -static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, - unsigned long addr, unsigned long len, - unsigned long pgoff, unsigned long flags) -{ - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - unsigned long start_addr; - - if (len > mm->cached_hole_size) { - start_addr = mm->free_area_cache; - } else { - start_addr = TASK_UNMAPPED_BASE; - mm->cached_hole_size = 0; - } - -full_search: - addr = ALIGN(start_addr, HPAGE_SIZE); - - for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { - /* At this point: (!vma || addr < vma->vm_end). */ - if (TASK_SIZE - len < addr) { - /* - * Start a new search - just in case we missed - * some holes. - */ - if (start_addr != TASK_UNMAPPED_BASE) { - start_addr = TASK_UNMAPPED_BASE; - mm->cached_hole_size = 0; - goto full_search; - } - return -ENOMEM; - } - if (!vma || addr + len <= vma->vm_start) { - mm->free_area_cache = addr + len; - return addr; - } - if (addr + mm->cached_hole_size < vma->vm_start) - mm->cached_hole_size = vma->vm_start - addr; - addr = ALIGN(vma->vm_end, HPAGE_SIZE); - } -} - -static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, - unsigned long addr0, unsigned long len, - unsigned long pgoff, unsigned long flags) -{ - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma, *prev_vma; - unsigned long base = mm->mmap_base, addr = addr0; - unsigned long largest_hole = mm->cached_hole_size; - int first_time = 1; - - /* don't allow allocations above current base */ - if (mm->free_area_cache > base) - mm->free_area_cache = base; - - if (len <= largest_hole) { - largest_hole = 0; - mm->free_area_cache = base; - } -try_again: - /* make sure it can fit in the remaining address space */ - if (mm->free_area_cache < len) - goto fail; - - /* either no address requested or cant fit in requested address hole */ - addr = (mm->free_area_cache - len) & HPAGE_MASK; - do { - /* - * Lookup failure means no vma is above this address, - * i.e. return with success: - */ - if (!(vma = find_vma_prev(mm, addr, &prev_vma))) - return addr; - - /* - * new region fits between prev_vma->vm_end and - * vma->vm_start, use it: - */ - if (addr + len <= vma->vm_start && - (!prev_vma || (addr >= prev_vma->vm_end))) { - /* remember the address as a hint for next time */ - mm->cached_hole_size = largest_hole; - return (mm->free_area_cache = addr); - } else { - /* pull free_area_cache down to the first hole */ - if (mm->free_area_cache == vma->vm_end) { - mm->free_area_cache = vma->vm_start; - mm->cached_hole_size = largest_hole; - } - } - - /* remember the largest hole we saw so far */ - if (addr + largest_hole < vma->vm_start) - largest_hole = vma->vm_start - addr; - - /* try just below the current vma->vm_start */ - addr = (vma->vm_start - len) & HPAGE_MASK; - } while (len <= vma->vm_start); - -fail: - /* - * if hint left us with no space for the requested - * mapping then try again: - */ - if (first_time) { - mm->free_area_cache = base; - largest_hole = 0; - first_time = 0; - goto try_again; - } - /* - * A failed mmap() very likely causes application failure, - * so fall back to the bottom-up function here. This scenario - * can happen with large stack limits and large mmap() - * allocations. - */ - mm->free_area_cache = TASK_UNMAPPED_BASE; - mm->cached_hole_size = ~0UL; - addr = hugetlb_get_unmapped_area_bottomup(file, addr0, - len, pgoff, flags); - - /* - * Restore the topdown base: - */ - mm->free_area_cache = base; - mm->cached_hole_size = ~0UL; - - return addr; -} - -unsigned long -hugetlb_get_unmapped_area(struct file *file, unsigned long addr, - unsigned long len, unsigned long pgoff, unsigned long flags) -{ - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - - if (len & ~HPAGE_MASK) - return -EINVAL; - if (len > TASK_SIZE) - return -ENOMEM; - - if (flags & MAP_FIXED) { - if (prepare_hugepage_range(addr, len)) - return -EINVAL; - return addr; - } - - if (addr) { - addr = ALIGN(addr, HPAGE_SIZE); - vma = find_vma(mm, addr); - if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) - return addr; - } - if (mm->get_unmapped_area == arch_get_unmapped_area) - return hugetlb_get_unmapped_area_bottomup(file, addr, len, - pgoff, flags); - else - return hugetlb_get_unmapped_area_topdown(file, addr, len, - pgoff, flags); -} - -#endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/ - diff --git a/arch/i386/mm/init_32.c b/arch/i386/mm/init_32.c deleted file mode 100644 index 730a5b177b1..00000000000 --- a/arch/i386/mm/init_32.c +++ /dev/null @@ -1,858 +0,0 @@ -/* - * linux/arch/i386/mm/init.c - * - * Copyright (C) 1995 Linus Torvalds - * - * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -unsigned int __VMALLOC_RESERVE = 128 << 20; - -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); -unsigned long highstart_pfn, highend_pfn; - -static int noinline do_test_wp_bit(void); - -/* - * Creates a middle page table and puts a pointer to it in the - * given global directory entry. This only returns the gd entry - * in non-PAE compilation mode, since the middle layer is folded. - */ -static pmd_t * __init one_md_table_init(pgd_t *pgd) -{ - pud_t *pud; - pmd_t *pmd_table; - -#ifdef CONFIG_X86_PAE - if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { - pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); - - paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT); - set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); - pud = pud_offset(pgd, 0); - if (pmd_table != pmd_offset(pud, 0)) - BUG(); - } -#endif - pud = pud_offset(pgd, 0); - pmd_table = pmd_offset(pud, 0); - return pmd_table; -} - -/* - * Create a page table and place a pointer to it in a middle page - * directory entry. - */ -static pte_t * __init one_page_table_init(pmd_t *pmd) -{ - if (!(pmd_val(*pmd) & _PAGE_PRESENT)) { - pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); - - paravirt_alloc_pt(&init_mm, __pa(page_table) >> PAGE_SHIFT); - set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); - BUG_ON(page_table != pte_offset_kernel(pmd, 0)); - } - - return pte_offset_kernel(pmd, 0); -} - -/* - * This function initializes a certain range of kernel virtual memory - * with new bootmem page tables, everywhere page tables are missing in - * the given range. - */ - -/* - * NOTE: The pagetables are allocated contiguous on the physical space - * so we can cache the place of the first one and move around without - * checking the pgd every time. - */ -static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base) -{ - pgd_t *pgd; - pmd_t *pmd; - int pgd_idx, pmd_idx; - unsigned long vaddr; - - vaddr = start; - pgd_idx = pgd_index(vaddr); - pmd_idx = pmd_index(vaddr); - pgd = pgd_base + pgd_idx; - - for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) { - pmd = one_md_table_init(pgd); - pmd = pmd + pmd_index(vaddr); - for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) { - one_page_table_init(pmd); - - vaddr += PMD_SIZE; - } - pmd_idx = 0; - } -} - -static inline int is_kernel_text(unsigned long addr) -{ - if (addr >= PAGE_OFFSET && addr <= (unsigned long)__init_end) - return 1; - return 0; -} - -/* - * This maps the physical memory to kernel virtual address space, a total - * of max_low_pfn pages, by creating page tables starting from address - * PAGE_OFFSET. - */ -static void __init kernel_physical_mapping_init(pgd_t *pgd_base) -{ - unsigned long pfn; - pgd_t *pgd; - pmd_t *pmd; - pte_t *pte; - int pgd_idx, pmd_idx, pte_ofs; - - pgd_idx = pgd_index(PAGE_OFFSET); - pgd = pgd_base + pgd_idx; - pfn = 0; - - for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) { - pmd = one_md_table_init(pgd); - if (pfn >= max_low_pfn) - continue; - for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_idx++) { - unsigned int address = pfn * PAGE_SIZE + PAGE_OFFSET; - - /* Map with big pages if possible, otherwise create normal page tables. */ - if (cpu_has_pse) { - unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1; - if (is_kernel_text(address) || is_kernel_text(address2)) - set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC)); - else - set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE)); - - pfn += PTRS_PER_PTE; - } else { - pte = one_page_table_init(pmd); - - for (pte_ofs = 0; - pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; - pte++, pfn++, pte_ofs++, address += PAGE_SIZE) { - if (is_kernel_text(address)) - set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); - else - set_pte(pte, pfn_pte(pfn, PAGE_KERNEL)); - } - } - } - } -} - -static inline int page_kills_ppro(unsigned long pagenr) -{ - if (pagenr >= 0x70000 && pagenr <= 0x7003F) - return 1; - return 0; -} - -int page_is_ram(unsigned long pagenr) -{ - int i; - unsigned long addr, end; - - if (efi_enabled) { - efi_memory_desc_t *md; - void *p; - - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - md = p; - if (!is_available_memory(md)) - continue; - addr = (md->phys_addr+PAGE_SIZE-1) >> PAGE_SHIFT; - end = (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >> PAGE_SHIFT; - - if ((pagenr >= addr) && (pagenr < end)) - return 1; - } - return 0; - } - - for (i = 0; i < e820.nr_map; i++) { - - if (e820.map[i].type != E820_RAM) /* not usable memory */ - continue; - /* - * !!!FIXME!!! Some BIOSen report areas as RAM that - * are not. Notably the 640->1Mb area. We need a sanity - * check here. - */ - addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT; - end = (e820.map[i].addr+e820.map[i].size) >> PAGE_SHIFT; - if ((pagenr >= addr) && (pagenr < end)) - return 1; - } - return 0; -} - -#ifdef CONFIG_HIGHMEM -pte_t *kmap_pte; -pgprot_t kmap_prot; - -#define kmap_get_fixmap_pte(vaddr) \ - pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), vaddr), (vaddr)), (vaddr)) - -static void __init kmap_init(void) -{ - unsigned long kmap_vstart; - - /* cache the first kmap pte */ - kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); - kmap_pte = kmap_get_fixmap_pte(kmap_vstart); - - kmap_prot = PAGE_KERNEL; -} - -static void __init permanent_kmaps_init(pgd_t *pgd_base) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - unsigned long vaddr; - - vaddr = PKMAP_BASE; - page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base); - - pgd = swapper_pg_dir + pgd_index(vaddr); - pud = pud_offset(pgd, vaddr); - pmd = pmd_offset(pud, vaddr); - pte = pte_offset_kernel(pmd, vaddr); - pkmap_page_table = pte; -} - -static void __meminit free_new_highpage(struct page *page) -{ - init_page_count(page); - __free_page(page); - totalhigh_pages++; -} - -void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro) -{ - if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) { - ClearPageReserved(page); - free_new_highpage(page); - } else - SetPageReserved(page); -} - -static int __meminit add_one_highpage_hotplug(struct page *page, unsigned long pfn) -{ - free_new_highpage(page); - totalram_pages++; -#ifdef CONFIG_FLATMEM - max_mapnr = max(pfn, max_mapnr); -#endif - num_physpages++; - return 0; -} - -/* - * Not currently handling the NUMA case. - * Assuming single node and all memory that - * has been added dynamically that would be - * onlined here is in HIGHMEM - */ -void __meminit online_page(struct page *page) -{ - ClearPageReserved(page); - add_one_highpage_hotplug(page, page_to_pfn(page)); -} - - -#ifdef CONFIG_NUMA -extern void set_highmem_pages_init(int); -#else -static void __init set_highmem_pages_init(int bad_ppro) -{ - int pfn; - for (pfn = highstart_pfn; pfn < highend_pfn; pfn++) - add_one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro); - totalram_pages += totalhigh_pages; -} -#endif /* CONFIG_FLATMEM */ - -#else -#define kmap_init() do { } while (0) -#define permanent_kmaps_init(pgd_base) do { } while (0) -#define set_highmem_pages_init(bad_ppro) do { } while (0) -#endif /* CONFIG_HIGHMEM */ - -unsigned long long __PAGE_KERNEL = _PAGE_KERNEL; -EXPORT_SYMBOL(__PAGE_KERNEL); -unsigned long long __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC; - -#ifdef CONFIG_NUMA -extern void __init remap_numa_kva(void); -#else -#define remap_numa_kva() do {} while (0) -#endif - -void __init native_pagetable_setup_start(pgd_t *base) -{ -#ifdef CONFIG_X86_PAE - int i; - - /* - * Init entries of the first-level page table to the - * zero page, if they haven't already been set up. - * - * In a normal native boot, we'll be running on a - * pagetable rooted in swapper_pg_dir, but not in PAE - * mode, so this will end up clobbering the mappings - * for the lower 24Mbytes of the address space, - * without affecting the kernel address space. - */ - for (i = 0; i < USER_PTRS_PER_PGD; i++) - set_pgd(&base[i], - __pgd(__pa(empty_zero_page) | _PAGE_PRESENT)); - - /* Make sure kernel address space is empty so that a pagetable - will be allocated for it. */ - memset(&base[USER_PTRS_PER_PGD], 0, - KERNEL_PGD_PTRS * sizeof(pgd_t)); -#else - paravirt_alloc_pd(__pa(swapper_pg_dir) >> PAGE_SHIFT); -#endif -} - -void __init native_pagetable_setup_done(pgd_t *base) -{ -#ifdef CONFIG_X86_PAE - /* - * Add low memory identity-mappings - SMP needs it when - * starting up on an AP from real-mode. In the non-PAE - * case we already have these mappings through head.S. - * All user-space mappings are explicitly cleared after - * SMP startup. - */ - set_pgd(&base[0], base[USER_PTRS_PER_PGD]); -#endif -} - -/* - * Build a proper pagetable for the kernel mappings. Up until this - * point, we've been running on some set of pagetables constructed by - * the boot process. - * - * If we're booting on native hardware, this will be a pagetable - * constructed in arch/i386/kernel/head.S, and not running in PAE mode - * (even if we'll end up running in PAE). The root of the pagetable - * will be swapper_pg_dir. - * - * If we're booting paravirtualized under a hypervisor, then there are - * more options: we may already be running PAE, and the pagetable may - * or may not be based in swapper_pg_dir. In any case, - * paravirt_pagetable_setup_start() will set up swapper_pg_dir - * appropriately for the rest of the initialization to work. - * - * In general, pagetable_init() assumes that the pagetable may already - * be partially populated, and so it avoids stomping on any existing - * mappings. - */ -static void __init pagetable_init (void) -{ - unsigned long vaddr, end; - pgd_t *pgd_base = swapper_pg_dir; - - paravirt_pagetable_setup_start(pgd_base); - - /* Enable PSE if available */ - if (cpu_has_pse) - set_in_cr4(X86_CR4_PSE); - - /* Enable PGE if available */ - if (cpu_has_pge) { - set_in_cr4(X86_CR4_PGE); - __PAGE_KERNEL |= _PAGE_GLOBAL; - __PAGE_KERNEL_EXEC |= _PAGE_GLOBAL; - } - - kernel_physical_mapping_init(pgd_base); - remap_numa_kva(); - - /* - * Fixed mappings, only the page table structure has to be - * created - mappings will be set by set_fixmap(): - */ - vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; - end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK; - page_table_range_init(vaddr, end, pgd_base); - - permanent_kmaps_init(pgd_base); - - paravirt_pagetable_setup_done(pgd_base); -} - -#if defined(CONFIG_HIBERNATION) || defined(CONFIG_ACPI) -/* - * Swap suspend & friends need this for resume because things like the intel-agp - * driver might have split up a kernel 4MB mapping. - */ -char __nosavedata swsusp_pg_dir[PAGE_SIZE] - __attribute__ ((aligned (PAGE_SIZE))); - -static inline void save_pg_dir(void) -{ - memcpy(swsusp_pg_dir, swapper_pg_dir, PAGE_SIZE); -} -#else -static inline void save_pg_dir(void) -{ -} -#endif - -void zap_low_mappings (void) -{ - int i; - - save_pg_dir(); - - /* - * Zap initial low-memory mappings. - * - * Note that "pgd_clear()" doesn't do it for - * us, because pgd_clear() is a no-op on i386. - */ - for (i = 0; i < USER_PTRS_PER_PGD; i++) -#ifdef CONFIG_X86_PAE - set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page))); -#else - set_pgd(swapper_pg_dir+i, __pgd(0)); -#endif - flush_tlb_all(); -} - -int nx_enabled = 0; - -#ifdef CONFIG_X86_PAE - -static int disable_nx __initdata = 0; -u64 __supported_pte_mask __read_mostly = ~_PAGE_NX; -EXPORT_SYMBOL_GPL(__supported_pte_mask); - -/* - * noexec = on|off - * - * Control non executable mappings. - * - * on Enable - * off Disable - */ -static int __init noexec_setup(char *str) -{ - if (!str || !strcmp(str, "on")) { - if (cpu_has_nx) { - __supported_pte_mask |= _PAGE_NX; - disable_nx = 0; - } - } else if (!strcmp(str,"off")) { - disable_nx = 1; - __supported_pte_mask &= ~_PAGE_NX; - } else - return -EINVAL; - - return 0; -} -early_param("noexec", noexec_setup); - -static void __init set_nx(void) -{ - unsigned int v[4], l, h; - - if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) { - cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]); - if ((v[3] & (1 << 20)) && !disable_nx) { - rdmsr(MSR_EFER, l, h); - l |= EFER_NX; - wrmsr(MSR_EFER, l, h); - nx_enabled = 1; - __supported_pte_mask |= _PAGE_NX; - } - } -} - -/* - * Enables/disables executability of a given kernel page and - * returns the previous setting. - */ -int __init set_kernel_exec(unsigned long vaddr, int enable) -{ - pte_t *pte; - int ret = 1; - - if (!nx_enabled) - goto out; - - pte = lookup_address(vaddr); - BUG_ON(!pte); - - if (!pte_exec_kernel(*pte)) - ret = 0; - - if (enable) - pte->pte_high &= ~(1 << (_PAGE_BIT_NX - 32)); - else - pte->pte_high |= 1 << (_PAGE_BIT_NX - 32); - pte_update_defer(&init_mm, vaddr, pte); - __flush_tlb_all(); -out: - return ret; -} - -#endif - -/* - * paging_init() sets up the page tables - note that the first 8MB are - * already mapped by head.S. - * - * This routines also unmaps the page at virtual kernel address 0, so - * that we can trap those pesky NULL-reference errors in the kernel. - */ -void __init paging_init(void) -{ -#ifdef CONFIG_X86_PAE - set_nx(); - if (nx_enabled) - printk("NX (Execute Disable) protection: active\n"); -#endif - - pagetable_init(); - - load_cr3(swapper_pg_dir); - -#ifdef CONFIG_X86_PAE - /* - * We will bail out later - printk doesn't work right now so - * the user would just see a hanging kernel. - */ - if (cpu_has_pae) - set_in_cr4(X86_CR4_PAE); -#endif - __flush_tlb_all(); - - kmap_init(); -} - -/* - * Test if the WP bit works in supervisor mode. It isn't supported on 386's - * and also on some strange 486's (NexGen etc.). All 586+'s are OK. This - * used to involve black magic jumps to work around some nasty CPU bugs, - * but fortunately the switch to using exceptions got rid of all that. - */ - -static void __init test_wp_bit(void) -{ - printk("Checking if this processor honours the WP bit even in supervisor mode... "); - - /* Any page-aligned address will do, the test is non-destructive */ - __set_fixmap(FIX_WP_TEST, __pa(&swapper_pg_dir), PAGE_READONLY); - boot_cpu_data.wp_works_ok = do_test_wp_bit(); - clear_fixmap(FIX_WP_TEST); - - if (!boot_cpu_data.wp_works_ok) { - printk("No.\n"); -#ifdef CONFIG_X86_WP_WORKS_OK - panic("This kernel doesn't support CPU's with broken WP. Recompile it for a 386!"); -#endif - } else { - printk("Ok.\n"); - } -} - -static struct kcore_list kcore_mem, kcore_vmalloc; - -void __init mem_init(void) -{ - extern int ppro_with_ram_bug(void); - int codesize, reservedpages, datasize, initsize; - int tmp; - int bad_ppro; - -#ifdef CONFIG_FLATMEM - BUG_ON(!mem_map); -#endif - - bad_ppro = ppro_with_ram_bug(); - -#ifdef CONFIG_HIGHMEM - /* check that fixmap and pkmap do not overlap */ - if (PKMAP_BASE+LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) { - printk(KERN_ERR "fixmap and kmap areas overlap - this will crash\n"); - printk(KERN_ERR "pkstart: %lxh pkend: %lxh fixstart %lxh\n", - PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, FIXADDR_START); - BUG(); - } -#endif - - /* this will put all low memory onto the freelists */ - totalram_pages += free_all_bootmem(); - - reservedpages = 0; - for (tmp = 0; tmp < max_low_pfn; tmp++) - /* - * Only count reserved RAM pages - */ - if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp))) - reservedpages++; - - set_highmem_pages_init(bad_ppro); - - codesize = (unsigned long) &_etext - (unsigned long) &_text; - datasize = (unsigned long) &_edata - (unsigned long) &_etext; - initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; - - kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT); - kclist_add(&kcore_vmalloc, (void *)VMALLOC_START, - VMALLOC_END-VMALLOC_START); - - printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n", - (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), - num_physpages << (PAGE_SHIFT-10), - codesize >> 10, - reservedpages << (PAGE_SHIFT-10), - datasize >> 10, - initsize >> 10, - (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) - ); - -#if 1 /* double-sanity-check paranoia */ - printk("virtual kernel memory layout:\n" - " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" -#ifdef CONFIG_HIGHMEM - " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n" -#endif - " vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n" - " lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n" - " .init : 0x%08lx - 0x%08lx (%4ld kB)\n" - " .data : 0x%08lx - 0x%08lx (%4ld kB)\n" - " .text : 0x%08lx - 0x%08lx (%4ld kB)\n", - FIXADDR_START, FIXADDR_TOP, - (FIXADDR_TOP - FIXADDR_START) >> 10, - -#ifdef CONFIG_HIGHMEM - PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, - (LAST_PKMAP*PAGE_SIZE) >> 10, -#endif - - VMALLOC_START, VMALLOC_END, - (VMALLOC_END - VMALLOC_START) >> 20, - - (unsigned long)__va(0), (unsigned long)high_memory, - ((unsigned long)high_memory - (unsigned long)__va(0)) >> 20, - - (unsigned long)&__init_begin, (unsigned long)&__init_end, - ((unsigned long)&__init_end - (unsigned long)&__init_begin) >> 10, - - (unsigned long)&_etext, (unsigned long)&_edata, - ((unsigned long)&_edata - (unsigned long)&_etext) >> 10, - - (unsigned long)&_text, (unsigned long)&_etext, - ((unsigned long)&_etext - (unsigned long)&_text) >> 10); - -#ifdef CONFIG_HIGHMEM - BUG_ON(PKMAP_BASE+LAST_PKMAP*PAGE_SIZE > FIXADDR_START); - BUG_ON(VMALLOC_END > PKMAP_BASE); -#endif - BUG_ON(VMALLOC_START > VMALLOC_END); - BUG_ON((unsigned long)high_memory > VMALLOC_START); -#endif /* double-sanity-check paranoia */ - -#ifdef CONFIG_X86_PAE - if (!cpu_has_pae) - panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!"); -#endif - if (boot_cpu_data.wp_works_ok < 0) - test_wp_bit(); - - /* - * Subtle. SMP is doing it's boot stuff late (because it has to - * fork idle threads) - but it also needs low mappings for the - * protected-mode entry to work. We zap these entries only after - * the WP-bit has been tested. - */ -#ifndef CONFIG_SMP - zap_low_mappings(); -#endif -} - -#ifdef CONFIG_MEMORY_HOTPLUG -int arch_add_memory(int nid, u64 start, u64 size) -{ - struct pglist_data *pgdata = NODE_DATA(nid); - struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM; - unsigned long start_pfn = start >> PAGE_SHIFT; - unsigned long nr_pages = size >> PAGE_SHIFT; - - return __add_pages(zone, start_pfn, nr_pages); -} - -int remove_memory(u64 start, u64 size) -{ - return -EINVAL; -} -EXPORT_SYMBOL_GPL(remove_memory); -#endif - -struct kmem_cache *pmd_cache; - -void __init pgtable_cache_init(void) -{ - size_t pgd_size = PTRS_PER_PGD*sizeof(pgd_t); - - if (PTRS_PER_PMD > 1) { - pmd_cache = kmem_cache_create("pmd", - PTRS_PER_PMD*sizeof(pmd_t), - PTRS_PER_PMD*sizeof(pmd_t), - SLAB_PANIC, - pmd_ctor); - if (!SHARED_KERNEL_PMD) { - /* If we're in PAE mode and have a non-shared - kernel pmd, then the pgd size must be a - page size. This is because the pgd_list - links through the page structure, so there - can only be one pgd per page for this to - work. */ - pgd_size = PAGE_SIZE; - } - } -} - -/* - * This function cannot be __init, since exceptions don't work in that - * section. Put this after the callers, so that it cannot be inlined. - */ -static int noinline do_test_wp_bit(void) -{ - char tmp_reg; - int flag; - - __asm__ __volatile__( - " movb %0,%1 \n" - "1: movb %1,%0 \n" - " xorl %2,%2 \n" - "2: \n" - ".section __ex_table,\"a\"\n" - " .align 4 \n" - " .long 1b,2b \n" - ".previous \n" - :"=m" (*(char *)fix_to_virt(FIX_WP_TEST)), - "=q" (tmp_reg), - "=r" (flag) - :"2" (1) - :"memory"); - - return flag; -} - -#ifdef CONFIG_DEBUG_RODATA - -void mark_rodata_ro(void) -{ - unsigned long start = PFN_ALIGN(_text); - unsigned long size = PFN_ALIGN(_etext) - start; - -#ifndef CONFIG_KPROBES -#ifdef CONFIG_HOTPLUG_CPU - /* It must still be possible to apply SMP alternatives. */ - if (num_possible_cpus() <= 1) -#endif - { - change_page_attr(virt_to_page(start), - size >> PAGE_SHIFT, PAGE_KERNEL_RX); - printk("Write protecting the kernel text: %luk\n", size >> 10); - } -#endif - start += size; - size = (unsigned long)__end_rodata - start; - change_page_attr(virt_to_page(start), - size >> PAGE_SHIFT, PAGE_KERNEL_RO); - printk("Write protecting the kernel read-only data: %luk\n", - size >> 10); - - /* - * change_page_attr() requires a global_flush_tlb() call after it. - * We do this after the printk so that if something went wrong in the - * change, the printk gets out at least to give a better debug hint - * of who is the culprit. - */ - global_flush_tlb(); -} -#endif - -void free_init_pages(char *what, unsigned long begin, unsigned long end) -{ - unsigned long addr; - - for (addr = begin; addr < end; addr += PAGE_SIZE) { - ClearPageReserved(virt_to_page(addr)); - init_page_count(virt_to_page(addr)); - memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); - free_page(addr); - totalram_pages++; - } - printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); -} - -void free_initmem(void) -{ - free_init_pages("unused kernel memory", - (unsigned long)(&__init_begin), - (unsigned long)(&__init_end)); -} - -#ifdef CONFIG_BLK_DEV_INITRD -void free_initrd_mem(unsigned long start, unsigned long end) -{ - free_init_pages("initrd memory", start, end); -} -#endif - diff --git a/arch/i386/mm/ioremap_32.c b/arch/i386/mm/ioremap_32.c deleted file mode 100644 index 0b278315d73..00000000000 --- a/arch/i386/mm/ioremap_32.c +++ /dev/null @@ -1,274 +0,0 @@ -/* - * arch/i386/mm/ioremap.c - * - * Re-map IO memory to kernel address space so that we can access it. - * This is needed for high PCI addresses that aren't mapped in the - * 640k-1MB IO memory area on PC's - * - * (C) Copyright 1995 1996 Linus Torvalds - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define ISA_START_ADDRESS 0xa0000 -#define ISA_END_ADDRESS 0x100000 - -/* - * Generic mapping function (not visible outside): - */ - -/* - * Remap an arbitrary physical address space into the kernel virtual - * address space. Needed when the kernel wants to access high addresses - * directly. - * - * NOTE! We need to allow non-page-aligned mappings too: we will obviously - * have to convert them into an offset in a page-aligned mapping, but the - * caller shouldn't need to know that small detail. - */ -void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flags) -{ - void __iomem * addr; - struct vm_struct * area; - unsigned long offset, last_addr; - pgprot_t prot; - - /* Don't allow wraparound or zero size */ - last_addr = phys_addr + size - 1; - if (!size || last_addr < phys_addr) - return NULL; - - /* - * Don't remap the low PCI/ISA area, it's always mapped.. - */ - if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS) - return (void __iomem *) phys_to_virt(phys_addr); - - /* - * Don't allow anybody to remap normal RAM that we're using.. - */ - if (phys_addr <= virt_to_phys(high_memory - 1)) { - char *t_addr, *t_end; - struct page *page; - - t_addr = __va(phys_addr); - t_end = t_addr + (size - 1); - - for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); page++) - if(!PageReserved(page)) - return NULL; - } - - prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY - | _PAGE_ACCESSED | flags); - - /* - * Mappings have to be page-aligned - */ - offset = phys_addr & ~PAGE_MASK; - phys_addr &= PAGE_MASK; - size = PAGE_ALIGN(last_addr+1) - phys_addr; - - /* - * Ok, go for it.. - */ - area = get_vm_area(size, VM_IOREMAP | (flags << 20)); - if (!area) - return NULL; - area->phys_addr = phys_addr; - addr = (void __iomem *) area->addr; - if (ioremap_page_range((unsigned long) addr, - (unsigned long) addr + size, phys_addr, prot)) { - vunmap((void __force *) addr); - return NULL; - } - return (void __iomem *) (offset + (char __iomem *)addr); -} -EXPORT_SYMBOL(__ioremap); - -/** - * ioremap_nocache - map bus memory into CPU space - * @offset: bus address of the memory - * @size: size of the resource to map - * - * ioremap_nocache performs a platform specific sequence of operations to - * make bus memory CPU accessible via the readb/readw/readl/writeb/ - * writew/writel functions and the other mmio helpers. The returned - * address is not guaranteed to be usable directly as a virtual - * address. - * - * This version of ioremap ensures that the memory is marked uncachable - * on the CPU as well as honouring existing caching rules from things like - * the PCI bus. Note that there are other caches and buffers on many - * busses. In particular driver authors should read up on PCI writes - * - * It's useful if some control registers are in such an area and - * write combining or read caching is not desirable: - * - * Must be freed with iounmap. - */ - -void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size) -{ - unsigned long last_addr; - void __iomem *p = __ioremap(phys_addr, size, _PAGE_PCD); - if (!p) - return p; - - /* Guaranteed to be > phys_addr, as per __ioremap() */ - last_addr = phys_addr + size - 1; - - if (last_addr < virt_to_phys(high_memory) - 1) { - struct page *ppage = virt_to_page(__va(phys_addr)); - unsigned long npages; - - phys_addr &= PAGE_MASK; - - /* This might overflow and become zero.. */ - last_addr = PAGE_ALIGN(last_addr); - - /* .. but that's ok, because modulo-2**n arithmetic will make - * the page-aligned "last - first" come out right. - */ - npages = (last_addr - phys_addr) >> PAGE_SHIFT; - - if (change_page_attr(ppage, npages, PAGE_KERNEL_NOCACHE) < 0) { - iounmap(p); - p = NULL; - } - global_flush_tlb(); - } - - return p; -} -EXPORT_SYMBOL(ioremap_nocache); - -/** - * iounmap - Free a IO remapping - * @addr: virtual address from ioremap_* - * - * Caller must ensure there is only one unmapping for the same pointer. - */ -void iounmap(volatile void __iomem *addr) -{ - struct vm_struct *p, *o; - - if ((void __force *)addr <= high_memory) - return; - - /* - * __ioremap special-cases the PCI/ISA range by not instantiating a - * vm_area and by simply returning an address into the kernel mapping - * of ISA space. So handle that here. - */ - if (addr >= phys_to_virt(ISA_START_ADDRESS) && - addr < phys_to_virt(ISA_END_ADDRESS)) - return; - - addr = (volatile void __iomem *)(PAGE_MASK & (unsigned long __force)addr); - - /* Use the vm area unlocked, assuming the caller - ensures there isn't another iounmap for the same address - in parallel. Reuse of the virtual address is prevented by - leaving it in the global lists until we're done with it. - cpa takes care of the direct mappings. */ - read_lock(&vmlist_lock); - for (p = vmlist; p; p = p->next) { - if (p->addr == addr) - break; - } - read_unlock(&vmlist_lock); - - if (!p) { - printk("iounmap: bad address %p\n", addr); - dump_stack(); - return; - } - - /* Reset the direct mapping. Can block */ - if ((p->flags >> 20) && p->phys_addr < virt_to_phys(high_memory) - 1) { - change_page_attr(virt_to_page(__va(p->phys_addr)), - get_vm_area_size(p) >> PAGE_SHIFT, - PAGE_KERNEL); - global_flush_tlb(); - } - - /* Finally remove it */ - o = remove_vm_area((void *)addr); - BUG_ON(p != o || o == NULL); - kfree(p); -} -EXPORT_SYMBOL(iounmap); - -void __init *bt_ioremap(unsigned long phys_addr, unsigned long size) -{ - unsigned long offset, last_addr; - unsigned int nrpages; - enum fixed_addresses idx; - - /* Don't allow wraparound or zero size */ - last_addr = phys_addr + size - 1; - if (!size || last_addr < phys_addr) - return NULL; - - /* - * Don't remap the low PCI/ISA area, it's always mapped.. - */ - if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS) - return phys_to_virt(phys_addr); - - /* - * Mappings have to be page-aligned - */ - offset = phys_addr & ~PAGE_MASK; - phys_addr &= PAGE_MASK; - size = PAGE_ALIGN(last_addr) - phys_addr; - - /* - * Mappings have to fit in the FIX_BTMAP area. - */ - nrpages = size >> PAGE_SHIFT; - if (nrpages > NR_FIX_BTMAPS) - return NULL; - - /* - * Ok, go for it.. - */ - idx = FIX_BTMAP_BEGIN; - while (nrpages > 0) { - set_fixmap(idx, phys_addr); - phys_addr += PAGE_SIZE; - --idx; - --nrpages; - } - return (void*) (offset + fix_to_virt(FIX_BTMAP_BEGIN)); -} - -void __init bt_iounmap(void *addr, unsigned long size) -{ - unsigned long virt_addr; - unsigned long offset; - unsigned int nrpages; - enum fixed_addresses idx; - - virt_addr = (unsigned long)addr; - if (virt_addr < fix_to_virt(FIX_BTMAP_BEGIN)) - return; - offset = virt_addr & ~PAGE_MASK; - nrpages = PAGE_ALIGN(offset + size - 1) >> PAGE_SHIFT; - - idx = FIX_BTMAP_BEGIN; - while (nrpages > 0) { - clear_fixmap(idx); - --idx; - --nrpages; - } -} diff --git a/arch/i386/mm/mmap_32.c b/arch/i386/mm/mmap_32.c deleted file mode 100644 index 552e0847375..00000000000 --- a/arch/i386/mm/mmap_32.c +++ /dev/null @@ -1,77 +0,0 @@ -/* - * linux/arch/i386/mm/mmap.c - * - * flexible mmap layout support - * - * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * - * Started by Ingo Molnar - */ - -#include -#include -#include -#include - -/* - * Top of mmap area (just below the process stack). - * - * Leave an at least ~128 MB hole. - */ -#define MIN_GAP (128*1024*1024) -#define MAX_GAP (TASK_SIZE/6*5) - -static inline unsigned long mmap_base(struct mm_struct *mm) -{ - unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur; - unsigned long random_factor = 0; - - if (current->flags & PF_RANDOMIZE) - random_factor = get_random_int() % (1024*1024); - - if (gap < MIN_GAP) - gap = MIN_GAP; - else if (gap > MAX_GAP) - gap = MAX_GAP; - - return PAGE_ALIGN(TASK_SIZE - gap - random_factor); -} - -/* - * This function, called very early during the creation of a new - * process VM image, sets up which VM layout function to use: - */ -void arch_pick_mmap_layout(struct mm_struct *mm) -{ - /* - * Fall back to the standard layout if the personality - * bit is set, or if the expected stack growth is unlimited: - */ - if (sysctl_legacy_va_layout || - (current->personality & ADDR_COMPAT_LAYOUT) || - current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY) { - mm->mmap_base = TASK_UNMAPPED_BASE; - mm->get_unmapped_area = arch_get_unmapped_area; - mm->unmap_area = arch_unmap_area; - } else { - mm->mmap_base = mmap_base(mm); - mm->get_unmapped_area = arch_get_unmapped_area_topdown; - mm->unmap_area = arch_unmap_area_topdown; - } -} diff --git a/arch/i386/mm/pageattr_32.c b/arch/i386/mm/pageattr_32.c deleted file mode 100644 index 4241a74d16c..00000000000 --- a/arch/i386/mm/pageattr_32.c +++ /dev/null @@ -1,278 +0,0 @@ -/* - * Copyright 2002 Andi Kleen, SuSE Labs. - * Thanks to Ben LaHaise for precious feedback. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static DEFINE_SPINLOCK(cpa_lock); -static struct list_head df_list = LIST_HEAD_INIT(df_list); - - -pte_t *lookup_address(unsigned long address) -{ - pgd_t *pgd = pgd_offset_k(address); - pud_t *pud; - pmd_t *pmd; - if (pgd_none(*pgd)) - return NULL; - pud = pud_offset(pgd, address); - if (pud_none(*pud)) - return NULL; - pmd = pmd_offset(pud, address); - if (pmd_none(*pmd)) - return NULL; - if (pmd_large(*pmd)) - return (pte_t *)pmd; - return pte_offset_kernel(pmd, address); -} - -static struct page *split_large_page(unsigned long address, pgprot_t prot, - pgprot_t ref_prot) -{ - int i; - unsigned long addr; - struct page *base; - pte_t *pbase; - - spin_unlock_irq(&cpa_lock); - base = alloc_pages(GFP_KERNEL, 0); - spin_lock_irq(&cpa_lock); - if (!base) - return NULL; - - /* - * page_private is used to track the number of entries in - * the page table page that have non standard attributes. - */ - SetPagePrivate(base); - page_private(base) = 0; - - address = __pa(address); - addr = address & LARGE_PAGE_MASK; - pbase = (pte_t *)page_address(base); - paravirt_alloc_pt(&init_mm, page_to_pfn(base)); - for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) { - set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, - addr == address ? prot : ref_prot)); - } - return base; -} - -static void cache_flush_page(struct page *p) -{ - unsigned long adr = (unsigned long)page_address(p); - int i; - for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size) - asm volatile("clflush (%0)" :: "r" (adr + i)); -} - -static void flush_kernel_map(void *arg) -{ - struct list_head *lh = (struct list_head *)arg; - struct page *p; - - /* High level code is not ready for clflush yet */ - if (0 && cpu_has_clflush) { - list_for_each_entry (p, lh, lru) - cache_flush_page(p); - } else if (boot_cpu_data.x86_model >= 4) - wbinvd(); - - /* Flush all to work around Errata in early athlons regarding - * large page flushing. - */ - __flush_tlb_all(); -} - -static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) -{ - struct page *page; - unsigned long flags; - - set_pte_atomic(kpte, pte); /* change init_mm */ - if (SHARED_KERNEL_PMD) - return; - - spin_lock_irqsave(&pgd_lock, flags); - for (page = pgd_list; page; page = (struct page *)page->index) { - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pgd = (pgd_t *)page_address(page) + pgd_index(address); - pud = pud_offset(pgd, address); - pmd = pmd_offset(pud, address); - set_pte_atomic((pte_t *)pmd, pte); - } - spin_unlock_irqrestore(&pgd_lock, flags); -} - -/* - * No more special protections in this 2/4MB area - revert to a - * large page again. - */ -static inline void revert_page(struct page *kpte_page, unsigned long address) -{ - pgprot_t ref_prot; - pte_t *linear; - - ref_prot = - ((address & LARGE_PAGE_MASK) < (unsigned long)&_etext) - ? PAGE_KERNEL_LARGE_EXEC : PAGE_KERNEL_LARGE; - - linear = (pte_t *) - pmd_offset(pud_offset(pgd_offset_k(address), address), address); - set_pmd_pte(linear, address, - pfn_pte((__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT, - ref_prot)); -} - -static inline void save_page(struct page *kpte_page) -{ - if (!test_and_set_bit(PG_arch_1, &kpte_page->flags)) - list_add(&kpte_page->lru, &df_list); -} - -static int -__change_page_attr(struct page *page, pgprot_t prot) -{ - pte_t *kpte; - unsigned long address; - struct page *kpte_page; - - BUG_ON(PageHighMem(page)); - address = (unsigned long)page_address(page); - - kpte = lookup_address(address); - if (!kpte) - return -EINVAL; - kpte_page = virt_to_page(kpte); - BUG_ON(PageLRU(kpte_page)); - BUG_ON(PageCompound(kpte_page)); - - if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) { - if (!pte_huge(*kpte)) { - set_pte_atomic(kpte, mk_pte(page, prot)); - } else { - pgprot_t ref_prot; - struct page *split; - - ref_prot = - ((address & LARGE_PAGE_MASK) < (unsigned long)&_etext) - ? PAGE_KERNEL_EXEC : PAGE_KERNEL; - split = split_large_page(address, prot, ref_prot); - if (!split) - return -ENOMEM; - set_pmd_pte(kpte,address,mk_pte(split, ref_prot)); - kpte_page = split; - } - page_private(kpte_page)++; - } else if (!pte_huge(*kpte)) { - set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL)); - BUG_ON(page_private(kpte_page) == 0); - page_private(kpte_page)--; - } else - BUG(); - - /* - * If the pte was reserved, it means it was created at boot - * time (not via split_large_page) and in turn we must not - * replace it with a largepage. - */ - - save_page(kpte_page); - if (!PageReserved(kpte_page)) { - if (cpu_has_pse && (page_private(kpte_page) == 0)) { - paravirt_release_pt(page_to_pfn(kpte_page)); - revert_page(kpte_page, address); - } - } - return 0; -} - -static inline void flush_map(struct list_head *l) -{ - on_each_cpu(flush_kernel_map, l, 1, 1); -} - -/* - * Change the page attributes of an page in the linear mapping. - * - * This should be used when a page is mapped with a different caching policy - * than write-back somewhere - some CPUs do not like it when mappings with - * different caching policies exist. This changes the page attributes of the - * in kernel linear mapping too. - * - * The caller needs to ensure that there are no conflicting mappings elsewhere. - * This function only deals with the kernel linear map. - * - * Caller must call global_flush_tlb() after this. - */ -int change_page_attr(struct page *page, int numpages, pgprot_t prot) -{ - int err = 0; - int i; - unsigned long flags; - - spin_lock_irqsave(&cpa_lock, flags); - for (i = 0; i < numpages; i++, page++) { - err = __change_page_attr(page, prot); - if (err) - break; - } - spin_unlock_irqrestore(&cpa_lock, flags); - return err; -} - -void global_flush_tlb(void) -{ - struct list_head l; - struct page *pg, *next; - - BUG_ON(irqs_disabled()); - - spin_lock_irq(&cpa_lock); - list_replace_init(&df_list, &l); - spin_unlock_irq(&cpa_lock); - flush_map(&l); - list_for_each_entry_safe(pg, next, &l, lru) { - list_del(&pg->lru); - clear_bit(PG_arch_1, &pg->flags); - if (PageReserved(pg) || !cpu_has_pse || page_private(pg) != 0) - continue; - ClearPagePrivate(pg); - __free_page(pg); - } -} - -#ifdef CONFIG_DEBUG_PAGEALLOC -void kernel_map_pages(struct page *page, int numpages, int enable) -{ - if (PageHighMem(page)) - return; - if (!enable) - debug_check_no_locks_freed(page_address(page), - numpages * PAGE_SIZE); - - /* the return value is ignored - the calls cannot fail, - * large pages are disabled at boot time. - */ - change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0)); - /* we should perform an IPI and flush all tlbs, - * but that can deadlock->flush only current cpu. - */ - __flush_tlb_all(); -} -#endif - -EXPORT_SYMBOL(change_page_attr); -EXPORT_SYMBOL(global_flush_tlb); diff --git a/arch/i386/mm/pgtable_32.c b/arch/i386/mm/pgtable_32.c deleted file mode 100644 index 01437c46baa..00000000000 --- a/arch/i386/mm/pgtable_32.c +++ /dev/null @@ -1,373 +0,0 @@ -/* - * linux/arch/i386/mm/pgtable.c - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -void show_mem(void) -{ - int total = 0, reserved = 0; - int shared = 0, cached = 0; - int highmem = 0; - struct page *page; - pg_data_t *pgdat; - unsigned long i; - unsigned long flags; - - printk(KERN_INFO "Mem-info:\n"); - show_free_areas(); - printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); - for_each_online_pgdat(pgdat) { - pgdat_resize_lock(pgdat, &flags); - for (i = 0; i < pgdat->node_spanned_pages; ++i) { - page = pgdat_page_nr(pgdat, i); - total++; - if (PageHighMem(page)) - highmem++; - if (PageReserved(page)) - reserved++; - else if (PageSwapCache(page)) - cached++; - else if (page_count(page)) - shared += page_count(page) - 1; - } - pgdat_resize_unlock(pgdat, &flags); - } - printk(KERN_INFO "%d pages of RAM\n", total); - printk(KERN_INFO "%d pages of HIGHMEM\n", highmem); - printk(KERN_INFO "%d reserved pages\n", reserved); - printk(KERN_INFO "%d pages shared\n", shared); - printk(KERN_INFO "%d pages swap cached\n", cached); - - printk(KERN_INFO "%lu pages dirty\n", global_page_state(NR_FILE_DIRTY)); - printk(KERN_INFO "%lu pages writeback\n", - global_page_state(NR_WRITEBACK)); - printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED)); - printk(KERN_INFO "%lu pages slab\n", - global_page_state(NR_SLAB_RECLAIMABLE) + - global_page_state(NR_SLAB_UNRECLAIMABLE)); - printk(KERN_INFO "%lu pages pagetables\n", - global_page_state(NR_PAGETABLE)); -} - -/* - * Associate a virtual page frame with a given physical page frame - * and protection flags for that frame. - */ -static void set_pte_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - - pgd = swapper_pg_dir + pgd_index(vaddr); - if (pgd_none(*pgd)) { - BUG(); - return; - } - pud = pud_offset(pgd, vaddr); - if (pud_none(*pud)) { - BUG(); - return; - } - pmd = pmd_offset(pud, vaddr); - if (pmd_none(*pmd)) { - BUG(); - return; - } - pte = pte_offset_kernel(pmd, vaddr); - if (pgprot_val(flags)) - /* stored as-is, to permit clearing entries */ - set_pte(pte, pfn_pte(pfn, flags)); - else - pte_clear(&init_mm, vaddr, pte); - - /* - * It's enough to flush this one mapping. - * (PGE mappings get flushed as well) - */ - __flush_tlb_one(vaddr); -} - -/* - * Associate a large virtual page frame with a given physical page frame - * and protection flags for that frame. pfn is for the base of the page, - * vaddr is what the page gets mapped to - both must be properly aligned. - * The pmd must already be instantiated. Assumes PAE mode. - */ -void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - - if (vaddr & (PMD_SIZE-1)) { /* vaddr is misaligned */ - printk(KERN_WARNING "set_pmd_pfn: vaddr misaligned\n"); - return; /* BUG(); */ - } - if (pfn & (PTRS_PER_PTE-1)) { /* pfn is misaligned */ - printk(KERN_WARNING "set_pmd_pfn: pfn misaligned\n"); - return; /* BUG(); */ - } - pgd = swapper_pg_dir + pgd_index(vaddr); - if (pgd_none(*pgd)) { - printk(KERN_WARNING "set_pmd_pfn: pgd_none\n"); - return; /* BUG(); */ - } - pud = pud_offset(pgd, vaddr); - pmd = pmd_offset(pud, vaddr); - set_pmd(pmd, pfn_pmd(pfn, flags)); - /* - * It's enough to flush this one mapping. - * (PGE mappings get flushed as well) - */ - __flush_tlb_one(vaddr); -} - -static int fixmaps; -unsigned long __FIXADDR_TOP = 0xfffff000; -EXPORT_SYMBOL(__FIXADDR_TOP); - -void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags) -{ - unsigned long address = __fix_to_virt(idx); - - if (idx >= __end_of_fixed_addresses) { - BUG(); - return; - } - set_pte_pfn(address, phys >> PAGE_SHIFT, flags); - fixmaps++; -} - -/** - * reserve_top_address - reserves a hole in the top of kernel address space - * @reserve - size of hole to reserve - * - * Can be used to relocate the fixmap area and poke a hole in the top - * of kernel address space to make room for a hypervisor. - */ -void reserve_top_address(unsigned long reserve) -{ - BUG_ON(fixmaps > 0); - printk(KERN_INFO "Reserving virtual address space above 0x%08x\n", - (int)-reserve); - __FIXADDR_TOP = -reserve - PAGE_SIZE; - __VMALLOC_RESERVE += reserve; -} - -pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) -{ - return (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); -} - -struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) -{ - struct page *pte; - -#ifdef CONFIG_HIGHPTE - pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0); -#else - pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); -#endif - return pte; -} - -void pmd_ctor(void *pmd, struct kmem_cache *cache, unsigned long flags) -{ - memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t)); -} - -/* - * List of all pgd's needed for non-PAE so it can invalidate entries - * in both cached and uncached pgd's; not needed for PAE since the - * kernel pmd is shared. If PAE were not to share the pmd a similar - * tactic would be needed. This is essentially codepath-based locking - * against pageattr.c; it is the unique case in which a valid change - * of kernel pagetables can't be lazily synchronized by vmalloc faults. - * vmalloc faults work because attached pagetables are never freed. - * -- wli - */ -DEFINE_SPINLOCK(pgd_lock); -struct page *pgd_list; - -static inline void pgd_list_add(pgd_t *pgd) -{ - struct page *page = virt_to_page(pgd); - page->index = (unsigned long)pgd_list; - if (pgd_list) - set_page_private(pgd_list, (unsigned long)&page->index); - pgd_list = page; - set_page_private(page, (unsigned long)&pgd_list); -} - -static inline void pgd_list_del(pgd_t *pgd) -{ - struct page *next, **pprev, *page = virt_to_page(pgd); - next = (struct page *)page->index; - pprev = (struct page **)page_private(page); - *pprev = next; - if (next) - set_page_private(next, (unsigned long)pprev); -} - - - -#if (PTRS_PER_PMD == 1) -/* Non-PAE pgd constructor */ -static void pgd_ctor(void *pgd) -{ - unsigned long flags; - - /* !PAE, no pagetable sharing */ - memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); - - spin_lock_irqsave(&pgd_lock, flags); - - /* must happen under lock */ - clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, - swapper_pg_dir + USER_PTRS_PER_PGD, - KERNEL_PGD_PTRS); - paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT, - __pa(swapper_pg_dir) >> PAGE_SHIFT, - USER_PTRS_PER_PGD, - KERNEL_PGD_PTRS); - pgd_list_add(pgd); - spin_unlock_irqrestore(&pgd_lock, flags); -} -#else /* PTRS_PER_PMD > 1 */ -/* PAE pgd constructor */ -static void pgd_ctor(void *pgd) -{ - /* PAE, kernel PMD may be shared */ - - if (SHARED_KERNEL_PMD) { - clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, - swapper_pg_dir + USER_PTRS_PER_PGD, - KERNEL_PGD_PTRS); - } else { - unsigned long flags; - - memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); - spin_lock_irqsave(&pgd_lock, flags); - pgd_list_add(pgd); - spin_unlock_irqrestore(&pgd_lock, flags); - } -} -#endif /* PTRS_PER_PMD */ - -static void pgd_dtor(void *pgd) -{ - unsigned long flags; /* can be called from interrupt context */ - - if (SHARED_KERNEL_PMD) - return; - - paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT); - spin_lock_irqsave(&pgd_lock, flags); - pgd_list_del(pgd); - spin_unlock_irqrestore(&pgd_lock, flags); -} - -#define UNSHARED_PTRS_PER_PGD \ - (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD) - -/* If we allocate a pmd for part of the kernel address space, then - make sure its initialized with the appropriate kernel mappings. - Otherwise use a cached zeroed pmd. */ -static pmd_t *pmd_cache_alloc(int idx) -{ - pmd_t *pmd; - - if (idx >= USER_PTRS_PER_PGD) { - pmd = (pmd_t *)__get_free_page(GFP_KERNEL); - - if (pmd) - memcpy(pmd, - (void *)pgd_page_vaddr(swapper_pg_dir[idx]), - sizeof(pmd_t) * PTRS_PER_PMD); - } else - pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); - - return pmd; -} - -static void pmd_cache_free(pmd_t *pmd, int idx) -{ - if (idx >= USER_PTRS_PER_PGD) - free_page((unsigned long)pmd); - else - kmem_cache_free(pmd_cache, pmd); -} - -pgd_t *pgd_alloc(struct mm_struct *mm) -{ - int i; - pgd_t *pgd = quicklist_alloc(0, GFP_KERNEL, pgd_ctor); - - if (PTRS_PER_PMD == 1 || !pgd) - return pgd; - - for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) { - pmd_t *pmd = pmd_cache_alloc(i); - - if (!pmd) - goto out_oom; - - paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT); - set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); - } - return pgd; - -out_oom: - for (i--; i >= 0; i--) { - pgd_t pgdent = pgd[i]; - void* pmd = (void *)__va(pgd_val(pgdent)-1); - paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); - pmd_cache_free(pmd, i); - } - quicklist_free(0, pgd_dtor, pgd); - return NULL; -} - -void pgd_free(pgd_t *pgd) -{ - int i; - - /* in the PAE case user pgd entries are overwritten before usage */ - if (PTRS_PER_PMD > 1) - for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) { - pgd_t pgdent = pgd[i]; - void* pmd = (void *)__va(pgd_val(pgdent)-1); - paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); - pmd_cache_free(pmd, i); - } - /* in the non-PAE case, free_pgtables() clears user pgd entries */ - quicklist_free(0, pgd_dtor, pgd); -} - -void check_pgt_cache(void) -{ - quicklist_trim(0, pgd_dtor, 25, 16); -} - diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile new file mode 100644 index 00000000000..7317648e658 --- /dev/null +++ b/arch/x86/mm/Makefile @@ -0,0 +1,5 @@ +ifeq ($(CONFIG_X86_32),y) +include ${srctree}/arch/x86/mm/Makefile_32 +else +include ${srctree}/arch/x86_64/mm/Makefile_64 +endif diff --git a/arch/x86/mm/Makefile_32 b/arch/x86/mm/Makefile_32 new file mode 100644 index 00000000000..362b4ad082d --- /dev/null +++ b/arch/x86/mm/Makefile_32 @@ -0,0 +1,10 @@ +# +# Makefile for the linux i386-specific parts of the memory manager. +# + +obj-y := init_32.o pgtable_32.o fault_32.o ioremap_32.o extable_32.o pageattr_32.o mmap_32.o + +obj-$(CONFIG_NUMA) += discontig_32.o +obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o +obj-$(CONFIG_HIGHMEM) += highmem_32.o +obj-$(CONFIG_BOOT_IOREMAP) += boot_ioremap_32.o diff --git a/arch/x86/mm/boot_ioremap_32.c b/arch/x86/mm/boot_ioremap_32.c new file mode 100644 index 00000000000..4de95a17a7d --- /dev/null +++ b/arch/x86/mm/boot_ioremap_32.c @@ -0,0 +1,100 @@ +/* + * arch/i386/mm/boot_ioremap.c + * + * Re-map functions for early boot-time before paging_init() when the + * boot-time pagetables are still in use + * + * Written by Dave Hansen + */ + + +/* + * We need to use the 2-level pagetable functions, but CONFIG_X86_PAE + * keeps that from happenning. If anyone has a better way, I'm listening. + * + * boot_pte_t is defined only if this all works correctly + */ + +#undef CONFIG_X86_PAE +#undef CONFIG_PARAVIRT +#include +#include +#include +#include +#include + +/* + * I'm cheating here. It is known that the two boot PTE pages are + * allocated next to each other. I'm pretending that they're just + * one big array. + */ + +#define BOOT_PTE_PTRS (PTRS_PER_PTE*2) + +static unsigned long boot_pte_index(unsigned long vaddr) +{ + return __pa(vaddr) >> PAGE_SHIFT; +} + +static inline boot_pte_t* boot_vaddr_to_pte(void *address) +{ + boot_pte_t* boot_pg = (boot_pte_t*)pg0; + return &boot_pg[boot_pte_index((unsigned long)address)]; +} + +/* + * This is only for a caller who is clever enough to page-align + * phys_addr and virtual_source, and who also has a preference + * about which virtual address from which to steal ptes + */ +static void __boot_ioremap(unsigned long phys_addr, unsigned long nrpages, + void* virtual_source) +{ + boot_pte_t* pte; + int i; + char *vaddr = virtual_source; + + pte = boot_vaddr_to_pte(virtual_source); + for (i=0; i < nrpages; i++, phys_addr += PAGE_SIZE, pte++) { + set_pte(pte, pfn_pte(phys_addr>>PAGE_SHIFT, PAGE_KERNEL)); + __flush_tlb_one(&vaddr[i*PAGE_SIZE]); + } +} + +/* the virtual space we're going to remap comes from this array */ +#define BOOT_IOREMAP_PAGES 4 +#define BOOT_IOREMAP_SIZE (BOOT_IOREMAP_PAGES*PAGE_SIZE) +static __initdata char boot_ioremap_space[BOOT_IOREMAP_SIZE] + __attribute__ ((aligned (PAGE_SIZE))); + +/* + * This only applies to things which need to ioremap before paging_init() + * bt_ioremap() and plain ioremap() are both useless at this point. + * + * When used, we're still using the boot-time pagetables, which only + * have 2 PTE pages mapping the first 8MB + * + * There is no unmap. The boot-time PTE pages aren't used after boot. + * If you really want the space back, just remap it yourself. + * boot_ioremap(&ioremap_space-PAGE_OFFSET, BOOT_IOREMAP_SIZE) + */ +__init void* boot_ioremap(unsigned long phys_addr, unsigned long size) +{ + unsigned long last_addr, offset; + unsigned int nrpages; + + last_addr = phys_addr + size - 1; + + /* page align the requested address */ + offset = phys_addr & ~PAGE_MASK; + phys_addr &= PAGE_MASK; + size = PAGE_ALIGN(last_addr) - phys_addr; + + nrpages = size >> PAGE_SHIFT; + if (nrpages > BOOT_IOREMAP_PAGES) + return NULL; + + __boot_ioremap(phys_addr, nrpages, boot_ioremap_space); + + return &boot_ioremap_space[offset]; +} diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c new file mode 100644 index 00000000000..860e912a3fb --- /dev/null +++ b/arch/x86/mm/discontig_32.c @@ -0,0 +1,431 @@ +/* + * Written by: Patricia Gaughen , IBM Corporation + * August 2002: added remote node KVA remap - Martin J. Bligh + * + * Copyright (C) 2002, IBM Corp. + * + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; +EXPORT_SYMBOL(node_data); +bootmem_data_t node0_bdata; + +/* + * numa interface - we expect the numa architecture specific code to have + * populated the following initialisation. + * + * 1) node_online_map - the map of all nodes configured (online) in the system + * 2) node_start_pfn - the starting page frame number for a node + * 3) node_end_pfn - the ending page fram number for a node + */ +unsigned long node_start_pfn[MAX_NUMNODES] __read_mostly; +unsigned long node_end_pfn[MAX_NUMNODES] __read_mostly; + + +#ifdef CONFIG_DISCONTIGMEM +/* + * 4) physnode_map - the mapping between a pfn and owning node + * physnode_map keeps track of the physical memory layout of a generic + * numa node on a 256Mb break (each element of the array will + * represent 256Mb of memory and will be marked by the node id. so, + * if the first gig is on node 0, and the second gig is on node 1 + * physnode_map will contain: + * + * physnode_map[0-3] = 0; + * physnode_map[4-7] = 1; + * physnode_map[8- ] = -1; + */ +s8 physnode_map[MAX_ELEMENTS] __read_mostly = { [0 ... (MAX_ELEMENTS - 1)] = -1}; +EXPORT_SYMBOL(physnode_map); + +void memory_present(int nid, unsigned long start, unsigned long end) +{ + unsigned long pfn; + + printk(KERN_INFO "Node: %d, start_pfn: %ld, end_pfn: %ld\n", + nid, start, end); + printk(KERN_DEBUG " Setting physnode_map array to node %d for pfns:\n", nid); + printk(KERN_DEBUG " "); + for (pfn = start; pfn < end; pfn += PAGES_PER_ELEMENT) { + physnode_map[pfn / PAGES_PER_ELEMENT] = nid; + printk("%ld ", pfn); + } + printk("\n"); +} + +unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn, + unsigned long end_pfn) +{ + unsigned long nr_pages = end_pfn - start_pfn; + + if (!nr_pages) + return 0; + + return (nr_pages + 1) * sizeof(struct page); +} +#endif + +extern unsigned long find_max_low_pfn(void); +extern void add_one_highpage_init(struct page *, int, int); +extern unsigned long highend_pfn, highstart_pfn; + +#define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE) + +unsigned long node_remap_start_pfn[MAX_NUMNODES]; +unsigned long node_remap_size[MAX_NUMNODES]; +unsigned long node_remap_offset[MAX_NUMNODES]; +void *node_remap_start_vaddr[MAX_NUMNODES]; +void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); + +void *node_remap_end_vaddr[MAX_NUMNODES]; +void *node_remap_alloc_vaddr[MAX_NUMNODES]; +static unsigned long kva_start_pfn; +static unsigned long kva_pages; +/* + * FLAT - support for basic PC memory model with discontig enabled, essentially + * a single node with all available processors in it with a flat + * memory map. + */ +int __init get_memcfg_numa_flat(void) +{ + printk("NUMA - single node, flat memory mode\n"); + + /* Run the memory configuration and find the top of memory. */ + find_max_pfn(); + node_start_pfn[0] = 0; + node_end_pfn[0] = max_pfn; + memory_present(0, 0, max_pfn); + + /* Indicate there is one node available. */ + nodes_clear(node_online_map); + node_set_online(0); + return 1; +} + +/* + * Find the highest page frame number we have available for the node + */ +static void __init find_max_pfn_node(int nid) +{ + if (node_end_pfn[nid] > max_pfn) + node_end_pfn[nid] = max_pfn; + /* + * if a user has given mem=XXXX, then we need to make sure + * that the node _starts_ before that, too, not just ends + */ + if (node_start_pfn[nid] > max_pfn) + node_start_pfn[nid] = max_pfn; + BUG_ON(node_start_pfn[nid] > node_end_pfn[nid]); +} + +/* + * Allocate memory for the pg_data_t for this node via a crude pre-bootmem + * method. For node zero take this from the bottom of memory, for + * subsequent nodes place them at node_remap_start_vaddr which contains + * node local data in physically node local memory. See setup_memory() + * for details. + */ +static void __init allocate_pgdat(int nid) +{ + if (nid && node_has_online_mem(nid)) + NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid]; + else { + NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(min_low_pfn)); + min_low_pfn += PFN_UP(sizeof(pg_data_t)); + } +} + +void *alloc_remap(int nid, unsigned long size) +{ + void *allocation = node_remap_alloc_vaddr[nid]; + + size = ALIGN(size, L1_CACHE_BYTES); + + if (!allocation || (allocation + size) >= node_remap_end_vaddr[nid]) + return 0; + + node_remap_alloc_vaddr[nid] += size; + memset(allocation, 0, size); + + return allocation; +} + +void __init remap_numa_kva(void) +{ + void *vaddr; + unsigned long pfn; + int node; + + for_each_online_node(node) { + for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) { + vaddr = node_remap_start_vaddr[node]+(pfn< max_pfn) + continue; + if (node_end_pfn[nid] > max_pfn) + node_end_pfn[nid] = max_pfn; + + /* ensure the remap includes space for the pgdat. */ + size = node_remap_size[nid] + sizeof(pg_data_t); + + /* convert size to large (pmd size) pages, rounding up */ + size = (size + LARGE_PAGE_BYTES - 1) / LARGE_PAGE_BYTES; + /* now the roundup is correct, convert to PAGE_SIZE pages */ + size = size * PTRS_PER_PTE; + + /* + * Validate the region we are allocating only contains valid + * pages. + */ + for (pfn = node_end_pfn[nid] - size; + pfn < node_end_pfn[nid]; pfn++) + if (!page_is_ram(pfn)) + break; + + if (pfn != node_end_pfn[nid]) + size = 0; + + printk("Reserving %ld pages of KVA for lmem_map of node %d\n", + size, nid); + node_remap_size[nid] = size; + node_remap_offset[nid] = reserve_pages; + reserve_pages += size; + printk("Shrinking node %d from %ld pages to %ld pages\n", + nid, node_end_pfn[nid], node_end_pfn[nid] - size); + + if (node_end_pfn[nid] & (PTRS_PER_PTE-1)) { + /* + * Align node_end_pfn[] and node_remap_start_pfn[] to + * pmd boundary. remap_numa_kva will barf otherwise. + */ + printk("Shrinking node %d further by %ld pages for proper alignment\n", + nid, node_end_pfn[nid] & (PTRS_PER_PTE-1)); + size += node_end_pfn[nid] & (PTRS_PER_PTE-1); + } + + node_end_pfn[nid] -= size; + node_remap_start_pfn[nid] = node_end_pfn[nid]; + shrink_active_range(nid, old_end_pfn, node_end_pfn[nid]); + } + printk("Reserving total of %ld pages for numa KVA remap\n", + reserve_pages); + return reserve_pages; +} + +extern void setup_bootmem_allocator(void); +unsigned long __init setup_memory(void) +{ + int nid; + unsigned long system_start_pfn, system_max_low_pfn; + + /* + * When mapping a NUMA machine we allocate the node_mem_map arrays + * from node local memory. They are then mapped directly into KVA + * between zone normal and vmalloc space. Calculate the size of + * this space and use it to adjust the boundry between ZONE_NORMAL + * and ZONE_HIGHMEM. + */ + find_max_pfn(); + get_memcfg_numa(); + + kva_pages = calculate_numa_remap_pages(); + + /* partially used pages are not usable - thus round upwards */ + system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end); + + kva_start_pfn = find_max_low_pfn() - kva_pages; + +#ifdef CONFIG_BLK_DEV_INITRD + /* Numa kva area is below the initrd */ + if (LOADER_TYPE && INITRD_START) + kva_start_pfn = PFN_DOWN(INITRD_START) - kva_pages; +#endif + kva_start_pfn -= kva_start_pfn & (PTRS_PER_PTE-1); + + system_max_low_pfn = max_low_pfn = find_max_low_pfn(); + printk("kva_start_pfn ~ %ld find_max_low_pfn() ~ %ld\n", + kva_start_pfn, max_low_pfn); + printk("max_pfn = %ld\n", max_pfn); +#ifdef CONFIG_HIGHMEM + highstart_pfn = highend_pfn = max_pfn; + if (max_pfn > system_max_low_pfn) + highstart_pfn = system_max_low_pfn; + printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", + pages_to_mb(highend_pfn - highstart_pfn)); + num_physpages = highend_pfn; + high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; +#else + num_physpages = system_max_low_pfn; + high_memory = (void *) __va(system_max_low_pfn * PAGE_SIZE - 1) + 1; +#endif + printk(KERN_NOTICE "%ldMB LOWMEM available.\n", + pages_to_mb(system_max_low_pfn)); + printk("min_low_pfn = %ld, max_low_pfn = %ld, highstart_pfn = %ld\n", + min_low_pfn, max_low_pfn, highstart_pfn); + + printk("Low memory ends at vaddr %08lx\n", + (ulong) pfn_to_kaddr(max_low_pfn)); + for_each_online_node(nid) { + node_remap_start_vaddr[nid] = pfn_to_kaddr( + kva_start_pfn + node_remap_offset[nid]); + /* Init the node remap allocator */ + node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] + + (node_remap_size[nid] * PAGE_SIZE); + node_remap_alloc_vaddr[nid] = node_remap_start_vaddr[nid] + + ALIGN(sizeof(pg_data_t), PAGE_SIZE); + + allocate_pgdat(nid); + printk ("node %d will remap to vaddr %08lx - %08lx\n", nid, + (ulong) node_remap_start_vaddr[nid], + (ulong) pfn_to_kaddr(highstart_pfn + + node_remap_offset[nid] + node_remap_size[nid])); + } + printk("High memory starts at vaddr %08lx\n", + (ulong) pfn_to_kaddr(highstart_pfn)); + for_each_online_node(nid) + find_max_pfn_node(nid); + + memset(NODE_DATA(0), 0, sizeof(struct pglist_data)); + NODE_DATA(0)->bdata = &node0_bdata; + setup_bootmem_allocator(); + return max_low_pfn; +} + +void __init numa_kva_reserve(void) +{ + reserve_bootmem(PFN_PHYS(kva_start_pfn),PFN_PHYS(kva_pages)); +} + +void __init zone_sizes_init(void) +{ + int nid; + unsigned long max_zone_pfns[MAX_NR_ZONES]; + memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); + max_zone_pfns[ZONE_DMA] = + virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; + max_zone_pfns[ZONE_NORMAL] = max_low_pfn; +#ifdef CONFIG_HIGHMEM + max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; +#endif + + /* If SRAT has not registered memory, register it now */ + if (find_max_pfn_with_active_regions() == 0) { + for_each_online_node(nid) { + if (node_has_online_mem(nid)) + add_active_range(nid, node_start_pfn[nid], + node_end_pfn[nid]); + } + } + + free_area_init_nodes(max_zone_pfns); + return; +} + +void __init set_highmem_pages_init(int bad_ppro) +{ +#ifdef CONFIG_HIGHMEM + struct zone *zone; + struct page *page; + + for_each_zone(zone) { + unsigned long node_pfn, zone_start_pfn, zone_end_pfn; + + if (!is_highmem(zone)) + continue; + + zone_start_pfn = zone->zone_start_pfn; + zone_end_pfn = zone_start_pfn + zone->spanned_pages; + + printk("Initializing %s for node %d (%08lx:%08lx)\n", + zone->name, zone_to_nid(zone), + zone_start_pfn, zone_end_pfn); + + for (node_pfn = zone_start_pfn; node_pfn < zone_end_pfn; node_pfn++) { + if (!pfn_valid(node_pfn)) + continue; + page = pfn_to_page(node_pfn); + add_one_highpage_init(page, node_pfn, bad_ppro); + } + } + totalram_pages += totalhigh_pages; +#endif +} + +#ifdef CONFIG_MEMORY_HOTPLUG +int paddr_to_nid(u64 addr) +{ + int nid; + unsigned long pfn = PFN_DOWN(addr); + + for_each_node(nid) + if (node_start_pfn[nid] <= pfn && + pfn < node_end_pfn[nid]) + return nid; + + return -1; +} + +/* + * This function is used to ask node id BEFORE memmap and mem_section's + * initialization (pfn_to_nid() can't be used yet). + * If _PXM is not defined on ACPI's DSDT, node id must be found by this. + */ +int memory_add_physaddr_to_nid(u64 addr) +{ + int nid = paddr_to_nid(addr); + return (nid >= 0) ? nid : 0; +} + +EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); +#endif diff --git a/arch/x86/mm/extable_32.c b/arch/x86/mm/extable_32.c new file mode 100644 index 00000000000..0ce4f22a263 --- /dev/null +++ b/arch/x86/mm/extable_32.c @@ -0,0 +1,35 @@ +/* + * linux/arch/i386/mm/extable.c + */ + +#include +#include +#include + +int fixup_exception(struct pt_regs *regs) +{ + const struct exception_table_entry *fixup; + +#ifdef CONFIG_PNPBIOS + if (unlikely(SEGMENT_IS_PNP_CODE(regs->xcs))) + { + extern u32 pnp_bios_fault_eip, pnp_bios_fault_esp; + extern u32 pnp_bios_is_utter_crap; + pnp_bios_is_utter_crap = 1; + printk(KERN_CRIT "PNPBIOS fault.. attempting recovery.\n"); + __asm__ volatile( + "movl %0, %%esp\n\t" + "jmp *%1\n\t" + : : "g" (pnp_bios_fault_esp), "g" (pnp_bios_fault_eip)); + panic("do_trap: can't hit this"); + } +#endif + + fixup = search_exception_tables(regs->eip); + if (fixup) { + regs->eip = fixup->fixup; + return 1; + } + + return 0; +} diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c new file mode 100644 index 00000000000..fcb38e7f354 --- /dev/null +++ b/arch/x86/mm/fault_32.c @@ -0,0 +1,657 @@ +/* + * linux/arch/i386/mm/fault.c + * + * Copyright (C) 1995 Linus Torvalds + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* For unblank_screen() */ +#include +#include /* for max_low_pfn */ +#include +#include +#include +#include +#include + +#include +#include +#include + +extern void die(const char *,struct pt_regs *,long); + +static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain); + +int register_page_fault_notifier(struct notifier_block *nb) +{ + vmalloc_sync_all(); + return atomic_notifier_chain_register(¬ify_page_fault_chain, nb); +} +EXPORT_SYMBOL_GPL(register_page_fault_notifier); + +int unregister_page_fault_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(¬ify_page_fault_chain, nb); +} +EXPORT_SYMBOL_GPL(unregister_page_fault_notifier); + +static inline int notify_page_fault(struct pt_regs *regs, long err) +{ + struct die_args args = { + .regs = regs, + .str = "page fault", + .err = err, + .trapnr = 14, + .signr = SIGSEGV + }; + return atomic_notifier_call_chain(¬ify_page_fault_chain, + DIE_PAGE_FAULT, &args); +} + +/* + * Return EIP plus the CS segment base. The segment limit is also + * adjusted, clamped to the kernel/user address space (whichever is + * appropriate), and returned in *eip_limit. + * + * The segment is checked, because it might have been changed by another + * task between the original faulting instruction and here. + * + * If CS is no longer a valid code segment, or if EIP is beyond the + * limit, or if it is a kernel address when CS is not a kernel segment, + * then the returned value will be greater than *eip_limit. + * + * This is slow, but is very rarely executed. + */ +static inline unsigned long get_segment_eip(struct pt_regs *regs, + unsigned long *eip_limit) +{ + unsigned long eip = regs->eip; + unsigned seg = regs->xcs & 0xffff; + u32 seg_ar, seg_limit, base, *desc; + + /* Unlikely, but must come before segment checks. */ + if (unlikely(regs->eflags & VM_MASK)) { + base = seg << 4; + *eip_limit = base + 0xffff; + return base + (eip & 0xffff); + } + + /* The standard kernel/user address space limit. */ + *eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg; + + /* By far the most common cases. */ + if (likely(SEGMENT_IS_FLAT_CODE(seg))) + return eip; + + /* Check the segment exists, is within the current LDT/GDT size, + that kernel/user (ring 0..3) has the appropriate privilege, + that it's a code segment, and get the limit. */ + __asm__ ("larl %3,%0; lsll %3,%1" + : "=&r" (seg_ar), "=r" (seg_limit) : "0" (0), "rm" (seg)); + if ((~seg_ar & 0x9800) || eip > seg_limit) { + *eip_limit = 0; + return 1; /* So that returned eip > *eip_limit. */ + } + + /* Get the GDT/LDT descriptor base. + When you look for races in this code remember that + LDT and other horrors are only used in user space. */ + if (seg & (1<<2)) { + /* Must lock the LDT while reading it. */ + down(¤t->mm->context.sem); + desc = current->mm->context.ldt; + desc = (void *)desc + (seg & ~7); + } else { + /* Must disable preemption while reading the GDT. */ + desc = (u32 *)get_cpu_gdt_table(get_cpu()); + desc = (void *)desc + (seg & ~7); + } + + /* Decode the code segment base from the descriptor */ + base = get_desc_base((unsigned long *)desc); + + if (seg & (1<<2)) { + up(¤t->mm->context.sem); + } else + put_cpu(); + + /* Adjust EIP and segment limit, and clamp at the kernel limit. + It's legitimate for segments to wrap at 0xffffffff. */ + seg_limit += base; + if (seg_limit < *eip_limit && seg_limit >= base) + *eip_limit = seg_limit; + return eip + base; +} + +/* + * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch. + * Check that here and ignore it. + */ +static int __is_prefetch(struct pt_regs *regs, unsigned long addr) +{ + unsigned long limit; + unsigned char *instr = (unsigned char *)get_segment_eip (regs, &limit); + int scan_more = 1; + int prefetch = 0; + int i; + + for (i = 0; scan_more && i < 15; i++) { + unsigned char opcode; + unsigned char instr_hi; + unsigned char instr_lo; + + if (instr > (unsigned char *)limit) + break; + if (probe_kernel_address(instr, opcode)) + break; + + instr_hi = opcode & 0xf0; + instr_lo = opcode & 0x0f; + instr++; + + switch (instr_hi) { + case 0x20: + case 0x30: + /* Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes. */ + scan_more = ((instr_lo & 7) == 0x6); + break; + + case 0x60: + /* 0x64 thru 0x67 are valid prefixes in all modes. */ + scan_more = (instr_lo & 0xC) == 0x4; + break; + case 0xF0: + /* 0xF0, 0xF2, and 0xF3 are valid prefixes */ + scan_more = !instr_lo || (instr_lo>>1) == 1; + break; + case 0x00: + /* Prefetch instruction is 0x0F0D or 0x0F18 */ + scan_more = 0; + if (instr > (unsigned char *)limit) + break; + if (probe_kernel_address(instr, opcode)) + break; + prefetch = (instr_lo == 0xF) && + (opcode == 0x0D || opcode == 0x18); + break; + default: + scan_more = 0; + break; + } + } + return prefetch; +} + +static inline int is_prefetch(struct pt_regs *regs, unsigned long addr, + unsigned long error_code) +{ + if (unlikely(boot_cpu_data.x86_vendor == X86_VENDOR_AMD && + boot_cpu_data.x86 >= 6)) { + /* Catch an obscure case of prefetch inside an NX page. */ + if (nx_enabled && (error_code & 16)) + return 0; + return __is_prefetch(regs, addr); + } + return 0; +} + +static noinline void force_sig_info_fault(int si_signo, int si_code, + unsigned long address, struct task_struct *tsk) +{ + siginfo_t info; + + info.si_signo = si_signo; + info.si_errno = 0; + info.si_code = si_code; + info.si_addr = (void __user *)address; + force_sig_info(si_signo, &info, tsk); +} + +fastcall void do_invalid_op(struct pt_regs *, unsigned long); + +static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) +{ + unsigned index = pgd_index(address); + pgd_t *pgd_k; + pud_t *pud, *pud_k; + pmd_t *pmd, *pmd_k; + + pgd += index; + pgd_k = init_mm.pgd + index; + + if (!pgd_present(*pgd_k)) + return NULL; + + /* + * set_pgd(pgd, *pgd_k); here would be useless on PAE + * and redundant with the set_pmd() on non-PAE. As would + * set_pud. + */ + + pud = pud_offset(pgd, address); + pud_k = pud_offset(pgd_k, address); + if (!pud_present(*pud_k)) + return NULL; + + pmd = pmd_offset(pud, address); + pmd_k = pmd_offset(pud_k, address); + if (!pmd_present(*pmd_k)) + return NULL; + if (!pmd_present(*pmd)) { + set_pmd(pmd, *pmd_k); + arch_flush_lazy_mmu_mode(); + } else + BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); + return pmd_k; +} + +/* + * Handle a fault on the vmalloc or module mapping area + * + * This assumes no large pages in there. + */ +static inline int vmalloc_fault(unsigned long address) +{ + unsigned long pgd_paddr; + pmd_t *pmd_k; + pte_t *pte_k; + /* + * Synchronize this task's top level page-table + * with the 'reference' page table. + * + * Do _not_ use "current" here. We might be inside + * an interrupt in the middle of a task switch.. + */ + pgd_paddr = read_cr3(); + pmd_k = vmalloc_sync_one(__va(pgd_paddr), address); + if (!pmd_k) + return -1; + pte_k = pte_offset_kernel(pmd_k, address); + if (!pte_present(*pte_k)) + return -1; + return 0; +} + +int show_unhandled_signals = 1; + +/* + * This routine handles page faults. It determines the address, + * and the problem, and then passes it off to one of the appropriate + * routines. + * + * error_code: + * bit 0 == 0 means no page found, 1 means protection fault + * bit 1 == 0 means read, 1 means write + * bit 2 == 0 means kernel, 1 means user-mode + * bit 3 == 1 means use of reserved bit detected + * bit 4 == 1 means fault was an instruction fetch + */ +fastcall void __kprobes do_page_fault(struct pt_regs *regs, + unsigned long error_code) +{ + struct task_struct *tsk; + struct mm_struct *mm; + struct vm_area_struct * vma; + unsigned long address; + int write, si_code; + int fault; + + /* get the address */ + address = read_cr2(); + + tsk = current; + + si_code = SEGV_MAPERR; + + /* + * We fault-in kernel-space virtual memory on-demand. The + * 'reference' page table is init_mm.pgd. + * + * NOTE! We MUST NOT take any locks for this case. We may + * be in an interrupt or a critical region, and should + * only copy the information from the master page table, + * nothing more. + * + * This verifies that the fault happens in kernel space + * (error_code & 4) == 0, and that the fault was not a + * protection error (error_code & 9) == 0. + */ + if (unlikely(address >= TASK_SIZE)) { + if (!(error_code & 0x0000000d) && vmalloc_fault(address) >= 0) + return; + if (notify_page_fault(regs, error_code) == NOTIFY_STOP) + return; + /* + * Don't take the mm semaphore here. If we fixup a prefetch + * fault we could otherwise deadlock. + */ + goto bad_area_nosemaphore; + } + + if (notify_page_fault(regs, error_code) == NOTIFY_STOP) + return; + + /* It's safe to allow irq's after cr2 has been saved and the vmalloc + fault has been handled. */ + if (regs->eflags & (X86_EFLAGS_IF|VM_MASK)) + local_irq_enable(); + + mm = tsk->mm; + + /* + * If we're in an interrupt, have no user context or are running in an + * atomic region then we must not take the fault.. + */ + if (in_atomic() || !mm) + goto bad_area_nosemaphore; + + /* When running in the kernel we expect faults to occur only to + * addresses in user space. All other faults represent errors in the + * kernel and should generate an OOPS. Unfortunatly, in the case of an + * erroneous fault occurring in a code path which already holds mmap_sem + * we will deadlock attempting to validate the fault against the + * address space. Luckily the kernel only validly references user + * space from well defined areas of code, which are listed in the + * exceptions table. + * + * As the vast majority of faults will be valid we will only perform + * the source reference check when there is a possibilty of a deadlock. + * Attempt to lock the address space, if we cannot we then validate the + * source. If this is invalid we can skip the address space check, + * thus avoiding the deadlock. + */ + if (!down_read_trylock(&mm->mmap_sem)) { + if ((error_code & 4) == 0 && + !search_exception_tables(regs->eip)) + goto bad_area_nosemaphore; + down_read(&mm->mmap_sem); + } + + vma = find_vma(mm, address); + if (!vma) + goto bad_area; + if (vma->vm_start <= address) + goto good_area; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; + if (error_code & 4) { + /* + * Accessing the stack below %esp is always a bug. + * The large cushion allows instructions like enter + * and pusha to work. ("enter $65535,$31" pushes + * 32 pointers and then decrements %esp by 65535.) + */ + if (address + 65536 + 32 * sizeof(unsigned long) < regs->esp) + goto bad_area; + } + if (expand_stack(vma, address)) + goto bad_area; +/* + * Ok, we have a good vm_area for this memory access, so + * we can handle it.. + */ +good_area: + si_code = SEGV_ACCERR; + write = 0; + switch (error_code & 3) { + default: /* 3: write, present */ + /* fall through */ + case 2: /* write, not present */ + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; + write++; + break; + case 1: /* read, present */ + goto bad_area; + case 0: /* read, not present */ + if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) + goto bad_area; + } + + survive: + /* + * If for any reason at all we couldn't handle the fault, + * make sure we exit gracefully rather than endlessly redo + * the fault. + */ + fault = handle_mm_fault(mm, vma, address, write); + if (unlikely(fault & VM_FAULT_ERROR)) { + if (fault & VM_FAULT_OOM) + goto out_of_memory; + else if (fault & VM_FAULT_SIGBUS) + goto do_sigbus; + BUG(); + } + if (fault & VM_FAULT_MAJOR) + tsk->maj_flt++; + else + tsk->min_flt++; + + /* + * Did it hit the DOS screen memory VA from vm86 mode? + */ + if (regs->eflags & VM_MASK) { + unsigned long bit = (address - 0xA0000) >> PAGE_SHIFT; + if (bit < 32) + tsk->thread.screen_bitmap |= 1 << bit; + } + up_read(&mm->mmap_sem); + return; + +/* + * Something tried to access memory that isn't in our memory map.. + * Fix it, but check if it's kernel or user first.. + */ +bad_area: + up_read(&mm->mmap_sem); + +bad_area_nosemaphore: + /* User mode accesses just cause a SIGSEGV */ + if (error_code & 4) { + /* + * It's possible to have interrupts off here. + */ + local_irq_enable(); + + /* + * Valid to do another page fault here because this one came + * from user space. + */ + if (is_prefetch(regs, address, error_code)) + return; + + if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && + printk_ratelimit()) { + printk("%s%s[%d]: segfault at %08lx eip %08lx " + "esp %08lx error %lx\n", + tsk->pid > 1 ? KERN_INFO : KERN_EMERG, + tsk->comm, tsk->pid, address, regs->eip, + regs->esp, error_code); + } + tsk->thread.cr2 = address; + /* Kernel addresses are always protection faults */ + tsk->thread.error_code = error_code | (address >= TASK_SIZE); + tsk->thread.trap_no = 14; + force_sig_info_fault(SIGSEGV, si_code, address, tsk); + return; + } + +#ifdef CONFIG_X86_F00F_BUG + /* + * Pentium F0 0F C7 C8 bug workaround. + */ + if (boot_cpu_data.f00f_bug) { + unsigned long nr; + + nr = (address - idt_descr.address) >> 3; + + if (nr == 6) { + do_invalid_op(regs, 0); + return; + } + } +#endif + +no_context: + /* Are we prepared to handle this kernel fault? */ + if (fixup_exception(regs)) + return; + + /* + * Valid to do another page fault here, because if this fault + * had been triggered by is_prefetch fixup_exception would have + * handled it. + */ + if (is_prefetch(regs, address, error_code)) + return; + +/* + * Oops. The kernel tried to access some bad page. We'll have to + * terminate things with extreme prejudice. + */ + + bust_spinlocks(1); + + if (oops_may_print()) { + __typeof__(pte_val(__pte(0))) page; + +#ifdef CONFIG_X86_PAE + if (error_code & 16) { + pte_t *pte = lookup_address(address); + + if (pte && pte_present(*pte) && !pte_exec_kernel(*pte)) + printk(KERN_CRIT "kernel tried to execute " + "NX-protected page - exploit attempt? " + "(uid: %d)\n", current->uid); + } +#endif + if (address < PAGE_SIZE) + printk(KERN_ALERT "BUG: unable to handle kernel NULL " + "pointer dereference"); + else + printk(KERN_ALERT "BUG: unable to handle kernel paging" + " request"); + printk(" at virtual address %08lx\n",address); + printk(KERN_ALERT " printing eip:\n"); + printk("%08lx\n", regs->eip); + + page = read_cr3(); + page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT]; +#ifdef CONFIG_X86_PAE + printk(KERN_ALERT "*pdpt = %016Lx\n", page); + if ((page >> PAGE_SHIFT) < max_low_pfn + && page & _PAGE_PRESENT) { + page &= PAGE_MASK; + page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT) + & (PTRS_PER_PMD - 1)]; + printk(KERN_ALERT "*pde = %016Lx\n", page); + page &= ~_PAGE_NX; + } +#else + printk(KERN_ALERT "*pde = %08lx\n", page); +#endif + + /* + * We must not directly access the pte in the highpte + * case if the page table is located in highmem. + * And let's rather not kmap-atomic the pte, just in case + * it's allocated already. + */ + if ((page >> PAGE_SHIFT) < max_low_pfn + && (page & _PAGE_PRESENT)) { + page &= PAGE_MASK; + page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT) + & (PTRS_PER_PTE - 1)]; + printk(KERN_ALERT "*pte = %0*Lx\n", sizeof(page)*2, (u64)page); + } + } + + tsk->thread.cr2 = address; + tsk->thread.trap_no = 14; + tsk->thread.error_code = error_code; + die("Oops", regs, error_code); + bust_spinlocks(0); + do_exit(SIGKILL); + +/* + * We ran out of memory, or some other thing happened to us that made + * us unable to handle the page fault gracefully. + */ +out_of_memory: + up_read(&mm->mmap_sem); + if (is_init(tsk)) { + yield(); + down_read(&mm->mmap_sem); + goto survive; + } + printk("VM: killing process %s\n", tsk->comm); + if (error_code & 4) + do_exit(SIGKILL); + goto no_context; + +do_sigbus: + up_read(&mm->mmap_sem); + + /* Kernel mode? Handle exceptions or die */ + if (!(error_code & 4)) + goto no_context; + + /* User space => ok to do another page fault */ + if (is_prefetch(regs, address, error_code)) + return; + + tsk->thread.cr2 = address; + tsk->thread.error_code = error_code; + tsk->thread.trap_no = 14; + force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); +} + +void vmalloc_sync_all(void) +{ + /* + * Note that races in the updates of insync and start aren't + * problematic: insync can only get set bits added, and updates to + * start are only improving performance (without affecting correctness + * if undone). + */ + static DECLARE_BITMAP(insync, PTRS_PER_PGD); + static unsigned long start = TASK_SIZE; + unsigned long address; + + if (SHARED_KERNEL_PMD) + return; + + BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK); + for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) { + if (!test_bit(pgd_index(address), insync)) { + unsigned long flags; + struct page *page; + + spin_lock_irqsave(&pgd_lock, flags); + for (page = pgd_list; page; page = + (struct page *)page->index) + if (!vmalloc_sync_one(page_address(page), + address)) { + BUG_ON(page != pgd_list); + break; + } + spin_unlock_irqrestore(&pgd_lock, flags); + if (!page) + set_bit(pgd_index(address), insync); + } + if (address == start && test_bit(pgd_index(address), insync)) + start = address + PGDIR_SIZE; + } +} diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c new file mode 100644 index 00000000000..1c3bf95f735 --- /dev/null +++ b/arch/x86/mm/highmem_32.c @@ -0,0 +1,113 @@ +#include +#include + +void *kmap(struct page *page) +{ + might_sleep(); + if (!PageHighMem(page)) + return page_address(page); + return kmap_high(page); +} + +void kunmap(struct page *page) +{ + if (in_interrupt()) + BUG(); + if (!PageHighMem(page)) + return; + kunmap_high(page); +} + +/* + * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because + * no global lock is needed and because the kmap code must perform a global TLB + * invalidation when the kmap pool wraps. + * + * However when holding an atomic kmap is is not legal to sleep, so atomic + * kmaps are appropriate for short, tight code paths only. + */ +void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) +{ + enum fixed_addresses idx; + unsigned long vaddr; + + /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ + pagefault_disable(); + + if (!PageHighMem(page)) + return page_address(page); + + idx = type + KM_TYPE_NR*smp_processor_id(); + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + BUG_ON(!pte_none(*(kmap_pte-idx))); + set_pte(kmap_pte-idx, mk_pte(page, prot)); + arch_flush_lazy_mmu_mode(); + + return (void *)vaddr; +} + +void *kmap_atomic(struct page *page, enum km_type type) +{ + return kmap_atomic_prot(page, type, kmap_prot); +} + +void kunmap_atomic(void *kvaddr, enum km_type type) +{ + unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; + enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); + + /* + * Force other mappings to Oops if they'll try to access this pte + * without first remap it. Keeping stale mappings around is a bad idea + * also, in case the page changes cacheability attributes or becomes + * a protected page in a hypervisor. + */ + if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx)) + kpte_clear_flush(kmap_pte-idx, vaddr); + else { +#ifdef CONFIG_DEBUG_HIGHMEM + BUG_ON(vaddr < PAGE_OFFSET); + BUG_ON(vaddr >= (unsigned long)high_memory); +#endif + } + + arch_flush_lazy_mmu_mode(); + pagefault_enable(); +} + +/* This is the same as kmap_atomic() but can map memory that doesn't + * have a struct page associated with it. + */ +void *kmap_atomic_pfn(unsigned long pfn, enum km_type type) +{ + enum fixed_addresses idx; + unsigned long vaddr; + + pagefault_disable(); + + idx = type + KM_TYPE_NR*smp_processor_id(); + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot)); + arch_flush_lazy_mmu_mode(); + + return (void*) vaddr; +} + +struct page *kmap_atomic_to_page(void *ptr) +{ + unsigned long idx, vaddr = (unsigned long)ptr; + pte_t *pte; + + if (vaddr < FIXADDR_START) + return virt_to_page(ptr); + + idx = virt_to_fix(vaddr); + pte = kmap_pte - (idx - FIX_KMAP_BEGIN); + return pte_page(*pte); +} + +EXPORT_SYMBOL(kmap); +EXPORT_SYMBOL(kunmap); +EXPORT_SYMBOL(kmap_atomic); +EXPORT_SYMBOL(kunmap_atomic); +EXPORT_SYMBOL(kmap_atomic_to_page); diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c new file mode 100644 index 00000000000..6c06d9c0488 --- /dev/null +++ b/arch/x86/mm/hugetlbpage.c @@ -0,0 +1,391 @@ +/* + * IA-32 Huge TLB Page Support for Kernel. + * + * Copyright (C) 2002, Rohit Seth + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static unsigned long page_table_shareable(struct vm_area_struct *svma, + struct vm_area_struct *vma, + unsigned long addr, pgoff_t idx) +{ + unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) + + svma->vm_start; + unsigned long sbase = saddr & PUD_MASK; + unsigned long s_end = sbase + PUD_SIZE; + + /* + * match the virtual addresses, permission and the alignment of the + * page table page. + */ + if (pmd_index(addr) != pmd_index(saddr) || + vma->vm_flags != svma->vm_flags || + sbase < svma->vm_start || svma->vm_end < s_end) + return 0; + + return saddr; +} + +static int vma_shareable(struct vm_area_struct *vma, unsigned long addr) +{ + unsigned long base = addr & PUD_MASK; + unsigned long end = base + PUD_SIZE; + + /* + * check on proper vm_flags and page table alignment + */ + if (vma->vm_flags & VM_MAYSHARE && + vma->vm_start <= base && end <= vma->vm_end) + return 1; + return 0; +} + +/* + * search for a shareable pmd page for hugetlb. + */ +static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) +{ + struct vm_area_struct *vma = find_vma(mm, addr); + struct address_space *mapping = vma->vm_file->f_mapping; + pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + + vma->vm_pgoff; + struct prio_tree_iter iter; + struct vm_area_struct *svma; + unsigned long saddr; + pte_t *spte = NULL; + + if (!vma_shareable(vma, addr)) + return; + + spin_lock(&mapping->i_mmap_lock); + vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) { + if (svma == vma) + continue; + + saddr = page_table_shareable(svma, vma, addr, idx); + if (saddr) { + spte = huge_pte_offset(svma->vm_mm, saddr); + if (spte) { + get_page(virt_to_page(spte)); + break; + } + } + } + + if (!spte) + goto out; + + spin_lock(&mm->page_table_lock); + if (pud_none(*pud)) + pud_populate(mm, pud, (unsigned long) spte & PAGE_MASK); + else + put_page(virt_to_page(spte)); + spin_unlock(&mm->page_table_lock); +out: + spin_unlock(&mapping->i_mmap_lock); +} + +/* + * unmap huge page backed by shared pte. + * + * Hugetlb pte page is ref counted at the time of mapping. If pte is shared + * indicated by page_count > 1, unmap is achieved by clearing pud and + * decrementing the ref count. If count == 1, the pte page is not shared. + * + * called with vma->vm_mm->page_table_lock held. + * + * returns: 1 successfully unmapped a shared pte page + * 0 the underlying pte page is not shared, or it is the last user + */ +int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +{ + pgd_t *pgd = pgd_offset(mm, *addr); + pud_t *pud = pud_offset(pgd, *addr); + + BUG_ON(page_count(virt_to_page(ptep)) == 0); + if (page_count(virt_to_page(ptep)) == 1) + return 0; + + pud_clear(pud); + put_page(virt_to_page(ptep)); + *addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE; + return 1; +} + +pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + pud_t *pud; + pte_t *pte = NULL; + + pgd = pgd_offset(mm, addr); + pud = pud_alloc(mm, pgd, addr); + if (pud) { + if (pud_none(*pud)) + huge_pmd_share(mm, addr, pud); + pte = (pte_t *) pmd_alloc(mm, pud, addr); + } + BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); + + return pte; +} + +pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd = NULL; + + pgd = pgd_offset(mm, addr); + if (pgd_present(*pgd)) { + pud = pud_offset(pgd, addr); + if (pud_present(*pud)) + pmd = pmd_offset(pud, addr); + } + return (pte_t *) pmd; +} + +#if 0 /* This is just for testing */ +struct page * +follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) +{ + unsigned long start = address; + int length = 1; + int nr; + struct page *page; + struct vm_area_struct *vma; + + vma = find_vma(mm, addr); + if (!vma || !is_vm_hugetlb_page(vma)) + return ERR_PTR(-EINVAL); + + pte = huge_pte_offset(mm, address); + + /* hugetlb should be locked, and hence, prefaulted */ + WARN_ON(!pte || pte_none(*pte)); + + page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)]; + + WARN_ON(!PageCompound(page)); + + return page; +} + +int pmd_huge(pmd_t pmd) +{ + return 0; +} + +struct page * +follow_huge_pmd(struct mm_struct *mm, unsigned long address, + pmd_t *pmd, int write) +{ + return NULL; +} + +#else + +struct page * +follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) +{ + return ERR_PTR(-EINVAL); +} + +int pmd_huge(pmd_t pmd) +{ + return !!(pmd_val(pmd) & _PAGE_PSE); +} + +struct page * +follow_huge_pmd(struct mm_struct *mm, unsigned long address, + pmd_t *pmd, int write) +{ + struct page *page; + + page = pte_page(*(pte_t *)pmd); + if (page) + page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT); + return page; +} +#endif + +/* x86_64 also uses this file */ + +#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA +static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, + unsigned long addr, unsigned long len, + unsigned long pgoff, unsigned long flags) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long start_addr; + + if (len > mm->cached_hole_size) { + start_addr = mm->free_area_cache; + } else { + start_addr = TASK_UNMAPPED_BASE; + mm->cached_hole_size = 0; + } + +full_search: + addr = ALIGN(start_addr, HPAGE_SIZE); + + for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { + /* At this point: (!vma || addr < vma->vm_end). */ + if (TASK_SIZE - len < addr) { + /* + * Start a new search - just in case we missed + * some holes. + */ + if (start_addr != TASK_UNMAPPED_BASE) { + start_addr = TASK_UNMAPPED_BASE; + mm->cached_hole_size = 0; + goto full_search; + } + return -ENOMEM; + } + if (!vma || addr + len <= vma->vm_start) { + mm->free_area_cache = addr + len; + return addr; + } + if (addr + mm->cached_hole_size < vma->vm_start) + mm->cached_hole_size = vma->vm_start - addr; + addr = ALIGN(vma->vm_end, HPAGE_SIZE); + } +} + +static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, + unsigned long addr0, unsigned long len, + unsigned long pgoff, unsigned long flags) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma, *prev_vma; + unsigned long base = mm->mmap_base, addr = addr0; + unsigned long largest_hole = mm->cached_hole_size; + int first_time = 1; + + /* don't allow allocations above current base */ + if (mm->free_area_cache > base) + mm->free_area_cache = base; + + if (len <= largest_hole) { + largest_hole = 0; + mm->free_area_cache = base; + } +try_again: + /* make sure it can fit in the remaining address space */ + if (mm->free_area_cache < len) + goto fail; + + /* either no address requested or cant fit in requested address hole */ + addr = (mm->free_area_cache - len) & HPAGE_MASK; + do { + /* + * Lookup failure means no vma is above this address, + * i.e. return with success: + */ + if (!(vma = find_vma_prev(mm, addr, &prev_vma))) + return addr; + + /* + * new region fits between prev_vma->vm_end and + * vma->vm_start, use it: + */ + if (addr + len <= vma->vm_start && + (!prev_vma || (addr >= prev_vma->vm_end))) { + /* remember the address as a hint for next time */ + mm->cached_hole_size = largest_hole; + return (mm->free_area_cache = addr); + } else { + /* pull free_area_cache down to the first hole */ + if (mm->free_area_cache == vma->vm_end) { + mm->free_area_cache = vma->vm_start; + mm->cached_hole_size = largest_hole; + } + } + + /* remember the largest hole we saw so far */ + if (addr + largest_hole < vma->vm_start) + largest_hole = vma->vm_start - addr; + + /* try just below the current vma->vm_start */ + addr = (vma->vm_start - len) & HPAGE_MASK; + } while (len <= vma->vm_start); + +fail: + /* + * if hint left us with no space for the requested + * mapping then try again: + */ + if (first_time) { + mm->free_area_cache = base; + largest_hole = 0; + first_time = 0; + goto try_again; + } + /* + * A failed mmap() very likely causes application failure, + * so fall back to the bottom-up function here. This scenario + * can happen with large stack limits and large mmap() + * allocations. + */ + mm->free_area_cache = TASK_UNMAPPED_BASE; + mm->cached_hole_size = ~0UL; + addr = hugetlb_get_unmapped_area_bottomup(file, addr0, + len, pgoff, flags); + + /* + * Restore the topdown base: + */ + mm->free_area_cache = base; + mm->cached_hole_size = ~0UL; + + return addr; +} + +unsigned long +hugetlb_get_unmapped_area(struct file *file, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + + if (len & ~HPAGE_MASK) + return -EINVAL; + if (len > TASK_SIZE) + return -ENOMEM; + + if (flags & MAP_FIXED) { + if (prepare_hugepage_range(addr, len)) + return -EINVAL; + return addr; + } + + if (addr) { + addr = ALIGN(addr, HPAGE_SIZE); + vma = find_vma(mm, addr); + if (TASK_SIZE - len >= addr && + (!vma || addr + len <= vma->vm_start)) + return addr; + } + if (mm->get_unmapped_area == arch_get_unmapped_area) + return hugetlb_get_unmapped_area_bottomup(file, addr, len, + pgoff, flags); + else + return hugetlb_get_unmapped_area_topdown(file, addr, len, + pgoff, flags); +} + +#endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/ + diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c new file mode 100644 index 00000000000..730a5b177b1 --- /dev/null +++ b/arch/x86/mm/init_32.c @@ -0,0 +1,858 @@ +/* + * linux/arch/i386/mm/init.c + * + * Copyright (C) 1995 Linus Torvalds + * + * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +unsigned int __VMALLOC_RESERVE = 128 << 20; + +DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +unsigned long highstart_pfn, highend_pfn; + +static int noinline do_test_wp_bit(void); + +/* + * Creates a middle page table and puts a pointer to it in the + * given global directory entry. This only returns the gd entry + * in non-PAE compilation mode, since the middle layer is folded. + */ +static pmd_t * __init one_md_table_init(pgd_t *pgd) +{ + pud_t *pud; + pmd_t *pmd_table; + +#ifdef CONFIG_X86_PAE + if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { + pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); + + paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT); + set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); + pud = pud_offset(pgd, 0); + if (pmd_table != pmd_offset(pud, 0)) + BUG(); + } +#endif + pud = pud_offset(pgd, 0); + pmd_table = pmd_offset(pud, 0); + return pmd_table; +} + +/* + * Create a page table and place a pointer to it in a middle page + * directory entry. + */ +static pte_t * __init one_page_table_init(pmd_t *pmd) +{ + if (!(pmd_val(*pmd) & _PAGE_PRESENT)) { + pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); + + paravirt_alloc_pt(&init_mm, __pa(page_table) >> PAGE_SHIFT); + set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); + BUG_ON(page_table != pte_offset_kernel(pmd, 0)); + } + + return pte_offset_kernel(pmd, 0); +} + +/* + * This function initializes a certain range of kernel virtual memory + * with new bootmem page tables, everywhere page tables are missing in + * the given range. + */ + +/* + * NOTE: The pagetables are allocated contiguous on the physical space + * so we can cache the place of the first one and move around without + * checking the pgd every time. + */ +static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base) +{ + pgd_t *pgd; + pmd_t *pmd; + int pgd_idx, pmd_idx; + unsigned long vaddr; + + vaddr = start; + pgd_idx = pgd_index(vaddr); + pmd_idx = pmd_index(vaddr); + pgd = pgd_base + pgd_idx; + + for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) { + pmd = one_md_table_init(pgd); + pmd = pmd + pmd_index(vaddr); + for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) { + one_page_table_init(pmd); + + vaddr += PMD_SIZE; + } + pmd_idx = 0; + } +} + +static inline int is_kernel_text(unsigned long addr) +{ + if (addr >= PAGE_OFFSET && addr <= (unsigned long)__init_end) + return 1; + return 0; +} + +/* + * This maps the physical memory to kernel virtual address space, a total + * of max_low_pfn pages, by creating page tables starting from address + * PAGE_OFFSET. + */ +static void __init kernel_physical_mapping_init(pgd_t *pgd_base) +{ + unsigned long pfn; + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + int pgd_idx, pmd_idx, pte_ofs; + + pgd_idx = pgd_index(PAGE_OFFSET); + pgd = pgd_base + pgd_idx; + pfn = 0; + + for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) { + pmd = one_md_table_init(pgd); + if (pfn >= max_low_pfn) + continue; + for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_idx++) { + unsigned int address = pfn * PAGE_SIZE + PAGE_OFFSET; + + /* Map with big pages if possible, otherwise create normal page tables. */ + if (cpu_has_pse) { + unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1; + if (is_kernel_text(address) || is_kernel_text(address2)) + set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC)); + else + set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE)); + + pfn += PTRS_PER_PTE; + } else { + pte = one_page_table_init(pmd); + + for (pte_ofs = 0; + pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; + pte++, pfn++, pte_ofs++, address += PAGE_SIZE) { + if (is_kernel_text(address)) + set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); + else + set_pte(pte, pfn_pte(pfn, PAGE_KERNEL)); + } + } + } + } +} + +static inline int page_kills_ppro(unsigned long pagenr) +{ + if (pagenr >= 0x70000 && pagenr <= 0x7003F) + return 1; + return 0; +} + +int page_is_ram(unsigned long pagenr) +{ + int i; + unsigned long addr, end; + + if (efi_enabled) { + efi_memory_desc_t *md; + void *p; + + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + md = p; + if (!is_available_memory(md)) + continue; + addr = (md->phys_addr+PAGE_SIZE-1) >> PAGE_SHIFT; + end = (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >> PAGE_SHIFT; + + if ((pagenr >= addr) && (pagenr < end)) + return 1; + } + return 0; + } + + for (i = 0; i < e820.nr_map; i++) { + + if (e820.map[i].type != E820_RAM) /* not usable memory */ + continue; + /* + * !!!FIXME!!! Some BIOSen report areas as RAM that + * are not. Notably the 640->1Mb area. We need a sanity + * check here. + */ + addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT; + end = (e820.map[i].addr+e820.map[i].size) >> PAGE_SHIFT; + if ((pagenr >= addr) && (pagenr < end)) + return 1; + } + return 0; +} + +#ifdef CONFIG_HIGHMEM +pte_t *kmap_pte; +pgprot_t kmap_prot; + +#define kmap_get_fixmap_pte(vaddr) \ + pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), vaddr), (vaddr)), (vaddr)) + +static void __init kmap_init(void) +{ + unsigned long kmap_vstart; + + /* cache the first kmap pte */ + kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); + kmap_pte = kmap_get_fixmap_pte(kmap_vstart); + + kmap_prot = PAGE_KERNEL; +} + +static void __init permanent_kmaps_init(pgd_t *pgd_base) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + unsigned long vaddr; + + vaddr = PKMAP_BASE; + page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base); + + pgd = swapper_pg_dir + pgd_index(vaddr); + pud = pud_offset(pgd, vaddr); + pmd = pmd_offset(pud, vaddr); + pte = pte_offset_kernel(pmd, vaddr); + pkmap_page_table = pte; +} + +static void __meminit free_new_highpage(struct page *page) +{ + init_page_count(page); + __free_page(page); + totalhigh_pages++; +} + +void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro) +{ + if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) { + ClearPageReserved(page); + free_new_highpage(page); + } else + SetPageReserved(page); +} + +static int __meminit add_one_highpage_hotplug(struct page *page, unsigned long pfn) +{ + free_new_highpage(page); + totalram_pages++; +#ifdef CONFIG_FLATMEM + max_mapnr = max(pfn, max_mapnr); +#endif + num_physpages++; + return 0; +} + +/* + * Not currently handling the NUMA case. + * Assuming single node and all memory that + * has been added dynamically that would be + * onlined here is in HIGHMEM + */ +void __meminit online_page(struct page *page) +{ + ClearPageReserved(page); + add_one_highpage_hotplug(page, page_to_pfn(page)); +} + + +#ifdef CONFIG_NUMA +extern void set_highmem_pages_init(int); +#else +static void __init set_highmem_pages_init(int bad_ppro) +{ + int pfn; + for (pfn = highstart_pfn; pfn < highend_pfn; pfn++) + add_one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro); + totalram_pages += totalhigh_pages; +} +#endif /* CONFIG_FLATMEM */ + +#else +#define kmap_init() do { } while (0) +#define permanent_kmaps_init(pgd_base) do { } while (0) +#define set_highmem_pages_init(bad_ppro) do { } while (0) +#endif /* CONFIG_HIGHMEM */ + +unsigned long long __PAGE_KERNEL = _PAGE_KERNEL; +EXPORT_SYMBOL(__PAGE_KERNEL); +unsigned long long __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC; + +#ifdef CONFIG_NUMA +extern void __init remap_numa_kva(void); +#else +#define remap_numa_kva() do {} while (0) +#endif + +void __init native_pagetable_setup_start(pgd_t *base) +{ +#ifdef CONFIG_X86_PAE + int i; + + /* + * Init entries of the first-level page table to the + * zero page, if they haven't already been set up. + * + * In a normal native boot, we'll be running on a + * pagetable rooted in swapper_pg_dir, but not in PAE + * mode, so this will end up clobbering the mappings + * for the lower 24Mbytes of the address space, + * without affecting the kernel address space. + */ + for (i = 0; i < USER_PTRS_PER_PGD; i++) + set_pgd(&base[i], + __pgd(__pa(empty_zero_page) | _PAGE_PRESENT)); + + /* Make sure kernel address space is empty so that a pagetable + will be allocated for it. */ + memset(&base[USER_PTRS_PER_PGD], 0, + KERNEL_PGD_PTRS * sizeof(pgd_t)); +#else + paravirt_alloc_pd(__pa(swapper_pg_dir) >> PAGE_SHIFT); +#endif +} + +void __init native_pagetable_setup_done(pgd_t *base) +{ +#ifdef CONFIG_X86_PAE + /* + * Add low memory identity-mappings - SMP needs it when + * starting up on an AP from real-mode. In the non-PAE + * case we already have these mappings through head.S. + * All user-space mappings are explicitly cleared after + * SMP startup. + */ + set_pgd(&base[0], base[USER_PTRS_PER_PGD]); +#endif +} + +/* + * Build a proper pagetable for the kernel mappings. Up until this + * point, we've been running on some set of pagetables constructed by + * the boot process. + * + * If we're booting on native hardware, this will be a pagetable + * constructed in arch/i386/kernel/head.S, and not running in PAE mode + * (even if we'll end up running in PAE). The root of the pagetable + * will be swapper_pg_dir. + * + * If we're booting paravirtualized under a hypervisor, then there are + * more options: we may already be running PAE, and the pagetable may + * or may not be based in swapper_pg_dir. In any case, + * paravirt_pagetable_setup_start() will set up swapper_pg_dir + * appropriately for the rest of the initialization to work. + * + * In general, pagetable_init() assumes that the pagetable may already + * be partially populated, and so it avoids stomping on any existing + * mappings. + */ +static void __init pagetable_init (void) +{ + unsigned long vaddr, end; + pgd_t *pgd_base = swapper_pg_dir; + + paravirt_pagetable_setup_start(pgd_base); + + /* Enable PSE if available */ + if (cpu_has_pse) + set_in_cr4(X86_CR4_PSE); + + /* Enable PGE if available */ + if (cpu_has_pge) { + set_in_cr4(X86_CR4_PGE); + __PAGE_KERNEL |= _PAGE_GLOBAL; + __PAGE_KERNEL_EXEC |= _PAGE_GLOBAL; + } + + kernel_physical_mapping_init(pgd_base); + remap_numa_kva(); + + /* + * Fixed mappings, only the page table structure has to be + * created - mappings will be set by set_fixmap(): + */ + vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; + end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK; + page_table_range_init(vaddr, end, pgd_base); + + permanent_kmaps_init(pgd_base); + + paravirt_pagetable_setup_done(pgd_base); +} + +#if defined(CONFIG_HIBERNATION) || defined(CONFIG_ACPI) +/* + * Swap suspend & friends need this for resume because things like the intel-agp + * driver might have split up a kernel 4MB mapping. + */ +char __nosavedata swsusp_pg_dir[PAGE_SIZE] + __attribute__ ((aligned (PAGE_SIZE))); + +static inline void save_pg_dir(void) +{ + memcpy(swsusp_pg_dir, swapper_pg_dir, PAGE_SIZE); +} +#else +static inline void save_pg_dir(void) +{ +} +#endif + +void zap_low_mappings (void) +{ + int i; + + save_pg_dir(); + + /* + * Zap initial low-memory mappings. + * + * Note that "pgd_clear()" doesn't do it for + * us, because pgd_clear() is a no-op on i386. + */ + for (i = 0; i < USER_PTRS_PER_PGD; i++) +#ifdef CONFIG_X86_PAE + set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page))); +#else + set_pgd(swapper_pg_dir+i, __pgd(0)); +#endif + flush_tlb_all(); +} + +int nx_enabled = 0; + +#ifdef CONFIG_X86_PAE + +static int disable_nx __initdata = 0; +u64 __supported_pte_mask __read_mostly = ~_PAGE_NX; +EXPORT_SYMBOL_GPL(__supported_pte_mask); + +/* + * noexec = on|off + * + * Control non executable mappings. + * + * on Enable + * off Disable + */ +static int __init noexec_setup(char *str) +{ + if (!str || !strcmp(str, "on")) { + if (cpu_has_nx) { + __supported_pte_mask |= _PAGE_NX; + disable_nx = 0; + } + } else if (!strcmp(str,"off")) { + disable_nx = 1; + __supported_pte_mask &= ~_PAGE_NX; + } else + return -EINVAL; + + return 0; +} +early_param("noexec", noexec_setup); + +static void __init set_nx(void) +{ + unsigned int v[4], l, h; + + if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) { + cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]); + if ((v[3] & (1 << 20)) && !disable_nx) { + rdmsr(MSR_EFER, l, h); + l |= EFER_NX; + wrmsr(MSR_EFER, l, h); + nx_enabled = 1; + __supported_pte_mask |= _PAGE_NX; + } + } +} + +/* + * Enables/disables executability of a given kernel page and + * returns the previous setting. + */ +int __init set_kernel_exec(unsigned long vaddr, int enable) +{ + pte_t *pte; + int ret = 1; + + if (!nx_enabled) + goto out; + + pte = lookup_address(vaddr); + BUG_ON(!pte); + + if (!pte_exec_kernel(*pte)) + ret = 0; + + if (enable) + pte->pte_high &= ~(1 << (_PAGE_BIT_NX - 32)); + else + pte->pte_high |= 1 << (_PAGE_BIT_NX - 32); + pte_update_defer(&init_mm, vaddr, pte); + __flush_tlb_all(); +out: + return ret; +} + +#endif + +/* + * paging_init() sets up the page tables - note that the first 8MB are + * already mapped by head.S. + * + * This routines also unmaps the page at virtual kernel address 0, so + * that we can trap those pesky NULL-reference errors in the kernel. + */ +void __init paging_init(void) +{ +#ifdef CONFIG_X86_PAE + set_nx(); + if (nx_enabled) + printk("NX (Execute Disable) protection: active\n"); +#endif + + pagetable_init(); + + load_cr3(swapper_pg_dir); + +#ifdef CONFIG_X86_PAE + /* + * We will bail out later - printk doesn't work right now so + * the user would just see a hanging kernel. + */ + if (cpu_has_pae) + set_in_cr4(X86_CR4_PAE); +#endif + __flush_tlb_all(); + + kmap_init(); +} + +/* + * Test if the WP bit works in supervisor mode. It isn't supported on 386's + * and also on some strange 486's (NexGen etc.). All 586+'s are OK. This + * used to involve black magic jumps to work around some nasty CPU bugs, + * but fortunately the switch to using exceptions got rid of all that. + */ + +static void __init test_wp_bit(void) +{ + printk("Checking if this processor honours the WP bit even in supervisor mode... "); + + /* Any page-aligned address will do, the test is non-destructive */ + __set_fixmap(FIX_WP_TEST, __pa(&swapper_pg_dir), PAGE_READONLY); + boot_cpu_data.wp_works_ok = do_test_wp_bit(); + clear_fixmap(FIX_WP_TEST); + + if (!boot_cpu_data.wp_works_ok) { + printk("No.\n"); +#ifdef CONFIG_X86_WP_WORKS_OK + panic("This kernel doesn't support CPU's with broken WP. Recompile it for a 386!"); +#endif + } else { + printk("Ok.\n"); + } +} + +static struct kcore_list kcore_mem, kcore_vmalloc; + +void __init mem_init(void) +{ + extern int ppro_with_ram_bug(void); + int codesize, reservedpages, datasize, initsize; + int tmp; + int bad_ppro; + +#ifdef CONFIG_FLATMEM + BUG_ON(!mem_map); +#endif + + bad_ppro = ppro_with_ram_bug(); + +#ifdef CONFIG_HIGHMEM + /* check that fixmap and pkmap do not overlap */ + if (PKMAP_BASE+LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) { + printk(KERN_ERR "fixmap and kmap areas overlap - this will crash\n"); + printk(KERN_ERR "pkstart: %lxh pkend: %lxh fixstart %lxh\n", + PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, FIXADDR_START); + BUG(); + } +#endif + + /* this will put all low memory onto the freelists */ + totalram_pages += free_all_bootmem(); + + reservedpages = 0; + for (tmp = 0; tmp < max_low_pfn; tmp++) + /* + * Only count reserved RAM pages + */ + if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp))) + reservedpages++; + + set_highmem_pages_init(bad_ppro); + + codesize = (unsigned long) &_etext - (unsigned long) &_text; + datasize = (unsigned long) &_edata - (unsigned long) &_etext; + initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; + + kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT); + kclist_add(&kcore_vmalloc, (void *)VMALLOC_START, + VMALLOC_END-VMALLOC_START); + + printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n", + (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), + num_physpages << (PAGE_SHIFT-10), + codesize >> 10, + reservedpages << (PAGE_SHIFT-10), + datasize >> 10, + initsize >> 10, + (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) + ); + +#if 1 /* double-sanity-check paranoia */ + printk("virtual kernel memory layout:\n" + " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" +#ifdef CONFIG_HIGHMEM + " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n" +#endif + " vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n" + " lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n" + " .init : 0x%08lx - 0x%08lx (%4ld kB)\n" + " .data : 0x%08lx - 0x%08lx (%4ld kB)\n" + " .text : 0x%08lx - 0x%08lx (%4ld kB)\n", + FIXADDR_START, FIXADDR_TOP, + (FIXADDR_TOP - FIXADDR_START) >> 10, + +#ifdef CONFIG_HIGHMEM + PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, + (LAST_PKMAP*PAGE_SIZE) >> 10, +#endif + + VMALLOC_START, VMALLOC_END, + (VMALLOC_END - VMALLOC_START) >> 20, + + (unsigned long)__va(0), (unsigned long)high_memory, + ((unsigned long)high_memory - (unsigned long)__va(0)) >> 20, + + (unsigned long)&__init_begin, (unsigned long)&__init_end, + ((unsigned long)&__init_end - (unsigned long)&__init_begin) >> 10, + + (unsigned long)&_etext, (unsigned long)&_edata, + ((unsigned long)&_edata - (unsigned long)&_etext) >> 10, + + (unsigned long)&_text, (unsigned long)&_etext, + ((unsigned long)&_etext - (unsigned long)&_text) >> 10); + +#ifdef CONFIG_HIGHMEM + BUG_ON(PKMAP_BASE+LAST_PKMAP*PAGE_SIZE > FIXADDR_START); + BUG_ON(VMALLOC_END > PKMAP_BASE); +#endif + BUG_ON(VMALLOC_START > VMALLOC_END); + BUG_ON((unsigned long)high_memory > VMALLOC_START); +#endif /* double-sanity-check paranoia */ + +#ifdef CONFIG_X86_PAE + if (!cpu_has_pae) + panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!"); +#endif + if (boot_cpu_data.wp_works_ok < 0) + test_wp_bit(); + + /* + * Subtle. SMP is doing it's boot stuff late (because it has to + * fork idle threads) - but it also needs low mappings for the + * protected-mode entry to work. We zap these entries only after + * the WP-bit has been tested. + */ +#ifndef CONFIG_SMP + zap_low_mappings(); +#endif +} + +#ifdef CONFIG_MEMORY_HOTPLUG +int arch_add_memory(int nid, u64 start, u64 size) +{ + struct pglist_data *pgdata = NODE_DATA(nid); + struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM; + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + + return __add_pages(zone, start_pfn, nr_pages); +} + +int remove_memory(u64 start, u64 size) +{ + return -EINVAL; +} +EXPORT_SYMBOL_GPL(remove_memory); +#endif + +struct kmem_cache *pmd_cache; + +void __init pgtable_cache_init(void) +{ + size_t pgd_size = PTRS_PER_PGD*sizeof(pgd_t); + + if (PTRS_PER_PMD > 1) { + pmd_cache = kmem_cache_create("pmd", + PTRS_PER_PMD*sizeof(pmd_t), + PTRS_PER_PMD*sizeof(pmd_t), + SLAB_PANIC, + pmd_ctor); + if (!SHARED_KERNEL_PMD) { + /* If we're in PAE mode and have a non-shared + kernel pmd, then the pgd size must be a + page size. This is because the pgd_list + links through the page structure, so there + can only be one pgd per page for this to + work. */ + pgd_size = PAGE_SIZE; + } + } +} + +/* + * This function cannot be __init, since exceptions don't work in that + * section. Put this after the callers, so that it cannot be inlined. + */ +static int noinline do_test_wp_bit(void) +{ + char tmp_reg; + int flag; + + __asm__ __volatile__( + " movb %0,%1 \n" + "1: movb %1,%0 \n" + " xorl %2,%2 \n" + "2: \n" + ".section __ex_table,\"a\"\n" + " .align 4 \n" + " .long 1b,2b \n" + ".previous \n" + :"=m" (*(char *)fix_to_virt(FIX_WP_TEST)), + "=q" (tmp_reg), + "=r" (flag) + :"2" (1) + :"memory"); + + return flag; +} + +#ifdef CONFIG_DEBUG_RODATA + +void mark_rodata_ro(void) +{ + unsigned long start = PFN_ALIGN(_text); + unsigned long size = PFN_ALIGN(_etext) - start; + +#ifndef CONFIG_KPROBES +#ifdef CONFIG_HOTPLUG_CPU + /* It must still be possible to apply SMP alternatives. */ + if (num_possible_cpus() <= 1) +#endif + { + change_page_attr(virt_to_page(start), + size >> PAGE_SHIFT, PAGE_KERNEL_RX); + printk("Write protecting the kernel text: %luk\n", size >> 10); + } +#endif + start += size; + size = (unsigned long)__end_rodata - start; + change_page_attr(virt_to_page(start), + size >> PAGE_SHIFT, PAGE_KERNEL_RO); + printk("Write protecting the kernel read-only data: %luk\n", + size >> 10); + + /* + * change_page_attr() requires a global_flush_tlb() call after it. + * We do this after the printk so that if something went wrong in the + * change, the printk gets out at least to give a better debug hint + * of who is the culprit. + */ + global_flush_tlb(); +} +#endif + +void free_init_pages(char *what, unsigned long begin, unsigned long end) +{ + unsigned long addr; + + for (addr = begin; addr < end; addr += PAGE_SIZE) { + ClearPageReserved(virt_to_page(addr)); + init_page_count(virt_to_page(addr)); + memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); + free_page(addr); + totalram_pages++; + } + printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); +} + +void free_initmem(void) +{ + free_init_pages("unused kernel memory", + (unsigned long)(&__init_begin), + (unsigned long)(&__init_end)); +} + +#ifdef CONFIG_BLK_DEV_INITRD +void free_initrd_mem(unsigned long start, unsigned long end) +{ + free_init_pages("initrd memory", start, end); +} +#endif + diff --git a/arch/x86/mm/ioremap_32.c b/arch/x86/mm/ioremap_32.c new file mode 100644 index 00000000000..0b278315d73 --- /dev/null +++ b/arch/x86/mm/ioremap_32.c @@ -0,0 +1,274 @@ +/* + * arch/i386/mm/ioremap.c + * + * Re-map IO memory to kernel address space so that we can access it. + * This is needed for high PCI addresses that aren't mapped in the + * 640k-1MB IO memory area on PC's + * + * (C) Copyright 1995 1996 Linus Torvalds + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define ISA_START_ADDRESS 0xa0000 +#define ISA_END_ADDRESS 0x100000 + +/* + * Generic mapping function (not visible outside): + */ + +/* + * Remap an arbitrary physical address space into the kernel virtual + * address space. Needed when the kernel wants to access high addresses + * directly. + * + * NOTE! We need to allow non-page-aligned mappings too: we will obviously + * have to convert them into an offset in a page-aligned mapping, but the + * caller shouldn't need to know that small detail. + */ +void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flags) +{ + void __iomem * addr; + struct vm_struct * area; + unsigned long offset, last_addr; + pgprot_t prot; + + /* Don't allow wraparound or zero size */ + last_addr = phys_addr + size - 1; + if (!size || last_addr < phys_addr) + return NULL; + + /* + * Don't remap the low PCI/ISA area, it's always mapped.. + */ + if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS) + return (void __iomem *) phys_to_virt(phys_addr); + + /* + * Don't allow anybody to remap normal RAM that we're using.. + */ + if (phys_addr <= virt_to_phys(high_memory - 1)) { + char *t_addr, *t_end; + struct page *page; + + t_addr = __va(phys_addr); + t_end = t_addr + (size - 1); + + for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); page++) + if(!PageReserved(page)) + return NULL; + } + + prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY + | _PAGE_ACCESSED | flags); + + /* + * Mappings have to be page-aligned + */ + offset = phys_addr & ~PAGE_MASK; + phys_addr &= PAGE_MASK; + size = PAGE_ALIGN(last_addr+1) - phys_addr; + + /* + * Ok, go for it.. + */ + area = get_vm_area(size, VM_IOREMAP | (flags << 20)); + if (!area) + return NULL; + area->phys_addr = phys_addr; + addr = (void __iomem *) area->addr; + if (ioremap_page_range((unsigned long) addr, + (unsigned long) addr + size, phys_addr, prot)) { + vunmap((void __force *) addr); + return NULL; + } + return (void __iomem *) (offset + (char __iomem *)addr); +} +EXPORT_SYMBOL(__ioremap); + +/** + * ioremap_nocache - map bus memory into CPU space + * @offset: bus address of the memory + * @size: size of the resource to map + * + * ioremap_nocache performs a platform specific sequence of operations to + * make bus memory CPU accessible via the readb/readw/readl/writeb/ + * writew/writel functions and the other mmio helpers. The returned + * address is not guaranteed to be usable directly as a virtual + * address. + * + * This version of ioremap ensures that the memory is marked uncachable + * on the CPU as well as honouring existing caching rules from things like + * the PCI bus. Note that there are other caches and buffers on many + * busses. In particular driver authors should read up on PCI writes + * + * It's useful if some control registers are in such an area and + * write combining or read caching is not desirable: + * + * Must be freed with iounmap. + */ + +void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size) +{ + unsigned long last_addr; + void __iomem *p = __ioremap(phys_addr, size, _PAGE_PCD); + if (!p) + return p; + + /* Guaranteed to be > phys_addr, as per __ioremap() */ + last_addr = phys_addr + size - 1; + + if (last_addr < virt_to_phys(high_memory) - 1) { + struct page *ppage = virt_to_page(__va(phys_addr)); + unsigned long npages; + + phys_addr &= PAGE_MASK; + + /* This might overflow and become zero.. */ + last_addr = PAGE_ALIGN(last_addr); + + /* .. but that's ok, because modulo-2**n arithmetic will make + * the page-aligned "last - first" come out right. + */ + npages = (last_addr - phys_addr) >> PAGE_SHIFT; + + if (change_page_attr(ppage, npages, PAGE_KERNEL_NOCACHE) < 0) { + iounmap(p); + p = NULL; + } + global_flush_tlb(); + } + + return p; +} +EXPORT_SYMBOL(ioremap_nocache); + +/** + * iounmap - Free a IO remapping + * @addr: virtual address from ioremap_* + * + * Caller must ensure there is only one unmapping for the same pointer. + */ +void iounmap(volatile void __iomem *addr) +{ + struct vm_struct *p, *o; + + if ((void __force *)addr <= high_memory) + return; + + /* + * __ioremap special-cases the PCI/ISA range by not instantiating a + * vm_area and by simply returning an address into the kernel mapping + * of ISA space. So handle that here. + */ + if (addr >= phys_to_virt(ISA_START_ADDRESS) && + addr < phys_to_virt(ISA_END_ADDRESS)) + return; + + addr = (volatile void __iomem *)(PAGE_MASK & (unsigned long __force)addr); + + /* Use the vm area unlocked, assuming the caller + ensures there isn't another iounmap for the same address + in parallel. Reuse of the virtual address is prevented by + leaving it in the global lists until we're done with it. + cpa takes care of the direct mappings. */ + read_lock(&vmlist_lock); + for (p = vmlist; p; p = p->next) { + if (p->addr == addr) + break; + } + read_unlock(&vmlist_lock); + + if (!p) { + printk("iounmap: bad address %p\n", addr); + dump_stack(); + return; + } + + /* Reset the direct mapping. Can block */ + if ((p->flags >> 20) && p->phys_addr < virt_to_phys(high_memory) - 1) { + change_page_attr(virt_to_page(__va(p->phys_addr)), + get_vm_area_size(p) >> PAGE_SHIFT, + PAGE_KERNEL); + global_flush_tlb(); + } + + /* Finally remove it */ + o = remove_vm_area((void *)addr); + BUG_ON(p != o || o == NULL); + kfree(p); +} +EXPORT_SYMBOL(iounmap); + +void __init *bt_ioremap(unsigned long phys_addr, unsigned long size) +{ + unsigned long offset, last_addr; + unsigned int nrpages; + enum fixed_addresses idx; + + /* Don't allow wraparound or zero size */ + last_addr = phys_addr + size - 1; + if (!size || last_addr < phys_addr) + return NULL; + + /* + * Don't remap the low PCI/ISA area, it's always mapped.. + */ + if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS) + return phys_to_virt(phys_addr); + + /* + * Mappings have to be page-aligned + */ + offset = phys_addr & ~PAGE_MASK; + phys_addr &= PAGE_MASK; + size = PAGE_ALIGN(last_addr) - phys_addr; + + /* + * Mappings have to fit in the FIX_BTMAP area. + */ + nrpages = size >> PAGE_SHIFT; + if (nrpages > NR_FIX_BTMAPS) + return NULL; + + /* + * Ok, go for it.. + */ + idx = FIX_BTMAP_BEGIN; + while (nrpages > 0) { + set_fixmap(idx, phys_addr); + phys_addr += PAGE_SIZE; + --idx; + --nrpages; + } + return (void*) (offset + fix_to_virt(FIX_BTMAP_BEGIN)); +} + +void __init bt_iounmap(void *addr, unsigned long size) +{ + unsigned long virt_addr; + unsigned long offset; + unsigned int nrpages; + enum fixed_addresses idx; + + virt_addr = (unsigned long)addr; + if (virt_addr < fix_to_virt(FIX_BTMAP_BEGIN)) + return; + offset = virt_addr & ~PAGE_MASK; + nrpages = PAGE_ALIGN(offset + size - 1) >> PAGE_SHIFT; + + idx = FIX_BTMAP_BEGIN; + while (nrpages > 0) { + clear_fixmap(idx); + --idx; + --nrpages; + } +} diff --git a/arch/x86/mm/mmap_32.c b/arch/x86/mm/mmap_32.c new file mode 100644 index 00000000000..552e0847375 --- /dev/null +++ b/arch/x86/mm/mmap_32.c @@ -0,0 +1,77 @@ +/* + * linux/arch/i386/mm/mmap.c + * + * flexible mmap layout support + * + * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * + * Started by Ingo Molnar + */ + +#include +#include +#include +#include + +/* + * Top of mmap area (just below the process stack). + * + * Leave an at least ~128 MB hole. + */ +#define MIN_GAP (128*1024*1024) +#define MAX_GAP (TASK_SIZE/6*5) + +static inline unsigned long mmap_base(struct mm_struct *mm) +{ + unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur; + unsigned long random_factor = 0; + + if (current->flags & PF_RANDOMIZE) + random_factor = get_random_int() % (1024*1024); + + if (gap < MIN_GAP) + gap = MIN_GAP; + else if (gap > MAX_GAP) + gap = MAX_GAP; + + return PAGE_ALIGN(TASK_SIZE - gap - random_factor); +} + +/* + * This function, called very early during the creation of a new + * process VM image, sets up which VM layout function to use: + */ +void arch_pick_mmap_layout(struct mm_struct *mm) +{ + /* + * Fall back to the standard layout if the personality + * bit is set, or if the expected stack growth is unlimited: + */ + if (sysctl_legacy_va_layout || + (current->personality & ADDR_COMPAT_LAYOUT) || + current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY) { + mm->mmap_base = TASK_UNMAPPED_BASE; + mm->get_unmapped_area = arch_get_unmapped_area; + mm->unmap_area = arch_unmap_area; + } else { + mm->mmap_base = mmap_base(mm); + mm->get_unmapped_area = arch_get_unmapped_area_topdown; + mm->unmap_area = arch_unmap_area_topdown; + } +} diff --git a/arch/x86/mm/pageattr_32.c b/arch/x86/mm/pageattr_32.c new file mode 100644 index 00000000000..4241a74d16c --- /dev/null +++ b/arch/x86/mm/pageattr_32.c @@ -0,0 +1,278 @@ +/* + * Copyright 2002 Andi Kleen, SuSE Labs. + * Thanks to Ben LaHaise for precious feedback. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static DEFINE_SPINLOCK(cpa_lock); +static struct list_head df_list = LIST_HEAD_INIT(df_list); + + +pte_t *lookup_address(unsigned long address) +{ + pgd_t *pgd = pgd_offset_k(address); + pud_t *pud; + pmd_t *pmd; + if (pgd_none(*pgd)) + return NULL; + pud = pud_offset(pgd, address); + if (pud_none(*pud)) + return NULL; + pmd = pmd_offset(pud, address); + if (pmd_none(*pmd)) + return NULL; + if (pmd_large(*pmd)) + return (pte_t *)pmd; + return pte_offset_kernel(pmd, address); +} + +static struct page *split_large_page(unsigned long address, pgprot_t prot, + pgprot_t ref_prot) +{ + int i; + unsigned long addr; + struct page *base; + pte_t *pbase; + + spin_unlock_irq(&cpa_lock); + base = alloc_pages(GFP_KERNEL, 0); + spin_lock_irq(&cpa_lock); + if (!base) + return NULL; + + /* + * page_private is used to track the number of entries in + * the page table page that have non standard attributes. + */ + SetPagePrivate(base); + page_private(base) = 0; + + address = __pa(address); + addr = address & LARGE_PAGE_MASK; + pbase = (pte_t *)page_address(base); + paravirt_alloc_pt(&init_mm, page_to_pfn(base)); + for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) { + set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, + addr == address ? prot : ref_prot)); + } + return base; +} + +static void cache_flush_page(struct page *p) +{ + unsigned long adr = (unsigned long)page_address(p); + int i; + for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size) + asm volatile("clflush (%0)" :: "r" (adr + i)); +} + +static void flush_kernel_map(void *arg) +{ + struct list_head *lh = (struct list_head *)arg; + struct page *p; + + /* High level code is not ready for clflush yet */ + if (0 && cpu_has_clflush) { + list_for_each_entry (p, lh, lru) + cache_flush_page(p); + } else if (boot_cpu_data.x86_model >= 4) + wbinvd(); + + /* Flush all to work around Errata in early athlons regarding + * large page flushing. + */ + __flush_tlb_all(); +} + +static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) +{ + struct page *page; + unsigned long flags; + + set_pte_atomic(kpte, pte); /* change init_mm */ + if (SHARED_KERNEL_PMD) + return; + + spin_lock_irqsave(&pgd_lock, flags); + for (page = pgd_list; page; page = (struct page *)page->index) { + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pgd = (pgd_t *)page_address(page) + pgd_index(address); + pud = pud_offset(pgd, address); + pmd = pmd_offset(pud, address); + set_pte_atomic((pte_t *)pmd, pte); + } + spin_unlock_irqrestore(&pgd_lock, flags); +} + +/* + * No more special protections in this 2/4MB area - revert to a + * large page again. + */ +static inline void revert_page(struct page *kpte_page, unsigned long address) +{ + pgprot_t ref_prot; + pte_t *linear; + + ref_prot = + ((address & LARGE_PAGE_MASK) < (unsigned long)&_etext) + ? PAGE_KERNEL_LARGE_EXEC : PAGE_KERNEL_LARGE; + + linear = (pte_t *) + pmd_offset(pud_offset(pgd_offset_k(address), address), address); + set_pmd_pte(linear, address, + pfn_pte((__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT, + ref_prot)); +} + +static inline void save_page(struct page *kpte_page) +{ + if (!test_and_set_bit(PG_arch_1, &kpte_page->flags)) + list_add(&kpte_page->lru, &df_list); +} + +static int +__change_page_attr(struct page *page, pgprot_t prot) +{ + pte_t *kpte; + unsigned long address; + struct page *kpte_page; + + BUG_ON(PageHighMem(page)); + address = (unsigned long)page_address(page); + + kpte = lookup_address(address); + if (!kpte) + return -EINVAL; + kpte_page = virt_to_page(kpte); + BUG_ON(PageLRU(kpte_page)); + BUG_ON(PageCompound(kpte_page)); + + if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) { + if (!pte_huge(*kpte)) { + set_pte_atomic(kpte, mk_pte(page, prot)); + } else { + pgprot_t ref_prot; + struct page *split; + + ref_prot = + ((address & LARGE_PAGE_MASK) < (unsigned long)&_etext) + ? PAGE_KERNEL_EXEC : PAGE_KERNEL; + split = split_large_page(address, prot, ref_prot); + if (!split) + return -ENOMEM; + set_pmd_pte(kpte,address,mk_pte(split, ref_prot)); + kpte_page = split; + } + page_private(kpte_page)++; + } else if (!pte_huge(*kpte)) { + set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL)); + BUG_ON(page_private(kpte_page) == 0); + page_private(kpte_page)--; + } else + BUG(); + + /* + * If the pte was reserved, it means it was created at boot + * time (not via split_large_page) and in turn we must not + * replace it with a largepage. + */ + + save_page(kpte_page); + if (!PageReserved(kpte_page)) { + if (cpu_has_pse && (page_private(kpte_page) == 0)) { + paravirt_release_pt(page_to_pfn(kpte_page)); + revert_page(kpte_page, address); + } + } + return 0; +} + +static inline void flush_map(struct list_head *l) +{ + on_each_cpu(flush_kernel_map, l, 1, 1); +} + +/* + * Change the page attributes of an page in the linear mapping. + * + * This should be used when a page is mapped with a different caching policy + * than write-back somewhere - some CPUs do not like it when mappings with + * different caching policies exist. This changes the page attributes of the + * in kernel linear mapping too. + * + * The caller needs to ensure that there are no conflicting mappings elsewhere. + * This function only deals with the kernel linear map. + * + * Caller must call global_flush_tlb() after this. + */ +int change_page_attr(struct page *page, int numpages, pgprot_t prot) +{ + int err = 0; + int i; + unsigned long flags; + + spin_lock_irqsave(&cpa_lock, flags); + for (i = 0; i < numpages; i++, page++) { + err = __change_page_attr(page, prot); + if (err) + break; + } + spin_unlock_irqrestore(&cpa_lock, flags); + return err; +} + +void global_flush_tlb(void) +{ + struct list_head l; + struct page *pg, *next; + + BUG_ON(irqs_disabled()); + + spin_lock_irq(&cpa_lock); + list_replace_init(&df_list, &l); + spin_unlock_irq(&cpa_lock); + flush_map(&l); + list_for_each_entry_safe(pg, next, &l, lru) { + list_del(&pg->lru); + clear_bit(PG_arch_1, &pg->flags); + if (PageReserved(pg) || !cpu_has_pse || page_private(pg) != 0) + continue; + ClearPagePrivate(pg); + __free_page(pg); + } +} + +#ifdef CONFIG_DEBUG_PAGEALLOC +void kernel_map_pages(struct page *page, int numpages, int enable) +{ + if (PageHighMem(page)) + return; + if (!enable) + debug_check_no_locks_freed(page_address(page), + numpages * PAGE_SIZE); + + /* the return value is ignored - the calls cannot fail, + * large pages are disabled at boot time. + */ + change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0)); + /* we should perform an IPI and flush all tlbs, + * but that can deadlock->flush only current cpu. + */ + __flush_tlb_all(); +} +#endif + +EXPORT_SYMBOL(change_page_attr); +EXPORT_SYMBOL(global_flush_tlb); diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c new file mode 100644 index 00000000000..01437c46baa --- /dev/null +++ b/arch/x86/mm/pgtable_32.c @@ -0,0 +1,373 @@ +/* + * linux/arch/i386/mm/pgtable.c + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +void show_mem(void) +{ + int total = 0, reserved = 0; + int shared = 0, cached = 0; + int highmem = 0; + struct page *page; + pg_data_t *pgdat; + unsigned long i; + unsigned long flags; + + printk(KERN_INFO "Mem-info:\n"); + show_free_areas(); + printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); + for_each_online_pgdat(pgdat) { + pgdat_resize_lock(pgdat, &flags); + for (i = 0; i < pgdat->node_spanned_pages; ++i) { + page = pgdat_page_nr(pgdat, i); + total++; + if (PageHighMem(page)) + highmem++; + if (PageReserved(page)) + reserved++; + else if (PageSwapCache(page)) + cached++; + else if (page_count(page)) + shared += page_count(page) - 1; + } + pgdat_resize_unlock(pgdat, &flags); + } + printk(KERN_INFO "%d pages of RAM\n", total); + printk(KERN_INFO "%d pages of HIGHMEM\n", highmem); + printk(KERN_INFO "%d reserved pages\n", reserved); + printk(KERN_INFO "%d pages shared\n", shared); + printk(KERN_INFO "%d pages swap cached\n", cached); + + printk(KERN_INFO "%lu pages dirty\n", global_page_state(NR_FILE_DIRTY)); + printk(KERN_INFO "%lu pages writeback\n", + global_page_state(NR_WRITEBACK)); + printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED)); + printk(KERN_INFO "%lu pages slab\n", + global_page_state(NR_SLAB_RECLAIMABLE) + + global_page_state(NR_SLAB_UNRECLAIMABLE)); + printk(KERN_INFO "%lu pages pagetables\n", + global_page_state(NR_PAGETABLE)); +} + +/* + * Associate a virtual page frame with a given physical page frame + * and protection flags for that frame. + */ +static void set_pte_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + pgd = swapper_pg_dir + pgd_index(vaddr); + if (pgd_none(*pgd)) { + BUG(); + return; + } + pud = pud_offset(pgd, vaddr); + if (pud_none(*pud)) { + BUG(); + return; + } + pmd = pmd_offset(pud, vaddr); + if (pmd_none(*pmd)) { + BUG(); + return; + } + pte = pte_offset_kernel(pmd, vaddr); + if (pgprot_val(flags)) + /* stored as-is, to permit clearing entries */ + set_pte(pte, pfn_pte(pfn, flags)); + else + pte_clear(&init_mm, vaddr, pte); + + /* + * It's enough to flush this one mapping. + * (PGE mappings get flushed as well) + */ + __flush_tlb_one(vaddr); +} + +/* + * Associate a large virtual page frame with a given physical page frame + * and protection flags for that frame. pfn is for the base of the page, + * vaddr is what the page gets mapped to - both must be properly aligned. + * The pmd must already be instantiated. Assumes PAE mode. + */ +void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + + if (vaddr & (PMD_SIZE-1)) { /* vaddr is misaligned */ + printk(KERN_WARNING "set_pmd_pfn: vaddr misaligned\n"); + return; /* BUG(); */ + } + if (pfn & (PTRS_PER_PTE-1)) { /* pfn is misaligned */ + printk(KERN_WARNING "set_pmd_pfn: pfn misaligned\n"); + return; /* BUG(); */ + } + pgd = swapper_pg_dir + pgd_index(vaddr); + if (pgd_none(*pgd)) { + printk(KERN_WARNING "set_pmd_pfn: pgd_none\n"); + return; /* BUG(); */ + } + pud = pud_offset(pgd, vaddr); + pmd = pmd_offset(pud, vaddr); + set_pmd(pmd, pfn_pmd(pfn, flags)); + /* + * It's enough to flush this one mapping. + * (PGE mappings get flushed as well) + */ + __flush_tlb_one(vaddr); +} + +static int fixmaps; +unsigned long __FIXADDR_TOP = 0xfffff000; +EXPORT_SYMBOL(__FIXADDR_TOP); + +void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags) +{ + unsigned long address = __fix_to_virt(idx); + + if (idx >= __end_of_fixed_addresses) { + BUG(); + return; + } + set_pte_pfn(address, phys >> PAGE_SHIFT, flags); + fixmaps++; +} + +/** + * reserve_top_address - reserves a hole in the top of kernel address space + * @reserve - size of hole to reserve + * + * Can be used to relocate the fixmap area and poke a hole in the top + * of kernel address space to make room for a hypervisor. + */ +void reserve_top_address(unsigned long reserve) +{ + BUG_ON(fixmaps > 0); + printk(KERN_INFO "Reserving virtual address space above 0x%08x\n", + (int)-reserve); + __FIXADDR_TOP = -reserve - PAGE_SIZE; + __VMALLOC_RESERVE += reserve; +} + +pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) +{ + return (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); +} + +struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) +{ + struct page *pte; + +#ifdef CONFIG_HIGHPTE + pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0); +#else + pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); +#endif + return pte; +} + +void pmd_ctor(void *pmd, struct kmem_cache *cache, unsigned long flags) +{ + memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t)); +} + +/* + * List of all pgd's needed for non-PAE so it can invalidate entries + * in both cached and uncached pgd's; not needed for PAE since the + * kernel pmd is shared. If PAE were not to share the pmd a similar + * tactic would be needed. This is essentially codepath-based locking + * against pageattr.c; it is the unique case in which a valid change + * of kernel pagetables can't be lazily synchronized by vmalloc faults. + * vmalloc faults work because attached pagetables are never freed. + * -- wli + */ +DEFINE_SPINLOCK(pgd_lock); +struct page *pgd_list; + +static inline void pgd_list_add(pgd_t *pgd) +{ + struct page *page = virt_to_page(pgd); + page->index = (unsigned long)pgd_list; + if (pgd_list) + set_page_private(pgd_list, (unsigned long)&page->index); + pgd_list = page; + set_page_private(page, (unsigned long)&pgd_list); +} + +static inline void pgd_list_del(pgd_t *pgd) +{ + struct page *next, **pprev, *page = virt_to_page(pgd); + next = (struct page *)page->index; + pprev = (struct page **)page_private(page); + *pprev = next; + if (next) + set_page_private(next, (unsigned long)pprev); +} + + + +#if (PTRS_PER_PMD == 1) +/* Non-PAE pgd constructor */ +static void pgd_ctor(void *pgd) +{ + unsigned long flags; + + /* !PAE, no pagetable sharing */ + memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); + + spin_lock_irqsave(&pgd_lock, flags); + + /* must happen under lock */ + clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, + swapper_pg_dir + USER_PTRS_PER_PGD, + KERNEL_PGD_PTRS); + paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT, + __pa(swapper_pg_dir) >> PAGE_SHIFT, + USER_PTRS_PER_PGD, + KERNEL_PGD_PTRS); + pgd_list_add(pgd); + spin_unlock_irqrestore(&pgd_lock, flags); +} +#else /* PTRS_PER_PMD > 1 */ +/* PAE pgd constructor */ +static void pgd_ctor(void *pgd) +{ + /* PAE, kernel PMD may be shared */ + + if (SHARED_KERNEL_PMD) { + clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, + swapper_pg_dir + USER_PTRS_PER_PGD, + KERNEL_PGD_PTRS); + } else { + unsigned long flags; + + memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); + spin_lock_irqsave(&pgd_lock, flags); + pgd_list_add(pgd); + spin_unlock_irqrestore(&pgd_lock, flags); + } +} +#endif /* PTRS_PER_PMD */ + +static void pgd_dtor(void *pgd) +{ + unsigned long flags; /* can be called from interrupt context */ + + if (SHARED_KERNEL_PMD) + return; + + paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT); + spin_lock_irqsave(&pgd_lock, flags); + pgd_list_del(pgd); + spin_unlock_irqrestore(&pgd_lock, flags); +} + +#define UNSHARED_PTRS_PER_PGD \ + (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD) + +/* If we allocate a pmd for part of the kernel address space, then + make sure its initialized with the appropriate kernel mappings. + Otherwise use a cached zeroed pmd. */ +static pmd_t *pmd_cache_alloc(int idx) +{ + pmd_t *pmd; + + if (idx >= USER_PTRS_PER_PGD) { + pmd = (pmd_t *)__get_free_page(GFP_KERNEL); + + if (pmd) + memcpy(pmd, + (void *)pgd_page_vaddr(swapper_pg_dir[idx]), + sizeof(pmd_t) * PTRS_PER_PMD); + } else + pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); + + return pmd; +} + +static void pmd_cache_free(pmd_t *pmd, int idx) +{ + if (idx >= USER_PTRS_PER_PGD) + free_page((unsigned long)pmd); + else + kmem_cache_free(pmd_cache, pmd); +} + +pgd_t *pgd_alloc(struct mm_struct *mm) +{ + int i; + pgd_t *pgd = quicklist_alloc(0, GFP_KERNEL, pgd_ctor); + + if (PTRS_PER_PMD == 1 || !pgd) + return pgd; + + for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) { + pmd_t *pmd = pmd_cache_alloc(i); + + if (!pmd) + goto out_oom; + + paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT); + set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); + } + return pgd; + +out_oom: + for (i--; i >= 0; i--) { + pgd_t pgdent = pgd[i]; + void* pmd = (void *)__va(pgd_val(pgdent)-1); + paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); + pmd_cache_free(pmd, i); + } + quicklist_free(0, pgd_dtor, pgd); + return NULL; +} + +void pgd_free(pgd_t *pgd) +{ + int i; + + /* in the PAE case user pgd entries are overwritten before usage */ + if (PTRS_PER_PMD > 1) + for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) { + pgd_t pgdent = pgd[i]; + void* pmd = (void *)__va(pgd_val(pgdent)-1); + paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); + pmd_cache_free(pmd, i); + } + /* in the non-PAE case, free_pgtables() clears user pgd entries */ + quicklist_free(0, pgd_dtor, pgd); +} + +void check_pgt_cache(void) +{ + quicklist_trim(0, pgd_dtor, 25, 16); +} + diff --git a/arch/x86_64/mm/Makefile b/arch/x86_64/mm/Makefile index 4042f8563a1..7317648e658 100644 --- a/arch/x86_64/mm/Makefile +++ b/arch/x86_64/mm/Makefile @@ -1,5 +1,5 @@ ifeq ($(CONFIG_X86_32),y) -include ${srctree}/arch/i386/mm/Makefile_32 +include ${srctree}/arch/x86/mm/Makefile_32 else include ${srctree}/arch/x86_64/mm/Makefile_64 endif diff --git a/arch/x86_64/mm/Makefile_64 b/arch/x86_64/mm/Makefile_64 index 2e209885fce..5c2883cad11 100644 --- a/arch/x86_64/mm/Makefile_64 +++ b/arch/x86_64/mm/Makefile_64 @@ -8,4 +8,4 @@ obj-$(CONFIG_NUMA) += numa_64.o obj-$(CONFIG_K8_NUMA) += k8topology_64.o obj-$(CONFIG_ACPI_NUMA) += srat_64.o -hugetlbpage-y = ../../i386/mm/hugetlbpage.o +hugetlbpage-y = ../../x86/mm/hugetlbpage.o -- cgit v1.2.3-70-g09d2 From d629e03b64c469245ceb07d05460e1e2eefa7585 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 11 Oct 2007 11:16:48 +0200 Subject: i386: move mach-default Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/i386/Makefile | 12 +-- arch/i386/mach-default/Makefile | 5 -- arch/i386/mach-default/setup.c | 180 ---------------------------------------- arch/x86/mach-default/Makefile | 5 ++ arch/x86/mach-default/setup.c | 180 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 191 insertions(+), 191 deletions(-) delete mode 100644 arch/i386/mach-default/Makefile delete mode 100644 arch/i386/mach-default/setup.c create mode 100644 arch/x86/mach-default/Makefile create mode 100644 arch/x86/mach-default/setup.c (limited to 'arch/x86') diff --git a/arch/i386/Makefile b/arch/i386/Makefile index cbdc14fddc3..223c5e252ec 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -61,7 +61,7 @@ AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONF CFLAGS += $(cflags-y) # Default subarch .c files -mcore-y := arch/i386/mach-default +mcore-y := arch/x86/mach-default # Voyager subarch support mflags-$(CONFIG_X86_VOYAGER) := -Iinclude/asm-i386/mach-voyager @@ -73,24 +73,24 @@ mcore-$(CONFIG_X86_VISWS) := arch/x86/mach-visws # NUMAQ subarch support mflags-$(CONFIG_X86_NUMAQ) := -Iinclude/asm-i386/mach-numaq -mcore-$(CONFIG_X86_NUMAQ) := arch/i386/mach-default +mcore-$(CONFIG_X86_NUMAQ) := arch/x86/mach-default # BIGSMP subarch support mflags-$(CONFIG_X86_BIGSMP) := -Iinclude/asm-i386/mach-bigsmp -mcore-$(CONFIG_X86_BIGSMP) := arch/i386/mach-default +mcore-$(CONFIG_X86_BIGSMP) := arch/x86/mach-default #Summit subarch support mflags-$(CONFIG_X86_SUMMIT) := -Iinclude/asm-i386/mach-summit -mcore-$(CONFIG_X86_SUMMIT) := arch/i386/mach-default +mcore-$(CONFIG_X86_SUMMIT) := arch/x86/mach-default # generic subarchitecture mflags-$(CONFIG_X86_GENERICARCH) := -Iinclude/asm-i386/mach-generic -mcore-$(CONFIG_X86_GENERICARCH) := arch/i386/mach-default +mcore-$(CONFIG_X86_GENERICARCH) := arch/x86/mach-default core-$(CONFIG_X86_GENERICARCH) += arch/x86/mach-generic/ # ES7000 subarch support mflags-$(CONFIG_X86_ES7000) := -Iinclude/asm-i386/mach-es7000 -mcore-$(CONFIG_X86_ES7000) := arch/i386/mach-default +mcore-$(CONFIG_X86_ES7000) := arch/x86/mach-default core-$(CONFIG_X86_ES7000) := arch/i386/mach-es7000/ # Xen paravirtualization support diff --git a/arch/i386/mach-default/Makefile b/arch/i386/mach-default/Makefile deleted file mode 100644 index 012fe34459e..00000000000 --- a/arch/i386/mach-default/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -# -# Makefile for the linux kernel. -# - -obj-y := setup.o diff --git a/arch/i386/mach-default/setup.c b/arch/i386/mach-default/setup.c deleted file mode 100644 index 7f635c7a238..00000000000 --- a/arch/i386/mach-default/setup.c +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Machine specific setup for generic - */ - -#include -#include -#include -#include -#include -#include -#include - -#ifdef CONFIG_HOTPLUG_CPU -#define DEFAULT_SEND_IPI (1) -#else -#define DEFAULT_SEND_IPI (0) -#endif - -int no_broadcast=DEFAULT_SEND_IPI; - -/** - * pre_intr_init_hook - initialisation prior to setting up interrupt vectors - * - * Description: - * Perform any necessary interrupt initialisation prior to setting up - * the "ordinary" interrupt call gates. For legacy reasons, the ISA - * interrupts should be initialised here if the machine emulates a PC - * in any way. - **/ -void __init pre_intr_init_hook(void) -{ - init_ISA_irqs(); -} - -/* - * IRQ2 is cascade interrupt to second interrupt controller - */ -static struct irqaction irq2 = { no_action, 0, CPU_MASK_NONE, "cascade", NULL, NULL}; - -/** - * intr_init_hook - post gate setup interrupt initialisation - * - * Description: - * Fill in any interrupts that may have been left out by the general - * init_IRQ() routine. interrupts having to do with the machine rather - * than the devices on the I/O bus (like APIC interrupts in intel MP - * systems) are started here. - **/ -void __init intr_init_hook(void) -{ -#ifdef CONFIG_X86_LOCAL_APIC - apic_intr_init(); -#endif - - if (!acpi_ioapic) - setup_irq(2, &irq2); -} - -/** - * pre_setup_arch_hook - hook called prior to any setup_arch() execution - * - * Description: - * generally used to activate any machine specific identification - * routines that may be needed before setup_arch() runs. On VISWS - * this is used to get the board revision and type. - **/ -void __init pre_setup_arch_hook(void) -{ -} - -/** - * trap_init_hook - initialise system specific traps - * - * Description: - * Called as the final act of trap_init(). Used in VISWS to initialise - * the various board specific APIC traps. - **/ -void __init trap_init_hook(void) -{ -} - -static struct irqaction irq0 = { - .handler = timer_interrupt, - .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL, - .mask = CPU_MASK_NONE, - .name = "timer" -}; - -/** - * time_init_hook - do any specific initialisations for the system timer. - * - * Description: - * Must plug the system timer interrupt source at HZ into the IRQ listed - * in irq_vectors.h:TIMER_IRQ - **/ -void __init time_init_hook(void) -{ - irq0.mask = cpumask_of_cpu(0); - setup_irq(0, &irq0); -} - -#ifdef CONFIG_MCA -/** - * mca_nmi_hook - hook into MCA specific NMI chain - * - * Description: - * The MCA (Microchannel Arcitecture) has an NMI chain for NMI sources - * along the MCA bus. Use this to hook into that chain if you will need - * it. - **/ -void mca_nmi_hook(void) -{ - /* If I recall correctly, there's a whole bunch of other things that - * we can do to check for NMI problems, but that's all I know about - * at the moment. - */ - - printk("NMI generated from unknown source!\n"); -} -#endif - -static __init int no_ipi_broadcast(char *str) -{ - get_option(&str, &no_broadcast); - printk ("Using %s mode\n", no_broadcast ? "No IPI Broadcast" : - "IPI Broadcast"); - return 1; -} - -__setup("no_ipi_broadcast", no_ipi_broadcast); - -static int __init print_ipi_mode(void) -{ - printk ("Using IPI %s mode\n", no_broadcast ? "No-Shortcut" : - "Shortcut"); - return 0; -} - -late_initcall(print_ipi_mode); - -/** - * machine_specific_memory_setup - Hook for machine specific memory setup. - * - * Description: - * This is included late in kernel/setup.c so that it can make - * use of all of the static functions. - **/ - -char * __init machine_specific_memory_setup(void) -{ - char *who; - - - who = "BIOS-e820"; - - /* - * Try to copy the BIOS-supplied E820-map. - * - * Otherwise fake a memory map; one section from 0k->640k, - * the next section from 1mb->appropriate_mem_k - */ - sanitize_e820_map(E820_MAP, &E820_MAP_NR); - if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0) { - unsigned long mem_size; - - /* compare results from other methods and take the greater */ - if (ALT_MEM_K < EXT_MEM_K) { - mem_size = EXT_MEM_K; - who = "BIOS-88"; - } else { - mem_size = ALT_MEM_K; - who = "BIOS-e801"; - } - - e820.nr_map = 0; - add_memory_region(0, LOWMEMSIZE(), E820_RAM); - add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM); - } - return who; -} diff --git a/arch/x86/mach-default/Makefile b/arch/x86/mach-default/Makefile new file mode 100644 index 00000000000..012fe34459e --- /dev/null +++ b/arch/x86/mach-default/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for the linux kernel. +# + +obj-y := setup.o diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c new file mode 100644 index 00000000000..7f635c7a238 --- /dev/null +++ b/arch/x86/mach-default/setup.c @@ -0,0 +1,180 @@ +/* + * Machine specific setup for generic + */ + +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_HOTPLUG_CPU +#define DEFAULT_SEND_IPI (1) +#else +#define DEFAULT_SEND_IPI (0) +#endif + +int no_broadcast=DEFAULT_SEND_IPI; + +/** + * pre_intr_init_hook - initialisation prior to setting up interrupt vectors + * + * Description: + * Perform any necessary interrupt initialisation prior to setting up + * the "ordinary" interrupt call gates. For legacy reasons, the ISA + * interrupts should be initialised here if the machine emulates a PC + * in any way. + **/ +void __init pre_intr_init_hook(void) +{ + init_ISA_irqs(); +} + +/* + * IRQ2 is cascade interrupt to second interrupt controller + */ +static struct irqaction irq2 = { no_action, 0, CPU_MASK_NONE, "cascade", NULL, NULL}; + +/** + * intr_init_hook - post gate setup interrupt initialisation + * + * Description: + * Fill in any interrupts that may have been left out by the general + * init_IRQ() routine. interrupts having to do with the machine rather + * than the devices on the I/O bus (like APIC interrupts in intel MP + * systems) are started here. + **/ +void __init intr_init_hook(void) +{ +#ifdef CONFIG_X86_LOCAL_APIC + apic_intr_init(); +#endif + + if (!acpi_ioapic) + setup_irq(2, &irq2); +} + +/** + * pre_setup_arch_hook - hook called prior to any setup_arch() execution + * + * Description: + * generally used to activate any machine specific identification + * routines that may be needed before setup_arch() runs. On VISWS + * this is used to get the board revision and type. + **/ +void __init pre_setup_arch_hook(void) +{ +} + +/** + * trap_init_hook - initialise system specific traps + * + * Description: + * Called as the final act of trap_init(). Used in VISWS to initialise + * the various board specific APIC traps. + **/ +void __init trap_init_hook(void) +{ +} + +static struct irqaction irq0 = { + .handler = timer_interrupt, + .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL, + .mask = CPU_MASK_NONE, + .name = "timer" +}; + +/** + * time_init_hook - do any specific initialisations for the system timer. + * + * Description: + * Must plug the system timer interrupt source at HZ into the IRQ listed + * in irq_vectors.h:TIMER_IRQ + **/ +void __init time_init_hook(void) +{ + irq0.mask = cpumask_of_cpu(0); + setup_irq(0, &irq0); +} + +#ifdef CONFIG_MCA +/** + * mca_nmi_hook - hook into MCA specific NMI chain + * + * Description: + * The MCA (Microchannel Arcitecture) has an NMI chain for NMI sources + * along the MCA bus. Use this to hook into that chain if you will need + * it. + **/ +void mca_nmi_hook(void) +{ + /* If I recall correctly, there's a whole bunch of other things that + * we can do to check for NMI problems, but that's all I know about + * at the moment. + */ + + printk("NMI generated from unknown source!\n"); +} +#endif + +static __init int no_ipi_broadcast(char *str) +{ + get_option(&str, &no_broadcast); + printk ("Using %s mode\n", no_broadcast ? "No IPI Broadcast" : + "IPI Broadcast"); + return 1; +} + +__setup("no_ipi_broadcast", no_ipi_broadcast); + +static int __init print_ipi_mode(void) +{ + printk ("Using IPI %s mode\n", no_broadcast ? "No-Shortcut" : + "Shortcut"); + return 0; +} + +late_initcall(print_ipi_mode); + +/** + * machine_specific_memory_setup - Hook for machine specific memory setup. + * + * Description: + * This is included late in kernel/setup.c so that it can make + * use of all of the static functions. + **/ + +char * __init machine_specific_memory_setup(void) +{ + char *who; + + + who = "BIOS-e820"; + + /* + * Try to copy the BIOS-supplied E820-map. + * + * Otherwise fake a memory map; one section from 0k->640k, + * the next section from 1mb->appropriate_mem_k + */ + sanitize_e820_map(E820_MAP, &E820_MAP_NR); + if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0) { + unsigned long mem_size; + + /* compare results from other methods and take the greater */ + if (ALT_MEM_K < EXT_MEM_K) { + mem_size = EXT_MEM_K; + who = "BIOS-88"; + } else { + mem_size = ALT_MEM_K; + who = "BIOS-e801"; + } + + e820.nr_map = 0; + add_memory_region(0, LOWMEMSIZE(), E820_RAM); + add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM); + } + return who; +} -- cgit v1.2.3-70-g09d2 From 334e621a01f86d5bc25e4f742e1eaae6e2d2a97a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 11 Oct 2007 11:16:50 +0200 Subject: i386: move mach-es7000 Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/i386/Makefile | 2 +- arch/i386/mach-es7000/Makefile | 6 - arch/i386/mach-es7000/es7000.h | 114 ------------- arch/i386/mach-es7000/es7000plat.c | 327 ------------------------------------- arch/x86/mach-es7000/Makefile | 6 + arch/x86/mach-es7000/es7000.h | 114 +++++++++++++ arch/x86/mach-es7000/es7000plat.c | 327 +++++++++++++++++++++++++++++++++++++ arch/x86/mach-generic/Makefile | 2 +- 8 files changed, 449 insertions(+), 449 deletions(-) delete mode 100644 arch/i386/mach-es7000/Makefile delete mode 100644 arch/i386/mach-es7000/es7000.h delete mode 100644 arch/i386/mach-es7000/es7000plat.c create mode 100644 arch/x86/mach-es7000/Makefile create mode 100644 arch/x86/mach-es7000/es7000.h create mode 100644 arch/x86/mach-es7000/es7000plat.c (limited to 'arch/x86') diff --git a/arch/i386/Makefile b/arch/i386/Makefile index 223c5e252ec..c462803a4db 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -91,7 +91,7 @@ core-$(CONFIG_X86_GENERICARCH) += arch/x86/mach-generic/ # ES7000 subarch support mflags-$(CONFIG_X86_ES7000) := -Iinclude/asm-i386/mach-es7000 mcore-$(CONFIG_X86_ES7000) := arch/x86/mach-default -core-$(CONFIG_X86_ES7000) := arch/i386/mach-es7000/ +core-$(CONFIG_X86_ES7000) := arch/x86/mach-es7000/ # Xen paravirtualization support core-$(CONFIG_XEN) += arch/i386/xen/ diff --git a/arch/i386/mach-es7000/Makefile b/arch/i386/mach-es7000/Makefile deleted file mode 100644 index 69dd4da218d..00000000000 --- a/arch/i386/mach-es7000/Makefile +++ /dev/null @@ -1,6 +0,0 @@ -# -# Makefile for the linux kernel. -# - -obj-$(CONFIG_X86_ES7000) := es7000plat.o -obj-$(CONFIG_X86_GENERICARCH) := es7000plat.o diff --git a/arch/i386/mach-es7000/es7000.h b/arch/i386/mach-es7000/es7000.h deleted file mode 100644 index c8d5aa132fa..00000000000 --- a/arch/i386/mach-es7000/es7000.h +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Written by: Garry Forsgren, Unisys Corporation - * Natalie Protasevich, Unisys Corporation - * This file contains the code to configure and interface - * with Unisys ES7000 series hardware system manager. - * - * Copyright (c) 2003 Unisys Corporation. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write the Free Software Foundation, Inc., 59 - * Temple Place - Suite 330, Boston MA 02111-1307, USA. - * - * Contact information: Unisys Corporation, Township Line & Union Meeting - * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or: - * - * http://www.unisys.com - */ - -/* - * ES7000 chipsets - */ - -#define NON_UNISYS 0 -#define ES7000_CLASSIC 1 -#define ES7000_ZORRO 2 - - -#define MIP_REG 1 -#define MIP_PSAI_REG 4 - -#define MIP_BUSY 1 -#define MIP_SPIN 0xf0000 -#define MIP_VALID 0x0100000000000000ULL -#define MIP_PORT(VALUE) ((VALUE >> 32) & 0xffff) - -#define MIP_RD_LO(VALUE) (VALUE & 0xffffffff) - -struct mip_reg_info { - unsigned long long mip_info; - unsigned long long delivery_info; - unsigned long long host_reg; - unsigned long long mip_reg; -}; - -struct part_info { - unsigned char type; - unsigned char length; - unsigned char part_id; - unsigned char apic_mode; - unsigned long snum; - char ptype[16]; - char sname[64]; - char pname[64]; -}; - -struct psai { - unsigned long long entry_type; - unsigned long long addr; - unsigned long long bep_addr; -}; - -struct es7000_mem_info { - unsigned char type; - unsigned char length; - unsigned char resv[6]; - unsigned long long start; - unsigned long long size; -}; - -struct es7000_oem_table { - unsigned long long hdr; - struct mip_reg_info mip; - struct part_info pif; - struct es7000_mem_info shm; - struct psai psai; -}; - -#ifdef CONFIG_ACPI - -struct oem_table { - struct acpi_table_header Header; - u32 OEMTableAddr; - u32 OEMTableSize; -}; - -extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); -#endif - -struct mip_reg { - unsigned long long off_0; - unsigned long long off_8; - unsigned long long off_10; - unsigned long long off_18; - unsigned long long off_20; - unsigned long long off_28; - unsigned long long off_30; - unsigned long long off_38; -}; - -#define MIP_SW_APIC 0x1020b -#define MIP_FUNC(VALUE) (VALUE & 0xff) - -extern int parse_unisys_oem (char *oemptr); -extern void setup_unisys(void); -extern int es7000_start_cpu(int cpu, unsigned long eip); -extern void es7000_sw_apic(void); diff --git a/arch/i386/mach-es7000/es7000plat.c b/arch/i386/mach-es7000/es7000plat.c deleted file mode 100644 index ab99072d3f9..00000000000 --- a/arch/i386/mach-es7000/es7000plat.c +++ /dev/null @@ -1,327 +0,0 @@ -/* - * Written by: Garry Forsgren, Unisys Corporation - * Natalie Protasevich, Unisys Corporation - * This file contains the code to configure and interface - * with Unisys ES7000 series hardware system manager. - * - * Copyright (c) 2003 Unisys Corporation. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write the Free Software Foundation, Inc., 59 - * Temple Place - Suite 330, Boston MA 02111-1307, USA. - * - * Contact information: Unisys Corporation, Township Line & Union Meeting - * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or: - * - * http://www.unisys.com - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "es7000.h" -#include - -/* - * ES7000 Globals - */ - -volatile unsigned long *psai = NULL; -struct mip_reg *mip_reg; -struct mip_reg *host_reg; -int mip_port; -unsigned long mip_addr, host_addr; - -/* - * GSI override for ES7000 platforms. - */ - -static unsigned int base; - -static int -es7000_rename_gsi(int ioapic, int gsi) -{ - if (es7000_plat == ES7000_ZORRO) - return gsi; - - if (!base) { - int i; - for (i = 0; i < nr_ioapics; i++) - base += nr_ioapic_registers[i]; - } - - if (!ioapic && (gsi < 16)) - gsi += base; - return gsi; -} - -void __init -setup_unisys(void) -{ - /* - * Determine the generation of the ES7000 currently running. - * - * es7000_plat = 1 if the machine is a 5xx ES7000 box - * es7000_plat = 2 if the machine is a x86_64 ES7000 box - * - */ - if (!(boot_cpu_data.x86 <= 15 && boot_cpu_data.x86_model <= 2)) - es7000_plat = ES7000_ZORRO; - else - es7000_plat = ES7000_CLASSIC; - ioapic_renumber_irq = es7000_rename_gsi; -} - -/* - * Parse the OEM Table - */ - -int __init -parse_unisys_oem (char *oemptr) -{ - int i; - int success = 0; - unsigned char type, size; - unsigned long val; - char *tp = NULL; - struct psai *psaip = NULL; - struct mip_reg_info *mi; - struct mip_reg *host, *mip; - - tp = oemptr; - - tp += 8; - - for (i=0; i <= 6; i++) { - type = *tp++; - size = *tp++; - tp -= 2; - switch (type) { - case MIP_REG: - mi = (struct mip_reg_info *)tp; - val = MIP_RD_LO(mi->host_reg); - host_addr = val; - host = (struct mip_reg *)val; - host_reg = __va(host); - val = MIP_RD_LO(mi->mip_reg); - mip_port = MIP_PORT(mi->mip_info); - mip_addr = val; - mip = (struct mip_reg *)val; - mip_reg = __va(mip); - Dprintk("es7000_mipcfg: host_reg = 0x%lx \n", - (unsigned long)host_reg); - Dprintk("es7000_mipcfg: mip_reg = 0x%lx \n", - (unsigned long)mip_reg); - success++; - break; - case MIP_PSAI_REG: - psaip = (struct psai *)tp; - if (tp != NULL) { - if (psaip->addr) - psai = __va(psaip->addr); - else - psai = NULL; - success++; - } - break; - default: - break; - } - tp += size; - } - - if (success < 2) { - es7000_plat = NON_UNISYS; - } else - setup_unisys(); - return es7000_plat; -} - -#ifdef CONFIG_ACPI -int __init -find_unisys_acpi_oem_table(unsigned long *oem_addr) -{ - struct acpi_table_header *header = NULL; - int i = 0; - while (ACPI_SUCCESS(acpi_get_table("OEM1", i++, &header))) { - if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) { - struct oem_table *t = (struct oem_table *)header; - *oem_addr = (unsigned long)__acpi_map_table(t->OEMTableAddr, - t->OEMTableSize); - return 0; - } - } - return -1; -} -#endif - -/* - * This file also gets compiled if CONFIG_X86_GENERICARCH is set. Generic - * arch already has got following function definitions (asm-generic/es7000.c) - * hence no need to define these for that case. - */ -#ifndef CONFIG_X86_GENERICARCH -void es7000_sw_apic(void); -void __init enable_apic_mode(void) -{ - es7000_sw_apic(); - return; -} - -__init int mps_oem_check(struct mp_config_table *mpc, char *oem, - char *productid) -{ - if (mpc->mpc_oemptr) { - struct mp_config_oemtable *oem_table = - (struct mp_config_oemtable *)mpc->mpc_oemptr; - if (!strncmp(oem, "UNISYS", 6)) - return parse_unisys_oem((char *)oem_table); - } - return 0; -} -#ifdef CONFIG_ACPI -/* Hook from generic ACPI tables.c */ -int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - unsigned long oem_addr; - if (!find_unisys_acpi_oem_table(&oem_addr)) { - if (es7000_check_dsdt()) - return parse_unisys_oem((char *)oem_addr); - else { - setup_unisys(); - return 1; - } - } - return 0; -} -#else -int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - return 0; -} -#endif -#endif /* COFIG_X86_GENERICARCH */ - -static void -es7000_spin(int n) -{ - int i = 0; - - while (i++ < n) - rep_nop(); -} - -static int __init -es7000_mip_write(struct mip_reg *mip_reg) -{ - int status = 0; - int spin; - - spin = MIP_SPIN; - while (((unsigned long long)host_reg->off_38 & - (unsigned long long)MIP_VALID) != 0) { - if (--spin <= 0) { - printk("es7000_mip_write: Timeout waiting for Host Valid Flag"); - return -1; - } - es7000_spin(MIP_SPIN); - } - - memcpy(host_reg, mip_reg, sizeof(struct mip_reg)); - outb(1, mip_port); - - spin = MIP_SPIN; - - while (((unsigned long long)mip_reg->off_38 & - (unsigned long long)MIP_VALID) == 0) { - if (--spin <= 0) { - printk("es7000_mip_write: Timeout waiting for MIP Valid Flag"); - return -1; - } - es7000_spin(MIP_SPIN); - } - - status = ((unsigned long long)mip_reg->off_0 & - (unsigned long long)0xffff0000000000ULL) >> 48; - mip_reg->off_38 = ((unsigned long long)mip_reg->off_38 & - (unsigned long long)~MIP_VALID); - return status; -} - -int -es7000_start_cpu(int cpu, unsigned long eip) -{ - unsigned long vect = 0, psaival = 0; - - if (psai == NULL) - return -1; - - vect = ((unsigned long)__pa(eip)/0x1000) << 16; - psaival = (0x1000000 | vect | cpu); - - while (*psai & 0x1000000) - ; - - *psai = psaival; - - return 0; - -} - -int -es7000_stop_cpu(int cpu) -{ - int startup; - - if (psai == NULL) - return -1; - - startup= (0x1000000 | cpu); - - while ((*psai & 0xff00ffff) != startup) - ; - - startup = (*psai & 0xff0000) >> 16; - *psai &= 0xffffff; - - return 0; - -} - -void __init -es7000_sw_apic() -{ - if (es7000_plat) { - int mip_status; - struct mip_reg es7000_mip_reg; - - printk("ES7000: Enabling APIC mode.\n"); - memset(&es7000_mip_reg, 0, sizeof(struct mip_reg)); - es7000_mip_reg.off_0 = MIP_SW_APIC; - es7000_mip_reg.off_38 = (MIP_VALID); - while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0) - printk("es7000_sw_apic: command failed, status = %x\n", - mip_status); - return; - } -} diff --git a/arch/x86/mach-es7000/Makefile b/arch/x86/mach-es7000/Makefile new file mode 100644 index 00000000000..69dd4da218d --- /dev/null +++ b/arch/x86/mach-es7000/Makefile @@ -0,0 +1,6 @@ +# +# Makefile for the linux kernel. +# + +obj-$(CONFIG_X86_ES7000) := es7000plat.o +obj-$(CONFIG_X86_GENERICARCH) := es7000plat.o diff --git a/arch/x86/mach-es7000/es7000.h b/arch/x86/mach-es7000/es7000.h new file mode 100644 index 00000000000..c8d5aa132fa --- /dev/null +++ b/arch/x86/mach-es7000/es7000.h @@ -0,0 +1,114 @@ +/* + * Written by: Garry Forsgren, Unisys Corporation + * Natalie Protasevich, Unisys Corporation + * This file contains the code to configure and interface + * with Unisys ES7000 series hardware system manager. + * + * Copyright (c) 2003 Unisys Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Unisys Corporation, Township Line & Union Meeting + * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or: + * + * http://www.unisys.com + */ + +/* + * ES7000 chipsets + */ + +#define NON_UNISYS 0 +#define ES7000_CLASSIC 1 +#define ES7000_ZORRO 2 + + +#define MIP_REG 1 +#define MIP_PSAI_REG 4 + +#define MIP_BUSY 1 +#define MIP_SPIN 0xf0000 +#define MIP_VALID 0x0100000000000000ULL +#define MIP_PORT(VALUE) ((VALUE >> 32) & 0xffff) + +#define MIP_RD_LO(VALUE) (VALUE & 0xffffffff) + +struct mip_reg_info { + unsigned long long mip_info; + unsigned long long delivery_info; + unsigned long long host_reg; + unsigned long long mip_reg; +}; + +struct part_info { + unsigned char type; + unsigned char length; + unsigned char part_id; + unsigned char apic_mode; + unsigned long snum; + char ptype[16]; + char sname[64]; + char pname[64]; +}; + +struct psai { + unsigned long long entry_type; + unsigned long long addr; + unsigned long long bep_addr; +}; + +struct es7000_mem_info { + unsigned char type; + unsigned char length; + unsigned char resv[6]; + unsigned long long start; + unsigned long long size; +}; + +struct es7000_oem_table { + unsigned long long hdr; + struct mip_reg_info mip; + struct part_info pif; + struct es7000_mem_info shm; + struct psai psai; +}; + +#ifdef CONFIG_ACPI + +struct oem_table { + struct acpi_table_header Header; + u32 OEMTableAddr; + u32 OEMTableSize; +}; + +extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); +#endif + +struct mip_reg { + unsigned long long off_0; + unsigned long long off_8; + unsigned long long off_10; + unsigned long long off_18; + unsigned long long off_20; + unsigned long long off_28; + unsigned long long off_30; + unsigned long long off_38; +}; + +#define MIP_SW_APIC 0x1020b +#define MIP_FUNC(VALUE) (VALUE & 0xff) + +extern int parse_unisys_oem (char *oemptr); +extern void setup_unisys(void); +extern int es7000_start_cpu(int cpu, unsigned long eip); +extern void es7000_sw_apic(void); diff --git a/arch/x86/mach-es7000/es7000plat.c b/arch/x86/mach-es7000/es7000plat.c new file mode 100644 index 00000000000..ab99072d3f9 --- /dev/null +++ b/arch/x86/mach-es7000/es7000plat.c @@ -0,0 +1,327 @@ +/* + * Written by: Garry Forsgren, Unisys Corporation + * Natalie Protasevich, Unisys Corporation + * This file contains the code to configure and interface + * with Unisys ES7000 series hardware system manager. + * + * Copyright (c) 2003 Unisys Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Unisys Corporation, Township Line & Union Meeting + * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or: + * + * http://www.unisys.com + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "es7000.h" +#include + +/* + * ES7000 Globals + */ + +volatile unsigned long *psai = NULL; +struct mip_reg *mip_reg; +struct mip_reg *host_reg; +int mip_port; +unsigned long mip_addr, host_addr; + +/* + * GSI override for ES7000 platforms. + */ + +static unsigned int base; + +static int +es7000_rename_gsi(int ioapic, int gsi) +{ + if (es7000_plat == ES7000_ZORRO) + return gsi; + + if (!base) { + int i; + for (i = 0; i < nr_ioapics; i++) + base += nr_ioapic_registers[i]; + } + + if (!ioapic && (gsi < 16)) + gsi += base; + return gsi; +} + +void __init +setup_unisys(void) +{ + /* + * Determine the generation of the ES7000 currently running. + * + * es7000_plat = 1 if the machine is a 5xx ES7000 box + * es7000_plat = 2 if the machine is a x86_64 ES7000 box + * + */ + if (!(boot_cpu_data.x86 <= 15 && boot_cpu_data.x86_model <= 2)) + es7000_plat = ES7000_ZORRO; + else + es7000_plat = ES7000_CLASSIC; + ioapic_renumber_irq = es7000_rename_gsi; +} + +/* + * Parse the OEM Table + */ + +int __init +parse_unisys_oem (char *oemptr) +{ + int i; + int success = 0; + unsigned char type, size; + unsigned long val; + char *tp = NULL; + struct psai *psaip = NULL; + struct mip_reg_info *mi; + struct mip_reg *host, *mip; + + tp = oemptr; + + tp += 8; + + for (i=0; i <= 6; i++) { + type = *tp++; + size = *tp++; + tp -= 2; + switch (type) { + case MIP_REG: + mi = (struct mip_reg_info *)tp; + val = MIP_RD_LO(mi->host_reg); + host_addr = val; + host = (struct mip_reg *)val; + host_reg = __va(host); + val = MIP_RD_LO(mi->mip_reg); + mip_port = MIP_PORT(mi->mip_info); + mip_addr = val; + mip = (struct mip_reg *)val; + mip_reg = __va(mip); + Dprintk("es7000_mipcfg: host_reg = 0x%lx \n", + (unsigned long)host_reg); + Dprintk("es7000_mipcfg: mip_reg = 0x%lx \n", + (unsigned long)mip_reg); + success++; + break; + case MIP_PSAI_REG: + psaip = (struct psai *)tp; + if (tp != NULL) { + if (psaip->addr) + psai = __va(psaip->addr); + else + psai = NULL; + success++; + } + break; + default: + break; + } + tp += size; + } + + if (success < 2) { + es7000_plat = NON_UNISYS; + } else + setup_unisys(); + return es7000_plat; +} + +#ifdef CONFIG_ACPI +int __init +find_unisys_acpi_oem_table(unsigned long *oem_addr) +{ + struct acpi_table_header *header = NULL; + int i = 0; + while (ACPI_SUCCESS(acpi_get_table("OEM1", i++, &header))) { + if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) { + struct oem_table *t = (struct oem_table *)header; + *oem_addr = (unsigned long)__acpi_map_table(t->OEMTableAddr, + t->OEMTableSize); + return 0; + } + } + return -1; +} +#endif + +/* + * This file also gets compiled if CONFIG_X86_GENERICARCH is set. Generic + * arch already has got following function definitions (asm-generic/es7000.c) + * hence no need to define these for that case. + */ +#ifndef CONFIG_X86_GENERICARCH +void es7000_sw_apic(void); +void __init enable_apic_mode(void) +{ + es7000_sw_apic(); + return; +} + +__init int mps_oem_check(struct mp_config_table *mpc, char *oem, + char *productid) +{ + if (mpc->mpc_oemptr) { + struct mp_config_oemtable *oem_table = + (struct mp_config_oemtable *)mpc->mpc_oemptr; + if (!strncmp(oem, "UNISYS", 6)) + return parse_unisys_oem((char *)oem_table); + } + return 0; +} +#ifdef CONFIG_ACPI +/* Hook from generic ACPI tables.c */ +int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + unsigned long oem_addr; + if (!find_unisys_acpi_oem_table(&oem_addr)) { + if (es7000_check_dsdt()) + return parse_unisys_oem((char *)oem_addr); + else { + setup_unisys(); + return 1; + } + } + return 0; +} +#else +int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + return 0; +} +#endif +#endif /* COFIG_X86_GENERICARCH */ + +static void +es7000_spin(int n) +{ + int i = 0; + + while (i++ < n) + rep_nop(); +} + +static int __init +es7000_mip_write(struct mip_reg *mip_reg) +{ + int status = 0; + int spin; + + spin = MIP_SPIN; + while (((unsigned long long)host_reg->off_38 & + (unsigned long long)MIP_VALID) != 0) { + if (--spin <= 0) { + printk("es7000_mip_write: Timeout waiting for Host Valid Flag"); + return -1; + } + es7000_spin(MIP_SPIN); + } + + memcpy(host_reg, mip_reg, sizeof(struct mip_reg)); + outb(1, mip_port); + + spin = MIP_SPIN; + + while (((unsigned long long)mip_reg->off_38 & + (unsigned long long)MIP_VALID) == 0) { + if (--spin <= 0) { + printk("es7000_mip_write: Timeout waiting for MIP Valid Flag"); + return -1; + } + es7000_spin(MIP_SPIN); + } + + status = ((unsigned long long)mip_reg->off_0 & + (unsigned long long)0xffff0000000000ULL) >> 48; + mip_reg->off_38 = ((unsigned long long)mip_reg->off_38 & + (unsigned long long)~MIP_VALID); + return status; +} + +int +es7000_start_cpu(int cpu, unsigned long eip) +{ + unsigned long vect = 0, psaival = 0; + + if (psai == NULL) + return -1; + + vect = ((unsigned long)__pa(eip)/0x1000) << 16; + psaival = (0x1000000 | vect | cpu); + + while (*psai & 0x1000000) + ; + + *psai = psaival; + + return 0; + +} + +int +es7000_stop_cpu(int cpu) +{ + int startup; + + if (psai == NULL) + return -1; + + startup= (0x1000000 | cpu); + + while ((*psai & 0xff00ffff) != startup) + ; + + startup = (*psai & 0xff0000) >> 16; + *psai &= 0xffffff; + + return 0; + +} + +void __init +es7000_sw_apic() +{ + if (es7000_plat) { + int mip_status; + struct mip_reg es7000_mip_reg; + + printk("ES7000: Enabling APIC mode.\n"); + memset(&es7000_mip_reg, 0, sizeof(struct mip_reg)); + es7000_mip_reg.off_0 = MIP_SW_APIC; + es7000_mip_reg.off_38 = (MIP_VALID); + while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0) + printk("es7000_sw_apic: command failed, status = %x\n", + mip_status); + return; + } +} diff --git a/arch/x86/mach-generic/Makefile b/arch/x86/mach-generic/Makefile index 2f216f55b2d..08f489a7f1f 100644 --- a/arch/x86/mach-generic/Makefile +++ b/arch/x86/mach-generic/Makefile @@ -5,4 +5,4 @@ EXTRA_CFLAGS := -Iarch/i386/kernel obj-y := probe.o summit.o bigsmp.o es7000.o default.o -obj-y += ../../i386/mach-es7000/ +obj-y += ../../x86/mach-es7000/ -- cgit v1.2.3-70-g09d2 From 9702785a747aa27baf46ff504beab6528f21f2dd Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 11 Oct 2007 11:16:51 +0200 Subject: i386: move xen Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/i386/Kconfig | 2 +- arch/i386/Makefile | 2 +- arch/i386/kernel/head_32.S | 2 +- arch/i386/kernel/vsyscall-note_32.S | 2 +- arch/i386/xen/Kconfig | 11 - arch/i386/xen/Makefile | 4 - arch/i386/xen/enlighten.c | 1146 ----------------------------------- arch/i386/xen/events.c | 591 ------------------ arch/i386/xen/features.c | 29 - arch/i386/xen/manage.c | 143 ----- arch/i386/xen/mmu.c | 567 ----------------- arch/i386/xen/mmu.h | 60 -- arch/i386/xen/multicalls.c | 90 --- arch/i386/xen/multicalls.h | 45 -- arch/i386/xen/setup.c | 111 ---- arch/i386/xen/smp.c | 404 ------------ arch/i386/xen/time.c | 593 ------------------ arch/i386/xen/vdso.h | 4 - arch/i386/xen/xen-asm.S | 291 --------- arch/i386/xen/xen-head.S | 38 -- arch/i386/xen/xen-ops.h | 71 --- arch/x86/xen/Kconfig | 11 + arch/x86/xen/Makefile | 4 + arch/x86/xen/enlighten.c | 1146 +++++++++++++++++++++++++++++++++++ arch/x86/xen/events.c | 591 ++++++++++++++++++ arch/x86/xen/features.c | 29 + arch/x86/xen/manage.c | 143 +++++ arch/x86/xen/mmu.c | 567 +++++++++++++++++ arch/x86/xen/mmu.h | 60 ++ arch/x86/xen/multicalls.c | 90 +++ arch/x86/xen/multicalls.h | 45 ++ arch/x86/xen/setup.c | 111 ++++ arch/x86/xen/smp.c | 404 ++++++++++++ arch/x86/xen/time.c | 593 ++++++++++++++++++ arch/x86/xen/vdso.h | 4 + arch/x86/xen/xen-asm.S | 291 +++++++++ arch/x86/xen/xen-head.S | 38 ++ arch/x86/xen/xen-ops.h | 71 +++ 38 files changed, 4202 insertions(+), 4202 deletions(-) delete mode 100644 arch/i386/xen/Kconfig delete mode 100644 arch/i386/xen/Makefile delete mode 100644 arch/i386/xen/enlighten.c delete mode 100644 arch/i386/xen/events.c delete mode 100644 arch/i386/xen/features.c delete mode 100644 arch/i386/xen/manage.c delete mode 100644 arch/i386/xen/mmu.c delete mode 100644 arch/i386/xen/mmu.h delete mode 100644 arch/i386/xen/multicalls.c delete mode 100644 arch/i386/xen/multicalls.h delete mode 100644 arch/i386/xen/setup.c delete mode 100644 arch/i386/xen/smp.c delete mode 100644 arch/i386/xen/time.c delete mode 100644 arch/i386/xen/vdso.h delete mode 100644 arch/i386/xen/xen-asm.S delete mode 100644 arch/i386/xen/xen-head.S delete mode 100644 arch/i386/xen/xen-ops.h create mode 100644 arch/x86/xen/Kconfig create mode 100644 arch/x86/xen/Makefile create mode 100644 arch/x86/xen/enlighten.c create mode 100644 arch/x86/xen/events.c create mode 100644 arch/x86/xen/features.c create mode 100644 arch/x86/xen/manage.c create mode 100644 arch/x86/xen/mmu.c create mode 100644 arch/x86/xen/mmu.h create mode 100644 arch/x86/xen/multicalls.c create mode 100644 arch/x86/xen/multicalls.h create mode 100644 arch/x86/xen/setup.c create mode 100644 arch/x86/xen/smp.c create mode 100644 arch/x86/xen/time.c create mode 100644 arch/x86/xen/vdso.h create mode 100644 arch/x86/xen/xen-asm.S create mode 100644 arch/x86/xen/xen-head.S create mode 100644 arch/x86/xen/xen-ops.h (limited to 'arch/x86') diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 561cc21914b..0f6186f0303 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -226,7 +226,7 @@ config PARAVIRT However, when run without a hypervisor the kernel is theoretically slower. If in doubt, say N. -source "arch/i386/xen/Kconfig" +source "arch/x86/xen/Kconfig" config VMI bool "VMI Paravirt-ops support" diff --git a/arch/i386/Makefile b/arch/i386/Makefile index c462803a4db..fc85d6674fc 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -94,7 +94,7 @@ mcore-$(CONFIG_X86_ES7000) := arch/x86/mach-default core-$(CONFIG_X86_ES7000) := arch/x86/mach-es7000/ # Xen paravirtualization support -core-$(CONFIG_XEN) += arch/i386/xen/ +core-$(CONFIG_XEN) += arch/x86/xen/ # default subarch .h files mflags-y += -Iinclude/asm-i386/mach-default diff --git a/arch/i386/kernel/head_32.S b/arch/i386/kernel/head_32.S index 8f0382161c9..9150ca9b5f8 100644 --- a/arch/i386/kernel/head_32.S +++ b/arch/i386/kernel/head_32.S @@ -537,7 +537,7 @@ fault_msg: .ascii "Int %d: CR2 %p err %p EIP %p CS %p flags %p\n" .asciz "Stack: %p %p %p %p %p %p %p %p\n" -#include "../xen/xen-head.S" +#include "../../x86/xen/xen-head.S" /* * The IDT and GDT 'descriptors' are a strange 48-bit object diff --git a/arch/i386/kernel/vsyscall-note_32.S b/arch/i386/kernel/vsyscall-note_32.S index 07c0daf7823..fcf376a37f7 100644 --- a/arch/i386/kernel/vsyscall-note_32.S +++ b/arch/i386/kernel/vsyscall-note_32.S @@ -33,7 +33,7 @@ ELFNOTE_END * at boot time we set VDSO_NOTE_NONEGSEG_BIT if running under Xen. */ -#include "../xen/vdso.h" /* Defines VDSO_NOTE_NONEGSEG_BIT. */ +#include "../../x86/xen/vdso.h" /* Defines VDSO_NOTE_NONEGSEG_BIT. */ .globl VDSO_NOTE_MASK ELFNOTE_START(GNU, 2, "a") diff --git a/arch/i386/xen/Kconfig b/arch/i386/xen/Kconfig deleted file mode 100644 index 9df99e1885a..00000000000 --- a/arch/i386/xen/Kconfig +++ /dev/null @@ -1,11 +0,0 @@ -# -# This Kconfig describes xen options -# - -config XEN - bool "Enable support for Xen hypervisor" - depends on PARAVIRT && X86_CMPXCHG && X86_TSC && !NEED_MULTIPLE_NODES - help - This is the Linux Xen port. Enabling this will allow the - kernel to boot in a paravirtualized environment under the - Xen hypervisor. diff --git a/arch/i386/xen/Makefile b/arch/i386/xen/Makefile deleted file mode 100644 index 343df246bd3..00000000000 --- a/arch/i386/xen/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -obj-y := enlighten.o setup.o features.o multicalls.o mmu.o \ - events.o time.o manage.o xen-asm.o - -obj-$(CONFIG_SMP) += smp.o diff --git a/arch/i386/xen/enlighten.c b/arch/i386/xen/enlighten.c deleted file mode 100644 index f01bfcd4bde..00000000000 --- a/arch/i386/xen/enlighten.c +++ /dev/null @@ -1,1146 +0,0 @@ -/* - * Core of Xen paravirt_ops implementation. - * - * This file contains the xen_paravirt_ops structure itself, and the - * implementations for: - * - privileged instructions - * - interrupt flags - * - segment operations - * - booting and setup - * - * Jeremy Fitzhardinge , XenSource Inc, 2007 - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "xen-ops.h" -#include "mmu.h" -#include "multicalls.h" - -EXPORT_SYMBOL_GPL(hypercall_page); - -DEFINE_PER_CPU(enum paravirt_lazy_mode, xen_lazy_mode); - -DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); -DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); -DEFINE_PER_CPU(unsigned long, xen_cr3); - -struct start_info *xen_start_info; -EXPORT_SYMBOL_GPL(xen_start_info); - -static /* __initdata */ struct shared_info dummy_shared_info; - -/* - * Point at some empty memory to start with. We map the real shared_info - * page as soon as fixmap is up and running. - */ -struct shared_info *HYPERVISOR_shared_info = (void *)&dummy_shared_info; - -/* - * Flag to determine whether vcpu info placement is available on all - * VCPUs. We assume it is to start with, and then set it to zero on - * the first failure. This is because it can succeed on some VCPUs - * and not others, since it can involve hypervisor memory allocation, - * or because the guest failed to guarantee all the appropriate - * constraints on all VCPUs (ie buffer can't cross a page boundary). - * - * Note that any particular CPU may be using a placed vcpu structure, - * but we can only optimise if the all are. - * - * 0: not available, 1: available - */ -static int have_vcpu_info_placement = 1; - -static void __init xen_vcpu_setup(int cpu) -{ - struct vcpu_register_vcpu_info info; - int err; - struct vcpu_info *vcpup; - - per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; - - if (!have_vcpu_info_placement) - return; /* already tested, not available */ - - vcpup = &per_cpu(xen_vcpu_info, cpu); - - info.mfn = virt_to_mfn(vcpup); - info.offset = offset_in_page(vcpup); - - printk(KERN_DEBUG "trying to map vcpu_info %d at %p, mfn %x, offset %d\n", - cpu, vcpup, info.mfn, info.offset); - - /* Check to see if the hypervisor will put the vcpu_info - structure where we want it, which allows direct access via - a percpu-variable. */ - err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info); - - if (err) { - printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err); - have_vcpu_info_placement = 0; - } else { - /* This cpu is using the registered vcpu info, even if - later ones fail to. */ - per_cpu(xen_vcpu, cpu) = vcpup; - - printk(KERN_DEBUG "cpu %d using vcpu_info at %p\n", - cpu, vcpup); - } -} - -static void __init xen_banner(void) -{ - printk(KERN_INFO "Booting paravirtualized kernel on %s\n", - paravirt_ops.name); - printk(KERN_INFO "Hypervisor signature: %s\n", xen_start_info->magic); -} - -static void xen_cpuid(unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) -{ - unsigned maskedx = ~0; - - /* - * Mask out inconvenient features, to try and disable as many - * unsupported kernel subsystems as possible. - */ - if (*eax == 1) - maskedx = ~((1 << X86_FEATURE_APIC) | /* disable APIC */ - (1 << X86_FEATURE_ACPI) | /* disable ACPI */ - (1 << X86_FEATURE_ACC)); /* thermal monitoring */ - - asm(XEN_EMULATE_PREFIX "cpuid" - : "=a" (*eax), - "=b" (*ebx), - "=c" (*ecx), - "=d" (*edx) - : "0" (*eax), "2" (*ecx)); - *edx &= maskedx; -} - -static void xen_set_debugreg(int reg, unsigned long val) -{ - HYPERVISOR_set_debugreg(reg, val); -} - -static unsigned long xen_get_debugreg(int reg) -{ - return HYPERVISOR_get_debugreg(reg); -} - -static unsigned long xen_save_fl(void) -{ - struct vcpu_info *vcpu; - unsigned long flags; - - vcpu = x86_read_percpu(xen_vcpu); - - /* flag has opposite sense of mask */ - flags = !vcpu->evtchn_upcall_mask; - - /* convert to IF type flag - -0 -> 0x00000000 - -1 -> 0xffffffff - */ - return (-flags) & X86_EFLAGS_IF; -} - -static void xen_restore_fl(unsigned long flags) -{ - struct vcpu_info *vcpu; - - /* convert from IF type flag */ - flags = !(flags & X86_EFLAGS_IF); - - /* There's a one instruction preempt window here. We need to - make sure we're don't switch CPUs between getting the vcpu - pointer and updating the mask. */ - preempt_disable(); - vcpu = x86_read_percpu(xen_vcpu); - vcpu->evtchn_upcall_mask = flags; - preempt_enable_no_resched(); - - /* Doesn't matter if we get preempted here, because any - pending event will get dealt with anyway. */ - - if (flags == 0) { - preempt_check_resched(); - barrier(); /* unmask then check (avoid races) */ - if (unlikely(vcpu->evtchn_upcall_pending)) - force_evtchn_callback(); - } -} - -static void xen_irq_disable(void) -{ - /* There's a one instruction preempt window here. We need to - make sure we're don't switch CPUs between getting the vcpu - pointer and updating the mask. */ - preempt_disable(); - x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1; - preempt_enable_no_resched(); -} - -static void xen_irq_enable(void) -{ - struct vcpu_info *vcpu; - - /* There's a one instruction preempt window here. We need to - make sure we're don't switch CPUs between getting the vcpu - pointer and updating the mask. */ - preempt_disable(); - vcpu = x86_read_percpu(xen_vcpu); - vcpu->evtchn_upcall_mask = 0; - preempt_enable_no_resched(); - - /* Doesn't matter if we get preempted here, because any - pending event will get dealt with anyway. */ - - barrier(); /* unmask then check (avoid races) */ - if (unlikely(vcpu->evtchn_upcall_pending)) - force_evtchn_callback(); -} - -static void xen_safe_halt(void) -{ - /* Blocking includes an implicit local_irq_enable(). */ - if (HYPERVISOR_sched_op(SCHEDOP_block, 0) != 0) - BUG(); -} - -static void xen_halt(void) -{ - if (irqs_disabled()) - HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); - else - xen_safe_halt(); -} - -static void xen_set_lazy_mode(enum paravirt_lazy_mode mode) -{ - BUG_ON(preemptible()); - - switch (mode) { - case PARAVIRT_LAZY_NONE: - BUG_ON(x86_read_percpu(xen_lazy_mode) == PARAVIRT_LAZY_NONE); - break; - - case PARAVIRT_LAZY_MMU: - case PARAVIRT_LAZY_CPU: - BUG_ON(x86_read_percpu(xen_lazy_mode) != PARAVIRT_LAZY_NONE); - break; - - case PARAVIRT_LAZY_FLUSH: - /* flush if necessary, but don't change state */ - if (x86_read_percpu(xen_lazy_mode) != PARAVIRT_LAZY_NONE) - xen_mc_flush(); - return; - } - - xen_mc_flush(); - x86_write_percpu(xen_lazy_mode, mode); -} - -static unsigned long xen_store_tr(void) -{ - return 0; -} - -static void xen_set_ldt(const void *addr, unsigned entries) -{ - unsigned long linear_addr = (unsigned long)addr; - struct mmuext_op *op; - struct multicall_space mcs = xen_mc_entry(sizeof(*op)); - - op = mcs.args; - op->cmd = MMUEXT_SET_LDT; - if (linear_addr) { - /* ldt my be vmalloced, use arbitrary_virt_to_machine */ - xmaddr_t maddr; - maddr = arbitrary_virt_to_machine((unsigned long)addr); - linear_addr = (unsigned long)maddr.maddr; - } - op->arg1.linear_addr = linear_addr; - op->arg2.nr_ents = entries; - - MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); - - xen_mc_issue(PARAVIRT_LAZY_CPU); -} - -static void xen_load_gdt(const struct Xgt_desc_struct *dtr) -{ - unsigned long *frames; - unsigned long va = dtr->address; - unsigned int size = dtr->size + 1; - unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; - int f; - struct multicall_space mcs; - - /* A GDT can be up to 64k in size, which corresponds to 8192 - 8-byte entries, or 16 4k pages.. */ - - BUG_ON(size > 65536); - BUG_ON(va & ~PAGE_MASK); - - mcs = xen_mc_entry(sizeof(*frames) * pages); - frames = mcs.args; - - for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) { - frames[f] = virt_to_mfn(va); - make_lowmem_page_readonly((void *)va); - } - - MULTI_set_gdt(mcs.mc, frames, size / sizeof(struct desc_struct)); - - xen_mc_issue(PARAVIRT_LAZY_CPU); -} - -static void load_TLS_descriptor(struct thread_struct *t, - unsigned int cpu, unsigned int i) -{ - struct desc_struct *gdt = get_cpu_gdt_table(cpu); - xmaddr_t maddr = virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]); - struct multicall_space mc = __xen_mc_entry(0); - - MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]); -} - -static void xen_load_tls(struct thread_struct *t, unsigned int cpu) -{ - xen_mc_batch(); - - load_TLS_descriptor(t, cpu, 0); - load_TLS_descriptor(t, cpu, 1); - load_TLS_descriptor(t, cpu, 2); - - xen_mc_issue(PARAVIRT_LAZY_CPU); - - /* - * XXX sleazy hack: If we're being called in a lazy-cpu zone, - * it means we're in a context switch, and %gs has just been - * saved. This means we can zero it out to prevent faults on - * exit from the hypervisor if the next process has no %gs. - * Either way, it has been saved, and the new value will get - * loaded properly. This will go away as soon as Xen has been - * modified to not save/restore %gs for normal hypercalls. - */ - if (xen_get_lazy_mode() == PARAVIRT_LAZY_CPU) - loadsegment(gs, 0); -} - -static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, - u32 low, u32 high) -{ - unsigned long lp = (unsigned long)&dt[entrynum]; - xmaddr_t mach_lp = virt_to_machine(lp); - u64 entry = (u64)high << 32 | low; - - preempt_disable(); - - xen_mc_flush(); - if (HYPERVISOR_update_descriptor(mach_lp.maddr, entry)) - BUG(); - - preempt_enable(); -} - -static int cvt_gate_to_trap(int vector, u32 low, u32 high, - struct trap_info *info) -{ - u8 type, dpl; - - type = (high >> 8) & 0x1f; - dpl = (high >> 13) & 3; - - if (type != 0xf && type != 0xe) - return 0; - - info->vector = vector; - info->address = (high & 0xffff0000) | (low & 0x0000ffff); - info->cs = low >> 16; - info->flags = dpl; - /* interrupt gates clear IF */ - if (type == 0xe) - info->flags |= 4; - - return 1; -} - -/* Locations of each CPU's IDT */ -static DEFINE_PER_CPU(struct Xgt_desc_struct, idt_desc); - -/* Set an IDT entry. If the entry is part of the current IDT, then - also update Xen. */ -static void xen_write_idt_entry(struct desc_struct *dt, int entrynum, - u32 low, u32 high) -{ - unsigned long p = (unsigned long)&dt[entrynum]; - unsigned long start, end; - - preempt_disable(); - - start = __get_cpu_var(idt_desc).address; - end = start + __get_cpu_var(idt_desc).size + 1; - - xen_mc_flush(); - - write_dt_entry(dt, entrynum, low, high); - - if (p >= start && (p + 8) <= end) { - struct trap_info info[2]; - - info[1].address = 0; - - if (cvt_gate_to_trap(entrynum, low, high, &info[0])) - if (HYPERVISOR_set_trap_table(info)) - BUG(); - } - - preempt_enable(); -} - -static void xen_convert_trap_info(const struct Xgt_desc_struct *desc, - struct trap_info *traps) -{ - unsigned in, out, count; - - count = (desc->size+1) / 8; - BUG_ON(count > 256); - - for (in = out = 0; in < count; in++) { - const u32 *entry = (u32 *)(desc->address + in * 8); - - if (cvt_gate_to_trap(in, entry[0], entry[1], &traps[out])) - out++; - } - traps[out].address = 0; -} - -void xen_copy_trap_info(struct trap_info *traps) -{ - const struct Xgt_desc_struct *desc = &__get_cpu_var(idt_desc); - - xen_convert_trap_info(desc, traps); -} - -/* Load a new IDT into Xen. In principle this can be per-CPU, so we - hold a spinlock to protect the static traps[] array (static because - it avoids allocation, and saves stack space). */ -static void xen_load_idt(const struct Xgt_desc_struct *desc) -{ - static DEFINE_SPINLOCK(lock); - static struct trap_info traps[257]; - - spin_lock(&lock); - - __get_cpu_var(idt_desc) = *desc; - - xen_convert_trap_info(desc, traps); - - xen_mc_flush(); - if (HYPERVISOR_set_trap_table(traps)) - BUG(); - - spin_unlock(&lock); -} - -/* Write a GDT descriptor entry. Ignore LDT descriptors, since - they're handled differently. */ -static void xen_write_gdt_entry(struct desc_struct *dt, int entry, - u32 low, u32 high) -{ - preempt_disable(); - - switch ((high >> 8) & 0xff) { - case DESCTYPE_LDT: - case DESCTYPE_TSS: - /* ignore */ - break; - - default: { - xmaddr_t maddr = virt_to_machine(&dt[entry]); - u64 desc = (u64)high << 32 | low; - - xen_mc_flush(); - if (HYPERVISOR_update_descriptor(maddr.maddr, desc)) - BUG(); - } - - } - - preempt_enable(); -} - -static void xen_load_esp0(struct tss_struct *tss, - struct thread_struct *thread) -{ - struct multicall_space mcs = xen_mc_entry(0); - MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->esp0); - xen_mc_issue(PARAVIRT_LAZY_CPU); -} - -static void xen_set_iopl_mask(unsigned mask) -{ - struct physdev_set_iopl set_iopl; - - /* Force the change at ring 0. */ - set_iopl.iopl = (mask == 0) ? 1 : (mask >> 12) & 3; - HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); -} - -static void xen_io_delay(void) -{ -} - -#ifdef CONFIG_X86_LOCAL_APIC -static unsigned long xen_apic_read(unsigned long reg) -{ - return 0; -} - -static void xen_apic_write(unsigned long reg, unsigned long val) -{ - /* Warn to see if there's any stray references */ - WARN_ON(1); -} -#endif - -static void xen_flush_tlb(void) -{ - struct mmuext_op *op; - struct multicall_space mcs = xen_mc_entry(sizeof(*op)); - - op = mcs.args; - op->cmd = MMUEXT_TLB_FLUSH_LOCAL; - MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); - - xen_mc_issue(PARAVIRT_LAZY_MMU); -} - -static void xen_flush_tlb_single(unsigned long addr) -{ - struct mmuext_op *op; - struct multicall_space mcs = xen_mc_entry(sizeof(*op)); - - op = mcs.args; - op->cmd = MMUEXT_INVLPG_LOCAL; - op->arg1.linear_addr = addr & PAGE_MASK; - MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); - - xen_mc_issue(PARAVIRT_LAZY_MMU); -} - -static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm, - unsigned long va) -{ - struct { - struct mmuext_op op; - cpumask_t mask; - } *args; - cpumask_t cpumask = *cpus; - struct multicall_space mcs; - - /* - * A couple of (to be removed) sanity checks: - * - * - current CPU must not be in mask - * - mask must exist :) - */ - BUG_ON(cpus_empty(cpumask)); - BUG_ON(cpu_isset(smp_processor_id(), cpumask)); - BUG_ON(!mm); - - /* If a CPU which we ran on has gone down, OK. */ - cpus_and(cpumask, cpumask, cpu_online_map); - if (cpus_empty(cpumask)) - return; - - mcs = xen_mc_entry(sizeof(*args)); - args = mcs.args; - args->mask = cpumask; - args->op.arg2.vcpumask = &args->mask; - - if (va == TLB_FLUSH_ALL) { - args->op.cmd = MMUEXT_TLB_FLUSH_MULTI; - } else { - args->op.cmd = MMUEXT_INVLPG_MULTI; - args->op.arg1.linear_addr = va; - } - - MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF); - - xen_mc_issue(PARAVIRT_LAZY_MMU); -} - -static void xen_write_cr2(unsigned long cr2) -{ - x86_read_percpu(xen_vcpu)->arch.cr2 = cr2; -} - -static unsigned long xen_read_cr2(void) -{ - return x86_read_percpu(xen_vcpu)->arch.cr2; -} - -static unsigned long xen_read_cr2_direct(void) -{ - return x86_read_percpu(xen_vcpu_info.arch.cr2); -} - -static void xen_write_cr4(unsigned long cr4) -{ - /* Just ignore cr4 changes; Xen doesn't allow us to do - anything anyway. */ -} - -static unsigned long xen_read_cr3(void) -{ - return x86_read_percpu(xen_cr3); -} - -static void xen_write_cr3(unsigned long cr3) -{ - BUG_ON(preemptible()); - - if (cr3 == x86_read_percpu(xen_cr3)) { - /* just a simple tlb flush */ - xen_flush_tlb(); - return; - } - - x86_write_percpu(xen_cr3, cr3); - - - { - struct mmuext_op *op; - struct multicall_space mcs = xen_mc_entry(sizeof(*op)); - unsigned long mfn = pfn_to_mfn(PFN_DOWN(cr3)); - - op = mcs.args; - op->cmd = MMUEXT_NEW_BASEPTR; - op->arg1.mfn = mfn; - - MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); - - xen_mc_issue(PARAVIRT_LAZY_CPU); - } -} - -/* Early in boot, while setting up the initial pagetable, assume - everything is pinned. */ -static __init void xen_alloc_pt_init(struct mm_struct *mm, u32 pfn) -{ - BUG_ON(mem_map); /* should only be used early */ - make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); -} - -/* This needs to make sure the new pte page is pinned iff its being - attached to a pinned pagetable. */ -static void xen_alloc_pt(struct mm_struct *mm, u32 pfn) -{ - struct page *page = pfn_to_page(pfn); - - if (PagePinned(virt_to_page(mm->pgd))) { - SetPagePinned(page); - - if (!PageHighMem(page)) - make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); - else - /* make sure there are no stray mappings of - this page */ - kmap_flush_unused(); - } -} - -/* This should never happen until we're OK to use struct page */ -static void xen_release_pt(u32 pfn) -{ - struct page *page = pfn_to_page(pfn); - - if (PagePinned(page)) { - if (!PageHighMem(page)) - make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); - } -} - -#ifdef CONFIG_HIGHPTE -static void *xen_kmap_atomic_pte(struct page *page, enum km_type type) -{ - pgprot_t prot = PAGE_KERNEL; - - if (PagePinned(page)) - prot = PAGE_KERNEL_RO; - - if (0 && PageHighMem(page)) - printk("mapping highpte %lx type %d prot %s\n", - page_to_pfn(page), type, - (unsigned long)pgprot_val(prot) & _PAGE_RW ? "WRITE" : "READ"); - - return kmap_atomic_prot(page, type, prot); -} -#endif - -static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) -{ - /* If there's an existing pte, then don't allow _PAGE_RW to be set */ - if (pte_val_ma(*ptep) & _PAGE_PRESENT) - pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) & - pte_val_ma(pte)); - - return pte; -} - -/* Init-time set_pte while constructing initial pagetables, which - doesn't allow RO pagetable pages to be remapped RW */ -static __init void xen_set_pte_init(pte_t *ptep, pte_t pte) -{ - pte = mask_rw_pte(ptep, pte); - - xen_set_pte(ptep, pte); -} - -static __init void xen_pagetable_setup_start(pgd_t *base) -{ - pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base; - - /* special set_pte for pagetable initialization */ - paravirt_ops.set_pte = xen_set_pte_init; - - init_mm.pgd = base; - /* - * copy top-level of Xen-supplied pagetable into place. For - * !PAE we can use this as-is, but for PAE it is a stand-in - * while we copy the pmd pages. - */ - memcpy(base, xen_pgd, PTRS_PER_PGD * sizeof(pgd_t)); - - if (PTRS_PER_PMD > 1) { - int i; - /* - * For PAE, need to allocate new pmds, rather than - * share Xen's, since Xen doesn't like pmd's being - * shared between address spaces. - */ - for (i = 0; i < PTRS_PER_PGD; i++) { - if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) { - pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); - - memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]), - PAGE_SIZE); - - make_lowmem_page_readonly(pmd); - - set_pgd(&base[i], __pgd(1 + __pa(pmd))); - } else - pgd_clear(&base[i]); - } - } - - /* make sure zero_page is mapped RO so we can use it in pagetables */ - make_lowmem_page_readonly(empty_zero_page); - make_lowmem_page_readonly(base); - /* - * Switch to new pagetable. This is done before - * pagetable_init has done anything so that the new pages - * added to the table can be prepared properly for Xen. - */ - xen_write_cr3(__pa(base)); -} - -static __init void xen_pagetable_setup_done(pgd_t *base) -{ - /* This will work as long as patching hasn't happened yet - (which it hasn't) */ - paravirt_ops.alloc_pt = xen_alloc_pt; - paravirt_ops.set_pte = xen_set_pte; - - if (!xen_feature(XENFEAT_auto_translated_physmap)) { - /* - * Create a mapping for the shared info page. - * Should be set_fixmap(), but shared_info is a machine - * address with no corresponding pseudo-phys address. - */ - set_pte_mfn(fix_to_virt(FIX_PARAVIRT_BOOTMAP), - PFN_DOWN(xen_start_info->shared_info), - PAGE_KERNEL); - - HYPERVISOR_shared_info = - (struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP); - - } else - HYPERVISOR_shared_info = - (struct shared_info *)__va(xen_start_info->shared_info); - - /* Actually pin the pagetable down, but we can't set PG_pinned - yet because the page structures don't exist yet. */ - { - struct mmuext_op op; -#ifdef CONFIG_X86_PAE - op.cmd = MMUEXT_PIN_L3_TABLE; -#else - op.cmd = MMUEXT_PIN_L3_TABLE; -#endif - op.arg1.mfn = pfn_to_mfn(PFN_DOWN(__pa(base))); - if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) - BUG(); - } -} - -/* This is called once we have the cpu_possible_map */ -void __init xen_setup_vcpu_info_placement(void) -{ - int cpu; - - for_each_possible_cpu(cpu) - xen_vcpu_setup(cpu); - - /* xen_vcpu_setup managed to place the vcpu_info within the - percpu area for all cpus, so make use of it */ - if (have_vcpu_info_placement) { - printk(KERN_INFO "Xen: using vcpu_info placement\n"); - - paravirt_ops.save_fl = xen_save_fl_direct; - paravirt_ops.restore_fl = xen_restore_fl_direct; - paravirt_ops.irq_disable = xen_irq_disable_direct; - paravirt_ops.irq_enable = xen_irq_enable_direct; - paravirt_ops.read_cr2 = xen_read_cr2_direct; - paravirt_ops.iret = xen_iret_direct; - } -} - -static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, - unsigned long addr, unsigned len) -{ - char *start, *end, *reloc; - unsigned ret; - - start = end = reloc = NULL; - -#define SITE(x) \ - case PARAVIRT_PATCH(x): \ - if (have_vcpu_info_placement) { \ - start = (char *)xen_##x##_direct; \ - end = xen_##x##_direct_end; \ - reloc = xen_##x##_direct_reloc; \ - } \ - goto patch_site - - switch (type) { - SITE(irq_enable); - SITE(irq_disable); - SITE(save_fl); - SITE(restore_fl); -#undef SITE - - patch_site: - if (start == NULL || (end-start) > len) - goto default_patch; - - ret = paravirt_patch_insns(insnbuf, len, start, end); - - /* Note: because reloc is assigned from something that - appears to be an array, gcc assumes it's non-null, - but doesn't know its relationship with start and - end. */ - if (reloc > start && reloc < end) { - int reloc_off = reloc - start; - long *relocp = (long *)(insnbuf + reloc_off); - long delta = start - (char *)addr; - - *relocp += delta; - } - break; - - default_patch: - default: - ret = paravirt_patch_default(type, clobbers, insnbuf, - addr, len); - break; - } - - return ret; -} - -static const struct paravirt_ops xen_paravirt_ops __initdata = { - .paravirt_enabled = 1, - .shared_kernel_pmd = 0, - - .name = "Xen", - .banner = xen_banner, - - .patch = xen_patch, - - .memory_setup = xen_memory_setup, - .arch_setup = xen_arch_setup, - .init_IRQ = xen_init_IRQ, - .post_allocator_init = xen_mark_init_mm_pinned, - - .time_init = xen_time_init, - .set_wallclock = xen_set_wallclock, - .get_wallclock = xen_get_wallclock, - .get_cpu_khz = xen_cpu_khz, - .sched_clock = xen_sched_clock, - - .cpuid = xen_cpuid, - - .set_debugreg = xen_set_debugreg, - .get_debugreg = xen_get_debugreg, - - .clts = native_clts, - - .read_cr0 = native_read_cr0, - .write_cr0 = native_write_cr0, - - .read_cr2 = xen_read_cr2, - .write_cr2 = xen_write_cr2, - - .read_cr3 = xen_read_cr3, - .write_cr3 = xen_write_cr3, - - .read_cr4 = native_read_cr4, - .read_cr4_safe = native_read_cr4_safe, - .write_cr4 = xen_write_cr4, - - .save_fl = xen_save_fl, - .restore_fl = xen_restore_fl, - .irq_disable = xen_irq_disable, - .irq_enable = xen_irq_enable, - .safe_halt = xen_safe_halt, - .halt = xen_halt, - .wbinvd = native_wbinvd, - - .read_msr = native_read_msr_safe, - .write_msr = native_write_msr_safe, - .read_tsc = native_read_tsc, - .read_pmc = native_read_pmc, - - .iret = (void *)&hypercall_page[__HYPERVISOR_iret], - .irq_enable_sysexit = NULL, /* never called */ - - .load_tr_desc = paravirt_nop, - .set_ldt = xen_set_ldt, - .load_gdt = xen_load_gdt, - .load_idt = xen_load_idt, - .load_tls = xen_load_tls, - - .store_gdt = native_store_gdt, - .store_idt = native_store_idt, - .store_tr = xen_store_tr, - - .write_ldt_entry = xen_write_ldt_entry, - .write_gdt_entry = xen_write_gdt_entry, - .write_idt_entry = xen_write_idt_entry, - .load_esp0 = xen_load_esp0, - - .set_iopl_mask = xen_set_iopl_mask, - .io_delay = xen_io_delay, - -#ifdef CONFIG_X86_LOCAL_APIC - .apic_write = xen_apic_write, - .apic_write_atomic = xen_apic_write, - .apic_read = xen_apic_read, - .setup_boot_clock = paravirt_nop, - .setup_secondary_clock = paravirt_nop, - .startup_ipi_hook = paravirt_nop, -#endif - - .flush_tlb_user = xen_flush_tlb, - .flush_tlb_kernel = xen_flush_tlb, - .flush_tlb_single = xen_flush_tlb_single, - .flush_tlb_others = xen_flush_tlb_others, - - .pte_update = paravirt_nop, - .pte_update_defer = paravirt_nop, - - .pagetable_setup_start = xen_pagetable_setup_start, - .pagetable_setup_done = xen_pagetable_setup_done, - - .alloc_pt = xen_alloc_pt_init, - .release_pt = xen_release_pt, - .alloc_pd = paravirt_nop, - .alloc_pd_clone = paravirt_nop, - .release_pd = paravirt_nop, - -#ifdef CONFIG_HIGHPTE - .kmap_atomic_pte = xen_kmap_atomic_pte, -#endif - - .set_pte = NULL, /* see xen_pagetable_setup_* */ - .set_pte_at = xen_set_pte_at, - .set_pmd = xen_set_pmd, - - .pte_val = xen_pte_val, - .pgd_val = xen_pgd_val, - - .make_pte = xen_make_pte, - .make_pgd = xen_make_pgd, - -#ifdef CONFIG_X86_PAE - .set_pte_atomic = xen_set_pte_atomic, - .set_pte_present = xen_set_pte_at, - .set_pud = xen_set_pud, - .pte_clear = xen_pte_clear, - .pmd_clear = xen_pmd_clear, - - .make_pmd = xen_make_pmd, - .pmd_val = xen_pmd_val, -#endif /* PAE */ - - .activate_mm = xen_activate_mm, - .dup_mmap = xen_dup_mmap, - .exit_mmap = xen_exit_mmap, - - .set_lazy_mode = xen_set_lazy_mode, -}; - -#ifdef CONFIG_SMP -static const struct smp_ops xen_smp_ops __initdata = { - .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu, - .smp_prepare_cpus = xen_smp_prepare_cpus, - .cpu_up = xen_cpu_up, - .smp_cpus_done = xen_smp_cpus_done, - - .smp_send_stop = xen_smp_send_stop, - .smp_send_reschedule = xen_smp_send_reschedule, - .smp_call_function_mask = xen_smp_call_function_mask, -}; -#endif /* CONFIG_SMP */ - -static void xen_reboot(int reason) -{ -#ifdef CONFIG_SMP - smp_send_stop(); -#endif - - if (HYPERVISOR_sched_op(SCHEDOP_shutdown, reason)) - BUG(); -} - -static void xen_restart(char *msg) -{ - xen_reboot(SHUTDOWN_reboot); -} - -static void xen_emergency_restart(void) -{ - xen_reboot(SHUTDOWN_reboot); -} - -static void xen_machine_halt(void) -{ - xen_reboot(SHUTDOWN_poweroff); -} - -static void xen_crash_shutdown(struct pt_regs *regs) -{ - xen_reboot(SHUTDOWN_crash); -} - -static const struct machine_ops __initdata xen_machine_ops = { - .restart = xen_restart, - .halt = xen_machine_halt, - .power_off = xen_machine_halt, - .shutdown = xen_machine_halt, - .crash_shutdown = xen_crash_shutdown, - .emergency_restart = xen_emergency_restart, -}; - - -/* First C function to be called on Xen boot */ -asmlinkage void __init xen_start_kernel(void) -{ - pgd_t *pgd; - - if (!xen_start_info) - return; - - BUG_ON(memcmp(xen_start_info->magic, "xen-3.0", 7) != 0); - - /* Install Xen paravirt ops */ - paravirt_ops = xen_paravirt_ops; - machine_ops = xen_machine_ops; - -#ifdef CONFIG_SMP - smp_ops = xen_smp_ops; -#endif - - xen_setup_features(); - - /* Get mfn list */ - if (!xen_feature(XENFEAT_auto_translated_physmap)) - phys_to_machine_mapping = (unsigned long *)xen_start_info->mfn_list; - - pgd = (pgd_t *)xen_start_info->pt_base; - - init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; - - init_mm.pgd = pgd; /* use the Xen pagetables to start */ - - /* keep using Xen gdt for now; no urgent need to change it */ - - x86_write_percpu(xen_cr3, __pa(pgd)); - -#ifdef CONFIG_SMP - /* Don't do the full vcpu_info placement stuff until we have a - possible map. */ - per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; -#else - /* May as well do it now, since there's no good time to call - it later on UP. */ - xen_setup_vcpu_info_placement(); -#endif - - paravirt_ops.kernel_rpl = 1; - if (xen_feature(XENFEAT_supervisor_mode_kernel)) - paravirt_ops.kernel_rpl = 0; - - /* set the limit of our address space */ - reserve_top_address(-HYPERVISOR_VIRT_START + 2 * PAGE_SIZE); - - /* set up basic CPUID stuff */ - cpu_detect(&new_cpu_data); - new_cpu_data.hard_math = 1; - new_cpu_data.x86_capability[0] = cpuid_edx(1); - - /* Poke various useful things into boot_params */ - LOADER_TYPE = (9 << 4) | 0; - INITRD_START = xen_start_info->mod_start ? __pa(xen_start_info->mod_start) : 0; - INITRD_SIZE = xen_start_info->mod_len; - - /* Start the world */ - start_kernel(); -} diff --git a/arch/i386/xen/events.c b/arch/i386/xen/events.c deleted file mode 100644 index da1b173547a..00000000000 --- a/arch/i386/xen/events.c +++ /dev/null @@ -1,591 +0,0 @@ -/* - * Xen event channels - * - * Xen models interrupts with abstract event channels. Because each - * domain gets 1024 event channels, but NR_IRQ is not that large, we - * must dynamically map irqs<->event channels. The event channels - * interface with the rest of the kernel by defining a xen interrupt - * chip. When an event is recieved, it is mapped to an irq and sent - * through the normal interrupt processing path. - * - * There are four kinds of events which can be mapped to an event - * channel: - * - * 1. Inter-domain notifications. This includes all the virtual - * device events, since they're driven by front-ends in another domain - * (typically dom0). - * 2. VIRQs, typically used for timers. These are per-cpu events. - * 3. IPIs. - * 4. Hardware interrupts. Not supported at present. - * - * Jeremy Fitzhardinge , XenSource Inc, 2007 - */ - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "xen-ops.h" - -/* - * This lock protects updates to the following mapping and reference-count - * arrays. The lock does not need to be acquired to read the mapping tables. - */ -static DEFINE_SPINLOCK(irq_mapping_update_lock); - -/* IRQ <-> VIRQ mapping. */ -static DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]) = {[0 ... NR_VIRQS-1] = -1}; - -/* IRQ <-> IPI mapping */ -static DEFINE_PER_CPU(int, ipi_to_irq[XEN_NR_IPIS]) = {[0 ... XEN_NR_IPIS-1] = -1}; - -/* Packed IRQ information: binding type, sub-type index, and event channel. */ -struct packed_irq -{ - unsigned short evtchn; - unsigned char index; - unsigned char type; -}; - -static struct packed_irq irq_info[NR_IRQS]; - -/* Binding types. */ -enum { - IRQT_UNBOUND, - IRQT_PIRQ, - IRQT_VIRQ, - IRQT_IPI, - IRQT_EVTCHN -}; - -/* Convenient shorthand for packed representation of an unbound IRQ. */ -#define IRQ_UNBOUND mk_irq_info(IRQT_UNBOUND, 0, 0) - -static int evtchn_to_irq[NR_EVENT_CHANNELS] = { - [0 ... NR_EVENT_CHANNELS-1] = -1 -}; -static unsigned long cpu_evtchn_mask[NR_CPUS][NR_EVENT_CHANNELS/BITS_PER_LONG]; -static u8 cpu_evtchn[NR_EVENT_CHANNELS]; - -/* Reference counts for bindings to IRQs. */ -static int irq_bindcount[NR_IRQS]; - -/* Xen will never allocate port zero for any purpose. */ -#define VALID_EVTCHN(chn) ((chn) != 0) - -/* - * Force a proper event-channel callback from Xen after clearing the - * callback mask. We do this in a very simple manner, by making a call - * down into Xen. The pending flag will be checked by Xen on return. - */ -void force_evtchn_callback(void) -{ - (void)HYPERVISOR_xen_version(0, NULL); -} -EXPORT_SYMBOL_GPL(force_evtchn_callback); - -static struct irq_chip xen_dynamic_chip; - -/* Constructor for packed IRQ information. */ -static inline struct packed_irq mk_irq_info(u32 type, u32 index, u32 evtchn) -{ - return (struct packed_irq) { evtchn, index, type }; -} - -/* - * Accessors for packed IRQ information. - */ -static inline unsigned int evtchn_from_irq(int irq) -{ - return irq_info[irq].evtchn; -} - -static inline unsigned int index_from_irq(int irq) -{ - return irq_info[irq].index; -} - -static inline unsigned int type_from_irq(int irq) -{ - return irq_info[irq].type; -} - -static inline unsigned long active_evtchns(unsigned int cpu, - struct shared_info *sh, - unsigned int idx) -{ - return (sh->evtchn_pending[idx] & - cpu_evtchn_mask[cpu][idx] & - ~sh->evtchn_mask[idx]); -} - -static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) -{ - int irq = evtchn_to_irq[chn]; - - BUG_ON(irq == -1); -#ifdef CONFIG_SMP - irq_desc[irq].affinity = cpumask_of_cpu(cpu); -#endif - - __clear_bit(chn, cpu_evtchn_mask[cpu_evtchn[chn]]); - __set_bit(chn, cpu_evtchn_mask[cpu]); - - cpu_evtchn[chn] = cpu; -} - -static void init_evtchn_cpu_bindings(void) -{ -#ifdef CONFIG_SMP - int i; - /* By default all event channels notify CPU#0. */ - for (i = 0; i < NR_IRQS; i++) - irq_desc[i].affinity = cpumask_of_cpu(0); -#endif - - memset(cpu_evtchn, 0, sizeof(cpu_evtchn)); - memset(cpu_evtchn_mask[0], ~0, sizeof(cpu_evtchn_mask[0])); -} - -static inline unsigned int cpu_from_evtchn(unsigned int evtchn) -{ - return cpu_evtchn[evtchn]; -} - -static inline void clear_evtchn(int port) -{ - struct shared_info *s = HYPERVISOR_shared_info; - sync_clear_bit(port, &s->evtchn_pending[0]); -} - -static inline void set_evtchn(int port) -{ - struct shared_info *s = HYPERVISOR_shared_info; - sync_set_bit(port, &s->evtchn_pending[0]); -} - - -/** - * notify_remote_via_irq - send event to remote end of event channel via irq - * @irq: irq of event channel to send event to - * - * Unlike notify_remote_via_evtchn(), this is safe to use across - * save/restore. Notifications on a broken connection are silently - * dropped. - */ -void notify_remote_via_irq(int irq) -{ - int evtchn = evtchn_from_irq(irq); - - if (VALID_EVTCHN(evtchn)) - notify_remote_via_evtchn(evtchn); -} -EXPORT_SYMBOL_GPL(notify_remote_via_irq); - -static void mask_evtchn(int port) -{ - struct shared_info *s = HYPERVISOR_shared_info; - sync_set_bit(port, &s->evtchn_mask[0]); -} - -static void unmask_evtchn(int port) -{ - struct shared_info *s = HYPERVISOR_shared_info; - unsigned int cpu = get_cpu(); - - BUG_ON(!irqs_disabled()); - - /* Slow path (hypercall) if this is a non-local port. */ - if (unlikely(cpu != cpu_from_evtchn(port))) { - struct evtchn_unmask unmask = { .port = port }; - (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask); - } else { - struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu); - - sync_clear_bit(port, &s->evtchn_mask[0]); - - /* - * The following is basically the equivalent of - * 'hw_resend_irq'. Just like a real IO-APIC we 'lose - * the interrupt edge' if the channel is masked. - */ - if (sync_test_bit(port, &s->evtchn_pending[0]) && - !sync_test_and_set_bit(port / BITS_PER_LONG, - &vcpu_info->evtchn_pending_sel)) - vcpu_info->evtchn_upcall_pending = 1; - } - - put_cpu(); -} - -static int find_unbound_irq(void) -{ - int irq; - - /* Only allocate from dynirq range */ - for (irq = 0; irq < NR_IRQS; irq++) - if (irq_bindcount[irq] == 0) - break; - - if (irq == NR_IRQS) - panic("No available IRQ to bind to: increase NR_IRQS!\n"); - - return irq; -} - -int bind_evtchn_to_irq(unsigned int evtchn) -{ - int irq; - - spin_lock(&irq_mapping_update_lock); - - irq = evtchn_to_irq[evtchn]; - - if (irq == -1) { - irq = find_unbound_irq(); - - dynamic_irq_init(irq); - set_irq_chip_and_handler_name(irq, &xen_dynamic_chip, - handle_level_irq, "event"); - - evtchn_to_irq[evtchn] = irq; - irq_info[irq] = mk_irq_info(IRQT_EVTCHN, 0, evtchn); - } - - irq_bindcount[irq]++; - - spin_unlock(&irq_mapping_update_lock); - - return irq; -} -EXPORT_SYMBOL_GPL(bind_evtchn_to_irq); - -static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) -{ - struct evtchn_bind_ipi bind_ipi; - int evtchn, irq; - - spin_lock(&irq_mapping_update_lock); - - irq = per_cpu(ipi_to_irq, cpu)[ipi]; - if (irq == -1) { - irq = find_unbound_irq(); - if (irq < 0) - goto out; - - dynamic_irq_init(irq); - set_irq_chip_and_handler_name(irq, &xen_dynamic_chip, - handle_level_irq, "ipi"); - - bind_ipi.vcpu = cpu; - if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, - &bind_ipi) != 0) - BUG(); - evtchn = bind_ipi.port; - - evtchn_to_irq[evtchn] = irq; - irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn); - - per_cpu(ipi_to_irq, cpu)[ipi] = irq; - - bind_evtchn_to_cpu(evtchn, cpu); - } - - irq_bindcount[irq]++; - - out: - spin_unlock(&irq_mapping_update_lock); - return irq; -} - - -static int bind_virq_to_irq(unsigned int virq, unsigned int cpu) -{ - struct evtchn_bind_virq bind_virq; - int evtchn, irq; - - spin_lock(&irq_mapping_update_lock); - - irq = per_cpu(virq_to_irq, cpu)[virq]; - - if (irq == -1) { - bind_virq.virq = virq; - bind_virq.vcpu = cpu; - if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, - &bind_virq) != 0) - BUG(); - evtchn = bind_virq.port; - - irq = find_unbound_irq(); - - dynamic_irq_init(irq); - set_irq_chip_and_handler_name(irq, &xen_dynamic_chip, - handle_level_irq, "virq"); - - evtchn_to_irq[evtchn] = irq; - irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn); - - per_cpu(virq_to_irq, cpu)[virq] = irq; - - bind_evtchn_to_cpu(evtchn, cpu); - } - - irq_bindcount[irq]++; - - spin_unlock(&irq_mapping_update_lock); - - return irq; -} - -static void unbind_from_irq(unsigned int irq) -{ - struct evtchn_close close; - int evtchn = evtchn_from_irq(irq); - - spin_lock(&irq_mapping_update_lock); - - if (VALID_EVTCHN(evtchn) && (--irq_bindcount[irq] == 0)) { - close.port = evtchn; - if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) - BUG(); - - switch (type_from_irq(irq)) { - case IRQT_VIRQ: - per_cpu(virq_to_irq, cpu_from_evtchn(evtchn)) - [index_from_irq(irq)] = -1; - break; - default: - break; - } - - /* Closed ports are implicitly re-bound to VCPU0. */ - bind_evtchn_to_cpu(evtchn, 0); - - evtchn_to_irq[evtchn] = -1; - irq_info[irq] = IRQ_UNBOUND; - - dynamic_irq_init(irq); - } - - spin_unlock(&irq_mapping_update_lock); -} - -int bind_evtchn_to_irqhandler(unsigned int evtchn, - irqreturn_t (*handler)(int, void *), - unsigned long irqflags, - const char *devname, void *dev_id) -{ - unsigned int irq; - int retval; - - irq = bind_evtchn_to_irq(evtchn); - retval = request_irq(irq, handler, irqflags, devname, dev_id); - if (retval != 0) { - unbind_from_irq(irq); - return retval; - } - - return irq; -} -EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler); - -int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, - irqreturn_t (*handler)(int, void *), - unsigned long irqflags, const char *devname, void *dev_id) -{ - unsigned int irq; - int retval; - - irq = bind_virq_to_irq(virq, cpu); - retval = request_irq(irq, handler, irqflags, devname, dev_id); - if (retval != 0) { - unbind_from_irq(irq); - return retval; - } - - return irq; -} -EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler); - -int bind_ipi_to_irqhandler(enum ipi_vector ipi, - unsigned int cpu, - irq_handler_t handler, - unsigned long irqflags, - const char *devname, - void *dev_id) -{ - int irq, retval; - - irq = bind_ipi_to_irq(ipi, cpu); - if (irq < 0) - return irq; - - retval = request_irq(irq, handler, irqflags, devname, dev_id); - if (retval != 0) { - unbind_from_irq(irq); - return retval; - } - - return irq; -} - -void unbind_from_irqhandler(unsigned int irq, void *dev_id) -{ - free_irq(irq, dev_id); - unbind_from_irq(irq); -} -EXPORT_SYMBOL_GPL(unbind_from_irqhandler); - -void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector) -{ - int irq = per_cpu(ipi_to_irq, cpu)[vector]; - BUG_ON(irq < 0); - notify_remote_via_irq(irq); -} - - -/* - * Search the CPUs pending events bitmasks. For each one found, map - * the event number to an irq, and feed it into do_IRQ() for - * handling. - * - * Xen uses a two-level bitmap to speed searching. The first level is - * a bitset of words which contain pending event bits. The second - * level is a bitset of pending events themselves. - */ -fastcall void xen_evtchn_do_upcall(struct pt_regs *regs) -{ - int cpu = get_cpu(); - struct shared_info *s = HYPERVISOR_shared_info; - struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu); - unsigned long pending_words; - - vcpu_info->evtchn_upcall_pending = 0; - - /* NB. No need for a barrier here -- XCHG is a barrier on x86. */ - pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0); - while (pending_words != 0) { - unsigned long pending_bits; - int word_idx = __ffs(pending_words); - pending_words &= ~(1UL << word_idx); - - while ((pending_bits = active_evtchns(cpu, s, word_idx)) != 0) { - int bit_idx = __ffs(pending_bits); - int port = (word_idx * BITS_PER_LONG) + bit_idx; - int irq = evtchn_to_irq[port]; - - if (irq != -1) { - regs->orig_eax = ~irq; - do_IRQ(regs); - } - } - } - - put_cpu(); -} - -/* Rebind an evtchn so that it gets delivered to a specific cpu */ -static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu) -{ - struct evtchn_bind_vcpu bind_vcpu; - int evtchn = evtchn_from_irq(irq); - - if (!VALID_EVTCHN(evtchn)) - return; - - /* Send future instances of this interrupt to other vcpu. */ - bind_vcpu.port = evtchn; - bind_vcpu.vcpu = tcpu; - - /* - * If this fails, it usually just indicates that we're dealing with a - * virq or IPI channel, which don't actually need to be rebound. Ignore - * it, but don't do the xenlinux-level rebind in that case. - */ - if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0) - bind_evtchn_to_cpu(evtchn, tcpu); -} - - -static void set_affinity_irq(unsigned irq, cpumask_t dest) -{ - unsigned tcpu = first_cpu(dest); - rebind_irq_to_cpu(irq, tcpu); -} - -static void enable_dynirq(unsigned int irq) -{ - int evtchn = evtchn_from_irq(irq); - - if (VALID_EVTCHN(evtchn)) - unmask_evtchn(evtchn); -} - -static void disable_dynirq(unsigned int irq) -{ - int evtchn = evtchn_from_irq(irq); - - if (VALID_EVTCHN(evtchn)) - mask_evtchn(evtchn); -} - -static void ack_dynirq(unsigned int irq) -{ - int evtchn = evtchn_from_irq(irq); - - move_native_irq(irq); - - if (VALID_EVTCHN(evtchn)) - clear_evtchn(evtchn); -} - -static int retrigger_dynirq(unsigned int irq) -{ - int evtchn = evtchn_from_irq(irq); - int ret = 0; - - if (VALID_EVTCHN(evtchn)) { - set_evtchn(evtchn); - ret = 1; - } - - return ret; -} - -static struct irq_chip xen_dynamic_chip __read_mostly = { - .name = "xen-dyn", - .mask = disable_dynirq, - .unmask = enable_dynirq, - .ack = ack_dynirq, - .set_affinity = set_affinity_irq, - .retrigger = retrigger_dynirq, -}; - -void __init xen_init_IRQ(void) -{ - int i; - - init_evtchn_cpu_bindings(); - - /* No event channels are 'live' right now. */ - for (i = 0; i < NR_EVENT_CHANNELS; i++) - mask_evtchn(i); - - /* Dynamic IRQ space is currently unbound. Zero the refcnts. */ - for (i = 0; i < NR_IRQS; i++) - irq_bindcount[i] = 0; - - irq_ctx_init(smp_processor_id()); -} diff --git a/arch/i386/xen/features.c b/arch/i386/xen/features.c deleted file mode 100644 index 0707714e40d..00000000000 --- a/arch/i386/xen/features.c +++ /dev/null @@ -1,29 +0,0 @@ -/****************************************************************************** - * features.c - * - * Xen feature flags. - * - * Copyright (c) 2006, Ian Campbell, XenSource Inc. - */ -#include -#include -#include -#include -#include - -u8 xen_features[XENFEAT_NR_SUBMAPS * 32] __read_mostly; -EXPORT_SYMBOL_GPL(xen_features); - -void xen_setup_features(void) -{ - struct xen_feature_info fi; - int i, j; - - for (i = 0; i < XENFEAT_NR_SUBMAPS; i++) { - fi.submap_idx = i; - if (HYPERVISOR_xen_version(XENVER_get_features, &fi) < 0) - break; - for (j = 0; j < 32; j++) - xen_features[i * 32 + j] = !!(fi.submap & 1< -#include -#include -#include - -#include - -#define SHUTDOWN_INVALID -1 -#define SHUTDOWN_POWEROFF 0 -#define SHUTDOWN_SUSPEND 2 -/* Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only - * report a crash, not be instructed to crash! - * HALT is the same as POWEROFF, as far as we're concerned. The tools use - * the distinction when we return the reason code to them. - */ -#define SHUTDOWN_HALT 4 - -/* Ignore multiple shutdown requests. */ -static int shutting_down = SHUTDOWN_INVALID; - -static void shutdown_handler(struct xenbus_watch *watch, - const char **vec, unsigned int len) -{ - char *str; - struct xenbus_transaction xbt; - int err; - - if (shutting_down != SHUTDOWN_INVALID) - return; - - again: - err = xenbus_transaction_start(&xbt); - if (err) - return; - - str = (char *)xenbus_read(xbt, "control", "shutdown", NULL); - /* Ignore read errors and empty reads. */ - if (XENBUS_IS_ERR_READ(str)) { - xenbus_transaction_end(xbt, 1); - return; - } - - xenbus_write(xbt, "control", "shutdown", ""); - - err = xenbus_transaction_end(xbt, 0); - if (err == -EAGAIN) { - kfree(str); - goto again; - } - - if (strcmp(str, "poweroff") == 0 || - strcmp(str, "halt") == 0) - orderly_poweroff(false); - else if (strcmp(str, "reboot") == 0) - ctrl_alt_del(); - else { - printk(KERN_INFO "Ignoring shutdown request: %s\n", str); - shutting_down = SHUTDOWN_INVALID; - } - - kfree(str); -} - -static void sysrq_handler(struct xenbus_watch *watch, const char **vec, - unsigned int len) -{ - char sysrq_key = '\0'; - struct xenbus_transaction xbt; - int err; - - again: - err = xenbus_transaction_start(&xbt); - if (err) - return; - if (!xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key)) { - printk(KERN_ERR "Unable to read sysrq code in " - "control/sysrq\n"); - xenbus_transaction_end(xbt, 1); - return; - } - - if (sysrq_key != '\0') - xenbus_printf(xbt, "control", "sysrq", "%c", '\0'); - - err = xenbus_transaction_end(xbt, 0); - if (err == -EAGAIN) - goto again; - - if (sysrq_key != '\0') - handle_sysrq(sysrq_key, NULL); -} - -static struct xenbus_watch shutdown_watch = { - .node = "control/shutdown", - .callback = shutdown_handler -}; - -static struct xenbus_watch sysrq_watch = { - .node = "control/sysrq", - .callback = sysrq_handler -}; - -static int setup_shutdown_watcher(void) -{ - int err; - - err = register_xenbus_watch(&shutdown_watch); - if (err) { - printk(KERN_ERR "Failed to set shutdown watcher\n"); - return err; - } - - err = register_xenbus_watch(&sysrq_watch); - if (err) { - printk(KERN_ERR "Failed to set sysrq watcher\n"); - return err; - } - - return 0; -} - -static int shutdown_event(struct notifier_block *notifier, - unsigned long event, - void *data) -{ - setup_shutdown_watcher(); - return NOTIFY_DONE; -} - -static int __init setup_shutdown_event(void) -{ - static struct notifier_block xenstore_notifier = { - .notifier_call = shutdown_event - }; - register_xenstore_notifier(&xenstore_notifier); - - return 0; -} - -subsys_initcall(setup_shutdown_event); diff --git a/arch/i386/xen/mmu.c b/arch/i386/xen/mmu.c deleted file mode 100644 index 874db0cd1d2..00000000000 --- a/arch/i386/xen/mmu.c +++ /dev/null @@ -1,567 +0,0 @@ -/* - * Xen mmu operations - * - * This file contains the various mmu fetch and update operations. - * The most important job they must perform is the mapping between the - * domain's pfn and the overall machine mfns. - * - * Xen allows guests to directly update the pagetable, in a controlled - * fashion. In other words, the guest modifies the same pagetable - * that the CPU actually uses, which eliminates the overhead of having - * a separate shadow pagetable. - * - * In order to allow this, it falls on the guest domain to map its - * notion of a "physical" pfn - which is just a domain-local linear - * address - into a real "machine address" which the CPU's MMU can - * use. - * - * A pgd_t/pmd_t/pte_t will typically contain an mfn, and so can be - * inserted directly into the pagetable. When creating a new - * pte/pmd/pgd, it converts the passed pfn into an mfn. Conversely, - * when reading the content back with __(pgd|pmd|pte)_val, it converts - * the mfn back into a pfn. - * - * The other constraint is that all pages which make up a pagetable - * must be mapped read-only in the guest. This prevents uncontrolled - * guest updates to the pagetable. Xen strictly enforces this, and - * will disallow any pagetable update which will end up mapping a - * pagetable page RW, and will disallow using any writable page as a - * pagetable. - * - * Naively, when loading %cr3 with the base of a new pagetable, Xen - * would need to validate the whole pagetable before going on. - * Naturally, this is quite slow. The solution is to "pin" a - * pagetable, which enforces all the constraints on the pagetable even - * when it is not actively in use. This menas that Xen can be assured - * that it is still valid when you do load it into %cr3, and doesn't - * need to revalidate it. - * - * Jeremy Fitzhardinge , XenSource Inc, 2007 - */ -#include -#include -#include -#include - -#include -#include -#include -#include - -#include -#include - -#include -#include - -#include "multicalls.h" -#include "mmu.h" - -xmaddr_t arbitrary_virt_to_machine(unsigned long address) -{ - pte_t *pte = lookup_address(address); - unsigned offset = address & PAGE_MASK; - - BUG_ON(pte == NULL); - - return XMADDR((pte_mfn(*pte) << PAGE_SHIFT) + offset); -} - -void make_lowmem_page_readonly(void *vaddr) -{ - pte_t *pte, ptev; - unsigned long address = (unsigned long)vaddr; - - pte = lookup_address(address); - BUG_ON(pte == NULL); - - ptev = pte_wrprotect(*pte); - - if (HYPERVISOR_update_va_mapping(address, ptev, 0)) - BUG(); -} - -void make_lowmem_page_readwrite(void *vaddr) -{ - pte_t *pte, ptev; - unsigned long address = (unsigned long)vaddr; - - pte = lookup_address(address); - BUG_ON(pte == NULL); - - ptev = pte_mkwrite(*pte); - - if (HYPERVISOR_update_va_mapping(address, ptev, 0)) - BUG(); -} - - -void xen_set_pmd(pmd_t *ptr, pmd_t val) -{ - struct multicall_space mcs; - struct mmu_update *u; - - preempt_disable(); - - mcs = xen_mc_entry(sizeof(*u)); - u = mcs.args; - u->ptr = virt_to_machine(ptr).maddr; - u->val = pmd_val_ma(val); - MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF); - - xen_mc_issue(PARAVIRT_LAZY_MMU); - - preempt_enable(); -} - -/* - * Associate a virtual page frame with a given physical page frame - * and protection flags for that frame. - */ -void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - - pgd = swapper_pg_dir + pgd_index(vaddr); - if (pgd_none(*pgd)) { - BUG(); - return; - } - pud = pud_offset(pgd, vaddr); - if (pud_none(*pud)) { - BUG(); - return; - } - pmd = pmd_offset(pud, vaddr); - if (pmd_none(*pmd)) { - BUG(); - return; - } - pte = pte_offset_kernel(pmd, vaddr); - /* stored as-is, to permit clearing entries */ - xen_set_pte(pte, mfn_pte(mfn, flags)); - - /* - * It's enough to flush this one mapping. - * (PGE mappings get flushed as well) - */ - __flush_tlb_one(vaddr); -} - -void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pteval) -{ - if (mm == current->mm || mm == &init_mm) { - if (xen_get_lazy_mode() == PARAVIRT_LAZY_MMU) { - struct multicall_space mcs; - mcs = xen_mc_entry(0); - - MULTI_update_va_mapping(mcs.mc, addr, pteval, 0); - xen_mc_issue(PARAVIRT_LAZY_MMU); - return; - } else - if (HYPERVISOR_update_va_mapping(addr, pteval, 0) == 0) - return; - } - xen_set_pte(ptep, pteval); -} - -#ifdef CONFIG_X86_PAE -void xen_set_pud(pud_t *ptr, pud_t val) -{ - struct multicall_space mcs; - struct mmu_update *u; - - preempt_disable(); - - mcs = xen_mc_entry(sizeof(*u)); - u = mcs.args; - u->ptr = virt_to_machine(ptr).maddr; - u->val = pud_val_ma(val); - MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF); - - xen_mc_issue(PARAVIRT_LAZY_MMU); - - preempt_enable(); -} - -void xen_set_pte(pte_t *ptep, pte_t pte) -{ - ptep->pte_high = pte.pte_high; - smp_wmb(); - ptep->pte_low = pte.pte_low; -} - -void xen_set_pte_atomic(pte_t *ptep, pte_t pte) -{ - set_64bit((u64 *)ptep, pte_val_ma(pte)); -} - -void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) -{ - ptep->pte_low = 0; - smp_wmb(); /* make sure low gets written first */ - ptep->pte_high = 0; -} - -void xen_pmd_clear(pmd_t *pmdp) -{ - xen_set_pmd(pmdp, __pmd(0)); -} - -unsigned long long xen_pte_val(pte_t pte) -{ - unsigned long long ret = 0; - - if (pte.pte_low) { - ret = ((unsigned long long)pte.pte_high << 32) | pte.pte_low; - ret = machine_to_phys(XMADDR(ret)).paddr | 1; - } - - return ret; -} - -unsigned long long xen_pmd_val(pmd_t pmd) -{ - unsigned long long ret = pmd.pmd; - if (ret) - ret = machine_to_phys(XMADDR(ret)).paddr | 1; - return ret; -} - -unsigned long long xen_pgd_val(pgd_t pgd) -{ - unsigned long long ret = pgd.pgd; - if (ret) - ret = machine_to_phys(XMADDR(ret)).paddr | 1; - return ret; -} - -pte_t xen_make_pte(unsigned long long pte) -{ - if (pte & 1) - pte = phys_to_machine(XPADDR(pte)).maddr; - - return (pte_t){ pte, pte >> 32 }; -} - -pmd_t xen_make_pmd(unsigned long long pmd) -{ - if (pmd & 1) - pmd = phys_to_machine(XPADDR(pmd)).maddr; - - return (pmd_t){ pmd }; -} - -pgd_t xen_make_pgd(unsigned long long pgd) -{ - if (pgd & _PAGE_PRESENT) - pgd = phys_to_machine(XPADDR(pgd)).maddr; - - return (pgd_t){ pgd }; -} -#else /* !PAE */ -void xen_set_pte(pte_t *ptep, pte_t pte) -{ - *ptep = pte; -} - -unsigned long xen_pte_val(pte_t pte) -{ - unsigned long ret = pte.pte_low; - - if (ret & _PAGE_PRESENT) - ret = machine_to_phys(XMADDR(ret)).paddr; - - return ret; -} - -unsigned long xen_pgd_val(pgd_t pgd) -{ - unsigned long ret = pgd.pgd; - if (ret) - ret = machine_to_phys(XMADDR(ret)).paddr | 1; - return ret; -} - -pte_t xen_make_pte(unsigned long pte) -{ - if (pte & _PAGE_PRESENT) - pte = phys_to_machine(XPADDR(pte)).maddr; - - return (pte_t){ pte }; -} - -pgd_t xen_make_pgd(unsigned long pgd) -{ - if (pgd & _PAGE_PRESENT) - pgd = phys_to_machine(XPADDR(pgd)).maddr; - - return (pgd_t){ pgd }; -} -#endif /* CONFIG_X86_PAE */ - - - -/* - (Yet another) pagetable walker. This one is intended for pinning a - pagetable. This means that it walks a pagetable and calls the - callback function on each page it finds making up the page table, - at every level. It walks the entire pagetable, but it only bothers - pinning pte pages which are below pte_limit. In the normal case - this will be TASK_SIZE, but at boot we need to pin up to - FIXADDR_TOP. But the important bit is that we don't pin beyond - there, because then we start getting into Xen's ptes. -*/ -static int pgd_walk(pgd_t *pgd_base, int (*func)(struct page *, unsigned), - unsigned long limit) -{ - pgd_t *pgd = pgd_base; - int flush = 0; - unsigned long addr = 0; - unsigned long pgd_next; - - BUG_ON(limit > FIXADDR_TOP); - - if (xen_feature(XENFEAT_auto_translated_physmap)) - return 0; - - for (; addr != FIXADDR_TOP; pgd++, addr = pgd_next) { - pud_t *pud; - unsigned long pud_limit, pud_next; - - pgd_next = pud_limit = pgd_addr_end(addr, FIXADDR_TOP); - - if (!pgd_val(*pgd)) - continue; - - pud = pud_offset(pgd, 0); - - if (PTRS_PER_PUD > 1) /* not folded */ - flush |= (*func)(virt_to_page(pud), 0); - - for (; addr != pud_limit; pud++, addr = pud_next) { - pmd_t *pmd; - unsigned long pmd_limit; - - pud_next = pud_addr_end(addr, pud_limit); - - if (pud_next < limit) - pmd_limit = pud_next; - else - pmd_limit = limit; - - if (pud_none(*pud)) - continue; - - pmd = pmd_offset(pud, 0); - - if (PTRS_PER_PMD > 1) /* not folded */ - flush |= (*func)(virt_to_page(pmd), 0); - - for (; addr != pmd_limit; pmd++) { - addr += (PAGE_SIZE * PTRS_PER_PTE); - if ((pmd_limit-1) < (addr-1)) { - addr = pmd_limit; - break; - } - - if (pmd_none(*pmd)) - continue; - - flush |= (*func)(pmd_page(*pmd), 0); - } - } - } - - flush |= (*func)(virt_to_page(pgd_base), UVMF_TLB_FLUSH); - - return flush; -} - -static int pin_page(struct page *page, unsigned flags) -{ - unsigned pgfl = test_and_set_bit(PG_pinned, &page->flags); - int flush; - - if (pgfl) - flush = 0; /* already pinned */ - else if (PageHighMem(page)) - /* kmaps need flushing if we found an unpinned - highpage */ - flush = 1; - else { - void *pt = lowmem_page_address(page); - unsigned long pfn = page_to_pfn(page); - struct multicall_space mcs = __xen_mc_entry(0); - - flush = 0; - - MULTI_update_va_mapping(mcs.mc, (unsigned long)pt, - pfn_pte(pfn, PAGE_KERNEL_RO), - flags); - } - - return flush; -} - -/* This is called just after a mm has been created, but it has not - been used yet. We need to make sure that its pagetable is all - read-only, and can be pinned. */ -void xen_pgd_pin(pgd_t *pgd) -{ - struct multicall_space mcs; - struct mmuext_op *op; - - xen_mc_batch(); - - if (pgd_walk(pgd, pin_page, TASK_SIZE)) { - /* re-enable interrupts for kmap_flush_unused */ - xen_mc_issue(0); - kmap_flush_unused(); - xen_mc_batch(); - } - - mcs = __xen_mc_entry(sizeof(*op)); - op = mcs.args; - -#ifdef CONFIG_X86_PAE - op->cmd = MMUEXT_PIN_L3_TABLE; -#else - op->cmd = MMUEXT_PIN_L2_TABLE; -#endif - op->arg1.mfn = pfn_to_mfn(PFN_DOWN(__pa(pgd))); - MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); - - xen_mc_issue(0); -} - -/* The init_mm pagetable is really pinned as soon as its created, but - that's before we have page structures to store the bits. So do all - the book-keeping now. */ -static __init int mark_pinned(struct page *page, unsigned flags) -{ - SetPagePinned(page); - return 0; -} - -void __init xen_mark_init_mm_pinned(void) -{ - pgd_walk(init_mm.pgd, mark_pinned, FIXADDR_TOP); -} - -static int unpin_page(struct page *page, unsigned flags) -{ - unsigned pgfl = test_and_clear_bit(PG_pinned, &page->flags); - - if (pgfl && !PageHighMem(page)) { - void *pt = lowmem_page_address(page); - unsigned long pfn = page_to_pfn(page); - struct multicall_space mcs = __xen_mc_entry(0); - - MULTI_update_va_mapping(mcs.mc, (unsigned long)pt, - pfn_pte(pfn, PAGE_KERNEL), - flags); - } - - return 0; /* never need to flush on unpin */ -} - -/* Release a pagetables pages back as normal RW */ -static void xen_pgd_unpin(pgd_t *pgd) -{ - struct mmuext_op *op; - struct multicall_space mcs; - - xen_mc_batch(); - - mcs = __xen_mc_entry(sizeof(*op)); - - op = mcs.args; - op->cmd = MMUEXT_UNPIN_TABLE; - op->arg1.mfn = pfn_to_mfn(PFN_DOWN(__pa(pgd))); - - MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); - - pgd_walk(pgd, unpin_page, TASK_SIZE); - - xen_mc_issue(0); -} - -void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) -{ - spin_lock(&next->page_table_lock); - xen_pgd_pin(next->pgd); - spin_unlock(&next->page_table_lock); -} - -void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) -{ - spin_lock(&mm->page_table_lock); - xen_pgd_pin(mm->pgd); - spin_unlock(&mm->page_table_lock); -} - - -#ifdef CONFIG_SMP -/* Another cpu may still have their %cr3 pointing at the pagetable, so - we need to repoint it somewhere else before we can unpin it. */ -static void drop_other_mm_ref(void *info) -{ - struct mm_struct *mm = info; - - if (__get_cpu_var(cpu_tlbstate).active_mm == mm) - leave_mm(smp_processor_id()); -} - -static void drop_mm_ref(struct mm_struct *mm) -{ - if (current->active_mm == mm) { - if (current->mm == mm) - load_cr3(swapper_pg_dir); - else - leave_mm(smp_processor_id()); - } - - if (!cpus_empty(mm->cpu_vm_mask)) - xen_smp_call_function_mask(mm->cpu_vm_mask, drop_other_mm_ref, - mm, 1); -} -#else -static void drop_mm_ref(struct mm_struct *mm) -{ - if (current->active_mm == mm) - load_cr3(swapper_pg_dir); -} -#endif - -/* - * While a process runs, Xen pins its pagetables, which means that the - * hypervisor forces it to be read-only, and it controls all updates - * to it. This means that all pagetable updates have to go via the - * hypervisor, which is moderately expensive. - * - * Since we're pulling the pagetable down, we switch to use init_mm, - * unpin old process pagetable and mark it all read-write, which - * allows further operations on it to be simple memory accesses. - * - * The only subtle point is that another CPU may be still using the - * pagetable because of lazy tlb flushing. This means we need need to - * switch all CPUs off this pagetable before we can unpin it. - */ -void xen_exit_mmap(struct mm_struct *mm) -{ - get_cpu(); /* make sure we don't move around */ - drop_mm_ref(mm); - put_cpu(); - - spin_lock(&mm->page_table_lock); - - /* pgd may not be pinned in the error exit path of execve */ - if (PagePinned(virt_to_page(mm->pgd))) - xen_pgd_unpin(mm->pgd); - spin_unlock(&mm->page_table_lock); -} diff --git a/arch/i386/xen/mmu.h b/arch/i386/xen/mmu.h deleted file mode 100644 index c9ff27f3ac3..00000000000 --- a/arch/i386/xen/mmu.h +++ /dev/null @@ -1,60 +0,0 @@ -#ifndef _XEN_MMU_H - -#include -#include - -/* - * Page-directory addresses above 4GB do not fit into architectural %cr3. - * When accessing %cr3, or equivalent field in vcpu_guest_context, guests - * must use the following accessor macros to pack/unpack valid MFNs. - * - * Note that Xen is using the fact that the pagetable base is always - * page-aligned, and putting the 12 MSB of the address into the 12 LSB - * of cr3. - */ -#define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20)) -#define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20)) - - -void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); - -void xen_set_pte(pte_t *ptep, pte_t pteval); -void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pteval); -void xen_set_pmd(pmd_t *pmdp, pmd_t pmdval); - -void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next); -void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm); -void xen_exit_mmap(struct mm_struct *mm); - -void xen_pgd_pin(pgd_t *pgd); -//void xen_pgd_unpin(pgd_t *pgd); - -#ifdef CONFIG_X86_PAE -unsigned long long xen_pte_val(pte_t); -unsigned long long xen_pmd_val(pmd_t); -unsigned long long xen_pgd_val(pgd_t); - -pte_t xen_make_pte(unsigned long long); -pmd_t xen_make_pmd(unsigned long long); -pgd_t xen_make_pgd(unsigned long long); - -void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pteval); -void xen_set_pte_atomic(pte_t *ptep, pte_t pte); -void xen_set_pud(pud_t *ptr, pud_t val); -void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); -void xen_pmd_clear(pmd_t *pmdp); - - -#else -unsigned long xen_pte_val(pte_t); -unsigned long xen_pmd_val(pmd_t); -unsigned long xen_pgd_val(pgd_t); - -pte_t xen_make_pte(unsigned long); -pmd_t xen_make_pmd(unsigned long); -pgd_t xen_make_pgd(unsigned long); -#endif - -#endif /* _XEN_MMU_H */ diff --git a/arch/i386/xen/multicalls.c b/arch/i386/xen/multicalls.c deleted file mode 100644 index c837e8e463d..00000000000 --- a/arch/i386/xen/multicalls.c +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Xen hypercall batching. - * - * Xen allows multiple hypercalls to be issued at once, using the - * multicall interface. This allows the cost of trapping into the - * hypervisor to be amortized over several calls. - * - * This file implements a simple interface for multicalls. There's a - * per-cpu buffer of outstanding multicalls. When you want to queue a - * multicall for issuing, you can allocate a multicall slot for the - * call and its arguments, along with storage for space which is - * pointed to by the arguments (for passing pointers to structures, - * etc). When the multicall is actually issued, all the space for the - * commands and allocated memory is freed for reuse. - * - * Multicalls are flushed whenever any of the buffers get full, or - * when explicitly requested. There's no way to get per-multicall - * return results back. It will BUG if any of the multicalls fail. - * - * Jeremy Fitzhardinge , XenSource Inc, 2007 - */ -#include -#include - -#include - -#include "multicalls.h" - -#define MC_BATCH 32 -#define MC_ARGS (MC_BATCH * 16 / sizeof(u64)) - -struct mc_buffer { - struct multicall_entry entries[MC_BATCH]; - u64 args[MC_ARGS]; - unsigned mcidx, argidx; -}; - -static DEFINE_PER_CPU(struct mc_buffer, mc_buffer); -DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags); - -void xen_mc_flush(void) -{ - struct mc_buffer *b = &__get_cpu_var(mc_buffer); - int ret = 0; - unsigned long flags; - - BUG_ON(preemptible()); - - /* Disable interrupts in case someone comes in and queues - something in the middle */ - local_irq_save(flags); - - if (b->mcidx) { - int i; - - if (HYPERVISOR_multicall(b->entries, b->mcidx) != 0) - BUG(); - for (i = 0; i < b->mcidx; i++) - if (b->entries[i].result < 0) - ret++; - b->mcidx = 0; - b->argidx = 0; - } else - BUG_ON(b->argidx != 0); - - local_irq_restore(flags); - - BUG_ON(ret); -} - -struct multicall_space __xen_mc_entry(size_t args) -{ - struct mc_buffer *b = &__get_cpu_var(mc_buffer); - struct multicall_space ret; - unsigned argspace = (args + sizeof(u64) - 1) / sizeof(u64); - - BUG_ON(preemptible()); - BUG_ON(argspace > MC_ARGS); - - if (b->mcidx == MC_BATCH || - (b->argidx + argspace) > MC_ARGS) - xen_mc_flush(); - - ret.mc = &b->entries[b->mcidx]; - b->mcidx++; - ret.args = &b->args[b->argidx]; - b->argidx += argspace; - - return ret; -} diff --git a/arch/i386/xen/multicalls.h b/arch/i386/xen/multicalls.h deleted file mode 100644 index e6f7530b156..00000000000 --- a/arch/i386/xen/multicalls.h +++ /dev/null @@ -1,45 +0,0 @@ -#ifndef _XEN_MULTICALLS_H -#define _XEN_MULTICALLS_H - -#include "xen-ops.h" - -/* Multicalls */ -struct multicall_space -{ - struct multicall_entry *mc; - void *args; -}; - -/* Allocate room for a multicall and its args */ -struct multicall_space __xen_mc_entry(size_t args); - -DECLARE_PER_CPU(unsigned long, xen_mc_irq_flags); - -/* Call to start a batch of multiple __xen_mc_entry()s. Must be - paired with xen_mc_issue() */ -static inline void xen_mc_batch(void) -{ - /* need to disable interrupts until this entry is complete */ - local_irq_save(__get_cpu_var(xen_mc_irq_flags)); -} - -static inline struct multicall_space xen_mc_entry(size_t args) -{ - xen_mc_batch(); - return __xen_mc_entry(args); -} - -/* Flush all pending multicalls */ -void xen_mc_flush(void); - -/* Issue a multicall if we're not in a lazy mode */ -static inline void xen_mc_issue(unsigned mode) -{ - if ((xen_get_lazy_mode() & mode) == 0) - xen_mc_flush(); - - /* restore flags saved in xen_mc_batch */ - local_irq_restore(x86_read_percpu(xen_mc_irq_flags)); -} - -#endif /* _XEN_MULTICALLS_H */ diff --git a/arch/i386/xen/setup.c b/arch/i386/xen/setup.c deleted file mode 100644 index f84e7722664..00000000000 --- a/arch/i386/xen/setup.c +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Machine specific setup for xen - * - * Jeremy Fitzhardinge , XenSource Inc, 2007 - */ - -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include - -#include "xen-ops.h" -#include "vdso.h" - -/* These are code, but not functions. Defined in entry.S */ -extern const char xen_hypervisor_callback[]; -extern const char xen_failsafe_callback[]; - -unsigned long *phys_to_machine_mapping; -EXPORT_SYMBOL(phys_to_machine_mapping); - -/** - * machine_specific_memory_setup - Hook for machine specific memory setup. - **/ - -char * __init xen_memory_setup(void) -{ - unsigned long max_pfn = xen_start_info->nr_pages; - - e820.nr_map = 0; - add_memory_region(0, PFN_PHYS(max_pfn), E820_RAM); - - return "Xen"; -} - -static void xen_idle(void) -{ - local_irq_disable(); - - if (need_resched()) - local_irq_enable(); - else { - current_thread_info()->status &= ~TS_POLLING; - smp_mb__after_clear_bit(); - safe_halt(); - current_thread_info()->status |= TS_POLLING; - } -} - -/* - * Set the bit indicating "nosegneg" library variants should be used. - */ -static void fiddle_vdso(void) -{ - extern u32 VDSO_NOTE_MASK; /* See ../kernel/vsyscall-note.S. */ - extern char vsyscall_int80_start; - u32 *mask = (u32 *) ((unsigned long) &VDSO_NOTE_MASK - VDSO_PRELINK + - &vsyscall_int80_start); - *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT; -} - -void __init xen_arch_setup(void) -{ - struct physdev_set_iopl set_iopl; - int rc; - - HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); - HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); - - if (!xen_feature(XENFEAT_auto_translated_physmap)) - HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_pae_extended_cr3); - - HYPERVISOR_set_callbacks(__KERNEL_CS, (unsigned long)xen_hypervisor_callback, - __KERNEL_CS, (unsigned long)xen_failsafe_callback); - - set_iopl.iopl = 1; - rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); - if (rc != 0) - printk(KERN_INFO "physdev_op failed %d\n", rc); - -#ifdef CONFIG_ACPI - if (!(xen_start_info->flags & SIF_INITDOMAIN)) { - printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); - disable_acpi(); - } -#endif - - memcpy(boot_command_line, xen_start_info->cmd_line, - MAX_GUEST_CMDLINE > COMMAND_LINE_SIZE ? - COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE); - - pm_idle = xen_idle; - -#ifdef CONFIG_SMP - /* fill cpus_possible with all available cpus */ - xen_fill_possible_map(); -#endif - - paravirt_disable_iospace(); - - fiddle_vdso(); -} diff --git a/arch/i386/xen/smp.c b/arch/i386/xen/smp.c deleted file mode 100644 index 557b8e24706..00000000000 --- a/arch/i386/xen/smp.c +++ /dev/null @@ -1,404 +0,0 @@ -/* - * Xen SMP support - * - * This file implements the Xen versions of smp_ops. SMP under Xen is - * very straightforward. Bringing a CPU up is simply a matter of - * loading its initial context and setting it running. - * - * IPIs are handled through the Xen event mechanism. - * - * Because virtual CPUs can be scheduled onto any real CPU, there's no - * useful topology information for the kernel to make use of. As a - * result, all CPUs are treated as if they're single-core and - * single-threaded. - * - * This does not handle HOTPLUG_CPU yet. - */ -#include -#include -#include - -#include -#include -#include -#include - -#include -#include - -#include -#include - -#include -#include - -#include "xen-ops.h" -#include "mmu.h" - -static cpumask_t cpu_initialized_map; -static DEFINE_PER_CPU(int, resched_irq); -static DEFINE_PER_CPU(int, callfunc_irq); - -/* - * Structure and data for smp_call_function(). This is designed to minimise - * static memory requirements. It also looks cleaner. - */ -static DEFINE_SPINLOCK(call_lock); - -struct call_data_struct { - void (*func) (void *info); - void *info; - atomic_t started; - atomic_t finished; - int wait; -}; - -static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id); - -static struct call_data_struct *call_data; - -/* - * Reschedule call back. Nothing to do, - * all the work is done automatically when - * we return from the interrupt. - */ -static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) -{ - return IRQ_HANDLED; -} - -static __cpuinit void cpu_bringup_and_idle(void) -{ - int cpu = smp_processor_id(); - - cpu_init(); - - preempt_disable(); - per_cpu(cpu_state, cpu) = CPU_ONLINE; - - xen_setup_cpu_clockevents(); - - /* We can take interrupts now: we're officially "up". */ - local_irq_enable(); - - wmb(); /* make sure everything is out */ - cpu_idle(); -} - -static int xen_smp_intr_init(unsigned int cpu) -{ - int rc; - const char *resched_name, *callfunc_name; - - per_cpu(resched_irq, cpu) = per_cpu(callfunc_irq, cpu) = -1; - - resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu); - rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR, - cpu, - xen_reschedule_interrupt, - IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, - resched_name, - NULL); - if (rc < 0) - goto fail; - per_cpu(resched_irq, cpu) = rc; - - callfunc_name = kasprintf(GFP_KERNEL, "callfunc%d", cpu); - rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_VECTOR, - cpu, - xen_call_function_interrupt, - IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, - callfunc_name, - NULL); - if (rc < 0) - goto fail; - per_cpu(callfunc_irq, cpu) = rc; - - return 0; - - fail: - if (per_cpu(resched_irq, cpu) >= 0) - unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL); - if (per_cpu(callfunc_irq, cpu) >= 0) - unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL); - return rc; -} - -void __init xen_fill_possible_map(void) -{ - int i, rc; - - for (i = 0; i < NR_CPUS; i++) { - rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); - if (rc >= 0) - cpu_set(i, cpu_possible_map); - } -} - -void __init xen_smp_prepare_boot_cpu(void) -{ - int cpu; - - BUG_ON(smp_processor_id() != 0); - native_smp_prepare_boot_cpu(); - - /* We've switched to the "real" per-cpu gdt, so make sure the - old memory can be recycled */ - make_lowmem_page_readwrite(&per_cpu__gdt_page); - - for (cpu = 0; cpu < NR_CPUS; cpu++) { - cpus_clear(cpu_sibling_map[cpu]); - cpus_clear(cpu_core_map[cpu]); - } - - xen_setup_vcpu_info_placement(); -} - -void __init xen_smp_prepare_cpus(unsigned int max_cpus) -{ - unsigned cpu; - - for (cpu = 0; cpu < NR_CPUS; cpu++) { - cpus_clear(cpu_sibling_map[cpu]); - cpus_clear(cpu_core_map[cpu]); - } - - smp_store_cpu_info(0); - set_cpu_sibling_map(0); - - if (xen_smp_intr_init(0)) - BUG(); - - cpu_initialized_map = cpumask_of_cpu(0); - - /* Restrict the possible_map according to max_cpus. */ - while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) { - for (cpu = NR_CPUS-1; !cpu_isset(cpu, cpu_possible_map); cpu--) - continue; - cpu_clear(cpu, cpu_possible_map); - } - - for_each_possible_cpu (cpu) { - struct task_struct *idle; - - if (cpu == 0) - continue; - - idle = fork_idle(cpu); - if (IS_ERR(idle)) - panic("failed fork for CPU %d", cpu); - - cpu_set(cpu, cpu_present_map); - } - - //init_xenbus_allowed_cpumask(); -} - -static __cpuinit int -cpu_initialize_context(unsigned int cpu, struct task_struct *idle) -{ - struct vcpu_guest_context *ctxt; - struct gdt_page *gdt = &per_cpu(gdt_page, cpu); - - if (cpu_test_and_set(cpu, cpu_initialized_map)) - return 0; - - ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); - if (ctxt == NULL) - return -ENOMEM; - - ctxt->flags = VGCF_IN_KERNEL; - ctxt->user_regs.ds = __USER_DS; - ctxt->user_regs.es = __USER_DS; - ctxt->user_regs.fs = __KERNEL_PERCPU; - ctxt->user_regs.gs = 0; - ctxt->user_regs.ss = __KERNEL_DS; - ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; - ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ - - memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); - - xen_copy_trap_info(ctxt->trap_ctxt); - - ctxt->ldt_ents = 0; - - BUG_ON((unsigned long)gdt->gdt & ~PAGE_MASK); - make_lowmem_page_readonly(gdt->gdt); - - ctxt->gdt_frames[0] = virt_to_mfn(gdt->gdt); - ctxt->gdt_ents = ARRAY_SIZE(gdt->gdt); - - ctxt->user_regs.cs = __KERNEL_CS; - ctxt->user_regs.esp = idle->thread.esp0 - sizeof(struct pt_regs); - - ctxt->kernel_ss = __KERNEL_DS; - ctxt->kernel_sp = idle->thread.esp0; - - ctxt->event_callback_cs = __KERNEL_CS; - ctxt->event_callback_eip = (unsigned long)xen_hypervisor_callback; - ctxt->failsafe_callback_cs = __KERNEL_CS; - ctxt->failsafe_callback_eip = (unsigned long)xen_failsafe_callback; - - per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); - ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); - - if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt)) - BUG(); - - kfree(ctxt); - return 0; -} - -int __cpuinit xen_cpu_up(unsigned int cpu) -{ - struct task_struct *idle = idle_task(cpu); - int rc; - -#if 0 - rc = cpu_up_check(cpu); - if (rc) - return rc; -#endif - - init_gdt(cpu); - per_cpu(current_task, cpu) = idle; - irq_ctx_init(cpu); - xen_setup_timer(cpu); - - /* make sure interrupts start blocked */ - per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1; - - rc = cpu_initialize_context(cpu, idle); - if (rc) - return rc; - - if (num_online_cpus() == 1) - alternatives_smp_switch(1); - - rc = xen_smp_intr_init(cpu); - if (rc) - return rc; - - smp_store_cpu_info(cpu); - set_cpu_sibling_map(cpu); - /* This must be done before setting cpu_online_map */ - wmb(); - - cpu_set(cpu, cpu_online_map); - - rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL); - BUG_ON(rc); - - return 0; -} - -void xen_smp_cpus_done(unsigned int max_cpus) -{ -} - -static void stop_self(void *v) -{ - int cpu = smp_processor_id(); - - /* make sure we're not pinning something down */ - load_cr3(swapper_pg_dir); - /* should set up a minimal gdt */ - - HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL); - BUG(); -} - -void xen_smp_send_stop(void) -{ - smp_call_function(stop_self, NULL, 0, 0); -} - -void xen_smp_send_reschedule(int cpu) -{ - xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR); -} - - -static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector) -{ - unsigned cpu; - - cpus_and(mask, mask, cpu_online_map); - - for_each_cpu_mask(cpu, mask) - xen_send_IPI_one(cpu, vector); -} - -static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) -{ - void (*func) (void *info) = call_data->func; - void *info = call_data->info; - int wait = call_data->wait; - - /* - * Notify initiating CPU that I've grabbed the data and am - * about to execute the function - */ - mb(); - atomic_inc(&call_data->started); - /* - * At this point the info structure may be out of scope unless wait==1 - */ - irq_enter(); - (*func)(info); - irq_exit(); - - if (wait) { - mb(); /* commit everything before setting finished */ - atomic_inc(&call_data->finished); - } - - return IRQ_HANDLED; -} - -int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *), - void *info, int wait) -{ - struct call_data_struct data; - int cpus; - - /* Holding any lock stops cpus from going down. */ - spin_lock(&call_lock); - - cpu_clear(smp_processor_id(), mask); - - cpus = cpus_weight(mask); - if (!cpus) { - spin_unlock(&call_lock); - return 0; - } - - /* Can deadlock when called with interrupts disabled */ - WARN_ON(irqs_disabled()); - - data.func = func; - data.info = info; - atomic_set(&data.started, 0); - data.wait = wait; - if (wait) - atomic_set(&data.finished, 0); - - call_data = &data; - mb(); /* write everything before IPI */ - - /* Send a message to other CPUs and wait for them to respond */ - xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR); - - /* Make sure other vcpus get a chance to run. - XXX too severe? Maybe we should check the other CPU's states? */ - HYPERVISOR_sched_op(SCHEDOP_yield, 0); - - /* Wait for response */ - while (atomic_read(&data.started) != cpus || - (wait && atomic_read(&data.finished) != cpus)) - cpu_relax(); - - spin_unlock(&call_lock); - - return 0; -} diff --git a/arch/i386/xen/time.c b/arch/i386/xen/time.c deleted file mode 100644 index dfd6db69ead..00000000000 --- a/arch/i386/xen/time.c +++ /dev/null @@ -1,593 +0,0 @@ -/* - * Xen time implementation. - * - * This is implemented in terms of a clocksource driver which uses - * the hypervisor clock as a nanosecond timebase, and a clockevent - * driver which uses the hypervisor's timer mechanism. - * - * Jeremy Fitzhardinge , XenSource Inc, 2007 - */ -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include - -#include "xen-ops.h" - -#define XEN_SHIFT 22 - -/* Xen may fire a timer up to this many ns early */ -#define TIMER_SLOP 100000 -#define NS_PER_TICK (1000000000LL / HZ) - -static cycle_t xen_clocksource_read(void); - -/* These are perodically updated in shared_info, and then copied here. */ -struct shadow_time_info { - u64 tsc_timestamp; /* TSC at last update of time vals. */ - u64 system_timestamp; /* Time, in nanosecs, since boot. */ - u32 tsc_to_nsec_mul; - int tsc_shift; - u32 version; -}; - -static DEFINE_PER_CPU(struct shadow_time_info, shadow_time); - -/* runstate info updated by Xen */ -static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate); - -/* snapshots of runstate info */ -static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate_snapshot); - -/* unused ns of stolen and blocked time */ -static DEFINE_PER_CPU(u64, residual_stolen); -static DEFINE_PER_CPU(u64, residual_blocked); - -/* return an consistent snapshot of 64-bit time/counter value */ -static u64 get64(const u64 *p) -{ - u64 ret; - - if (BITS_PER_LONG < 64) { - u32 *p32 = (u32 *)p; - u32 h, l; - - /* - * Read high then low, and then make sure high is - * still the same; this will only loop if low wraps - * and carries into high. - * XXX some clean way to make this endian-proof? - */ - do { - h = p32[1]; - barrier(); - l = p32[0]; - barrier(); - } while (p32[1] != h); - - ret = (((u64)h) << 32) | l; - } else - ret = *p; - - return ret; -} - -/* - * Runstate accounting - */ -static void get_runstate_snapshot(struct vcpu_runstate_info *res) -{ - u64 state_time; - struct vcpu_runstate_info *state; - - BUG_ON(preemptible()); - - state = &__get_cpu_var(runstate); - - /* - * The runstate info is always updated by the hypervisor on - * the current CPU, so there's no need to use anything - * stronger than a compiler barrier when fetching it. - */ - do { - state_time = get64(&state->state_entry_time); - barrier(); - *res = *state; - barrier(); - } while (get64(&state->state_entry_time) != state_time); -} - -static void setup_runstate_info(int cpu) -{ - struct vcpu_register_runstate_memory_area area; - - area.addr.v = &per_cpu(runstate, cpu); - - if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, - cpu, &area)) - BUG(); -} - -static void do_stolen_accounting(void) -{ - struct vcpu_runstate_info state; - struct vcpu_runstate_info *snap; - s64 blocked, runnable, offline, stolen; - cputime_t ticks; - - get_runstate_snapshot(&state); - - WARN_ON(state.state != RUNSTATE_running); - - snap = &__get_cpu_var(runstate_snapshot); - - /* work out how much time the VCPU has not been runn*ing* */ - blocked = state.time[RUNSTATE_blocked] - snap->time[RUNSTATE_blocked]; - runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable]; - offline = state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline]; - - *snap = state; - - /* Add the appropriate number of ticks of stolen time, - including any left-overs from last time. Passing NULL to - account_steal_time accounts the time as stolen. */ - stolen = runnable + offline + __get_cpu_var(residual_stolen); - - if (stolen < 0) - stolen = 0; - - ticks = 0; - while (stolen >= NS_PER_TICK) { - ticks++; - stolen -= NS_PER_TICK; - } - __get_cpu_var(residual_stolen) = stolen; - account_steal_time(NULL, ticks); - - /* Add the appropriate number of ticks of blocked time, - including any left-overs from last time. Passing idle to - account_steal_time accounts the time as idle/wait. */ - blocked += __get_cpu_var(residual_blocked); - - if (blocked < 0) - blocked = 0; - - ticks = 0; - while (blocked >= NS_PER_TICK) { - ticks++; - blocked -= NS_PER_TICK; - } - __get_cpu_var(residual_blocked) = blocked; - account_steal_time(idle_task(smp_processor_id()), ticks); -} - -/* - * Xen sched_clock implementation. Returns the number of unstolen - * nanoseconds, which is nanoseconds the VCPU spent in RUNNING+BLOCKED - * states. - */ -unsigned long long xen_sched_clock(void) -{ - struct vcpu_runstate_info state; - cycle_t now; - u64 ret; - s64 offset; - - /* - * Ideally sched_clock should be called on a per-cpu basis - * anyway, so preempt should already be disabled, but that's - * not current practice at the moment. - */ - preempt_disable(); - - now = xen_clocksource_read(); - - get_runstate_snapshot(&state); - - WARN_ON(state.state != RUNSTATE_running); - - offset = now - state.state_entry_time; - if (offset < 0) - offset = 0; - - ret = state.time[RUNSTATE_blocked] + - state.time[RUNSTATE_running] + - offset; - - preempt_enable(); - - return ret; -} - - -/* Get the CPU speed from Xen */ -unsigned long xen_cpu_khz(void) -{ - u64 cpu_khz = 1000000ULL << 32; - const struct vcpu_time_info *info = - &HYPERVISOR_shared_info->vcpu_info[0].time; - - do_div(cpu_khz, info->tsc_to_system_mul); - if (info->tsc_shift < 0) - cpu_khz <<= -info->tsc_shift; - else - cpu_khz >>= info->tsc_shift; - - return cpu_khz; -} - -/* - * Reads a consistent set of time-base values from Xen, into a shadow data - * area. - */ -static unsigned get_time_values_from_xen(void) -{ - struct vcpu_time_info *src; - struct shadow_time_info *dst; - - /* src is shared memory with the hypervisor, so we need to - make sure we get a consistent snapshot, even in the face of - being preempted. */ - src = &__get_cpu_var(xen_vcpu)->time; - dst = &__get_cpu_var(shadow_time); - - do { - dst->version = src->version; - rmb(); /* fetch version before data */ - dst->tsc_timestamp = src->tsc_timestamp; - dst->system_timestamp = src->system_time; - dst->tsc_to_nsec_mul = src->tsc_to_system_mul; - dst->tsc_shift = src->tsc_shift; - rmb(); /* test version after fetching data */ - } while ((src->version & 1) | (dst->version ^ src->version)); - - return dst->version; -} - -/* - * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, - * yielding a 64-bit result. - */ -static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift) -{ - u64 product; -#ifdef __i386__ - u32 tmp1, tmp2; -#endif - - if (shift < 0) - delta >>= -shift; - else - delta <<= shift; - -#ifdef __i386__ - __asm__ ( - "mul %5 ; " - "mov %4,%%eax ; " - "mov %%edx,%4 ; " - "mul %5 ; " - "xor %5,%5 ; " - "add %4,%%eax ; " - "adc %5,%%edx ; " - : "=A" (product), "=r" (tmp1), "=r" (tmp2) - : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) ); -#elif __x86_64__ - __asm__ ( - "mul %%rdx ; shrd $32,%%rdx,%%rax" - : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) ); -#else -#error implement me! -#endif - - return product; -} - -static u64 get_nsec_offset(struct shadow_time_info *shadow) -{ - u64 now, delta; - now = native_read_tsc(); - delta = now - shadow->tsc_timestamp; - return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift); -} - -static cycle_t xen_clocksource_read(void) -{ - struct shadow_time_info *shadow = &get_cpu_var(shadow_time); - cycle_t ret; - unsigned version; - - do { - version = get_time_values_from_xen(); - barrier(); - ret = shadow->system_timestamp + get_nsec_offset(shadow); - barrier(); - } while (version != __get_cpu_var(xen_vcpu)->time.version); - - put_cpu_var(shadow_time); - - return ret; -} - -static void xen_read_wallclock(struct timespec *ts) -{ - const struct shared_info *s = HYPERVISOR_shared_info; - u32 version; - u64 delta; - struct timespec now; - - /* get wallclock at system boot */ - do { - version = s->wc_version; - rmb(); /* fetch version before time */ - now.tv_sec = s->wc_sec; - now.tv_nsec = s->wc_nsec; - rmb(); /* fetch time before checking version */ - } while ((s->wc_version & 1) | (version ^ s->wc_version)); - - delta = xen_clocksource_read(); /* time since system boot */ - delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec; - - now.tv_nsec = do_div(delta, NSEC_PER_SEC); - now.tv_sec = delta; - - set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); -} - -unsigned long xen_get_wallclock(void) -{ - struct timespec ts; - - xen_read_wallclock(&ts); - - return ts.tv_sec; -} - -int xen_set_wallclock(unsigned long now) -{ - /* do nothing for domU */ - return -1; -} - -static struct clocksource xen_clocksource __read_mostly = { - .name = "xen", - .rating = 400, - .read = xen_clocksource_read, - .mask = ~0, - .mult = 1<mode != CLOCK_EVT_MODE_ONESHOT); - - if (HYPERVISOR_set_timer_op(get_abs_timeout(delta)) < 0) - BUG(); - - /* We may have missed the deadline, but there's no real way of - knowing for sure. If the event was in the past, then we'll - get an immediate interrupt. */ - - return 0; -} - -static const struct clock_event_device xen_timerop_clockevent = { - .name = "xen", - .features = CLOCK_EVT_FEAT_ONESHOT, - - .max_delta_ns = 0xffffffff, - .min_delta_ns = TIMER_SLOP, - - .mult = 1, - .shift = 0, - .rating = 500, - - .set_mode = xen_timerop_set_mode, - .set_next_event = xen_timerop_set_next_event, -}; - - - -static void xen_vcpuop_set_mode(enum clock_event_mode mode, - struct clock_event_device *evt) -{ - int cpu = smp_processor_id(); - - switch (mode) { - case CLOCK_EVT_MODE_PERIODIC: - WARN_ON(1); /* unsupported */ - break; - - case CLOCK_EVT_MODE_ONESHOT: - if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL)) - BUG(); - break; - - case CLOCK_EVT_MODE_UNUSED: - case CLOCK_EVT_MODE_SHUTDOWN: - if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, cpu, NULL) || - HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL)) - BUG(); - break; - case CLOCK_EVT_MODE_RESUME: - break; - } -} - -static int xen_vcpuop_set_next_event(unsigned long delta, - struct clock_event_device *evt) -{ - int cpu = smp_processor_id(); - struct vcpu_set_singleshot_timer single; - int ret; - - WARN_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT); - - single.timeout_abs_ns = get_abs_timeout(delta); - single.flags = VCPU_SSHOTTMR_future; - - ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, cpu, &single); - - BUG_ON(ret != 0 && ret != -ETIME); - - return ret; -} - -static const struct clock_event_device xen_vcpuop_clockevent = { - .name = "xen", - .features = CLOCK_EVT_FEAT_ONESHOT, - - .max_delta_ns = 0xffffffff, - .min_delta_ns = TIMER_SLOP, - - .mult = 1, - .shift = 0, - .rating = 500, - - .set_mode = xen_vcpuop_set_mode, - .set_next_event = xen_vcpuop_set_next_event, -}; - -static const struct clock_event_device *xen_clockevent = - &xen_timerop_clockevent; -static DEFINE_PER_CPU(struct clock_event_device, xen_clock_events); - -static irqreturn_t xen_timer_interrupt(int irq, void *dev_id) -{ - struct clock_event_device *evt = &__get_cpu_var(xen_clock_events); - irqreturn_t ret; - - ret = IRQ_NONE; - if (evt->event_handler) { - evt->event_handler(evt); - ret = IRQ_HANDLED; - } - - do_stolen_accounting(); - - return ret; -} - -void xen_setup_timer(int cpu) -{ - const char *name; - struct clock_event_device *evt; - int irq; - - printk(KERN_INFO "installing Xen timer for CPU %d\n", cpu); - - name = kasprintf(GFP_KERNEL, "timer%d", cpu); - if (!name) - name = ""; - - irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt, - IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, - name, NULL); - - evt = &per_cpu(xen_clock_events, cpu); - memcpy(evt, xen_clockevent, sizeof(*evt)); - - evt->cpumask = cpumask_of_cpu(cpu); - evt->irq = irq; - - setup_runstate_info(cpu); -} - -void xen_setup_cpu_clockevents(void) -{ - BUG_ON(preemptible()); - - clockevents_register_device(&__get_cpu_var(xen_clock_events)); -} - -__init void xen_time_init(void) -{ - int cpu = smp_processor_id(); - - get_time_values_from_xen(); - - clocksource_register(&xen_clocksource); - - if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) { - /* Successfully turned off 100Hz tick, so we have the - vcpuop-based timer interface */ - printk(KERN_DEBUG "Xen: using vcpuop timer interface\n"); - xen_clockevent = &xen_vcpuop_clockevent; - } - - /* Set initial system time with full resolution */ - xen_read_wallclock(&xtime); - set_normalized_timespec(&wall_to_monotonic, - -xtime.tv_sec, -xtime.tv_nsec); - - tsc_disable = 0; - - xen_setup_timer(cpu); - xen_setup_cpu_clockevents(); -} diff --git a/arch/i386/xen/vdso.h b/arch/i386/xen/vdso.h deleted file mode 100644 index 861fedfe523..00000000000 --- a/arch/i386/xen/vdso.h +++ /dev/null @@ -1,4 +0,0 @@ -/* Bit used for the pseudo-hwcap for non-negative segments. We use - bit 1 to avoid bugs in some versions of glibc when bit 0 is - used; the choice is otherwise arbitrary. */ -#define VDSO_NOTE_NONEGSEG_BIT 1 diff --git a/arch/i386/xen/xen-asm.S b/arch/i386/xen/xen-asm.S deleted file mode 100644 index 1a43b60c0c6..00000000000 --- a/arch/i386/xen/xen-asm.S +++ /dev/null @@ -1,291 +0,0 @@ -/* - Asm versions of Xen pv-ops, suitable for either direct use or inlining. - The inline versions are the same as the direct-use versions, with the - pre- and post-amble chopped off. - - This code is encoded for size rather than absolute efficiency, - with a view to being able to inline as much as possible. - - We only bother with direct forms (ie, vcpu in pda) of the operations - here; the indirect forms are better handled in C, since they're - generally too large to inline anyway. - */ - -#include - -#include -#include -#include -#include -#include - -#include - -#define RELOC(x, v) .globl x##_reloc; x##_reloc=v -#define ENDPATCH(x) .globl x##_end; x##_end=. - -/* Pseudo-flag used for virtual NMI, which we don't implement yet */ -#define XEN_EFLAGS_NMI 0x80000000 - -/* - Enable events. This clears the event mask and tests the pending - event status with one and operation. If there are pending - events, then enter the hypervisor to get them handled. - */ -ENTRY(xen_irq_enable_direct) - /* Clear mask and test pending */ - andw $0x00ff, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_pending - /* Preempt here doesn't matter because that will deal with - any pending interrupts. The pending check may end up being - run on the wrong CPU, but that doesn't hurt. */ - jz 1f -2: call check_events -1: -ENDPATCH(xen_irq_enable_direct) - ret - ENDPROC(xen_irq_enable_direct) - RELOC(xen_irq_enable_direct, 2b+1) - - -/* - Disabling events is simply a matter of making the event mask - non-zero. - */ -ENTRY(xen_irq_disable_direct) - movb $1, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask -ENDPATCH(xen_irq_disable_direct) - ret - ENDPROC(xen_irq_disable_direct) - RELOC(xen_irq_disable_direct, 0) - -/* - (xen_)save_fl is used to get the current interrupt enable status. - Callers expect the status to be in X86_EFLAGS_IF, and other bits - may be set in the return value. We take advantage of this by - making sure that X86_EFLAGS_IF has the right value (and other bits - in that byte are 0), but other bits in the return value are - undefined. We need to toggle the state of the bit, because - Xen and x86 use opposite senses (mask vs enable). - */ -ENTRY(xen_save_fl_direct) - testb $0xff, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask - setz %ah - addb %ah,%ah -ENDPATCH(xen_save_fl_direct) - ret - ENDPROC(xen_save_fl_direct) - RELOC(xen_save_fl_direct, 0) - - -/* - In principle the caller should be passing us a value return - from xen_save_fl_direct, but for robustness sake we test only - the X86_EFLAGS_IF flag rather than the whole byte. After - setting the interrupt mask state, it checks for unmasked - pending events and enters the hypervisor to get them delivered - if so. - */ -ENTRY(xen_restore_fl_direct) - testb $X86_EFLAGS_IF>>8, %ah - setz PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask - /* Preempt here doesn't matter because that will deal with - any pending interrupts. The pending check may end up being - run on the wrong CPU, but that doesn't hurt. */ - - /* check for unmasked and pending */ - cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_pending - jz 1f -2: call check_events -1: -ENDPATCH(xen_restore_fl_direct) - ret - ENDPROC(xen_restore_fl_direct) - RELOC(xen_restore_fl_direct, 2b+1) - -/* - This is run where a normal iret would be run, with the same stack setup: - 8: eflags - 4: cs - esp-> 0: eip - - This attempts to make sure that any pending events are dealt - with on return to usermode, but there is a small window in - which an event can happen just before entering usermode. If - the nested interrupt ends up setting one of the TIF_WORK_MASK - pending work flags, they will not be tested again before - returning to usermode. This means that a process can end up - with pending work, which will be unprocessed until the process - enters and leaves the kernel again, which could be an - unbounded amount of time. This means that a pending signal or - reschedule event could be indefinitely delayed. - - The fix is to notice a nested interrupt in the critical - window, and if one occurs, then fold the nested interrupt into - the current interrupt stack frame, and re-process it - iteratively rather than recursively. This means that it will - exit via the normal path, and all pending work will be dealt - with appropriately. - - Because the nested interrupt handler needs to deal with the - current stack state in whatever form its in, we keep things - simple by only using a single register which is pushed/popped - on the stack. - - Non-direct iret could be done in the same way, but it would - require an annoying amount of code duplication. We'll assume - that direct mode will be the common case once the hypervisor - support becomes commonplace. - */ -ENTRY(xen_iret_direct) - /* test eflags for special cases */ - testl $(X86_EFLAGS_VM | XEN_EFLAGS_NMI), 8(%esp) - jnz hyper_iret - - push %eax - ESP_OFFSET=4 # bytes pushed onto stack - - /* Store vcpu_info pointer for easy access. Do it this - way to avoid having to reload %fs */ -#ifdef CONFIG_SMP - GET_THREAD_INFO(%eax) - movl TI_cpu(%eax),%eax - movl __per_cpu_offset(,%eax,4),%eax - lea per_cpu__xen_vcpu_info(%eax),%eax -#else - movl $per_cpu__xen_vcpu_info, %eax -#endif - - /* check IF state we're restoring */ - testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp) - - /* Maybe enable events. Once this happens we could get a - recursive event, so the critical region starts immediately - afterwards. However, if that happens we don't end up - resuming the code, so we don't have to be worried about - being preempted to another CPU. */ - setz XEN_vcpu_info_mask(%eax) -xen_iret_start_crit: - - /* check for unmasked and pending */ - cmpw $0x0001, XEN_vcpu_info_pending(%eax) - - /* If there's something pending, mask events again so we - can jump back into xen_hypervisor_callback */ - sete XEN_vcpu_info_mask(%eax) - - popl %eax - - /* From this point on the registers are restored and the stack - updated, so we don't need to worry about it if we're preempted */ -iret_restore_end: - - /* Jump to hypervisor_callback after fixing up the stack. - Events are masked, so jumping out of the critical - region is OK. */ - je xen_hypervisor_callback - - iret -xen_iret_end_crit: - -hyper_iret: - /* put this out of line since its very rarely used */ - jmp hypercall_page + __HYPERVISOR_iret * 32 - - .globl xen_iret_start_crit, xen_iret_end_crit - -/* - This is called by xen_hypervisor_callback in entry.S when it sees - that the EIP at the time of interrupt was between xen_iret_start_crit - and xen_iret_end_crit. We're passed the EIP in %eax so we can do - a more refined determination of what to do. - - The stack format at this point is: - ---------------- - ss : (ss/esp may be present if we came from usermode) - esp : - eflags } outer exception info - cs } - eip } - ---------------- <- edi (copy dest) - eax : outer eax if it hasn't been restored - ---------------- - eflags } nested exception info - cs } (no ss/esp because we're nested - eip } from the same ring) - orig_eax }<- esi (copy src) - - - - - - - - - - fs } - es } - ds } SAVE_ALL state - eax } - : : - ebx } - ---------------- - return addr <- esp - ---------------- - - In order to deliver the nested exception properly, we need to shift - everything from the return addr up to the error code so it - sits just under the outer exception info. This means that when we - handle the exception, we do it in the context of the outer exception - rather than starting a new one. - - The only caveat is that if the outer eax hasn't been - restored yet (ie, it's still on stack), we need to insert - its value into the SAVE_ALL state before going on, since - it's usermode state which we eventually need to restore. - */ -ENTRY(xen_iret_crit_fixup) - /* offsets +4 for return address */ - - /* - Paranoia: Make sure we're really coming from userspace. - One could imagine a case where userspace jumps into the - critical range address, but just before the CPU delivers a GP, - it decides to deliver an interrupt instead. Unlikely? - Definitely. Easy to avoid? Yes. The Intel documents - explicitly say that the reported EIP for a bad jump is the - jump instruction itself, not the destination, but some virtual - environments get this wrong. - */ - movl PT_CS+4(%esp), %ecx - andl $SEGMENT_RPL_MASK, %ecx - cmpl $USER_RPL, %ecx - je 2f - - lea PT_ORIG_EAX+4(%esp), %esi - lea PT_EFLAGS+4(%esp), %edi - - /* If eip is before iret_restore_end then stack - hasn't been restored yet. */ - cmp $iret_restore_end, %eax - jae 1f - - movl 0+4(%edi),%eax /* copy EAX */ - movl %eax, PT_EAX+4(%esp) - - lea ESP_OFFSET(%edi),%edi /* move dest up over saved regs */ - - /* set up the copy */ -1: std - mov $(PT_EIP+4) / 4, %ecx /* copy ret+saved regs up to orig_eax */ - rep movsl - cld - - lea 4(%edi),%esp /* point esp to new frame */ -2: ret - - -/* - Force an event check by making a hypercall, - but preserve regs before making the call. - */ -check_events: - push %eax - push %ecx - push %edx - call force_evtchn_callback - pop %edx - pop %ecx - pop %eax - ret diff --git a/arch/i386/xen/xen-head.S b/arch/i386/xen/xen-head.S deleted file mode 100644 index f8d6937db2e..00000000000 --- a/arch/i386/xen/xen-head.S +++ /dev/null @@ -1,38 +0,0 @@ -/* Xen-specific pieces of head.S, intended to be included in the right - place in head.S */ - -#ifdef CONFIG_XEN - -#include -#include -#include - -.pushsection .init.text -ENTRY(startup_xen) - movl %esi,xen_start_info - cld - movl $(init_thread_union+THREAD_SIZE),%esp - jmp xen_start_kernel -.popsection - -.pushsection .bss.page_aligned - .align PAGE_SIZE_asm -ENTRY(hypercall_page) - .skip 0x1000 -.popsection - - ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux") - ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz "2.6") - ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz "xen-3.0") - ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, .long __PAGE_OFFSET) - ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long startup_xen) - ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long hypercall_page) - ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb") -#ifdef CONFIG_X86_PAE - ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") -#else - ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "no") -#endif - ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") - -#endif /*CONFIG_XEN */ diff --git a/arch/i386/xen/xen-ops.h b/arch/i386/xen/xen-ops.h deleted file mode 100644 index b9aaea45f07..00000000000 --- a/arch/i386/xen/xen-ops.h +++ /dev/null @@ -1,71 +0,0 @@ -#ifndef XEN_OPS_H -#define XEN_OPS_H - -#include - -/* These are code, but not functions. Defined in entry.S */ -extern const char xen_hypervisor_callback[]; -extern const char xen_failsafe_callback[]; - -void xen_copy_trap_info(struct trap_info *traps); - -DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu); -DECLARE_PER_CPU(unsigned long, xen_cr3); - -extern struct start_info *xen_start_info; -extern struct shared_info *HYPERVISOR_shared_info; - -char * __init xen_memory_setup(void); -void __init xen_arch_setup(void); -void __init xen_init_IRQ(void); - -void xen_setup_timer(int cpu); -void xen_setup_cpu_clockevents(void); -unsigned long xen_cpu_khz(void); -void __init xen_time_init(void); -unsigned long xen_get_wallclock(void); -int xen_set_wallclock(unsigned long time); -unsigned long long xen_sched_clock(void); - -void xen_mark_init_mm_pinned(void); - -DECLARE_PER_CPU(enum paravirt_lazy_mode, xen_lazy_mode); - -static inline unsigned xen_get_lazy_mode(void) -{ - return x86_read_percpu(xen_lazy_mode); -} - -void __init xen_fill_possible_map(void); - -void __init xen_setup_vcpu_info_placement(void); -void xen_smp_prepare_boot_cpu(void); -void xen_smp_prepare_cpus(unsigned int max_cpus); -int xen_cpu_up(unsigned int cpu); -void xen_smp_cpus_done(unsigned int max_cpus); - -void xen_smp_send_stop(void); -void xen_smp_send_reschedule(int cpu); -int xen_smp_call_function (void (*func) (void *info), void *info, int nonatomic, - int wait); -int xen_smp_call_function_single(int cpu, void (*func) (void *info), void *info, - int nonatomic, int wait); - -int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *), - void *info, int wait); - - -/* Declare an asm function, along with symbols needed to make it - inlineable */ -#define DECL_ASM(ret, name, ...) \ - ret name(__VA_ARGS__); \ - extern char name##_end[]; \ - extern char name##_reloc[] \ - -DECL_ASM(void, xen_irq_enable_direct, void); -DECL_ASM(void, xen_irq_disable_direct, void); -DECL_ASM(unsigned long, xen_save_fl_direct, void); -DECL_ASM(void, xen_restore_fl_direct, unsigned long); - -void xen_iret_direct(void); -#endif /* XEN_OPS_H */ diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig new file mode 100644 index 00000000000..9df99e1885a --- /dev/null +++ b/arch/x86/xen/Kconfig @@ -0,0 +1,11 @@ +# +# This Kconfig describes xen options +# + +config XEN + bool "Enable support for Xen hypervisor" + depends on PARAVIRT && X86_CMPXCHG && X86_TSC && !NEED_MULTIPLE_NODES + help + This is the Linux Xen port. Enabling this will allow the + kernel to boot in a paravirtualized environment under the + Xen hypervisor. diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile new file mode 100644 index 00000000000..343df246bd3 --- /dev/null +++ b/arch/x86/xen/Makefile @@ -0,0 +1,4 @@ +obj-y := enlighten.o setup.o features.o multicalls.o mmu.o \ + events.o time.o manage.o xen-asm.o + +obj-$(CONFIG_SMP) += smp.o diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c new file mode 100644 index 00000000000..f01bfcd4bde --- /dev/null +++ b/arch/x86/xen/enlighten.c @@ -0,0 +1,1146 @@ +/* + * Core of Xen paravirt_ops implementation. + * + * This file contains the xen_paravirt_ops structure itself, and the + * implementations for: + * - privileged instructions + * - interrupt flags + * - segment operations + * - booting and setup + * + * Jeremy Fitzhardinge , XenSource Inc, 2007 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "xen-ops.h" +#include "mmu.h" +#include "multicalls.h" + +EXPORT_SYMBOL_GPL(hypercall_page); + +DEFINE_PER_CPU(enum paravirt_lazy_mode, xen_lazy_mode); + +DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); +DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); +DEFINE_PER_CPU(unsigned long, xen_cr3); + +struct start_info *xen_start_info; +EXPORT_SYMBOL_GPL(xen_start_info); + +static /* __initdata */ struct shared_info dummy_shared_info; + +/* + * Point at some empty memory to start with. We map the real shared_info + * page as soon as fixmap is up and running. + */ +struct shared_info *HYPERVISOR_shared_info = (void *)&dummy_shared_info; + +/* + * Flag to determine whether vcpu info placement is available on all + * VCPUs. We assume it is to start with, and then set it to zero on + * the first failure. This is because it can succeed on some VCPUs + * and not others, since it can involve hypervisor memory allocation, + * or because the guest failed to guarantee all the appropriate + * constraints on all VCPUs (ie buffer can't cross a page boundary). + * + * Note that any particular CPU may be using a placed vcpu structure, + * but we can only optimise if the all are. + * + * 0: not available, 1: available + */ +static int have_vcpu_info_placement = 1; + +static void __init xen_vcpu_setup(int cpu) +{ + struct vcpu_register_vcpu_info info; + int err; + struct vcpu_info *vcpup; + + per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; + + if (!have_vcpu_info_placement) + return; /* already tested, not available */ + + vcpup = &per_cpu(xen_vcpu_info, cpu); + + info.mfn = virt_to_mfn(vcpup); + info.offset = offset_in_page(vcpup); + + printk(KERN_DEBUG "trying to map vcpu_info %d at %p, mfn %x, offset %d\n", + cpu, vcpup, info.mfn, info.offset); + + /* Check to see if the hypervisor will put the vcpu_info + structure where we want it, which allows direct access via + a percpu-variable. */ + err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info); + + if (err) { + printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err); + have_vcpu_info_placement = 0; + } else { + /* This cpu is using the registered vcpu info, even if + later ones fail to. */ + per_cpu(xen_vcpu, cpu) = vcpup; + + printk(KERN_DEBUG "cpu %d using vcpu_info at %p\n", + cpu, vcpup); + } +} + +static void __init xen_banner(void) +{ + printk(KERN_INFO "Booting paravirtualized kernel on %s\n", + paravirt_ops.name); + printk(KERN_INFO "Hypervisor signature: %s\n", xen_start_info->magic); +} + +static void xen_cpuid(unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + unsigned maskedx = ~0; + + /* + * Mask out inconvenient features, to try and disable as many + * unsupported kernel subsystems as possible. + */ + if (*eax == 1) + maskedx = ~((1 << X86_FEATURE_APIC) | /* disable APIC */ + (1 << X86_FEATURE_ACPI) | /* disable ACPI */ + (1 << X86_FEATURE_ACC)); /* thermal monitoring */ + + asm(XEN_EMULATE_PREFIX "cpuid" + : "=a" (*eax), + "=b" (*ebx), + "=c" (*ecx), + "=d" (*edx) + : "0" (*eax), "2" (*ecx)); + *edx &= maskedx; +} + +static void xen_set_debugreg(int reg, unsigned long val) +{ + HYPERVISOR_set_debugreg(reg, val); +} + +static unsigned long xen_get_debugreg(int reg) +{ + return HYPERVISOR_get_debugreg(reg); +} + +static unsigned long xen_save_fl(void) +{ + struct vcpu_info *vcpu; + unsigned long flags; + + vcpu = x86_read_percpu(xen_vcpu); + + /* flag has opposite sense of mask */ + flags = !vcpu->evtchn_upcall_mask; + + /* convert to IF type flag + -0 -> 0x00000000 + -1 -> 0xffffffff + */ + return (-flags) & X86_EFLAGS_IF; +} + +static void xen_restore_fl(unsigned long flags) +{ + struct vcpu_info *vcpu; + + /* convert from IF type flag */ + flags = !(flags & X86_EFLAGS_IF); + + /* There's a one instruction preempt window here. We need to + make sure we're don't switch CPUs between getting the vcpu + pointer and updating the mask. */ + preempt_disable(); + vcpu = x86_read_percpu(xen_vcpu); + vcpu->evtchn_upcall_mask = flags; + preempt_enable_no_resched(); + + /* Doesn't matter if we get preempted here, because any + pending event will get dealt with anyway. */ + + if (flags == 0) { + preempt_check_resched(); + barrier(); /* unmask then check (avoid races) */ + if (unlikely(vcpu->evtchn_upcall_pending)) + force_evtchn_callback(); + } +} + +static void xen_irq_disable(void) +{ + /* There's a one instruction preempt window here. We need to + make sure we're don't switch CPUs between getting the vcpu + pointer and updating the mask. */ + preempt_disable(); + x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1; + preempt_enable_no_resched(); +} + +static void xen_irq_enable(void) +{ + struct vcpu_info *vcpu; + + /* There's a one instruction preempt window here. We need to + make sure we're don't switch CPUs between getting the vcpu + pointer and updating the mask. */ + preempt_disable(); + vcpu = x86_read_percpu(xen_vcpu); + vcpu->evtchn_upcall_mask = 0; + preempt_enable_no_resched(); + + /* Doesn't matter if we get preempted here, because any + pending event will get dealt with anyway. */ + + barrier(); /* unmask then check (avoid races) */ + if (unlikely(vcpu->evtchn_upcall_pending)) + force_evtchn_callback(); +} + +static void xen_safe_halt(void) +{ + /* Blocking includes an implicit local_irq_enable(). */ + if (HYPERVISOR_sched_op(SCHEDOP_block, 0) != 0) + BUG(); +} + +static void xen_halt(void) +{ + if (irqs_disabled()) + HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); + else + xen_safe_halt(); +} + +static void xen_set_lazy_mode(enum paravirt_lazy_mode mode) +{ + BUG_ON(preemptible()); + + switch (mode) { + case PARAVIRT_LAZY_NONE: + BUG_ON(x86_read_percpu(xen_lazy_mode) == PARAVIRT_LAZY_NONE); + break; + + case PARAVIRT_LAZY_MMU: + case PARAVIRT_LAZY_CPU: + BUG_ON(x86_read_percpu(xen_lazy_mode) != PARAVIRT_LAZY_NONE); + break; + + case PARAVIRT_LAZY_FLUSH: + /* flush if necessary, but don't change state */ + if (x86_read_percpu(xen_lazy_mode) != PARAVIRT_LAZY_NONE) + xen_mc_flush(); + return; + } + + xen_mc_flush(); + x86_write_percpu(xen_lazy_mode, mode); +} + +static unsigned long xen_store_tr(void) +{ + return 0; +} + +static void xen_set_ldt(const void *addr, unsigned entries) +{ + unsigned long linear_addr = (unsigned long)addr; + struct mmuext_op *op; + struct multicall_space mcs = xen_mc_entry(sizeof(*op)); + + op = mcs.args; + op->cmd = MMUEXT_SET_LDT; + if (linear_addr) { + /* ldt my be vmalloced, use arbitrary_virt_to_machine */ + xmaddr_t maddr; + maddr = arbitrary_virt_to_machine((unsigned long)addr); + linear_addr = (unsigned long)maddr.maddr; + } + op->arg1.linear_addr = linear_addr; + op->arg2.nr_ents = entries; + + MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); + + xen_mc_issue(PARAVIRT_LAZY_CPU); +} + +static void xen_load_gdt(const struct Xgt_desc_struct *dtr) +{ + unsigned long *frames; + unsigned long va = dtr->address; + unsigned int size = dtr->size + 1; + unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; + int f; + struct multicall_space mcs; + + /* A GDT can be up to 64k in size, which corresponds to 8192 + 8-byte entries, or 16 4k pages.. */ + + BUG_ON(size > 65536); + BUG_ON(va & ~PAGE_MASK); + + mcs = xen_mc_entry(sizeof(*frames) * pages); + frames = mcs.args; + + for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) { + frames[f] = virt_to_mfn(va); + make_lowmem_page_readonly((void *)va); + } + + MULTI_set_gdt(mcs.mc, frames, size / sizeof(struct desc_struct)); + + xen_mc_issue(PARAVIRT_LAZY_CPU); +} + +static void load_TLS_descriptor(struct thread_struct *t, + unsigned int cpu, unsigned int i) +{ + struct desc_struct *gdt = get_cpu_gdt_table(cpu); + xmaddr_t maddr = virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]); + struct multicall_space mc = __xen_mc_entry(0); + + MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]); +} + +static void xen_load_tls(struct thread_struct *t, unsigned int cpu) +{ + xen_mc_batch(); + + load_TLS_descriptor(t, cpu, 0); + load_TLS_descriptor(t, cpu, 1); + load_TLS_descriptor(t, cpu, 2); + + xen_mc_issue(PARAVIRT_LAZY_CPU); + + /* + * XXX sleazy hack: If we're being called in a lazy-cpu zone, + * it means we're in a context switch, and %gs has just been + * saved. This means we can zero it out to prevent faults on + * exit from the hypervisor if the next process has no %gs. + * Either way, it has been saved, and the new value will get + * loaded properly. This will go away as soon as Xen has been + * modified to not save/restore %gs for normal hypercalls. + */ + if (xen_get_lazy_mode() == PARAVIRT_LAZY_CPU) + loadsegment(gs, 0); +} + +static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, + u32 low, u32 high) +{ + unsigned long lp = (unsigned long)&dt[entrynum]; + xmaddr_t mach_lp = virt_to_machine(lp); + u64 entry = (u64)high << 32 | low; + + preempt_disable(); + + xen_mc_flush(); + if (HYPERVISOR_update_descriptor(mach_lp.maddr, entry)) + BUG(); + + preempt_enable(); +} + +static int cvt_gate_to_trap(int vector, u32 low, u32 high, + struct trap_info *info) +{ + u8 type, dpl; + + type = (high >> 8) & 0x1f; + dpl = (high >> 13) & 3; + + if (type != 0xf && type != 0xe) + return 0; + + info->vector = vector; + info->address = (high & 0xffff0000) | (low & 0x0000ffff); + info->cs = low >> 16; + info->flags = dpl; + /* interrupt gates clear IF */ + if (type == 0xe) + info->flags |= 4; + + return 1; +} + +/* Locations of each CPU's IDT */ +static DEFINE_PER_CPU(struct Xgt_desc_struct, idt_desc); + +/* Set an IDT entry. If the entry is part of the current IDT, then + also update Xen. */ +static void xen_write_idt_entry(struct desc_struct *dt, int entrynum, + u32 low, u32 high) +{ + unsigned long p = (unsigned long)&dt[entrynum]; + unsigned long start, end; + + preempt_disable(); + + start = __get_cpu_var(idt_desc).address; + end = start + __get_cpu_var(idt_desc).size + 1; + + xen_mc_flush(); + + write_dt_entry(dt, entrynum, low, high); + + if (p >= start && (p + 8) <= end) { + struct trap_info info[2]; + + info[1].address = 0; + + if (cvt_gate_to_trap(entrynum, low, high, &info[0])) + if (HYPERVISOR_set_trap_table(info)) + BUG(); + } + + preempt_enable(); +} + +static void xen_convert_trap_info(const struct Xgt_desc_struct *desc, + struct trap_info *traps) +{ + unsigned in, out, count; + + count = (desc->size+1) / 8; + BUG_ON(count > 256); + + for (in = out = 0; in < count; in++) { + const u32 *entry = (u32 *)(desc->address + in * 8); + + if (cvt_gate_to_trap(in, entry[0], entry[1], &traps[out])) + out++; + } + traps[out].address = 0; +} + +void xen_copy_trap_info(struct trap_info *traps) +{ + const struct Xgt_desc_struct *desc = &__get_cpu_var(idt_desc); + + xen_convert_trap_info(desc, traps); +} + +/* Load a new IDT into Xen. In principle this can be per-CPU, so we + hold a spinlock to protect the static traps[] array (static because + it avoids allocation, and saves stack space). */ +static void xen_load_idt(const struct Xgt_desc_struct *desc) +{ + static DEFINE_SPINLOCK(lock); + static struct trap_info traps[257]; + + spin_lock(&lock); + + __get_cpu_var(idt_desc) = *desc; + + xen_convert_trap_info(desc, traps); + + xen_mc_flush(); + if (HYPERVISOR_set_trap_table(traps)) + BUG(); + + spin_unlock(&lock); +} + +/* Write a GDT descriptor entry. Ignore LDT descriptors, since + they're handled differently. */ +static void xen_write_gdt_entry(struct desc_struct *dt, int entry, + u32 low, u32 high) +{ + preempt_disable(); + + switch ((high >> 8) & 0xff) { + case DESCTYPE_LDT: + case DESCTYPE_TSS: + /* ignore */ + break; + + default: { + xmaddr_t maddr = virt_to_machine(&dt[entry]); + u64 desc = (u64)high << 32 | low; + + xen_mc_flush(); + if (HYPERVISOR_update_descriptor(maddr.maddr, desc)) + BUG(); + } + + } + + preempt_enable(); +} + +static void xen_load_esp0(struct tss_struct *tss, + struct thread_struct *thread) +{ + struct multicall_space mcs = xen_mc_entry(0); + MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->esp0); + xen_mc_issue(PARAVIRT_LAZY_CPU); +} + +static void xen_set_iopl_mask(unsigned mask) +{ + struct physdev_set_iopl set_iopl; + + /* Force the change at ring 0. */ + set_iopl.iopl = (mask == 0) ? 1 : (mask >> 12) & 3; + HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); +} + +static void xen_io_delay(void) +{ +} + +#ifdef CONFIG_X86_LOCAL_APIC +static unsigned long xen_apic_read(unsigned long reg) +{ + return 0; +} + +static void xen_apic_write(unsigned long reg, unsigned long val) +{ + /* Warn to see if there's any stray references */ + WARN_ON(1); +} +#endif + +static void xen_flush_tlb(void) +{ + struct mmuext_op *op; + struct multicall_space mcs = xen_mc_entry(sizeof(*op)); + + op = mcs.args; + op->cmd = MMUEXT_TLB_FLUSH_LOCAL; + MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); + + xen_mc_issue(PARAVIRT_LAZY_MMU); +} + +static void xen_flush_tlb_single(unsigned long addr) +{ + struct mmuext_op *op; + struct multicall_space mcs = xen_mc_entry(sizeof(*op)); + + op = mcs.args; + op->cmd = MMUEXT_INVLPG_LOCAL; + op->arg1.linear_addr = addr & PAGE_MASK; + MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); + + xen_mc_issue(PARAVIRT_LAZY_MMU); +} + +static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm, + unsigned long va) +{ + struct { + struct mmuext_op op; + cpumask_t mask; + } *args; + cpumask_t cpumask = *cpus; + struct multicall_space mcs; + + /* + * A couple of (to be removed) sanity checks: + * + * - current CPU must not be in mask + * - mask must exist :) + */ + BUG_ON(cpus_empty(cpumask)); + BUG_ON(cpu_isset(smp_processor_id(), cpumask)); + BUG_ON(!mm); + + /* If a CPU which we ran on has gone down, OK. */ + cpus_and(cpumask, cpumask, cpu_online_map); + if (cpus_empty(cpumask)) + return; + + mcs = xen_mc_entry(sizeof(*args)); + args = mcs.args; + args->mask = cpumask; + args->op.arg2.vcpumask = &args->mask; + + if (va == TLB_FLUSH_ALL) { + args->op.cmd = MMUEXT_TLB_FLUSH_MULTI; + } else { + args->op.cmd = MMUEXT_INVLPG_MULTI; + args->op.arg1.linear_addr = va; + } + + MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF); + + xen_mc_issue(PARAVIRT_LAZY_MMU); +} + +static void xen_write_cr2(unsigned long cr2) +{ + x86_read_percpu(xen_vcpu)->arch.cr2 = cr2; +} + +static unsigned long xen_read_cr2(void) +{ + return x86_read_percpu(xen_vcpu)->arch.cr2; +} + +static unsigned long xen_read_cr2_direct(void) +{ + return x86_read_percpu(xen_vcpu_info.arch.cr2); +} + +static void xen_write_cr4(unsigned long cr4) +{ + /* Just ignore cr4 changes; Xen doesn't allow us to do + anything anyway. */ +} + +static unsigned long xen_read_cr3(void) +{ + return x86_read_percpu(xen_cr3); +} + +static void xen_write_cr3(unsigned long cr3) +{ + BUG_ON(preemptible()); + + if (cr3 == x86_read_percpu(xen_cr3)) { + /* just a simple tlb flush */ + xen_flush_tlb(); + return; + } + + x86_write_percpu(xen_cr3, cr3); + + + { + struct mmuext_op *op; + struct multicall_space mcs = xen_mc_entry(sizeof(*op)); + unsigned long mfn = pfn_to_mfn(PFN_DOWN(cr3)); + + op = mcs.args; + op->cmd = MMUEXT_NEW_BASEPTR; + op->arg1.mfn = mfn; + + MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); + + xen_mc_issue(PARAVIRT_LAZY_CPU); + } +} + +/* Early in boot, while setting up the initial pagetable, assume + everything is pinned. */ +static __init void xen_alloc_pt_init(struct mm_struct *mm, u32 pfn) +{ + BUG_ON(mem_map); /* should only be used early */ + make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); +} + +/* This needs to make sure the new pte page is pinned iff its being + attached to a pinned pagetable. */ +static void xen_alloc_pt(struct mm_struct *mm, u32 pfn) +{ + struct page *page = pfn_to_page(pfn); + + if (PagePinned(virt_to_page(mm->pgd))) { + SetPagePinned(page); + + if (!PageHighMem(page)) + make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); + else + /* make sure there are no stray mappings of + this page */ + kmap_flush_unused(); + } +} + +/* This should never happen until we're OK to use struct page */ +static void xen_release_pt(u32 pfn) +{ + struct page *page = pfn_to_page(pfn); + + if (PagePinned(page)) { + if (!PageHighMem(page)) + make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); + } +} + +#ifdef CONFIG_HIGHPTE +static void *xen_kmap_atomic_pte(struct page *page, enum km_type type) +{ + pgprot_t prot = PAGE_KERNEL; + + if (PagePinned(page)) + prot = PAGE_KERNEL_RO; + + if (0 && PageHighMem(page)) + printk("mapping highpte %lx type %d prot %s\n", + page_to_pfn(page), type, + (unsigned long)pgprot_val(prot) & _PAGE_RW ? "WRITE" : "READ"); + + return kmap_atomic_prot(page, type, prot); +} +#endif + +static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) +{ + /* If there's an existing pte, then don't allow _PAGE_RW to be set */ + if (pte_val_ma(*ptep) & _PAGE_PRESENT) + pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) & + pte_val_ma(pte)); + + return pte; +} + +/* Init-time set_pte while constructing initial pagetables, which + doesn't allow RO pagetable pages to be remapped RW */ +static __init void xen_set_pte_init(pte_t *ptep, pte_t pte) +{ + pte = mask_rw_pte(ptep, pte); + + xen_set_pte(ptep, pte); +} + +static __init void xen_pagetable_setup_start(pgd_t *base) +{ + pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base; + + /* special set_pte for pagetable initialization */ + paravirt_ops.set_pte = xen_set_pte_init; + + init_mm.pgd = base; + /* + * copy top-level of Xen-supplied pagetable into place. For + * !PAE we can use this as-is, but for PAE it is a stand-in + * while we copy the pmd pages. + */ + memcpy(base, xen_pgd, PTRS_PER_PGD * sizeof(pgd_t)); + + if (PTRS_PER_PMD > 1) { + int i; + /* + * For PAE, need to allocate new pmds, rather than + * share Xen's, since Xen doesn't like pmd's being + * shared between address spaces. + */ + for (i = 0; i < PTRS_PER_PGD; i++) { + if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) { + pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); + + memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]), + PAGE_SIZE); + + make_lowmem_page_readonly(pmd); + + set_pgd(&base[i], __pgd(1 + __pa(pmd))); + } else + pgd_clear(&base[i]); + } + } + + /* make sure zero_page is mapped RO so we can use it in pagetables */ + make_lowmem_page_readonly(empty_zero_page); + make_lowmem_page_readonly(base); + /* + * Switch to new pagetable. This is done before + * pagetable_init has done anything so that the new pages + * added to the table can be prepared properly for Xen. + */ + xen_write_cr3(__pa(base)); +} + +static __init void xen_pagetable_setup_done(pgd_t *base) +{ + /* This will work as long as patching hasn't happened yet + (which it hasn't) */ + paravirt_ops.alloc_pt = xen_alloc_pt; + paravirt_ops.set_pte = xen_set_pte; + + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + /* + * Create a mapping for the shared info page. + * Should be set_fixmap(), but shared_info is a machine + * address with no corresponding pseudo-phys address. + */ + set_pte_mfn(fix_to_virt(FIX_PARAVIRT_BOOTMAP), + PFN_DOWN(xen_start_info->shared_info), + PAGE_KERNEL); + + HYPERVISOR_shared_info = + (struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP); + + } else + HYPERVISOR_shared_info = + (struct shared_info *)__va(xen_start_info->shared_info); + + /* Actually pin the pagetable down, but we can't set PG_pinned + yet because the page structures don't exist yet. */ + { + struct mmuext_op op; +#ifdef CONFIG_X86_PAE + op.cmd = MMUEXT_PIN_L3_TABLE; +#else + op.cmd = MMUEXT_PIN_L3_TABLE; +#endif + op.arg1.mfn = pfn_to_mfn(PFN_DOWN(__pa(base))); + if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) + BUG(); + } +} + +/* This is called once we have the cpu_possible_map */ +void __init xen_setup_vcpu_info_placement(void) +{ + int cpu; + + for_each_possible_cpu(cpu) + xen_vcpu_setup(cpu); + + /* xen_vcpu_setup managed to place the vcpu_info within the + percpu area for all cpus, so make use of it */ + if (have_vcpu_info_placement) { + printk(KERN_INFO "Xen: using vcpu_info placement\n"); + + paravirt_ops.save_fl = xen_save_fl_direct; + paravirt_ops.restore_fl = xen_restore_fl_direct; + paravirt_ops.irq_disable = xen_irq_disable_direct; + paravirt_ops.irq_enable = xen_irq_enable_direct; + paravirt_ops.read_cr2 = xen_read_cr2_direct; + paravirt_ops.iret = xen_iret_direct; + } +} + +static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, + unsigned long addr, unsigned len) +{ + char *start, *end, *reloc; + unsigned ret; + + start = end = reloc = NULL; + +#define SITE(x) \ + case PARAVIRT_PATCH(x): \ + if (have_vcpu_info_placement) { \ + start = (char *)xen_##x##_direct; \ + end = xen_##x##_direct_end; \ + reloc = xen_##x##_direct_reloc; \ + } \ + goto patch_site + + switch (type) { + SITE(irq_enable); + SITE(irq_disable); + SITE(save_fl); + SITE(restore_fl); +#undef SITE + + patch_site: + if (start == NULL || (end-start) > len) + goto default_patch; + + ret = paravirt_patch_insns(insnbuf, len, start, end); + + /* Note: because reloc is assigned from something that + appears to be an array, gcc assumes it's non-null, + but doesn't know its relationship with start and + end. */ + if (reloc > start && reloc < end) { + int reloc_off = reloc - start; + long *relocp = (long *)(insnbuf + reloc_off); + long delta = start - (char *)addr; + + *relocp += delta; + } + break; + + default_patch: + default: + ret = paravirt_patch_default(type, clobbers, insnbuf, + addr, len); + break; + } + + return ret; +} + +static const struct paravirt_ops xen_paravirt_ops __initdata = { + .paravirt_enabled = 1, + .shared_kernel_pmd = 0, + + .name = "Xen", + .banner = xen_banner, + + .patch = xen_patch, + + .memory_setup = xen_memory_setup, + .arch_setup = xen_arch_setup, + .init_IRQ = xen_init_IRQ, + .post_allocator_init = xen_mark_init_mm_pinned, + + .time_init = xen_time_init, + .set_wallclock = xen_set_wallclock, + .get_wallclock = xen_get_wallclock, + .get_cpu_khz = xen_cpu_khz, + .sched_clock = xen_sched_clock, + + .cpuid = xen_cpuid, + + .set_debugreg = xen_set_debugreg, + .get_debugreg = xen_get_debugreg, + + .clts = native_clts, + + .read_cr0 = native_read_cr0, + .write_cr0 = native_write_cr0, + + .read_cr2 = xen_read_cr2, + .write_cr2 = xen_write_cr2, + + .read_cr3 = xen_read_cr3, + .write_cr3 = xen_write_cr3, + + .read_cr4 = native_read_cr4, + .read_cr4_safe = native_read_cr4_safe, + .write_cr4 = xen_write_cr4, + + .save_fl = xen_save_fl, + .restore_fl = xen_restore_fl, + .irq_disable = xen_irq_disable, + .irq_enable = xen_irq_enable, + .safe_halt = xen_safe_halt, + .halt = xen_halt, + .wbinvd = native_wbinvd, + + .read_msr = native_read_msr_safe, + .write_msr = native_write_msr_safe, + .read_tsc = native_read_tsc, + .read_pmc = native_read_pmc, + + .iret = (void *)&hypercall_page[__HYPERVISOR_iret], + .irq_enable_sysexit = NULL, /* never called */ + + .load_tr_desc = paravirt_nop, + .set_ldt = xen_set_ldt, + .load_gdt = xen_load_gdt, + .load_idt = xen_load_idt, + .load_tls = xen_load_tls, + + .store_gdt = native_store_gdt, + .store_idt = native_store_idt, + .store_tr = xen_store_tr, + + .write_ldt_entry = xen_write_ldt_entry, + .write_gdt_entry = xen_write_gdt_entry, + .write_idt_entry = xen_write_idt_entry, + .load_esp0 = xen_load_esp0, + + .set_iopl_mask = xen_set_iopl_mask, + .io_delay = xen_io_delay, + +#ifdef CONFIG_X86_LOCAL_APIC + .apic_write = xen_apic_write, + .apic_write_atomic = xen_apic_write, + .apic_read = xen_apic_read, + .setup_boot_clock = paravirt_nop, + .setup_secondary_clock = paravirt_nop, + .startup_ipi_hook = paravirt_nop, +#endif + + .flush_tlb_user = xen_flush_tlb, + .flush_tlb_kernel = xen_flush_tlb, + .flush_tlb_single = xen_flush_tlb_single, + .flush_tlb_others = xen_flush_tlb_others, + + .pte_update = paravirt_nop, + .pte_update_defer = paravirt_nop, + + .pagetable_setup_start = xen_pagetable_setup_start, + .pagetable_setup_done = xen_pagetable_setup_done, + + .alloc_pt = xen_alloc_pt_init, + .release_pt = xen_release_pt, + .alloc_pd = paravirt_nop, + .alloc_pd_clone = paravirt_nop, + .release_pd = paravirt_nop, + +#ifdef CONFIG_HIGHPTE + .kmap_atomic_pte = xen_kmap_atomic_pte, +#endif + + .set_pte = NULL, /* see xen_pagetable_setup_* */ + .set_pte_at = xen_set_pte_at, + .set_pmd = xen_set_pmd, + + .pte_val = xen_pte_val, + .pgd_val = xen_pgd_val, + + .make_pte = xen_make_pte, + .make_pgd = xen_make_pgd, + +#ifdef CONFIG_X86_PAE + .set_pte_atomic = xen_set_pte_atomic, + .set_pte_present = xen_set_pte_at, + .set_pud = xen_set_pud, + .pte_clear = xen_pte_clear, + .pmd_clear = xen_pmd_clear, + + .make_pmd = xen_make_pmd, + .pmd_val = xen_pmd_val, +#endif /* PAE */ + + .activate_mm = xen_activate_mm, + .dup_mmap = xen_dup_mmap, + .exit_mmap = xen_exit_mmap, + + .set_lazy_mode = xen_set_lazy_mode, +}; + +#ifdef CONFIG_SMP +static const struct smp_ops xen_smp_ops __initdata = { + .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu, + .smp_prepare_cpus = xen_smp_prepare_cpus, + .cpu_up = xen_cpu_up, + .smp_cpus_done = xen_smp_cpus_done, + + .smp_send_stop = xen_smp_send_stop, + .smp_send_reschedule = xen_smp_send_reschedule, + .smp_call_function_mask = xen_smp_call_function_mask, +}; +#endif /* CONFIG_SMP */ + +static void xen_reboot(int reason) +{ +#ifdef CONFIG_SMP + smp_send_stop(); +#endif + + if (HYPERVISOR_sched_op(SCHEDOP_shutdown, reason)) + BUG(); +} + +static void xen_restart(char *msg) +{ + xen_reboot(SHUTDOWN_reboot); +} + +static void xen_emergency_restart(void) +{ + xen_reboot(SHUTDOWN_reboot); +} + +static void xen_machine_halt(void) +{ + xen_reboot(SHUTDOWN_poweroff); +} + +static void xen_crash_shutdown(struct pt_regs *regs) +{ + xen_reboot(SHUTDOWN_crash); +} + +static const struct machine_ops __initdata xen_machine_ops = { + .restart = xen_restart, + .halt = xen_machine_halt, + .power_off = xen_machine_halt, + .shutdown = xen_machine_halt, + .crash_shutdown = xen_crash_shutdown, + .emergency_restart = xen_emergency_restart, +}; + + +/* First C function to be called on Xen boot */ +asmlinkage void __init xen_start_kernel(void) +{ + pgd_t *pgd; + + if (!xen_start_info) + return; + + BUG_ON(memcmp(xen_start_info->magic, "xen-3.0", 7) != 0); + + /* Install Xen paravirt ops */ + paravirt_ops = xen_paravirt_ops; + machine_ops = xen_machine_ops; + +#ifdef CONFIG_SMP + smp_ops = xen_smp_ops; +#endif + + xen_setup_features(); + + /* Get mfn list */ + if (!xen_feature(XENFEAT_auto_translated_physmap)) + phys_to_machine_mapping = (unsigned long *)xen_start_info->mfn_list; + + pgd = (pgd_t *)xen_start_info->pt_base; + + init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; + + init_mm.pgd = pgd; /* use the Xen pagetables to start */ + + /* keep using Xen gdt for now; no urgent need to change it */ + + x86_write_percpu(xen_cr3, __pa(pgd)); + +#ifdef CONFIG_SMP + /* Don't do the full vcpu_info placement stuff until we have a + possible map. */ + per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; +#else + /* May as well do it now, since there's no good time to call + it later on UP. */ + xen_setup_vcpu_info_placement(); +#endif + + paravirt_ops.kernel_rpl = 1; + if (xen_feature(XENFEAT_supervisor_mode_kernel)) + paravirt_ops.kernel_rpl = 0; + + /* set the limit of our address space */ + reserve_top_address(-HYPERVISOR_VIRT_START + 2 * PAGE_SIZE); + + /* set up basic CPUID stuff */ + cpu_detect(&new_cpu_data); + new_cpu_data.hard_math = 1; + new_cpu_data.x86_capability[0] = cpuid_edx(1); + + /* Poke various useful things into boot_params */ + LOADER_TYPE = (9 << 4) | 0; + INITRD_START = xen_start_info->mod_start ? __pa(xen_start_info->mod_start) : 0; + INITRD_SIZE = xen_start_info->mod_len; + + /* Start the world */ + start_kernel(); +} diff --git a/arch/x86/xen/events.c b/arch/x86/xen/events.c new file mode 100644 index 00000000000..da1b173547a --- /dev/null +++ b/arch/x86/xen/events.c @@ -0,0 +1,591 @@ +/* + * Xen event channels + * + * Xen models interrupts with abstract event channels. Because each + * domain gets 1024 event channels, but NR_IRQ is not that large, we + * must dynamically map irqs<->event channels. The event channels + * interface with the rest of the kernel by defining a xen interrupt + * chip. When an event is recieved, it is mapped to an irq and sent + * through the normal interrupt processing path. + * + * There are four kinds of events which can be mapped to an event + * channel: + * + * 1. Inter-domain notifications. This includes all the virtual + * device events, since they're driven by front-ends in another domain + * (typically dom0). + * 2. VIRQs, typically used for timers. These are per-cpu events. + * 3. IPIs. + * 4. Hardware interrupts. Not supported at present. + * + * Jeremy Fitzhardinge , XenSource Inc, 2007 + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "xen-ops.h" + +/* + * This lock protects updates to the following mapping and reference-count + * arrays. The lock does not need to be acquired to read the mapping tables. + */ +static DEFINE_SPINLOCK(irq_mapping_update_lock); + +/* IRQ <-> VIRQ mapping. */ +static DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]) = {[0 ... NR_VIRQS-1] = -1}; + +/* IRQ <-> IPI mapping */ +static DEFINE_PER_CPU(int, ipi_to_irq[XEN_NR_IPIS]) = {[0 ... XEN_NR_IPIS-1] = -1}; + +/* Packed IRQ information: binding type, sub-type index, and event channel. */ +struct packed_irq +{ + unsigned short evtchn; + unsigned char index; + unsigned char type; +}; + +static struct packed_irq irq_info[NR_IRQS]; + +/* Binding types. */ +enum { + IRQT_UNBOUND, + IRQT_PIRQ, + IRQT_VIRQ, + IRQT_IPI, + IRQT_EVTCHN +}; + +/* Convenient shorthand for packed representation of an unbound IRQ. */ +#define IRQ_UNBOUND mk_irq_info(IRQT_UNBOUND, 0, 0) + +static int evtchn_to_irq[NR_EVENT_CHANNELS] = { + [0 ... NR_EVENT_CHANNELS-1] = -1 +}; +static unsigned long cpu_evtchn_mask[NR_CPUS][NR_EVENT_CHANNELS/BITS_PER_LONG]; +static u8 cpu_evtchn[NR_EVENT_CHANNELS]; + +/* Reference counts for bindings to IRQs. */ +static int irq_bindcount[NR_IRQS]; + +/* Xen will never allocate port zero for any purpose. */ +#define VALID_EVTCHN(chn) ((chn) != 0) + +/* + * Force a proper event-channel callback from Xen after clearing the + * callback mask. We do this in a very simple manner, by making a call + * down into Xen. The pending flag will be checked by Xen on return. + */ +void force_evtchn_callback(void) +{ + (void)HYPERVISOR_xen_version(0, NULL); +} +EXPORT_SYMBOL_GPL(force_evtchn_callback); + +static struct irq_chip xen_dynamic_chip; + +/* Constructor for packed IRQ information. */ +static inline struct packed_irq mk_irq_info(u32 type, u32 index, u32 evtchn) +{ + return (struct packed_irq) { evtchn, index, type }; +} + +/* + * Accessors for packed IRQ information. + */ +static inline unsigned int evtchn_from_irq(int irq) +{ + return irq_info[irq].evtchn; +} + +static inline unsigned int index_from_irq(int irq) +{ + return irq_info[irq].index; +} + +static inline unsigned int type_from_irq(int irq) +{ + return irq_info[irq].type; +} + +static inline unsigned long active_evtchns(unsigned int cpu, + struct shared_info *sh, + unsigned int idx) +{ + return (sh->evtchn_pending[idx] & + cpu_evtchn_mask[cpu][idx] & + ~sh->evtchn_mask[idx]); +} + +static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) +{ + int irq = evtchn_to_irq[chn]; + + BUG_ON(irq == -1); +#ifdef CONFIG_SMP + irq_desc[irq].affinity = cpumask_of_cpu(cpu); +#endif + + __clear_bit(chn, cpu_evtchn_mask[cpu_evtchn[chn]]); + __set_bit(chn, cpu_evtchn_mask[cpu]); + + cpu_evtchn[chn] = cpu; +} + +static void init_evtchn_cpu_bindings(void) +{ +#ifdef CONFIG_SMP + int i; + /* By default all event channels notify CPU#0. */ + for (i = 0; i < NR_IRQS; i++) + irq_desc[i].affinity = cpumask_of_cpu(0); +#endif + + memset(cpu_evtchn, 0, sizeof(cpu_evtchn)); + memset(cpu_evtchn_mask[0], ~0, sizeof(cpu_evtchn_mask[0])); +} + +static inline unsigned int cpu_from_evtchn(unsigned int evtchn) +{ + return cpu_evtchn[evtchn]; +} + +static inline void clear_evtchn(int port) +{ + struct shared_info *s = HYPERVISOR_shared_info; + sync_clear_bit(port, &s->evtchn_pending[0]); +} + +static inline void set_evtchn(int port) +{ + struct shared_info *s = HYPERVISOR_shared_info; + sync_set_bit(port, &s->evtchn_pending[0]); +} + + +/** + * notify_remote_via_irq - send event to remote end of event channel via irq + * @irq: irq of event channel to send event to + * + * Unlike notify_remote_via_evtchn(), this is safe to use across + * save/restore. Notifications on a broken connection are silently + * dropped. + */ +void notify_remote_via_irq(int irq) +{ + int evtchn = evtchn_from_irq(irq); + + if (VALID_EVTCHN(evtchn)) + notify_remote_via_evtchn(evtchn); +} +EXPORT_SYMBOL_GPL(notify_remote_via_irq); + +static void mask_evtchn(int port) +{ + struct shared_info *s = HYPERVISOR_shared_info; + sync_set_bit(port, &s->evtchn_mask[0]); +} + +static void unmask_evtchn(int port) +{ + struct shared_info *s = HYPERVISOR_shared_info; + unsigned int cpu = get_cpu(); + + BUG_ON(!irqs_disabled()); + + /* Slow path (hypercall) if this is a non-local port. */ + if (unlikely(cpu != cpu_from_evtchn(port))) { + struct evtchn_unmask unmask = { .port = port }; + (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask); + } else { + struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu); + + sync_clear_bit(port, &s->evtchn_mask[0]); + + /* + * The following is basically the equivalent of + * 'hw_resend_irq'. Just like a real IO-APIC we 'lose + * the interrupt edge' if the channel is masked. + */ + if (sync_test_bit(port, &s->evtchn_pending[0]) && + !sync_test_and_set_bit(port / BITS_PER_LONG, + &vcpu_info->evtchn_pending_sel)) + vcpu_info->evtchn_upcall_pending = 1; + } + + put_cpu(); +} + +static int find_unbound_irq(void) +{ + int irq; + + /* Only allocate from dynirq range */ + for (irq = 0; irq < NR_IRQS; irq++) + if (irq_bindcount[irq] == 0) + break; + + if (irq == NR_IRQS) + panic("No available IRQ to bind to: increase NR_IRQS!\n"); + + return irq; +} + +int bind_evtchn_to_irq(unsigned int evtchn) +{ + int irq; + + spin_lock(&irq_mapping_update_lock); + + irq = evtchn_to_irq[evtchn]; + + if (irq == -1) { + irq = find_unbound_irq(); + + dynamic_irq_init(irq); + set_irq_chip_and_handler_name(irq, &xen_dynamic_chip, + handle_level_irq, "event"); + + evtchn_to_irq[evtchn] = irq; + irq_info[irq] = mk_irq_info(IRQT_EVTCHN, 0, evtchn); + } + + irq_bindcount[irq]++; + + spin_unlock(&irq_mapping_update_lock); + + return irq; +} +EXPORT_SYMBOL_GPL(bind_evtchn_to_irq); + +static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) +{ + struct evtchn_bind_ipi bind_ipi; + int evtchn, irq; + + spin_lock(&irq_mapping_update_lock); + + irq = per_cpu(ipi_to_irq, cpu)[ipi]; + if (irq == -1) { + irq = find_unbound_irq(); + if (irq < 0) + goto out; + + dynamic_irq_init(irq); + set_irq_chip_and_handler_name(irq, &xen_dynamic_chip, + handle_level_irq, "ipi"); + + bind_ipi.vcpu = cpu; + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, + &bind_ipi) != 0) + BUG(); + evtchn = bind_ipi.port; + + evtchn_to_irq[evtchn] = irq; + irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn); + + per_cpu(ipi_to_irq, cpu)[ipi] = irq; + + bind_evtchn_to_cpu(evtchn, cpu); + } + + irq_bindcount[irq]++; + + out: + spin_unlock(&irq_mapping_update_lock); + return irq; +} + + +static int bind_virq_to_irq(unsigned int virq, unsigned int cpu) +{ + struct evtchn_bind_virq bind_virq; + int evtchn, irq; + + spin_lock(&irq_mapping_update_lock); + + irq = per_cpu(virq_to_irq, cpu)[virq]; + + if (irq == -1) { + bind_virq.virq = virq; + bind_virq.vcpu = cpu; + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, + &bind_virq) != 0) + BUG(); + evtchn = bind_virq.port; + + irq = find_unbound_irq(); + + dynamic_irq_init(irq); + set_irq_chip_and_handler_name(irq, &xen_dynamic_chip, + handle_level_irq, "virq"); + + evtchn_to_irq[evtchn] = irq; + irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn); + + per_cpu(virq_to_irq, cpu)[virq] = irq; + + bind_evtchn_to_cpu(evtchn, cpu); + } + + irq_bindcount[irq]++; + + spin_unlock(&irq_mapping_update_lock); + + return irq; +} + +static void unbind_from_irq(unsigned int irq) +{ + struct evtchn_close close; + int evtchn = evtchn_from_irq(irq); + + spin_lock(&irq_mapping_update_lock); + + if (VALID_EVTCHN(evtchn) && (--irq_bindcount[irq] == 0)) { + close.port = evtchn; + if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) + BUG(); + + switch (type_from_irq(irq)) { + case IRQT_VIRQ: + per_cpu(virq_to_irq, cpu_from_evtchn(evtchn)) + [index_from_irq(irq)] = -1; + break; + default: + break; + } + + /* Closed ports are implicitly re-bound to VCPU0. */ + bind_evtchn_to_cpu(evtchn, 0); + + evtchn_to_irq[evtchn] = -1; + irq_info[irq] = IRQ_UNBOUND; + + dynamic_irq_init(irq); + } + + spin_unlock(&irq_mapping_update_lock); +} + +int bind_evtchn_to_irqhandler(unsigned int evtchn, + irqreturn_t (*handler)(int, void *), + unsigned long irqflags, + const char *devname, void *dev_id) +{ + unsigned int irq; + int retval; + + irq = bind_evtchn_to_irq(evtchn); + retval = request_irq(irq, handler, irqflags, devname, dev_id); + if (retval != 0) { + unbind_from_irq(irq); + return retval; + } + + return irq; +} +EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler); + +int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, + irqreturn_t (*handler)(int, void *), + unsigned long irqflags, const char *devname, void *dev_id) +{ + unsigned int irq; + int retval; + + irq = bind_virq_to_irq(virq, cpu); + retval = request_irq(irq, handler, irqflags, devname, dev_id); + if (retval != 0) { + unbind_from_irq(irq); + return retval; + } + + return irq; +} +EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler); + +int bind_ipi_to_irqhandler(enum ipi_vector ipi, + unsigned int cpu, + irq_handler_t handler, + unsigned long irqflags, + const char *devname, + void *dev_id) +{ + int irq, retval; + + irq = bind_ipi_to_irq(ipi, cpu); + if (irq < 0) + return irq; + + retval = request_irq(irq, handler, irqflags, devname, dev_id); + if (retval != 0) { + unbind_from_irq(irq); + return retval; + } + + return irq; +} + +void unbind_from_irqhandler(unsigned int irq, void *dev_id) +{ + free_irq(irq, dev_id); + unbind_from_irq(irq); +} +EXPORT_SYMBOL_GPL(unbind_from_irqhandler); + +void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector) +{ + int irq = per_cpu(ipi_to_irq, cpu)[vector]; + BUG_ON(irq < 0); + notify_remote_via_irq(irq); +} + + +/* + * Search the CPUs pending events bitmasks. For each one found, map + * the event number to an irq, and feed it into do_IRQ() for + * handling. + * + * Xen uses a two-level bitmap to speed searching. The first level is + * a bitset of words which contain pending event bits. The second + * level is a bitset of pending events themselves. + */ +fastcall void xen_evtchn_do_upcall(struct pt_regs *regs) +{ + int cpu = get_cpu(); + struct shared_info *s = HYPERVISOR_shared_info; + struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu); + unsigned long pending_words; + + vcpu_info->evtchn_upcall_pending = 0; + + /* NB. No need for a barrier here -- XCHG is a barrier on x86. */ + pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0); + while (pending_words != 0) { + unsigned long pending_bits; + int word_idx = __ffs(pending_words); + pending_words &= ~(1UL << word_idx); + + while ((pending_bits = active_evtchns(cpu, s, word_idx)) != 0) { + int bit_idx = __ffs(pending_bits); + int port = (word_idx * BITS_PER_LONG) + bit_idx; + int irq = evtchn_to_irq[port]; + + if (irq != -1) { + regs->orig_eax = ~irq; + do_IRQ(regs); + } + } + } + + put_cpu(); +} + +/* Rebind an evtchn so that it gets delivered to a specific cpu */ +static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu) +{ + struct evtchn_bind_vcpu bind_vcpu; + int evtchn = evtchn_from_irq(irq); + + if (!VALID_EVTCHN(evtchn)) + return; + + /* Send future instances of this interrupt to other vcpu. */ + bind_vcpu.port = evtchn; + bind_vcpu.vcpu = tcpu; + + /* + * If this fails, it usually just indicates that we're dealing with a + * virq or IPI channel, which don't actually need to be rebound. Ignore + * it, but don't do the xenlinux-level rebind in that case. + */ + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0) + bind_evtchn_to_cpu(evtchn, tcpu); +} + + +static void set_affinity_irq(unsigned irq, cpumask_t dest) +{ + unsigned tcpu = first_cpu(dest); + rebind_irq_to_cpu(irq, tcpu); +} + +static void enable_dynirq(unsigned int irq) +{ + int evtchn = evtchn_from_irq(irq); + + if (VALID_EVTCHN(evtchn)) + unmask_evtchn(evtchn); +} + +static void disable_dynirq(unsigned int irq) +{ + int evtchn = evtchn_from_irq(irq); + + if (VALID_EVTCHN(evtchn)) + mask_evtchn(evtchn); +} + +static void ack_dynirq(unsigned int irq) +{ + int evtchn = evtchn_from_irq(irq); + + move_native_irq(irq); + + if (VALID_EVTCHN(evtchn)) + clear_evtchn(evtchn); +} + +static int retrigger_dynirq(unsigned int irq) +{ + int evtchn = evtchn_from_irq(irq); + int ret = 0; + + if (VALID_EVTCHN(evtchn)) { + set_evtchn(evtchn); + ret = 1; + } + + return ret; +} + +static struct irq_chip xen_dynamic_chip __read_mostly = { + .name = "xen-dyn", + .mask = disable_dynirq, + .unmask = enable_dynirq, + .ack = ack_dynirq, + .set_affinity = set_affinity_irq, + .retrigger = retrigger_dynirq, +}; + +void __init xen_init_IRQ(void) +{ + int i; + + init_evtchn_cpu_bindings(); + + /* No event channels are 'live' right now. */ + for (i = 0; i < NR_EVENT_CHANNELS; i++) + mask_evtchn(i); + + /* Dynamic IRQ space is currently unbound. Zero the refcnts. */ + for (i = 0; i < NR_IRQS; i++) + irq_bindcount[i] = 0; + + irq_ctx_init(smp_processor_id()); +} diff --git a/arch/x86/xen/features.c b/arch/x86/xen/features.c new file mode 100644 index 00000000000..0707714e40d --- /dev/null +++ b/arch/x86/xen/features.c @@ -0,0 +1,29 @@ +/****************************************************************************** + * features.c + * + * Xen feature flags. + * + * Copyright (c) 2006, Ian Campbell, XenSource Inc. + */ +#include +#include +#include +#include +#include + +u8 xen_features[XENFEAT_NR_SUBMAPS * 32] __read_mostly; +EXPORT_SYMBOL_GPL(xen_features); + +void xen_setup_features(void) +{ + struct xen_feature_info fi; + int i, j; + + for (i = 0; i < XENFEAT_NR_SUBMAPS; i++) { + fi.submap_idx = i; + if (HYPERVISOR_xen_version(XENVER_get_features, &fi) < 0) + break; + for (j = 0; j < 32; j++) + xen_features[i * 32 + j] = !!(fi.submap & 1< +#include +#include +#include + +#include + +#define SHUTDOWN_INVALID -1 +#define SHUTDOWN_POWEROFF 0 +#define SHUTDOWN_SUSPEND 2 +/* Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only + * report a crash, not be instructed to crash! + * HALT is the same as POWEROFF, as far as we're concerned. The tools use + * the distinction when we return the reason code to them. + */ +#define SHUTDOWN_HALT 4 + +/* Ignore multiple shutdown requests. */ +static int shutting_down = SHUTDOWN_INVALID; + +static void shutdown_handler(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + char *str; + struct xenbus_transaction xbt; + int err; + + if (shutting_down != SHUTDOWN_INVALID) + return; + + again: + err = xenbus_transaction_start(&xbt); + if (err) + return; + + str = (char *)xenbus_read(xbt, "control", "shutdown", NULL); + /* Ignore read errors and empty reads. */ + if (XENBUS_IS_ERR_READ(str)) { + xenbus_transaction_end(xbt, 1); + return; + } + + xenbus_write(xbt, "control", "shutdown", ""); + + err = xenbus_transaction_end(xbt, 0); + if (err == -EAGAIN) { + kfree(str); + goto again; + } + + if (strcmp(str, "poweroff") == 0 || + strcmp(str, "halt") == 0) + orderly_poweroff(false); + else if (strcmp(str, "reboot") == 0) + ctrl_alt_del(); + else { + printk(KERN_INFO "Ignoring shutdown request: %s\n", str); + shutting_down = SHUTDOWN_INVALID; + } + + kfree(str); +} + +static void sysrq_handler(struct xenbus_watch *watch, const char **vec, + unsigned int len) +{ + char sysrq_key = '\0'; + struct xenbus_transaction xbt; + int err; + + again: + err = xenbus_transaction_start(&xbt); + if (err) + return; + if (!xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key)) { + printk(KERN_ERR "Unable to read sysrq code in " + "control/sysrq\n"); + xenbus_transaction_end(xbt, 1); + return; + } + + if (sysrq_key != '\0') + xenbus_printf(xbt, "control", "sysrq", "%c", '\0'); + + err = xenbus_transaction_end(xbt, 0); + if (err == -EAGAIN) + goto again; + + if (sysrq_key != '\0') + handle_sysrq(sysrq_key, NULL); +} + +static struct xenbus_watch shutdown_watch = { + .node = "control/shutdown", + .callback = shutdown_handler +}; + +static struct xenbus_watch sysrq_watch = { + .node = "control/sysrq", + .callback = sysrq_handler +}; + +static int setup_shutdown_watcher(void) +{ + int err; + + err = register_xenbus_watch(&shutdown_watch); + if (err) { + printk(KERN_ERR "Failed to set shutdown watcher\n"); + return err; + } + + err = register_xenbus_watch(&sysrq_watch); + if (err) { + printk(KERN_ERR "Failed to set sysrq watcher\n"); + return err; + } + + return 0; +} + +static int shutdown_event(struct notifier_block *notifier, + unsigned long event, + void *data) +{ + setup_shutdown_watcher(); + return NOTIFY_DONE; +} + +static int __init setup_shutdown_event(void) +{ + static struct notifier_block xenstore_notifier = { + .notifier_call = shutdown_event + }; + register_xenstore_notifier(&xenstore_notifier); + + return 0; +} + +subsys_initcall(setup_shutdown_event); diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c new file mode 100644 index 00000000000..874db0cd1d2 --- /dev/null +++ b/arch/x86/xen/mmu.c @@ -0,0 +1,567 @@ +/* + * Xen mmu operations + * + * This file contains the various mmu fetch and update operations. + * The most important job they must perform is the mapping between the + * domain's pfn and the overall machine mfns. + * + * Xen allows guests to directly update the pagetable, in a controlled + * fashion. In other words, the guest modifies the same pagetable + * that the CPU actually uses, which eliminates the overhead of having + * a separate shadow pagetable. + * + * In order to allow this, it falls on the guest domain to map its + * notion of a "physical" pfn - which is just a domain-local linear + * address - into a real "machine address" which the CPU's MMU can + * use. + * + * A pgd_t/pmd_t/pte_t will typically contain an mfn, and so can be + * inserted directly into the pagetable. When creating a new + * pte/pmd/pgd, it converts the passed pfn into an mfn. Conversely, + * when reading the content back with __(pgd|pmd|pte)_val, it converts + * the mfn back into a pfn. + * + * The other constraint is that all pages which make up a pagetable + * must be mapped read-only in the guest. This prevents uncontrolled + * guest updates to the pagetable. Xen strictly enforces this, and + * will disallow any pagetable update which will end up mapping a + * pagetable page RW, and will disallow using any writable page as a + * pagetable. + * + * Naively, when loading %cr3 with the base of a new pagetable, Xen + * would need to validate the whole pagetable before going on. + * Naturally, this is quite slow. The solution is to "pin" a + * pagetable, which enforces all the constraints on the pagetable even + * when it is not actively in use. This menas that Xen can be assured + * that it is still valid when you do load it into %cr3, and doesn't + * need to revalidate it. + * + * Jeremy Fitzhardinge , XenSource Inc, 2007 + */ +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include "multicalls.h" +#include "mmu.h" + +xmaddr_t arbitrary_virt_to_machine(unsigned long address) +{ + pte_t *pte = lookup_address(address); + unsigned offset = address & PAGE_MASK; + + BUG_ON(pte == NULL); + + return XMADDR((pte_mfn(*pte) << PAGE_SHIFT) + offset); +} + +void make_lowmem_page_readonly(void *vaddr) +{ + pte_t *pte, ptev; + unsigned long address = (unsigned long)vaddr; + + pte = lookup_address(address); + BUG_ON(pte == NULL); + + ptev = pte_wrprotect(*pte); + + if (HYPERVISOR_update_va_mapping(address, ptev, 0)) + BUG(); +} + +void make_lowmem_page_readwrite(void *vaddr) +{ + pte_t *pte, ptev; + unsigned long address = (unsigned long)vaddr; + + pte = lookup_address(address); + BUG_ON(pte == NULL); + + ptev = pte_mkwrite(*pte); + + if (HYPERVISOR_update_va_mapping(address, ptev, 0)) + BUG(); +} + + +void xen_set_pmd(pmd_t *ptr, pmd_t val) +{ + struct multicall_space mcs; + struct mmu_update *u; + + preempt_disable(); + + mcs = xen_mc_entry(sizeof(*u)); + u = mcs.args; + u->ptr = virt_to_machine(ptr).maddr; + u->val = pmd_val_ma(val); + MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF); + + xen_mc_issue(PARAVIRT_LAZY_MMU); + + preempt_enable(); +} + +/* + * Associate a virtual page frame with a given physical page frame + * and protection flags for that frame. + */ +void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + pgd = swapper_pg_dir + pgd_index(vaddr); + if (pgd_none(*pgd)) { + BUG(); + return; + } + pud = pud_offset(pgd, vaddr); + if (pud_none(*pud)) { + BUG(); + return; + } + pmd = pmd_offset(pud, vaddr); + if (pmd_none(*pmd)) { + BUG(); + return; + } + pte = pte_offset_kernel(pmd, vaddr); + /* stored as-is, to permit clearing entries */ + xen_set_pte(pte, mfn_pte(mfn, flags)); + + /* + * It's enough to flush this one mapping. + * (PGE mappings get flushed as well) + */ + __flush_tlb_one(vaddr); +} + +void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pteval) +{ + if (mm == current->mm || mm == &init_mm) { + if (xen_get_lazy_mode() == PARAVIRT_LAZY_MMU) { + struct multicall_space mcs; + mcs = xen_mc_entry(0); + + MULTI_update_va_mapping(mcs.mc, addr, pteval, 0); + xen_mc_issue(PARAVIRT_LAZY_MMU); + return; + } else + if (HYPERVISOR_update_va_mapping(addr, pteval, 0) == 0) + return; + } + xen_set_pte(ptep, pteval); +} + +#ifdef CONFIG_X86_PAE +void xen_set_pud(pud_t *ptr, pud_t val) +{ + struct multicall_space mcs; + struct mmu_update *u; + + preempt_disable(); + + mcs = xen_mc_entry(sizeof(*u)); + u = mcs.args; + u->ptr = virt_to_machine(ptr).maddr; + u->val = pud_val_ma(val); + MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF); + + xen_mc_issue(PARAVIRT_LAZY_MMU); + + preempt_enable(); +} + +void xen_set_pte(pte_t *ptep, pte_t pte) +{ + ptep->pte_high = pte.pte_high; + smp_wmb(); + ptep->pte_low = pte.pte_low; +} + +void xen_set_pte_atomic(pte_t *ptep, pte_t pte) +{ + set_64bit((u64 *)ptep, pte_val_ma(pte)); +} + +void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +{ + ptep->pte_low = 0; + smp_wmb(); /* make sure low gets written first */ + ptep->pte_high = 0; +} + +void xen_pmd_clear(pmd_t *pmdp) +{ + xen_set_pmd(pmdp, __pmd(0)); +} + +unsigned long long xen_pte_val(pte_t pte) +{ + unsigned long long ret = 0; + + if (pte.pte_low) { + ret = ((unsigned long long)pte.pte_high << 32) | pte.pte_low; + ret = machine_to_phys(XMADDR(ret)).paddr | 1; + } + + return ret; +} + +unsigned long long xen_pmd_val(pmd_t pmd) +{ + unsigned long long ret = pmd.pmd; + if (ret) + ret = machine_to_phys(XMADDR(ret)).paddr | 1; + return ret; +} + +unsigned long long xen_pgd_val(pgd_t pgd) +{ + unsigned long long ret = pgd.pgd; + if (ret) + ret = machine_to_phys(XMADDR(ret)).paddr | 1; + return ret; +} + +pte_t xen_make_pte(unsigned long long pte) +{ + if (pte & 1) + pte = phys_to_machine(XPADDR(pte)).maddr; + + return (pte_t){ pte, pte >> 32 }; +} + +pmd_t xen_make_pmd(unsigned long long pmd) +{ + if (pmd & 1) + pmd = phys_to_machine(XPADDR(pmd)).maddr; + + return (pmd_t){ pmd }; +} + +pgd_t xen_make_pgd(unsigned long long pgd) +{ + if (pgd & _PAGE_PRESENT) + pgd = phys_to_machine(XPADDR(pgd)).maddr; + + return (pgd_t){ pgd }; +} +#else /* !PAE */ +void xen_set_pte(pte_t *ptep, pte_t pte) +{ + *ptep = pte; +} + +unsigned long xen_pte_val(pte_t pte) +{ + unsigned long ret = pte.pte_low; + + if (ret & _PAGE_PRESENT) + ret = machine_to_phys(XMADDR(ret)).paddr; + + return ret; +} + +unsigned long xen_pgd_val(pgd_t pgd) +{ + unsigned long ret = pgd.pgd; + if (ret) + ret = machine_to_phys(XMADDR(ret)).paddr | 1; + return ret; +} + +pte_t xen_make_pte(unsigned long pte) +{ + if (pte & _PAGE_PRESENT) + pte = phys_to_machine(XPADDR(pte)).maddr; + + return (pte_t){ pte }; +} + +pgd_t xen_make_pgd(unsigned long pgd) +{ + if (pgd & _PAGE_PRESENT) + pgd = phys_to_machine(XPADDR(pgd)).maddr; + + return (pgd_t){ pgd }; +} +#endif /* CONFIG_X86_PAE */ + + + +/* + (Yet another) pagetable walker. This one is intended for pinning a + pagetable. This means that it walks a pagetable and calls the + callback function on each page it finds making up the page table, + at every level. It walks the entire pagetable, but it only bothers + pinning pte pages which are below pte_limit. In the normal case + this will be TASK_SIZE, but at boot we need to pin up to + FIXADDR_TOP. But the important bit is that we don't pin beyond + there, because then we start getting into Xen's ptes. +*/ +static int pgd_walk(pgd_t *pgd_base, int (*func)(struct page *, unsigned), + unsigned long limit) +{ + pgd_t *pgd = pgd_base; + int flush = 0; + unsigned long addr = 0; + unsigned long pgd_next; + + BUG_ON(limit > FIXADDR_TOP); + + if (xen_feature(XENFEAT_auto_translated_physmap)) + return 0; + + for (; addr != FIXADDR_TOP; pgd++, addr = pgd_next) { + pud_t *pud; + unsigned long pud_limit, pud_next; + + pgd_next = pud_limit = pgd_addr_end(addr, FIXADDR_TOP); + + if (!pgd_val(*pgd)) + continue; + + pud = pud_offset(pgd, 0); + + if (PTRS_PER_PUD > 1) /* not folded */ + flush |= (*func)(virt_to_page(pud), 0); + + for (; addr != pud_limit; pud++, addr = pud_next) { + pmd_t *pmd; + unsigned long pmd_limit; + + pud_next = pud_addr_end(addr, pud_limit); + + if (pud_next < limit) + pmd_limit = pud_next; + else + pmd_limit = limit; + + if (pud_none(*pud)) + continue; + + pmd = pmd_offset(pud, 0); + + if (PTRS_PER_PMD > 1) /* not folded */ + flush |= (*func)(virt_to_page(pmd), 0); + + for (; addr != pmd_limit; pmd++) { + addr += (PAGE_SIZE * PTRS_PER_PTE); + if ((pmd_limit-1) < (addr-1)) { + addr = pmd_limit; + break; + } + + if (pmd_none(*pmd)) + continue; + + flush |= (*func)(pmd_page(*pmd), 0); + } + } + } + + flush |= (*func)(virt_to_page(pgd_base), UVMF_TLB_FLUSH); + + return flush; +} + +static int pin_page(struct page *page, unsigned flags) +{ + unsigned pgfl = test_and_set_bit(PG_pinned, &page->flags); + int flush; + + if (pgfl) + flush = 0; /* already pinned */ + else if (PageHighMem(page)) + /* kmaps need flushing if we found an unpinned + highpage */ + flush = 1; + else { + void *pt = lowmem_page_address(page); + unsigned long pfn = page_to_pfn(page); + struct multicall_space mcs = __xen_mc_entry(0); + + flush = 0; + + MULTI_update_va_mapping(mcs.mc, (unsigned long)pt, + pfn_pte(pfn, PAGE_KERNEL_RO), + flags); + } + + return flush; +} + +/* This is called just after a mm has been created, but it has not + been used yet. We need to make sure that its pagetable is all + read-only, and can be pinned. */ +void xen_pgd_pin(pgd_t *pgd) +{ + struct multicall_space mcs; + struct mmuext_op *op; + + xen_mc_batch(); + + if (pgd_walk(pgd, pin_page, TASK_SIZE)) { + /* re-enable interrupts for kmap_flush_unused */ + xen_mc_issue(0); + kmap_flush_unused(); + xen_mc_batch(); + } + + mcs = __xen_mc_entry(sizeof(*op)); + op = mcs.args; + +#ifdef CONFIG_X86_PAE + op->cmd = MMUEXT_PIN_L3_TABLE; +#else + op->cmd = MMUEXT_PIN_L2_TABLE; +#endif + op->arg1.mfn = pfn_to_mfn(PFN_DOWN(__pa(pgd))); + MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); + + xen_mc_issue(0); +} + +/* The init_mm pagetable is really pinned as soon as its created, but + that's before we have page structures to store the bits. So do all + the book-keeping now. */ +static __init int mark_pinned(struct page *page, unsigned flags) +{ + SetPagePinned(page); + return 0; +} + +void __init xen_mark_init_mm_pinned(void) +{ + pgd_walk(init_mm.pgd, mark_pinned, FIXADDR_TOP); +} + +static int unpin_page(struct page *page, unsigned flags) +{ + unsigned pgfl = test_and_clear_bit(PG_pinned, &page->flags); + + if (pgfl && !PageHighMem(page)) { + void *pt = lowmem_page_address(page); + unsigned long pfn = page_to_pfn(page); + struct multicall_space mcs = __xen_mc_entry(0); + + MULTI_update_va_mapping(mcs.mc, (unsigned long)pt, + pfn_pte(pfn, PAGE_KERNEL), + flags); + } + + return 0; /* never need to flush on unpin */ +} + +/* Release a pagetables pages back as normal RW */ +static void xen_pgd_unpin(pgd_t *pgd) +{ + struct mmuext_op *op; + struct multicall_space mcs; + + xen_mc_batch(); + + mcs = __xen_mc_entry(sizeof(*op)); + + op = mcs.args; + op->cmd = MMUEXT_UNPIN_TABLE; + op->arg1.mfn = pfn_to_mfn(PFN_DOWN(__pa(pgd))); + + MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); + + pgd_walk(pgd, unpin_page, TASK_SIZE); + + xen_mc_issue(0); +} + +void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) +{ + spin_lock(&next->page_table_lock); + xen_pgd_pin(next->pgd); + spin_unlock(&next->page_table_lock); +} + +void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) +{ + spin_lock(&mm->page_table_lock); + xen_pgd_pin(mm->pgd); + spin_unlock(&mm->page_table_lock); +} + + +#ifdef CONFIG_SMP +/* Another cpu may still have their %cr3 pointing at the pagetable, so + we need to repoint it somewhere else before we can unpin it. */ +static void drop_other_mm_ref(void *info) +{ + struct mm_struct *mm = info; + + if (__get_cpu_var(cpu_tlbstate).active_mm == mm) + leave_mm(smp_processor_id()); +} + +static void drop_mm_ref(struct mm_struct *mm) +{ + if (current->active_mm == mm) { + if (current->mm == mm) + load_cr3(swapper_pg_dir); + else + leave_mm(smp_processor_id()); + } + + if (!cpus_empty(mm->cpu_vm_mask)) + xen_smp_call_function_mask(mm->cpu_vm_mask, drop_other_mm_ref, + mm, 1); +} +#else +static void drop_mm_ref(struct mm_struct *mm) +{ + if (current->active_mm == mm) + load_cr3(swapper_pg_dir); +} +#endif + +/* + * While a process runs, Xen pins its pagetables, which means that the + * hypervisor forces it to be read-only, and it controls all updates + * to it. This means that all pagetable updates have to go via the + * hypervisor, which is moderately expensive. + * + * Since we're pulling the pagetable down, we switch to use init_mm, + * unpin old process pagetable and mark it all read-write, which + * allows further operations on it to be simple memory accesses. + * + * The only subtle point is that another CPU may be still using the + * pagetable because of lazy tlb flushing. This means we need need to + * switch all CPUs off this pagetable before we can unpin it. + */ +void xen_exit_mmap(struct mm_struct *mm) +{ + get_cpu(); /* make sure we don't move around */ + drop_mm_ref(mm); + put_cpu(); + + spin_lock(&mm->page_table_lock); + + /* pgd may not be pinned in the error exit path of execve */ + if (PagePinned(virt_to_page(mm->pgd))) + xen_pgd_unpin(mm->pgd); + spin_unlock(&mm->page_table_lock); +} diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h new file mode 100644 index 00000000000..c9ff27f3ac3 --- /dev/null +++ b/arch/x86/xen/mmu.h @@ -0,0 +1,60 @@ +#ifndef _XEN_MMU_H + +#include +#include + +/* + * Page-directory addresses above 4GB do not fit into architectural %cr3. + * When accessing %cr3, or equivalent field in vcpu_guest_context, guests + * must use the following accessor macros to pack/unpack valid MFNs. + * + * Note that Xen is using the fact that the pagetable base is always + * page-aligned, and putting the 12 MSB of the address into the 12 LSB + * of cr3. + */ +#define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20)) +#define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20)) + + +void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); + +void xen_set_pte(pte_t *ptep, pte_t pteval); +void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pteval); +void xen_set_pmd(pmd_t *pmdp, pmd_t pmdval); + +void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next); +void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm); +void xen_exit_mmap(struct mm_struct *mm); + +void xen_pgd_pin(pgd_t *pgd); +//void xen_pgd_unpin(pgd_t *pgd); + +#ifdef CONFIG_X86_PAE +unsigned long long xen_pte_val(pte_t); +unsigned long long xen_pmd_val(pmd_t); +unsigned long long xen_pgd_val(pgd_t); + +pte_t xen_make_pte(unsigned long long); +pmd_t xen_make_pmd(unsigned long long); +pgd_t xen_make_pgd(unsigned long long); + +void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pteval); +void xen_set_pte_atomic(pte_t *ptep, pte_t pte); +void xen_set_pud(pud_t *ptr, pud_t val); +void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); +void xen_pmd_clear(pmd_t *pmdp); + + +#else +unsigned long xen_pte_val(pte_t); +unsigned long xen_pmd_val(pmd_t); +unsigned long xen_pgd_val(pgd_t); + +pte_t xen_make_pte(unsigned long); +pmd_t xen_make_pmd(unsigned long); +pgd_t xen_make_pgd(unsigned long); +#endif + +#endif /* _XEN_MMU_H */ diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c new file mode 100644 index 00000000000..c837e8e463d --- /dev/null +++ b/arch/x86/xen/multicalls.c @@ -0,0 +1,90 @@ +/* + * Xen hypercall batching. + * + * Xen allows multiple hypercalls to be issued at once, using the + * multicall interface. This allows the cost of trapping into the + * hypervisor to be amortized over several calls. + * + * This file implements a simple interface for multicalls. There's a + * per-cpu buffer of outstanding multicalls. When you want to queue a + * multicall for issuing, you can allocate a multicall slot for the + * call and its arguments, along with storage for space which is + * pointed to by the arguments (for passing pointers to structures, + * etc). When the multicall is actually issued, all the space for the + * commands and allocated memory is freed for reuse. + * + * Multicalls are flushed whenever any of the buffers get full, or + * when explicitly requested. There's no way to get per-multicall + * return results back. It will BUG if any of the multicalls fail. + * + * Jeremy Fitzhardinge , XenSource Inc, 2007 + */ +#include +#include + +#include + +#include "multicalls.h" + +#define MC_BATCH 32 +#define MC_ARGS (MC_BATCH * 16 / sizeof(u64)) + +struct mc_buffer { + struct multicall_entry entries[MC_BATCH]; + u64 args[MC_ARGS]; + unsigned mcidx, argidx; +}; + +static DEFINE_PER_CPU(struct mc_buffer, mc_buffer); +DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags); + +void xen_mc_flush(void) +{ + struct mc_buffer *b = &__get_cpu_var(mc_buffer); + int ret = 0; + unsigned long flags; + + BUG_ON(preemptible()); + + /* Disable interrupts in case someone comes in and queues + something in the middle */ + local_irq_save(flags); + + if (b->mcidx) { + int i; + + if (HYPERVISOR_multicall(b->entries, b->mcidx) != 0) + BUG(); + for (i = 0; i < b->mcidx; i++) + if (b->entries[i].result < 0) + ret++; + b->mcidx = 0; + b->argidx = 0; + } else + BUG_ON(b->argidx != 0); + + local_irq_restore(flags); + + BUG_ON(ret); +} + +struct multicall_space __xen_mc_entry(size_t args) +{ + struct mc_buffer *b = &__get_cpu_var(mc_buffer); + struct multicall_space ret; + unsigned argspace = (args + sizeof(u64) - 1) / sizeof(u64); + + BUG_ON(preemptible()); + BUG_ON(argspace > MC_ARGS); + + if (b->mcidx == MC_BATCH || + (b->argidx + argspace) > MC_ARGS) + xen_mc_flush(); + + ret.mc = &b->entries[b->mcidx]; + b->mcidx++; + ret.args = &b->args[b->argidx]; + b->argidx += argspace; + + return ret; +} diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h new file mode 100644 index 00000000000..e6f7530b156 --- /dev/null +++ b/arch/x86/xen/multicalls.h @@ -0,0 +1,45 @@ +#ifndef _XEN_MULTICALLS_H +#define _XEN_MULTICALLS_H + +#include "xen-ops.h" + +/* Multicalls */ +struct multicall_space +{ + struct multicall_entry *mc; + void *args; +}; + +/* Allocate room for a multicall and its args */ +struct multicall_space __xen_mc_entry(size_t args); + +DECLARE_PER_CPU(unsigned long, xen_mc_irq_flags); + +/* Call to start a batch of multiple __xen_mc_entry()s. Must be + paired with xen_mc_issue() */ +static inline void xen_mc_batch(void) +{ + /* need to disable interrupts until this entry is complete */ + local_irq_save(__get_cpu_var(xen_mc_irq_flags)); +} + +static inline struct multicall_space xen_mc_entry(size_t args) +{ + xen_mc_batch(); + return __xen_mc_entry(args); +} + +/* Flush all pending multicalls */ +void xen_mc_flush(void); + +/* Issue a multicall if we're not in a lazy mode */ +static inline void xen_mc_issue(unsigned mode) +{ + if ((xen_get_lazy_mode() & mode) == 0) + xen_mc_flush(); + + /* restore flags saved in xen_mc_batch */ + local_irq_restore(x86_read_percpu(xen_mc_irq_flags)); +} + +#endif /* _XEN_MULTICALLS_H */ diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c new file mode 100644 index 00000000000..f84e7722664 --- /dev/null +++ b/arch/x86/xen/setup.c @@ -0,0 +1,111 @@ +/* + * Machine specific setup for xen + * + * Jeremy Fitzhardinge , XenSource Inc, 2007 + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +#include "xen-ops.h" +#include "vdso.h" + +/* These are code, but not functions. Defined in entry.S */ +extern const char xen_hypervisor_callback[]; +extern const char xen_failsafe_callback[]; + +unsigned long *phys_to_machine_mapping; +EXPORT_SYMBOL(phys_to_machine_mapping); + +/** + * machine_specific_memory_setup - Hook for machine specific memory setup. + **/ + +char * __init xen_memory_setup(void) +{ + unsigned long max_pfn = xen_start_info->nr_pages; + + e820.nr_map = 0; + add_memory_region(0, PFN_PHYS(max_pfn), E820_RAM); + + return "Xen"; +} + +static void xen_idle(void) +{ + local_irq_disable(); + + if (need_resched()) + local_irq_enable(); + else { + current_thread_info()->status &= ~TS_POLLING; + smp_mb__after_clear_bit(); + safe_halt(); + current_thread_info()->status |= TS_POLLING; + } +} + +/* + * Set the bit indicating "nosegneg" library variants should be used. + */ +static void fiddle_vdso(void) +{ + extern u32 VDSO_NOTE_MASK; /* See ../kernel/vsyscall-note.S. */ + extern char vsyscall_int80_start; + u32 *mask = (u32 *) ((unsigned long) &VDSO_NOTE_MASK - VDSO_PRELINK + + &vsyscall_int80_start); + *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT; +} + +void __init xen_arch_setup(void) +{ + struct physdev_set_iopl set_iopl; + int rc; + + HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); + HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); + + if (!xen_feature(XENFEAT_auto_translated_physmap)) + HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_pae_extended_cr3); + + HYPERVISOR_set_callbacks(__KERNEL_CS, (unsigned long)xen_hypervisor_callback, + __KERNEL_CS, (unsigned long)xen_failsafe_callback); + + set_iopl.iopl = 1; + rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); + if (rc != 0) + printk(KERN_INFO "physdev_op failed %d\n", rc); + +#ifdef CONFIG_ACPI + if (!(xen_start_info->flags & SIF_INITDOMAIN)) { + printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); + disable_acpi(); + } +#endif + + memcpy(boot_command_line, xen_start_info->cmd_line, + MAX_GUEST_CMDLINE > COMMAND_LINE_SIZE ? + COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE); + + pm_idle = xen_idle; + +#ifdef CONFIG_SMP + /* fill cpus_possible with all available cpus */ + xen_fill_possible_map(); +#endif + + paravirt_disable_iospace(); + + fiddle_vdso(); +} diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c new file mode 100644 index 00000000000..557b8e24706 --- /dev/null +++ b/arch/x86/xen/smp.c @@ -0,0 +1,404 @@ +/* + * Xen SMP support + * + * This file implements the Xen versions of smp_ops. SMP under Xen is + * very straightforward. Bringing a CPU up is simply a matter of + * loading its initial context and setting it running. + * + * IPIs are handled through the Xen event mechanism. + * + * Because virtual CPUs can be scheduled onto any real CPU, there's no + * useful topology information for the kernel to make use of. As a + * result, all CPUs are treated as if they're single-core and + * single-threaded. + * + * This does not handle HOTPLUG_CPU yet. + */ +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include +#include + +#include "xen-ops.h" +#include "mmu.h" + +static cpumask_t cpu_initialized_map; +static DEFINE_PER_CPU(int, resched_irq); +static DEFINE_PER_CPU(int, callfunc_irq); + +/* + * Structure and data for smp_call_function(). This is designed to minimise + * static memory requirements. It also looks cleaner. + */ +static DEFINE_SPINLOCK(call_lock); + +struct call_data_struct { + void (*func) (void *info); + void *info; + atomic_t started; + atomic_t finished; + int wait; +}; + +static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id); + +static struct call_data_struct *call_data; + +/* + * Reschedule call back. Nothing to do, + * all the work is done automatically when + * we return from the interrupt. + */ +static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) +{ + return IRQ_HANDLED; +} + +static __cpuinit void cpu_bringup_and_idle(void) +{ + int cpu = smp_processor_id(); + + cpu_init(); + + preempt_disable(); + per_cpu(cpu_state, cpu) = CPU_ONLINE; + + xen_setup_cpu_clockevents(); + + /* We can take interrupts now: we're officially "up". */ + local_irq_enable(); + + wmb(); /* make sure everything is out */ + cpu_idle(); +} + +static int xen_smp_intr_init(unsigned int cpu) +{ + int rc; + const char *resched_name, *callfunc_name; + + per_cpu(resched_irq, cpu) = per_cpu(callfunc_irq, cpu) = -1; + + resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu); + rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR, + cpu, + xen_reschedule_interrupt, + IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, + resched_name, + NULL); + if (rc < 0) + goto fail; + per_cpu(resched_irq, cpu) = rc; + + callfunc_name = kasprintf(GFP_KERNEL, "callfunc%d", cpu); + rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_VECTOR, + cpu, + xen_call_function_interrupt, + IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, + callfunc_name, + NULL); + if (rc < 0) + goto fail; + per_cpu(callfunc_irq, cpu) = rc; + + return 0; + + fail: + if (per_cpu(resched_irq, cpu) >= 0) + unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL); + if (per_cpu(callfunc_irq, cpu) >= 0) + unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL); + return rc; +} + +void __init xen_fill_possible_map(void) +{ + int i, rc; + + for (i = 0; i < NR_CPUS; i++) { + rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); + if (rc >= 0) + cpu_set(i, cpu_possible_map); + } +} + +void __init xen_smp_prepare_boot_cpu(void) +{ + int cpu; + + BUG_ON(smp_processor_id() != 0); + native_smp_prepare_boot_cpu(); + + /* We've switched to the "real" per-cpu gdt, so make sure the + old memory can be recycled */ + make_lowmem_page_readwrite(&per_cpu__gdt_page); + + for (cpu = 0; cpu < NR_CPUS; cpu++) { + cpus_clear(cpu_sibling_map[cpu]); + cpus_clear(cpu_core_map[cpu]); + } + + xen_setup_vcpu_info_placement(); +} + +void __init xen_smp_prepare_cpus(unsigned int max_cpus) +{ + unsigned cpu; + + for (cpu = 0; cpu < NR_CPUS; cpu++) { + cpus_clear(cpu_sibling_map[cpu]); + cpus_clear(cpu_core_map[cpu]); + } + + smp_store_cpu_info(0); + set_cpu_sibling_map(0); + + if (xen_smp_intr_init(0)) + BUG(); + + cpu_initialized_map = cpumask_of_cpu(0); + + /* Restrict the possible_map according to max_cpus. */ + while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) { + for (cpu = NR_CPUS-1; !cpu_isset(cpu, cpu_possible_map); cpu--) + continue; + cpu_clear(cpu, cpu_possible_map); + } + + for_each_possible_cpu (cpu) { + struct task_struct *idle; + + if (cpu == 0) + continue; + + idle = fork_idle(cpu); + if (IS_ERR(idle)) + panic("failed fork for CPU %d", cpu); + + cpu_set(cpu, cpu_present_map); + } + + //init_xenbus_allowed_cpumask(); +} + +static __cpuinit int +cpu_initialize_context(unsigned int cpu, struct task_struct *idle) +{ + struct vcpu_guest_context *ctxt; + struct gdt_page *gdt = &per_cpu(gdt_page, cpu); + + if (cpu_test_and_set(cpu, cpu_initialized_map)) + return 0; + + ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); + if (ctxt == NULL) + return -ENOMEM; + + ctxt->flags = VGCF_IN_KERNEL; + ctxt->user_regs.ds = __USER_DS; + ctxt->user_regs.es = __USER_DS; + ctxt->user_regs.fs = __KERNEL_PERCPU; + ctxt->user_regs.gs = 0; + ctxt->user_regs.ss = __KERNEL_DS; + ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; + ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ + + memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); + + xen_copy_trap_info(ctxt->trap_ctxt); + + ctxt->ldt_ents = 0; + + BUG_ON((unsigned long)gdt->gdt & ~PAGE_MASK); + make_lowmem_page_readonly(gdt->gdt); + + ctxt->gdt_frames[0] = virt_to_mfn(gdt->gdt); + ctxt->gdt_ents = ARRAY_SIZE(gdt->gdt); + + ctxt->user_regs.cs = __KERNEL_CS; + ctxt->user_regs.esp = idle->thread.esp0 - sizeof(struct pt_regs); + + ctxt->kernel_ss = __KERNEL_DS; + ctxt->kernel_sp = idle->thread.esp0; + + ctxt->event_callback_cs = __KERNEL_CS; + ctxt->event_callback_eip = (unsigned long)xen_hypervisor_callback; + ctxt->failsafe_callback_cs = __KERNEL_CS; + ctxt->failsafe_callback_eip = (unsigned long)xen_failsafe_callback; + + per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); + ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); + + if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt)) + BUG(); + + kfree(ctxt); + return 0; +} + +int __cpuinit xen_cpu_up(unsigned int cpu) +{ + struct task_struct *idle = idle_task(cpu); + int rc; + +#if 0 + rc = cpu_up_check(cpu); + if (rc) + return rc; +#endif + + init_gdt(cpu); + per_cpu(current_task, cpu) = idle; + irq_ctx_init(cpu); + xen_setup_timer(cpu); + + /* make sure interrupts start blocked */ + per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1; + + rc = cpu_initialize_context(cpu, idle); + if (rc) + return rc; + + if (num_online_cpus() == 1) + alternatives_smp_switch(1); + + rc = xen_smp_intr_init(cpu); + if (rc) + return rc; + + smp_store_cpu_info(cpu); + set_cpu_sibling_map(cpu); + /* This must be done before setting cpu_online_map */ + wmb(); + + cpu_set(cpu, cpu_online_map); + + rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL); + BUG_ON(rc); + + return 0; +} + +void xen_smp_cpus_done(unsigned int max_cpus) +{ +} + +static void stop_self(void *v) +{ + int cpu = smp_processor_id(); + + /* make sure we're not pinning something down */ + load_cr3(swapper_pg_dir); + /* should set up a minimal gdt */ + + HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL); + BUG(); +} + +void xen_smp_send_stop(void) +{ + smp_call_function(stop_self, NULL, 0, 0); +} + +void xen_smp_send_reschedule(int cpu) +{ + xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR); +} + + +static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector) +{ + unsigned cpu; + + cpus_and(mask, mask, cpu_online_map); + + for_each_cpu_mask(cpu, mask) + xen_send_IPI_one(cpu, vector); +} + +static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) +{ + void (*func) (void *info) = call_data->func; + void *info = call_data->info; + int wait = call_data->wait; + + /* + * Notify initiating CPU that I've grabbed the data and am + * about to execute the function + */ + mb(); + atomic_inc(&call_data->started); + /* + * At this point the info structure may be out of scope unless wait==1 + */ + irq_enter(); + (*func)(info); + irq_exit(); + + if (wait) { + mb(); /* commit everything before setting finished */ + atomic_inc(&call_data->finished); + } + + return IRQ_HANDLED; +} + +int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *), + void *info, int wait) +{ + struct call_data_struct data; + int cpus; + + /* Holding any lock stops cpus from going down. */ + spin_lock(&call_lock); + + cpu_clear(smp_processor_id(), mask); + + cpus = cpus_weight(mask); + if (!cpus) { + spin_unlock(&call_lock); + return 0; + } + + /* Can deadlock when called with interrupts disabled */ + WARN_ON(irqs_disabled()); + + data.func = func; + data.info = info; + atomic_set(&data.started, 0); + data.wait = wait; + if (wait) + atomic_set(&data.finished, 0); + + call_data = &data; + mb(); /* write everything before IPI */ + + /* Send a message to other CPUs and wait for them to respond */ + xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR); + + /* Make sure other vcpus get a chance to run. + XXX too severe? Maybe we should check the other CPU's states? */ + HYPERVISOR_sched_op(SCHEDOP_yield, 0); + + /* Wait for response */ + while (atomic_read(&data.started) != cpus || + (wait && atomic_read(&data.finished) != cpus)) + cpu_relax(); + + spin_unlock(&call_lock); + + return 0; +} diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c new file mode 100644 index 00000000000..dfd6db69ead --- /dev/null +++ b/arch/x86/xen/time.c @@ -0,0 +1,593 @@ +/* + * Xen time implementation. + * + * This is implemented in terms of a clocksource driver which uses + * the hypervisor clock as a nanosecond timebase, and a clockevent + * driver which uses the hypervisor's timer mechanism. + * + * Jeremy Fitzhardinge , XenSource Inc, 2007 + */ +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +#include "xen-ops.h" + +#define XEN_SHIFT 22 + +/* Xen may fire a timer up to this many ns early */ +#define TIMER_SLOP 100000 +#define NS_PER_TICK (1000000000LL / HZ) + +static cycle_t xen_clocksource_read(void); + +/* These are perodically updated in shared_info, and then copied here. */ +struct shadow_time_info { + u64 tsc_timestamp; /* TSC at last update of time vals. */ + u64 system_timestamp; /* Time, in nanosecs, since boot. */ + u32 tsc_to_nsec_mul; + int tsc_shift; + u32 version; +}; + +static DEFINE_PER_CPU(struct shadow_time_info, shadow_time); + +/* runstate info updated by Xen */ +static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate); + +/* snapshots of runstate info */ +static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate_snapshot); + +/* unused ns of stolen and blocked time */ +static DEFINE_PER_CPU(u64, residual_stolen); +static DEFINE_PER_CPU(u64, residual_blocked); + +/* return an consistent snapshot of 64-bit time/counter value */ +static u64 get64(const u64 *p) +{ + u64 ret; + + if (BITS_PER_LONG < 64) { + u32 *p32 = (u32 *)p; + u32 h, l; + + /* + * Read high then low, and then make sure high is + * still the same; this will only loop if low wraps + * and carries into high. + * XXX some clean way to make this endian-proof? + */ + do { + h = p32[1]; + barrier(); + l = p32[0]; + barrier(); + } while (p32[1] != h); + + ret = (((u64)h) << 32) | l; + } else + ret = *p; + + return ret; +} + +/* + * Runstate accounting + */ +static void get_runstate_snapshot(struct vcpu_runstate_info *res) +{ + u64 state_time; + struct vcpu_runstate_info *state; + + BUG_ON(preemptible()); + + state = &__get_cpu_var(runstate); + + /* + * The runstate info is always updated by the hypervisor on + * the current CPU, so there's no need to use anything + * stronger than a compiler barrier when fetching it. + */ + do { + state_time = get64(&state->state_entry_time); + barrier(); + *res = *state; + barrier(); + } while (get64(&state->state_entry_time) != state_time); +} + +static void setup_runstate_info(int cpu) +{ + struct vcpu_register_runstate_memory_area area; + + area.addr.v = &per_cpu(runstate, cpu); + + if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, + cpu, &area)) + BUG(); +} + +static void do_stolen_accounting(void) +{ + struct vcpu_runstate_info state; + struct vcpu_runstate_info *snap; + s64 blocked, runnable, offline, stolen; + cputime_t ticks; + + get_runstate_snapshot(&state); + + WARN_ON(state.state != RUNSTATE_running); + + snap = &__get_cpu_var(runstate_snapshot); + + /* work out how much time the VCPU has not been runn*ing* */ + blocked = state.time[RUNSTATE_blocked] - snap->time[RUNSTATE_blocked]; + runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable]; + offline = state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline]; + + *snap = state; + + /* Add the appropriate number of ticks of stolen time, + including any left-overs from last time. Passing NULL to + account_steal_time accounts the time as stolen. */ + stolen = runnable + offline + __get_cpu_var(residual_stolen); + + if (stolen < 0) + stolen = 0; + + ticks = 0; + while (stolen >= NS_PER_TICK) { + ticks++; + stolen -= NS_PER_TICK; + } + __get_cpu_var(residual_stolen) = stolen; + account_steal_time(NULL, ticks); + + /* Add the appropriate number of ticks of blocked time, + including any left-overs from last time. Passing idle to + account_steal_time accounts the time as idle/wait. */ + blocked += __get_cpu_var(residual_blocked); + + if (blocked < 0) + blocked = 0; + + ticks = 0; + while (blocked >= NS_PER_TICK) { + ticks++; + blocked -= NS_PER_TICK; + } + __get_cpu_var(residual_blocked) = blocked; + account_steal_time(idle_task(smp_processor_id()), ticks); +} + +/* + * Xen sched_clock implementation. Returns the number of unstolen + * nanoseconds, which is nanoseconds the VCPU spent in RUNNING+BLOCKED + * states. + */ +unsigned long long xen_sched_clock(void) +{ + struct vcpu_runstate_info state; + cycle_t now; + u64 ret; + s64 offset; + + /* + * Ideally sched_clock should be called on a per-cpu basis + * anyway, so preempt should already be disabled, but that's + * not current practice at the moment. + */ + preempt_disable(); + + now = xen_clocksource_read(); + + get_runstate_snapshot(&state); + + WARN_ON(state.state != RUNSTATE_running); + + offset = now - state.state_entry_time; + if (offset < 0) + offset = 0; + + ret = state.time[RUNSTATE_blocked] + + state.time[RUNSTATE_running] + + offset; + + preempt_enable(); + + return ret; +} + + +/* Get the CPU speed from Xen */ +unsigned long xen_cpu_khz(void) +{ + u64 cpu_khz = 1000000ULL << 32; + const struct vcpu_time_info *info = + &HYPERVISOR_shared_info->vcpu_info[0].time; + + do_div(cpu_khz, info->tsc_to_system_mul); + if (info->tsc_shift < 0) + cpu_khz <<= -info->tsc_shift; + else + cpu_khz >>= info->tsc_shift; + + return cpu_khz; +} + +/* + * Reads a consistent set of time-base values from Xen, into a shadow data + * area. + */ +static unsigned get_time_values_from_xen(void) +{ + struct vcpu_time_info *src; + struct shadow_time_info *dst; + + /* src is shared memory with the hypervisor, so we need to + make sure we get a consistent snapshot, even in the face of + being preempted. */ + src = &__get_cpu_var(xen_vcpu)->time; + dst = &__get_cpu_var(shadow_time); + + do { + dst->version = src->version; + rmb(); /* fetch version before data */ + dst->tsc_timestamp = src->tsc_timestamp; + dst->system_timestamp = src->system_time; + dst->tsc_to_nsec_mul = src->tsc_to_system_mul; + dst->tsc_shift = src->tsc_shift; + rmb(); /* test version after fetching data */ + } while ((src->version & 1) | (dst->version ^ src->version)); + + return dst->version; +} + +/* + * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, + * yielding a 64-bit result. + */ +static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift) +{ + u64 product; +#ifdef __i386__ + u32 tmp1, tmp2; +#endif + + if (shift < 0) + delta >>= -shift; + else + delta <<= shift; + +#ifdef __i386__ + __asm__ ( + "mul %5 ; " + "mov %4,%%eax ; " + "mov %%edx,%4 ; " + "mul %5 ; " + "xor %5,%5 ; " + "add %4,%%eax ; " + "adc %5,%%edx ; " + : "=A" (product), "=r" (tmp1), "=r" (tmp2) + : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) ); +#elif __x86_64__ + __asm__ ( + "mul %%rdx ; shrd $32,%%rdx,%%rax" + : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) ); +#else +#error implement me! +#endif + + return product; +} + +static u64 get_nsec_offset(struct shadow_time_info *shadow) +{ + u64 now, delta; + now = native_read_tsc(); + delta = now - shadow->tsc_timestamp; + return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift); +} + +static cycle_t xen_clocksource_read(void) +{ + struct shadow_time_info *shadow = &get_cpu_var(shadow_time); + cycle_t ret; + unsigned version; + + do { + version = get_time_values_from_xen(); + barrier(); + ret = shadow->system_timestamp + get_nsec_offset(shadow); + barrier(); + } while (version != __get_cpu_var(xen_vcpu)->time.version); + + put_cpu_var(shadow_time); + + return ret; +} + +static void xen_read_wallclock(struct timespec *ts) +{ + const struct shared_info *s = HYPERVISOR_shared_info; + u32 version; + u64 delta; + struct timespec now; + + /* get wallclock at system boot */ + do { + version = s->wc_version; + rmb(); /* fetch version before time */ + now.tv_sec = s->wc_sec; + now.tv_nsec = s->wc_nsec; + rmb(); /* fetch time before checking version */ + } while ((s->wc_version & 1) | (version ^ s->wc_version)); + + delta = xen_clocksource_read(); /* time since system boot */ + delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec; + + now.tv_nsec = do_div(delta, NSEC_PER_SEC); + now.tv_sec = delta; + + set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); +} + +unsigned long xen_get_wallclock(void) +{ + struct timespec ts; + + xen_read_wallclock(&ts); + + return ts.tv_sec; +} + +int xen_set_wallclock(unsigned long now) +{ + /* do nothing for domU */ + return -1; +} + +static struct clocksource xen_clocksource __read_mostly = { + .name = "xen", + .rating = 400, + .read = xen_clocksource_read, + .mask = ~0, + .mult = 1<mode != CLOCK_EVT_MODE_ONESHOT); + + if (HYPERVISOR_set_timer_op(get_abs_timeout(delta)) < 0) + BUG(); + + /* We may have missed the deadline, but there's no real way of + knowing for sure. If the event was in the past, then we'll + get an immediate interrupt. */ + + return 0; +} + +static const struct clock_event_device xen_timerop_clockevent = { + .name = "xen", + .features = CLOCK_EVT_FEAT_ONESHOT, + + .max_delta_ns = 0xffffffff, + .min_delta_ns = TIMER_SLOP, + + .mult = 1, + .shift = 0, + .rating = 500, + + .set_mode = xen_timerop_set_mode, + .set_next_event = xen_timerop_set_next_event, +}; + + + +static void xen_vcpuop_set_mode(enum clock_event_mode mode, + struct clock_event_device *evt) +{ + int cpu = smp_processor_id(); + + switch (mode) { + case CLOCK_EVT_MODE_PERIODIC: + WARN_ON(1); /* unsupported */ + break; + + case CLOCK_EVT_MODE_ONESHOT: + if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL)) + BUG(); + break; + + case CLOCK_EVT_MODE_UNUSED: + case CLOCK_EVT_MODE_SHUTDOWN: + if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, cpu, NULL) || + HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL)) + BUG(); + break; + case CLOCK_EVT_MODE_RESUME: + break; + } +} + +static int xen_vcpuop_set_next_event(unsigned long delta, + struct clock_event_device *evt) +{ + int cpu = smp_processor_id(); + struct vcpu_set_singleshot_timer single; + int ret; + + WARN_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT); + + single.timeout_abs_ns = get_abs_timeout(delta); + single.flags = VCPU_SSHOTTMR_future; + + ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, cpu, &single); + + BUG_ON(ret != 0 && ret != -ETIME); + + return ret; +} + +static const struct clock_event_device xen_vcpuop_clockevent = { + .name = "xen", + .features = CLOCK_EVT_FEAT_ONESHOT, + + .max_delta_ns = 0xffffffff, + .min_delta_ns = TIMER_SLOP, + + .mult = 1, + .shift = 0, + .rating = 500, + + .set_mode = xen_vcpuop_set_mode, + .set_next_event = xen_vcpuop_set_next_event, +}; + +static const struct clock_event_device *xen_clockevent = + &xen_timerop_clockevent; +static DEFINE_PER_CPU(struct clock_event_device, xen_clock_events); + +static irqreturn_t xen_timer_interrupt(int irq, void *dev_id) +{ + struct clock_event_device *evt = &__get_cpu_var(xen_clock_events); + irqreturn_t ret; + + ret = IRQ_NONE; + if (evt->event_handler) { + evt->event_handler(evt); + ret = IRQ_HANDLED; + } + + do_stolen_accounting(); + + return ret; +} + +void xen_setup_timer(int cpu) +{ + const char *name; + struct clock_event_device *evt; + int irq; + + printk(KERN_INFO "installing Xen timer for CPU %d\n", cpu); + + name = kasprintf(GFP_KERNEL, "timer%d", cpu); + if (!name) + name = ""; + + irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt, + IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, + name, NULL); + + evt = &per_cpu(xen_clock_events, cpu); + memcpy(evt, xen_clockevent, sizeof(*evt)); + + evt->cpumask = cpumask_of_cpu(cpu); + evt->irq = irq; + + setup_runstate_info(cpu); +} + +void xen_setup_cpu_clockevents(void) +{ + BUG_ON(preemptible()); + + clockevents_register_device(&__get_cpu_var(xen_clock_events)); +} + +__init void xen_time_init(void) +{ + int cpu = smp_processor_id(); + + get_time_values_from_xen(); + + clocksource_register(&xen_clocksource); + + if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) { + /* Successfully turned off 100Hz tick, so we have the + vcpuop-based timer interface */ + printk(KERN_DEBUG "Xen: using vcpuop timer interface\n"); + xen_clockevent = &xen_vcpuop_clockevent; + } + + /* Set initial system time with full resolution */ + xen_read_wallclock(&xtime); + set_normalized_timespec(&wall_to_monotonic, + -xtime.tv_sec, -xtime.tv_nsec); + + tsc_disable = 0; + + xen_setup_timer(cpu); + xen_setup_cpu_clockevents(); +} diff --git a/arch/x86/xen/vdso.h b/arch/x86/xen/vdso.h new file mode 100644 index 00000000000..861fedfe523 --- /dev/null +++ b/arch/x86/xen/vdso.h @@ -0,0 +1,4 @@ +/* Bit used for the pseudo-hwcap for non-negative segments. We use + bit 1 to avoid bugs in some versions of glibc when bit 0 is + used; the choice is otherwise arbitrary. */ +#define VDSO_NOTE_NONEGSEG_BIT 1 diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S new file mode 100644 index 00000000000..1a43b60c0c6 --- /dev/null +++ b/arch/x86/xen/xen-asm.S @@ -0,0 +1,291 @@ +/* + Asm versions of Xen pv-ops, suitable for either direct use or inlining. + The inline versions are the same as the direct-use versions, with the + pre- and post-amble chopped off. + + This code is encoded for size rather than absolute efficiency, + with a view to being able to inline as much as possible. + + We only bother with direct forms (ie, vcpu in pda) of the operations + here; the indirect forms are better handled in C, since they're + generally too large to inline anyway. + */ + +#include + +#include +#include +#include +#include +#include + +#include + +#define RELOC(x, v) .globl x##_reloc; x##_reloc=v +#define ENDPATCH(x) .globl x##_end; x##_end=. + +/* Pseudo-flag used for virtual NMI, which we don't implement yet */ +#define XEN_EFLAGS_NMI 0x80000000 + +/* + Enable events. This clears the event mask and tests the pending + event status with one and operation. If there are pending + events, then enter the hypervisor to get them handled. + */ +ENTRY(xen_irq_enable_direct) + /* Clear mask and test pending */ + andw $0x00ff, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_pending + /* Preempt here doesn't matter because that will deal with + any pending interrupts. The pending check may end up being + run on the wrong CPU, but that doesn't hurt. */ + jz 1f +2: call check_events +1: +ENDPATCH(xen_irq_enable_direct) + ret + ENDPROC(xen_irq_enable_direct) + RELOC(xen_irq_enable_direct, 2b+1) + + +/* + Disabling events is simply a matter of making the event mask + non-zero. + */ +ENTRY(xen_irq_disable_direct) + movb $1, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask +ENDPATCH(xen_irq_disable_direct) + ret + ENDPROC(xen_irq_disable_direct) + RELOC(xen_irq_disable_direct, 0) + +/* + (xen_)save_fl is used to get the current interrupt enable status. + Callers expect the status to be in X86_EFLAGS_IF, and other bits + may be set in the return value. We take advantage of this by + making sure that X86_EFLAGS_IF has the right value (and other bits + in that byte are 0), but other bits in the return value are + undefined. We need to toggle the state of the bit, because + Xen and x86 use opposite senses (mask vs enable). + */ +ENTRY(xen_save_fl_direct) + testb $0xff, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask + setz %ah + addb %ah,%ah +ENDPATCH(xen_save_fl_direct) + ret + ENDPROC(xen_save_fl_direct) + RELOC(xen_save_fl_direct, 0) + + +/* + In principle the caller should be passing us a value return + from xen_save_fl_direct, but for robustness sake we test only + the X86_EFLAGS_IF flag rather than the whole byte. After + setting the interrupt mask state, it checks for unmasked + pending events and enters the hypervisor to get them delivered + if so. + */ +ENTRY(xen_restore_fl_direct) + testb $X86_EFLAGS_IF>>8, %ah + setz PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask + /* Preempt here doesn't matter because that will deal with + any pending interrupts. The pending check may end up being + run on the wrong CPU, but that doesn't hurt. */ + + /* check for unmasked and pending */ + cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_pending + jz 1f +2: call check_events +1: +ENDPATCH(xen_restore_fl_direct) + ret + ENDPROC(xen_restore_fl_direct) + RELOC(xen_restore_fl_direct, 2b+1) + +/* + This is run where a normal iret would be run, with the same stack setup: + 8: eflags + 4: cs + esp-> 0: eip + + This attempts to make sure that any pending events are dealt + with on return to usermode, but there is a small window in + which an event can happen just before entering usermode. If + the nested interrupt ends up setting one of the TIF_WORK_MASK + pending work flags, they will not be tested again before + returning to usermode. This means that a process can end up + with pending work, which will be unprocessed until the process + enters and leaves the kernel again, which could be an + unbounded amount of time. This means that a pending signal or + reschedule event could be indefinitely delayed. + + The fix is to notice a nested interrupt in the critical + window, and if one occurs, then fold the nested interrupt into + the current interrupt stack frame, and re-process it + iteratively rather than recursively. This means that it will + exit via the normal path, and all pending work will be dealt + with appropriately. + + Because the nested interrupt handler needs to deal with the + current stack state in whatever form its in, we keep things + simple by only using a single register which is pushed/popped + on the stack. + + Non-direct iret could be done in the same way, but it would + require an annoying amount of code duplication. We'll assume + that direct mode will be the common case once the hypervisor + support becomes commonplace. + */ +ENTRY(xen_iret_direct) + /* test eflags for special cases */ + testl $(X86_EFLAGS_VM | XEN_EFLAGS_NMI), 8(%esp) + jnz hyper_iret + + push %eax + ESP_OFFSET=4 # bytes pushed onto stack + + /* Store vcpu_info pointer for easy access. Do it this + way to avoid having to reload %fs */ +#ifdef CONFIG_SMP + GET_THREAD_INFO(%eax) + movl TI_cpu(%eax),%eax + movl __per_cpu_offset(,%eax,4),%eax + lea per_cpu__xen_vcpu_info(%eax),%eax +#else + movl $per_cpu__xen_vcpu_info, %eax +#endif + + /* check IF state we're restoring */ + testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp) + + /* Maybe enable events. Once this happens we could get a + recursive event, so the critical region starts immediately + afterwards. However, if that happens we don't end up + resuming the code, so we don't have to be worried about + being preempted to another CPU. */ + setz XEN_vcpu_info_mask(%eax) +xen_iret_start_crit: + + /* check for unmasked and pending */ + cmpw $0x0001, XEN_vcpu_info_pending(%eax) + + /* If there's something pending, mask events again so we + can jump back into xen_hypervisor_callback */ + sete XEN_vcpu_info_mask(%eax) + + popl %eax + + /* From this point on the registers are restored and the stack + updated, so we don't need to worry about it if we're preempted */ +iret_restore_end: + + /* Jump to hypervisor_callback after fixing up the stack. + Events are masked, so jumping out of the critical + region is OK. */ + je xen_hypervisor_callback + + iret +xen_iret_end_crit: + +hyper_iret: + /* put this out of line since its very rarely used */ + jmp hypercall_page + __HYPERVISOR_iret * 32 + + .globl xen_iret_start_crit, xen_iret_end_crit + +/* + This is called by xen_hypervisor_callback in entry.S when it sees + that the EIP at the time of interrupt was between xen_iret_start_crit + and xen_iret_end_crit. We're passed the EIP in %eax so we can do + a more refined determination of what to do. + + The stack format at this point is: + ---------------- + ss : (ss/esp may be present if we came from usermode) + esp : + eflags } outer exception info + cs } + eip } + ---------------- <- edi (copy dest) + eax : outer eax if it hasn't been restored + ---------------- + eflags } nested exception info + cs } (no ss/esp because we're nested + eip } from the same ring) + orig_eax }<- esi (copy src) + - - - - - - - - + fs } + es } + ds } SAVE_ALL state + eax } + : : + ebx } + ---------------- + return addr <- esp + ---------------- + + In order to deliver the nested exception properly, we need to shift + everything from the return addr up to the error code so it + sits just under the outer exception info. This means that when we + handle the exception, we do it in the context of the outer exception + rather than starting a new one. + + The only caveat is that if the outer eax hasn't been + restored yet (ie, it's still on stack), we need to insert + its value into the SAVE_ALL state before going on, since + it's usermode state which we eventually need to restore. + */ +ENTRY(xen_iret_crit_fixup) + /* offsets +4 for return address */ + + /* + Paranoia: Make sure we're really coming from userspace. + One could imagine a case where userspace jumps into the + critical range address, but just before the CPU delivers a GP, + it decides to deliver an interrupt instead. Unlikely? + Definitely. Easy to avoid? Yes. The Intel documents + explicitly say that the reported EIP for a bad jump is the + jump instruction itself, not the destination, but some virtual + environments get this wrong. + */ + movl PT_CS+4(%esp), %ecx + andl $SEGMENT_RPL_MASK, %ecx + cmpl $USER_RPL, %ecx + je 2f + + lea PT_ORIG_EAX+4(%esp), %esi + lea PT_EFLAGS+4(%esp), %edi + + /* If eip is before iret_restore_end then stack + hasn't been restored yet. */ + cmp $iret_restore_end, %eax + jae 1f + + movl 0+4(%edi),%eax /* copy EAX */ + movl %eax, PT_EAX+4(%esp) + + lea ESP_OFFSET(%edi),%edi /* move dest up over saved regs */ + + /* set up the copy */ +1: std + mov $(PT_EIP+4) / 4, %ecx /* copy ret+saved regs up to orig_eax */ + rep movsl + cld + + lea 4(%edi),%esp /* point esp to new frame */ +2: ret + + +/* + Force an event check by making a hypercall, + but preserve regs before making the call. + */ +check_events: + push %eax + push %ecx + push %edx + call force_evtchn_callback + pop %edx + pop %ecx + pop %eax + ret diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S new file mode 100644 index 00000000000..f8d6937db2e --- /dev/null +++ b/arch/x86/xen/xen-head.S @@ -0,0 +1,38 @@ +/* Xen-specific pieces of head.S, intended to be included in the right + place in head.S */ + +#ifdef CONFIG_XEN + +#include +#include +#include + +.pushsection .init.text +ENTRY(startup_xen) + movl %esi,xen_start_info + cld + movl $(init_thread_union+THREAD_SIZE),%esp + jmp xen_start_kernel +.popsection + +.pushsection .bss.page_aligned + .align PAGE_SIZE_asm +ENTRY(hypercall_page) + .skip 0x1000 +.popsection + + ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux") + ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz "2.6") + ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz "xen-3.0") + ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, .long __PAGE_OFFSET) + ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long startup_xen) + ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long hypercall_page) + ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb") +#ifdef CONFIG_X86_PAE + ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") +#else + ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "no") +#endif + ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") + +#endif /*CONFIG_XEN */ diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h new file mode 100644 index 00000000000..b9aaea45f07 --- /dev/null +++ b/arch/x86/xen/xen-ops.h @@ -0,0 +1,71 @@ +#ifndef XEN_OPS_H +#define XEN_OPS_H + +#include + +/* These are code, but not functions. Defined in entry.S */ +extern const char xen_hypervisor_callback[]; +extern const char xen_failsafe_callback[]; + +void xen_copy_trap_info(struct trap_info *traps); + +DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu); +DECLARE_PER_CPU(unsigned long, xen_cr3); + +extern struct start_info *xen_start_info; +extern struct shared_info *HYPERVISOR_shared_info; + +char * __init xen_memory_setup(void); +void __init xen_arch_setup(void); +void __init xen_init_IRQ(void); + +void xen_setup_timer(int cpu); +void xen_setup_cpu_clockevents(void); +unsigned long xen_cpu_khz(void); +void __init xen_time_init(void); +unsigned long xen_get_wallclock(void); +int xen_set_wallclock(unsigned long time); +unsigned long long xen_sched_clock(void); + +void xen_mark_init_mm_pinned(void); + +DECLARE_PER_CPU(enum paravirt_lazy_mode, xen_lazy_mode); + +static inline unsigned xen_get_lazy_mode(void) +{ + return x86_read_percpu(xen_lazy_mode); +} + +void __init xen_fill_possible_map(void); + +void __init xen_setup_vcpu_info_placement(void); +void xen_smp_prepare_boot_cpu(void); +void xen_smp_prepare_cpus(unsigned int max_cpus); +int xen_cpu_up(unsigned int cpu); +void xen_smp_cpus_done(unsigned int max_cpus); + +void xen_smp_send_stop(void); +void xen_smp_send_reschedule(int cpu); +int xen_smp_call_function (void (*func) (void *info), void *info, int nonatomic, + int wait); +int xen_smp_call_function_single(int cpu, void (*func) (void *info), void *info, + int nonatomic, int wait); + +int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *), + void *info, int wait); + + +/* Declare an asm function, along with symbols needed to make it + inlineable */ +#define DECL_ASM(ret, name, ...) \ + ret name(__VA_ARGS__); \ + extern char name##_end[]; \ + extern char name##_reloc[] \ + +DECL_ASM(void, xen_irq_enable_direct, void); +DECL_ASM(void, xen_irq_disable_direct, void); +DECL_ASM(unsigned long, xen_save_fl_direct, void); +DECL_ASM(void, xen_restore_fl_direct, unsigned long); + +void xen_iret_direct(void); +#endif /* XEN_OPS_H */ -- cgit v1.2.3-70-g09d2 From 9402e12b8fef1efe9cf949fc020dcda22d9d8667 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 11 Oct 2007 11:16:53 +0200 Subject: i386: move mach-voyager Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/i386/Makefile | 2 +- arch/i386/mach-voyager/Makefile | 8 - arch/i386/mach-voyager/setup.c | 125 -- arch/i386/mach-voyager/voyager_basic.c | 331 ------ arch/i386/mach-voyager/voyager_cat.c | 1180 ------------------- arch/i386/mach-voyager/voyager_smp.c | 1952 ------------------------------- arch/i386/mach-voyager/voyager_thread.c | 134 --- arch/x86/mach-voyager/Makefile | 8 + arch/x86/mach-voyager/setup.c | 125 ++ arch/x86/mach-voyager/voyager_basic.c | 331 ++++++ arch/x86/mach-voyager/voyager_cat.c | 1180 +++++++++++++++++++ arch/x86/mach-voyager/voyager_smp.c | 1952 +++++++++++++++++++++++++++++++ arch/x86/mach-voyager/voyager_thread.c | 134 +++ 13 files changed, 3731 insertions(+), 3731 deletions(-) delete mode 100644 arch/i386/mach-voyager/Makefile delete mode 100644 arch/i386/mach-voyager/setup.c delete mode 100644 arch/i386/mach-voyager/voyager_basic.c delete mode 100644 arch/i386/mach-voyager/voyager_cat.c delete mode 100644 arch/i386/mach-voyager/voyager_smp.c delete mode 100644 arch/i386/mach-voyager/voyager_thread.c create mode 100644 arch/x86/mach-voyager/Makefile create mode 100644 arch/x86/mach-voyager/setup.c create mode 100644 arch/x86/mach-voyager/voyager_basic.c create mode 100644 arch/x86/mach-voyager/voyager_cat.c create mode 100644 arch/x86/mach-voyager/voyager_smp.c create mode 100644 arch/x86/mach-voyager/voyager_thread.c (limited to 'arch/x86') diff --git a/arch/i386/Makefile b/arch/i386/Makefile index fc85d6674fc..e03c47e56c7 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -65,7 +65,7 @@ mcore-y := arch/x86/mach-default # Voyager subarch support mflags-$(CONFIG_X86_VOYAGER) := -Iinclude/asm-i386/mach-voyager -mcore-$(CONFIG_X86_VOYAGER) := arch/i386/mach-voyager +mcore-$(CONFIG_X86_VOYAGER) := arch/x86/mach-voyager # VISWS subarch support mflags-$(CONFIG_X86_VISWS) := -Iinclude/asm-i386/mach-visws diff --git a/arch/i386/mach-voyager/Makefile b/arch/i386/mach-voyager/Makefile deleted file mode 100644 index 33b74cf0dd2..00000000000 --- a/arch/i386/mach-voyager/Makefile +++ /dev/null @@ -1,8 +0,0 @@ -# -# Makefile for the linux kernel. -# - -EXTRA_CFLAGS := -Iarch/i386/kernel -obj-y := setup.o voyager_basic.o voyager_thread.o - -obj-$(CONFIG_SMP) += voyager_smp.o voyager_cat.o diff --git a/arch/i386/mach-voyager/setup.c b/arch/i386/mach-voyager/setup.c deleted file mode 100644 index 2b55694e640..00000000000 --- a/arch/i386/mach-voyager/setup.c +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Machine specific setup for generic - */ - -#include -#include -#include -#include -#include -#include -#include - -void __init pre_intr_init_hook(void) -{ - init_ISA_irqs(); -} - -/* - * IRQ2 is cascade interrupt to second interrupt controller - */ -static struct irqaction irq2 = { no_action, 0, CPU_MASK_NONE, "cascade", NULL, NULL}; - -void __init intr_init_hook(void) -{ -#ifdef CONFIG_SMP - smp_intr_init(); -#endif - - setup_irq(2, &irq2); -} - -void __init pre_setup_arch_hook(void) -{ - /* Voyagers run their CPUs from independent clocks, so disable - * the TSC code because we can't sync them */ - tsc_disable = 1; -} - -void __init trap_init_hook(void) -{ -} - -static struct irqaction irq0 = { - .handler = timer_interrupt, - .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL, - .mask = CPU_MASK_NONE, - .name = "timer" -}; - -void __init time_init_hook(void) -{ - irq0.mask = cpumask_of_cpu(safe_smp_processor_id()); - setup_irq(0, &irq0); -} - -/* Hook for machine specific memory setup. */ - -char * __init machine_specific_memory_setup(void) -{ - char *who; - - who = "NOT VOYAGER"; - - if(voyager_level == 5) { - __u32 addr, length; - int i; - - who = "Voyager-SUS"; - - e820.nr_map = 0; - for(i=0; voyager_memory_detect(i, &addr, &length); i++) { - add_memory_region(addr, length, E820_RAM); - } - return who; - } else if(voyager_level == 4) { - __u32 tom; - __u16 catbase = inb(VOYAGER_SSPB_RELOCATION_PORT)<<8; - /* select the DINO config space */ - outb(VOYAGER_DINO, VOYAGER_CAT_CONFIG_PORT); - /* Read DINO top of memory register */ - tom = ((inb(catbase + 0x4) & 0xf0) << 16) - + ((inb(catbase + 0x5) & 0x7f) << 24); - - if(inb(catbase) != VOYAGER_DINO) { - printk(KERN_ERR "Voyager: Failed to get DINO for L4, setting tom to EXT_MEM_K\n"); - tom = (EXT_MEM_K)<<10; - } - who = "Voyager-TOM"; - add_memory_region(0, 0x9f000, E820_RAM); - /* map from 1M to top of memory */ - add_memory_region(1*1024*1024, tom - 1*1024*1024, E820_RAM); - /* FIXME: Should check the ASICs to see if I need to - * take out the 8M window. Just do it at the moment - * */ - add_memory_region(8*1024*1024, 8*1024*1024, E820_RESERVED); - return who; - } - - who = "BIOS-e820"; - - /* - * Try to copy the BIOS-supplied E820-map. - * - * Otherwise fake a memory map; one section from 0k->640k, - * the next section from 1mb->appropriate_mem_k - */ - sanitize_e820_map(E820_MAP, &E820_MAP_NR); - if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0) { - unsigned long mem_size; - - /* compare results from other methods and take the greater */ - if (ALT_MEM_K < EXT_MEM_K) { - mem_size = EXT_MEM_K; - who = "BIOS-88"; - } else { - mem_size = ALT_MEM_K; - who = "BIOS-e801"; - } - - e820.nr_map = 0; - add_memory_region(0, LOWMEMSIZE(), E820_RAM); - add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM); - } - return who; -} diff --git a/arch/i386/mach-voyager/voyager_basic.c b/arch/i386/mach-voyager/voyager_basic.c deleted file mode 100644 index 9b77b39b71a..00000000000 --- a/arch/i386/mach-voyager/voyager_basic.c +++ /dev/null @@ -1,331 +0,0 @@ -/* Copyright (C) 1999,2001 - * - * Author: J.E.J.Bottomley@HansenPartnership.com - * - * linux/arch/i386/kernel/voyager.c - * - * This file contains all the voyager specific routines for getting - * initialisation of the architecture to function. For additional - * features see: - * - * voyager_cat.c - Voyager CAT bus interface - * voyager_smp.c - Voyager SMP hal (emulates linux smp.c) - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * Power off function, if any - */ -void (*pm_power_off)(void); -EXPORT_SYMBOL(pm_power_off); - -int voyager_level = 0; - -struct voyager_SUS *voyager_SUS = NULL; - -#ifdef CONFIG_SMP -static void -voyager_dump(int dummy1, struct tty_struct *dummy3) -{ - /* get here via a sysrq */ - voyager_smp_dump(); -} - -static struct sysrq_key_op sysrq_voyager_dump_op = { - .handler = voyager_dump, - .help_msg = "Voyager", - .action_msg = "Dump Voyager Status", -}; -#endif - -void -voyager_detect(struct voyager_bios_info *bios) -{ - if(bios->len != 0xff) { - int class = (bios->class_1 << 8) - | (bios->class_2 & 0xff); - - printk("Voyager System detected.\n" - " Class %x, Revision %d.%d\n", - class, bios->major, bios->minor); - if(class == VOYAGER_LEVEL4) - voyager_level = 4; - else if(class < VOYAGER_LEVEL5_AND_ABOVE) - voyager_level = 3; - else - voyager_level = 5; - printk(" Architecture Level %d\n", voyager_level); - if(voyager_level < 4) - printk("\n**WARNING**: Voyager HAL only supports Levels 4 and 5 Architectures at the moment\n\n"); - /* install the power off handler */ - pm_power_off = voyager_power_off; -#ifdef CONFIG_SMP - register_sysrq_key('v', &sysrq_voyager_dump_op); -#endif - } else { - printk("\n\n**WARNING**: No Voyager Subsystem Found\n"); - } -} - -void -voyager_system_interrupt(int cpl, void *dev_id) -{ - printk("Voyager: detected system interrupt\n"); -} - -/* Routine to read information from the extended CMOS area */ -__u8 -voyager_extended_cmos_read(__u16 addr) -{ - outb(addr & 0xff, 0x74); - outb((addr >> 8) & 0xff, 0x75); - return inb(0x76); -} - -/* internal definitions for the SUS Click Map of memory */ - -#define CLICK_ENTRIES 16 -#define CLICK_SIZE 4096 /* click to byte conversion for Length */ - -typedef struct ClickMap { - struct Entry { - __u32 Address; - __u32 Length; - } Entry[CLICK_ENTRIES]; -} ClickMap_t; - - -/* This routine is pretty much an awful hack to read the bios clickmap by - * mapping it into page 0. There are usually three regions in the map: - * Base Memory - * Extended Memory - * zero length marker for end of map - * - * Returns are 0 for failure and 1 for success on extracting region. - */ -int __init -voyager_memory_detect(int region, __u32 *start, __u32 *length) -{ - int i; - int retval = 0; - __u8 cmos[4]; - ClickMap_t *map; - unsigned long map_addr; - unsigned long old; - - if(region >= CLICK_ENTRIES) { - printk("Voyager: Illegal ClickMap region %d\n", region); - return 0; - } - - for(i = 0; i < sizeof(cmos); i++) - cmos[i] = voyager_extended_cmos_read(VOYAGER_MEMORY_CLICKMAP + i); - - map_addr = *(unsigned long *)cmos; - - /* steal page 0 for this */ - old = pg0[0]; - pg0[0] = ((map_addr & PAGE_MASK) | _PAGE_RW | _PAGE_PRESENT); - local_flush_tlb(); - /* now clear everything out but page 0 */ - map = (ClickMap_t *)(map_addr & (~PAGE_MASK)); - - /* zero length is the end of the clickmap */ - if(map->Entry[region].Length != 0) { - *length = map->Entry[region].Length * CLICK_SIZE; - *start = map->Entry[region].Address; - retval = 1; - } - - /* replace the mapping */ - pg0[0] = old; - local_flush_tlb(); - return retval; -} - -/* voyager specific handling code for timer interrupts. Used to hand - * off the timer tick to the SMP code, since the VIC doesn't have an - * internal timer (The QIC does, but that's another story). */ -void -voyager_timer_interrupt(void) -{ - if((jiffies & 0x3ff) == 0) { - - /* There seems to be something flaky in either - * hardware or software that is resetting the timer 0 - * count to something much higher than it should be - * This seems to occur in the boot sequence, just - * before root is mounted. Therefore, every 10 - * seconds or so, we sanity check the timer zero count - * and kick it back to where it should be. - * - * FIXME: This is the most awful hack yet seen. I - * should work out exactly what is interfering with - * the timer count settings early in the boot sequence - * and swiftly introduce it to something sharp and - * pointy. */ - __u16 val; - - spin_lock(&i8253_lock); - - outb_p(0x00, 0x43); - val = inb_p(0x40); - val |= inb(0x40) << 8; - spin_unlock(&i8253_lock); - - if(val > LATCH) { - printk("\nVOYAGER: countdown timer value too high (%d), resetting\n\n", val); - spin_lock(&i8253_lock); - outb(0x34,0x43); - outb_p(LATCH & 0xff , 0x40); /* LSB */ - outb(LATCH >> 8 , 0x40); /* MSB */ - spin_unlock(&i8253_lock); - } - } -#ifdef CONFIG_SMP - smp_vic_timer_interrupt(); -#endif -} - -void -voyager_power_off(void) -{ - printk("VOYAGER Power Off\n"); - - if(voyager_level == 5) { - voyager_cat_power_off(); - } else if(voyager_level == 4) { - /* This doesn't apparently work on most L4 machines, - * but the specs say to do this to get automatic power - * off. Unfortunately, if it doesn't power off the - * machine, it ends up doing a cold restart, which - * isn't really intended, so comment out the code */ -#if 0 - int port; - - - /* enable the voyager Configuration Space */ - outb((inb(VOYAGER_MC_SETUP) & 0xf0) | 0x8, - VOYAGER_MC_SETUP); - /* the port for the power off flag is an offset from the - floating base */ - port = (inb(VOYAGER_SSPB_RELOCATION_PORT) << 8) + 0x21; - /* set the power off flag */ - outb(inb(port) | 0x1, port); -#endif - } - /* and wait for it to happen */ - local_irq_disable(); - for(;;) - halt(); -} - -/* copied from process.c */ -static inline void -kb_wait(void) -{ - int i; - - for (i=0; i<0x10000; i++) - if ((inb_p(0x64) & 0x02) == 0) - break; -} - -void -machine_shutdown(void) -{ - /* Architecture specific shutdown needed before a kexec */ -} - -void -machine_restart(char *cmd) -{ - printk("Voyager Warm Restart\n"); - kb_wait(); - - if(voyager_level == 5) { - /* write magic values to the RTC to inform system that - * shutdown is beginning */ - outb(0x8f, 0x70); - outb(0x5 , 0x71); - - udelay(50); - outb(0xfe,0x64); /* pull reset low */ - } else if(voyager_level == 4) { - __u16 catbase = inb(VOYAGER_SSPB_RELOCATION_PORT)<<8; - __u8 basebd = inb(VOYAGER_MC_SETUP); - - outb(basebd | 0x08, VOYAGER_MC_SETUP); - outb(0x02, catbase + 0x21); - } - local_irq_disable(); - for(;;) - halt(); -} - -void -machine_emergency_restart(void) -{ - /*for now, just hook this to a warm restart */ - machine_restart(NULL); -} - -void -mca_nmi_hook(void) -{ - __u8 dumpval __maybe_unused = inb(0xf823); - __u8 swnmi __maybe_unused = inb(0xf813); - - /* FIXME: assume dump switch pressed */ - /* check to see if the dump switch was pressed */ - VDEBUG(("VOYAGER: dumpval = 0x%x, swnmi = 0x%x\n", dumpval, swnmi)); - /* clear swnmi */ - outb(0xff, 0xf813); - /* tell SUS to ignore dump */ - if(voyager_level == 5 && voyager_SUS != NULL) { - if(voyager_SUS->SUS_mbox == VOYAGER_DUMP_BUTTON_NMI) { - voyager_SUS->kernel_mbox = VOYAGER_NO_COMMAND; - voyager_SUS->kernel_flags |= VOYAGER_OS_IN_PROGRESS; - udelay(1000); - voyager_SUS->kernel_mbox = VOYAGER_IGNORE_DUMP; - voyager_SUS->kernel_flags &= ~VOYAGER_OS_IN_PROGRESS; - } - } - printk(KERN_ERR "VOYAGER: Dump switch pressed, printing CPU%d tracebacks\n", smp_processor_id()); - show_stack(NULL, NULL); - show_state(); -} - - - -void -machine_halt(void) -{ - /* treat a halt like a power off */ - machine_power_off(); -} - -void machine_power_off(void) -{ - if (pm_power_off) - pm_power_off(); -} diff --git a/arch/i386/mach-voyager/voyager_cat.c b/arch/i386/mach-voyager/voyager_cat.c deleted file mode 100644 index 26a2d4c54b6..00000000000 --- a/arch/i386/mach-voyager/voyager_cat.c +++ /dev/null @@ -1,1180 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8 -*- */ - -/* Copyright (C) 1999,2001 - * - * Author: J.E.J.Bottomley@HansenPartnership.com - * - * linux/arch/i386/kernel/voyager_cat.c - * - * This file contains all the logic for manipulating the CAT bus - * in a level 5 machine. - * - * The CAT bus is a serial configuration and test bus. Its primary - * uses are to probe the initial configuration of the system and to - * diagnose error conditions when a system interrupt occurs. The low - * level interface is fairly primitive, so most of this file consists - * of bit shift manipulations to send and receive packets on the - * serial bus */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef VOYAGER_CAT_DEBUG -#define CDEBUG(x) printk x -#else -#define CDEBUG(x) -#endif - -/* the CAT command port */ -#define CAT_CMD (sspb + 0xe) -/* the CAT data port */ -#define CAT_DATA (sspb + 0xd) - -/* the internal cat functions */ -static void cat_pack(__u8 *msg, __u16 start_bit, __u8 *data, - __u16 num_bits); -static void cat_unpack(__u8 *msg, __u16 start_bit, __u8 *data, - __u16 num_bits); -static void cat_build_header(__u8 *header, const __u16 len, - const __u16 smallest_reg_bits, - const __u16 longest_reg_bits); -static int cat_sendinst(voyager_module_t *modp, voyager_asic_t *asicp, - __u8 reg, __u8 op); -static int cat_getdata(voyager_module_t *modp, voyager_asic_t *asicp, - __u8 reg, __u8 *value); -static int cat_shiftout(__u8 *data, __u16 data_bytes, __u16 header_bytes, - __u8 pad_bits); -static int cat_write(voyager_module_t *modp, voyager_asic_t *asicp, __u8 reg, - __u8 value); -static int cat_read(voyager_module_t *modp, voyager_asic_t *asicp, __u8 reg, - __u8 *value); -static int cat_subread(voyager_module_t *modp, voyager_asic_t *asicp, - __u16 offset, __u16 len, void *buf); -static int cat_senddata(voyager_module_t *modp, voyager_asic_t *asicp, - __u8 reg, __u8 value); -static int cat_disconnect(voyager_module_t *modp, voyager_asic_t *asicp); -static int cat_connect(voyager_module_t *modp, voyager_asic_t *asicp); - -static inline const char * -cat_module_name(int module_id) -{ - switch(module_id) { - case 0x10: - return "Processor Slot 0"; - case 0x11: - return "Processor Slot 1"; - case 0x12: - return "Processor Slot 2"; - case 0x13: - return "Processor Slot 4"; - case 0x14: - return "Memory Slot 0"; - case 0x15: - return "Memory Slot 1"; - case 0x18: - return "Primary Microchannel"; - case 0x19: - return "Secondary Microchannel"; - case 0x1a: - return "Power Supply Interface"; - case 0x1c: - return "Processor Slot 5"; - case 0x1d: - return "Processor Slot 6"; - case 0x1e: - return "Processor Slot 7"; - case 0x1f: - return "Processor Slot 8"; - default: - return "Unknown Module"; - } -} - -static int sspb = 0; /* stores the super port location */ -int voyager_8slot = 0; /* set to true if a 51xx monster */ - -voyager_module_t *voyager_cat_list; - -/* the I/O port assignments for the VIC and QIC */ -static struct resource vic_res = { - .name = "Voyager Interrupt Controller", - .start = 0xFC00, - .end = 0xFC6F -}; -static struct resource qic_res = { - .name = "Quad Interrupt Controller", - .start = 0xFC70, - .end = 0xFCFF -}; - -/* This function is used to pack a data bit stream inside a message. - * It writes num_bits of the data buffer in msg starting at start_bit. - * Note: This function assumes that any unused bit in the data stream - * is set to zero so that the ors will work correctly */ -static void -cat_pack(__u8 *msg, const __u16 start_bit, __u8 *data, const __u16 num_bits) -{ - /* compute initial shift needed */ - const __u16 offset = start_bit % BITS_PER_BYTE; - __u16 len = num_bits / BITS_PER_BYTE; - __u16 byte = start_bit / BITS_PER_BYTE; - __u16 residue = (num_bits % BITS_PER_BYTE) + offset; - int i; - - /* adjust if we have more than a byte of residue */ - if(residue >= BITS_PER_BYTE) { - residue -= BITS_PER_BYTE; - len++; - } - - /* clear out the bits. We assume here that if len==0 then - * residue >= offset. This is always true for the catbus - * operations */ - msg[byte] &= 0xff << (BITS_PER_BYTE - offset); - msg[byte++] |= data[0] >> offset; - if(len == 0) - return; - for(i = 1; i < len; i++) - msg[byte++] = (data[i-1] << (BITS_PER_BYTE - offset)) - | (data[i] >> offset); - if(residue != 0) { - __u8 mask = 0xff >> residue; - __u8 last_byte = data[i-1] << (BITS_PER_BYTE - offset) - | (data[i] >> offset); - - last_byte &= ~mask; - msg[byte] &= mask; - msg[byte] |= last_byte; - } - return; -} -/* unpack the data again (same arguments as cat_pack()). data buffer - * must be zero populated. - * - * Function: given a message string move to start_bit and copy num_bits into - * data (starting at bit 0 in data). - */ -static void -cat_unpack(__u8 *msg, const __u16 start_bit, __u8 *data, const __u16 num_bits) -{ - /* compute initial shift needed */ - const __u16 offset = start_bit % BITS_PER_BYTE; - __u16 len = num_bits / BITS_PER_BYTE; - const __u8 last_bits = num_bits % BITS_PER_BYTE; - __u16 byte = start_bit / BITS_PER_BYTE; - int i; - - if(last_bits != 0) - len++; - - /* special case: want < 8 bits from msg and we can get it from - * a single byte of the msg */ - if(len == 0 && BITS_PER_BYTE - offset >= num_bits) { - data[0] = msg[byte] << offset; - data[0] &= 0xff >> (BITS_PER_BYTE - num_bits); - return; - } - for(i = 0; i < len; i++) { - /* this annoying if has to be done just in case a read of - * msg one beyond the array causes a panic */ - if(offset != 0) { - data[i] = msg[byte++] << offset; - data[i] |= msg[byte] >> (BITS_PER_BYTE - offset); - } - else { - data[i] = msg[byte++]; - } - } - /* do we need to truncate the final byte */ - if(last_bits != 0) { - data[i-1] &= 0xff << (BITS_PER_BYTE - last_bits); - } - return; -} - -static void -cat_build_header(__u8 *header, const __u16 len, const __u16 smallest_reg_bits, - const __u16 longest_reg_bits) -{ - int i; - __u16 start_bit = (smallest_reg_bits - 1) % BITS_PER_BYTE; - __u8 *last_byte = &header[len - 1]; - - if(start_bit == 0) - start_bit = 1; /* must have at least one bit in the hdr */ - - for(i=0; i < len; i++) - header[i] = 0; - - for(i = start_bit; i > 0; i--) - *last_byte = ((*last_byte) << 1) + 1; - -} - -static int -cat_sendinst(voyager_module_t *modp, voyager_asic_t *asicp, __u8 reg, __u8 op) -{ - __u8 parity, inst, inst_buf[4] = { 0 }; - __u8 iseq[VOYAGER_MAX_SCAN_PATH], hseq[VOYAGER_MAX_REG_SIZE]; - __u16 ibytes, hbytes, padbits; - int i; - - /* - * Parity is the parity of the register number + 1 (READ_REGISTER - * and WRITE_REGISTER always add '1' to the number of bits == 1) - */ - parity = (__u8)(1 + (reg & 0x01) + - ((__u8)(reg & 0x02) >> 1) + - ((__u8)(reg & 0x04) >> 2) + - ((__u8)(reg & 0x08) >> 3)) % 2; - - inst = ((parity << 7) | (reg << 2) | op); - - outb(VOYAGER_CAT_IRCYC, CAT_CMD); - if(!modp->scan_path_connected) { - if(asicp->asic_id != VOYAGER_CAT_ID) { - printk("**WARNING***: cat_sendinst has disconnected scan path not to CAT asic\n"); - return 1; - } - outb(VOYAGER_CAT_HEADER, CAT_DATA); - outb(inst, CAT_DATA); - if(inb(CAT_DATA) != VOYAGER_CAT_HEADER) { - CDEBUG(("VOYAGER CAT: cat_sendinst failed to get CAT_HEADER\n")); - return 1; - } - return 0; - } - ibytes = modp->inst_bits / BITS_PER_BYTE; - if((padbits = modp->inst_bits % BITS_PER_BYTE) != 0) { - padbits = BITS_PER_BYTE - padbits; - ibytes++; - } - hbytes = modp->largest_reg / BITS_PER_BYTE; - if(modp->largest_reg % BITS_PER_BYTE) - hbytes++; - CDEBUG(("cat_sendinst: ibytes=%d, hbytes=%d\n", ibytes, hbytes)); - /* initialise the instruction sequence to 0xff */ - for(i=0; i < ibytes + hbytes; i++) - iseq[i] = 0xff; - cat_build_header(hseq, hbytes, modp->smallest_reg, modp->largest_reg); - cat_pack(iseq, modp->inst_bits, hseq, hbytes * BITS_PER_BYTE); - inst_buf[0] = inst; - inst_buf[1] = 0xFF >> (modp->largest_reg % BITS_PER_BYTE); - cat_pack(iseq, asicp->bit_location, inst_buf, asicp->ireg_length); -#ifdef VOYAGER_CAT_DEBUG - printk("ins = 0x%x, iseq: ", inst); - for(i=0; i< ibytes + hbytes; i++) - printk("0x%x ", iseq[i]); - printk("\n"); -#endif - if(cat_shiftout(iseq, ibytes, hbytes, padbits)) { - CDEBUG(("VOYAGER CAT: cat_sendinst: cat_shiftout failed\n")); - return 1; - } - CDEBUG(("CAT SHIFTOUT DONE\n")); - return 0; -} - -static int -cat_getdata(voyager_module_t *modp, voyager_asic_t *asicp, __u8 reg, - __u8 *value) -{ - if(!modp->scan_path_connected) { - if(asicp->asic_id != VOYAGER_CAT_ID) { - CDEBUG(("VOYAGER CAT: ERROR: cat_getdata to CAT asic with scan path connected\n")); - return 1; - } - if(reg > VOYAGER_SUBADDRHI) - outb(VOYAGER_CAT_RUN, CAT_CMD); - outb(VOYAGER_CAT_DRCYC, CAT_CMD); - outb(VOYAGER_CAT_HEADER, CAT_DATA); - *value = inb(CAT_DATA); - outb(0xAA, CAT_DATA); - if(inb(CAT_DATA) != VOYAGER_CAT_HEADER) { - CDEBUG(("cat_getdata: failed to get VOYAGER_CAT_HEADER\n")); - return 1; - } - return 0; - } - else { - __u16 sbits = modp->num_asics -1 + asicp->ireg_length; - __u16 sbytes = sbits / BITS_PER_BYTE; - __u16 tbytes; - __u8 string[VOYAGER_MAX_SCAN_PATH], trailer[VOYAGER_MAX_REG_SIZE]; - __u8 padbits; - int i; - - outb(VOYAGER_CAT_DRCYC, CAT_CMD); - - if((padbits = sbits % BITS_PER_BYTE) != 0) { - padbits = BITS_PER_BYTE - padbits; - sbytes++; - } - tbytes = asicp->ireg_length / BITS_PER_BYTE; - if(asicp->ireg_length % BITS_PER_BYTE) - tbytes++; - CDEBUG(("cat_getdata: tbytes = %d, sbytes = %d, padbits = %d\n", - tbytes, sbytes, padbits)); - cat_build_header(trailer, tbytes, 1, asicp->ireg_length); - - - for(i = tbytes - 1; i >= 0; i--) { - outb(trailer[i], CAT_DATA); - string[sbytes + i] = inb(CAT_DATA); - } - - for(i = sbytes - 1; i >= 0; i--) { - outb(0xaa, CAT_DATA); - string[i] = inb(CAT_DATA); - } - *value = 0; - cat_unpack(string, padbits + (tbytes * BITS_PER_BYTE) + asicp->asic_location, value, asicp->ireg_length); -#ifdef VOYAGER_CAT_DEBUG - printk("value=0x%x, string: ", *value); - for(i=0; i< tbytes+sbytes; i++) - printk("0x%x ", string[i]); - printk("\n"); -#endif - - /* sanity check the rest of the return */ - for(i=0; i < tbytes; i++) { - __u8 input = 0; - - cat_unpack(string, padbits + (i * BITS_PER_BYTE), &input, BITS_PER_BYTE); - if(trailer[i] != input) { - CDEBUG(("cat_getdata: failed to sanity check rest of ret(%d) 0x%x != 0x%x\n", i, input, trailer[i])); - return 1; - } - } - CDEBUG(("cat_getdata DONE\n")); - return 0; - } -} - -static int -cat_shiftout(__u8 *data, __u16 data_bytes, __u16 header_bytes, __u8 pad_bits) -{ - int i; - - for(i = data_bytes + header_bytes - 1; i >= header_bytes; i--) - outb(data[i], CAT_DATA); - - for(i = header_bytes - 1; i >= 0; i--) { - __u8 header = 0; - __u8 input; - - outb(data[i], CAT_DATA); - input = inb(CAT_DATA); - CDEBUG(("cat_shiftout: returned 0x%x\n", input)); - cat_unpack(data, ((data_bytes + i) * BITS_PER_BYTE) - pad_bits, - &header, BITS_PER_BYTE); - if(input != header) { - CDEBUG(("VOYAGER CAT: cat_shiftout failed to return header 0x%x != 0x%x\n", input, header)); - return 1; - } - } - return 0; -} - -static int -cat_senddata(voyager_module_t *modp, voyager_asic_t *asicp, - __u8 reg, __u8 value) -{ - outb(VOYAGER_CAT_DRCYC, CAT_CMD); - if(!modp->scan_path_connected) { - if(asicp->asic_id != VOYAGER_CAT_ID) { - CDEBUG(("VOYAGER CAT: ERROR: scan path disconnected when asic != CAT\n")); - return 1; - } - outb(VOYAGER_CAT_HEADER, CAT_DATA); - outb(value, CAT_DATA); - if(inb(CAT_DATA) != VOYAGER_CAT_HEADER) { - CDEBUG(("cat_senddata: failed to get correct header response to sent data\n")); - return 1; - } - if(reg > VOYAGER_SUBADDRHI) { - outb(VOYAGER_CAT_RUN, CAT_CMD); - outb(VOYAGER_CAT_END, CAT_CMD); - outb(VOYAGER_CAT_RUN, CAT_CMD); - } - - return 0; - } - else { - __u16 hbytes = asicp->ireg_length / BITS_PER_BYTE; - __u16 dbytes = (modp->num_asics - 1 + asicp->ireg_length)/BITS_PER_BYTE; - __u8 padbits, dseq[VOYAGER_MAX_SCAN_PATH], - hseq[VOYAGER_MAX_REG_SIZE]; - int i; - - if((padbits = (modp->num_asics - 1 - + asicp->ireg_length) % BITS_PER_BYTE) != 0) { - padbits = BITS_PER_BYTE - padbits; - dbytes++; - } - if(asicp->ireg_length % BITS_PER_BYTE) - hbytes++; - - cat_build_header(hseq, hbytes, 1, asicp->ireg_length); - - for(i = 0; i < dbytes + hbytes; i++) - dseq[i] = 0xff; - CDEBUG(("cat_senddata: dbytes=%d, hbytes=%d, padbits=%d\n", - dbytes, hbytes, padbits)); - cat_pack(dseq, modp->num_asics - 1 + asicp->ireg_length, - hseq, hbytes * BITS_PER_BYTE); - cat_pack(dseq, asicp->asic_location, &value, - asicp->ireg_length); -#ifdef VOYAGER_CAT_DEBUG - printk("dseq "); - for(i=0; i 1) { - /* set auto increment */ - __u8 newval; - - if(cat_read(modp, asicp, VOYAGER_AUTO_INC_REG, &val)) { - CDEBUG(("cat_subaddrsetup: read of VOYAGER_AUTO_INC_REG failed\n")); - return 1; - } - CDEBUG(("cat_subaddrsetup: VOYAGER_AUTO_INC_REG = 0x%x\n", val)); - newval = val | VOYAGER_AUTO_INC; - if(newval != val) { - if(cat_write(modp, asicp, VOYAGER_AUTO_INC_REG, val)) { - CDEBUG(("cat_subaddrsetup: write to VOYAGER_AUTO_INC_REG failed\n")); - return 1; - } - } - } - if(cat_write(modp, asicp, VOYAGER_SUBADDRLO, (__u8)(offset &0xff))) { - CDEBUG(("cat_subaddrsetup: write to SUBADDRLO failed\n")); - return 1; - } - if(asicp->subaddr > VOYAGER_SUBADDR_LO) { - if(cat_write(modp, asicp, VOYAGER_SUBADDRHI, (__u8)(offset >> 8))) { - CDEBUG(("cat_subaddrsetup: write to SUBADDRHI failed\n")); - return 1; - } - cat_read(modp, asicp, VOYAGER_SUBADDRHI, &val); - CDEBUG(("cat_subaddrsetup: offset = %d, hi = %d\n", offset, val)); - } - cat_read(modp, asicp, VOYAGER_SUBADDRLO, &val); - CDEBUG(("cat_subaddrsetup: offset = %d, lo = %d\n", offset, val)); - return 0; -} - -static int -cat_subwrite(voyager_module_t *modp, voyager_asic_t *asicp, __u16 offset, - __u16 len, void *buf) -{ - int i, retval; - - /* FIXME: need special actions for VOYAGER_CAT_ID here */ - if(asicp->asic_id == VOYAGER_CAT_ID) { - CDEBUG(("cat_subwrite: ATTEMPT TO WRITE TO CAT ASIC\n")); - /* FIXME -- This is supposed to be handled better - * There is a problem writing to the cat asic in the - * PSI. The 30us delay seems to work, though */ - udelay(30); - } - - if((retval = cat_subaddrsetup(modp, asicp, offset, len)) != 0) { - printk("cat_subwrite: cat_subaddrsetup FAILED\n"); - return retval; - } - - if(cat_sendinst(modp, asicp, VOYAGER_SUBADDRDATA, VOYAGER_WRITE_CONFIG)) { - printk("cat_subwrite: cat_sendinst FAILED\n"); - return 1; - } - for(i = 0; i < len; i++) { - if(cat_senddata(modp, asicp, 0xFF, ((__u8 *)buf)[i])) { - printk("cat_subwrite: cat_sendata element at %d FAILED\n", i); - return 1; - } - } - return 0; -} -static int -cat_subread(voyager_module_t *modp, voyager_asic_t *asicp, __u16 offset, - __u16 len, void *buf) -{ - int i, retval; - - if((retval = cat_subaddrsetup(modp, asicp, offset, len)) != 0) { - CDEBUG(("cat_subread: cat_subaddrsetup FAILED\n")); - return retval; - } - - if(cat_sendinst(modp, asicp, VOYAGER_SUBADDRDATA, VOYAGER_READ_CONFIG)) { - CDEBUG(("cat_subread: cat_sendinst failed\n")); - return 1; - } - for(i = 0; i < len; i++) { - if(cat_getdata(modp, asicp, 0xFF, - &((__u8 *)buf)[i])) { - CDEBUG(("cat_subread: cat_getdata element %d failed\n", i)); - return 1; - } - } - return 0; -} - - -/* buffer for storing EPROM data read in during initialisation */ -static __initdata __u8 eprom_buf[0xFFFF]; -static voyager_module_t *voyager_initial_module; - -/* Initialise the cat bus components. We assume this is called by the - * boot cpu *after* all memory initialisation has been done (so we can - * use kmalloc) but before smp initialisation, so we can probe the SMP - * configuration and pick up necessary information. */ -void -voyager_cat_init(void) -{ - voyager_module_t **modpp = &voyager_initial_module; - voyager_asic_t **asicpp; - voyager_asic_t *qabc_asic = NULL; - int i, j; - unsigned long qic_addr = 0; - __u8 qabc_data[0x20]; - __u8 num_submodules, val; - voyager_eprom_hdr_t *eprom_hdr = (voyager_eprom_hdr_t *)&eprom_buf[0]; - - __u8 cmos[4]; - unsigned long addr; - - /* initiallise the SUS mailbox */ - for(i=0; iSUS_version); - voyager_SUS->kernel_version = VOYAGER_MAILBOX_VERSION; - voyager_SUS->kernel_flags = VOYAGER_OS_HAS_SYSINT; - } - - /* clear the processor counts */ - voyager_extended_vic_processors = 0; - voyager_quad_processors = 0; - - - - printk("VOYAGER: beginning CAT bus probe\n"); - /* set up the SuperSet Port Block which tells us where the - * CAT communication port is */ - sspb = inb(VOYAGER_SSPB_RELOCATION_PORT) * 0x100; - VDEBUG(("VOYAGER DEBUG: sspb = 0x%x\n", sspb)); - - /* now find out if were 8 slot or normal */ - if((inb(VIC_PROC_WHO_AM_I) & EIGHT_SLOT_IDENTIFIER) - == EIGHT_SLOT_IDENTIFIER) { - voyager_8slot = 1; - printk(KERN_NOTICE "Voyager: Eight slot 51xx configuration detected\n"); - } - - for(i = VOYAGER_MIN_MODULE; - i <= VOYAGER_MAX_MODULE; i++) { - __u8 input; - int asic; - __u16 eprom_size; - __u16 sp_offset; - - outb(VOYAGER_CAT_DESELECT, VOYAGER_CAT_CONFIG_PORT); - outb(i, VOYAGER_CAT_CONFIG_PORT); - - /* check the presence of the module */ - outb(VOYAGER_CAT_RUN, CAT_CMD); - outb(VOYAGER_CAT_IRCYC, CAT_CMD); - outb(VOYAGER_CAT_HEADER, CAT_DATA); - /* stream series of alternating 1's and 0's to stimulate - * response */ - outb(0xAA, CAT_DATA); - input = inb(CAT_DATA); - outb(VOYAGER_CAT_END, CAT_CMD); - if(input != VOYAGER_CAT_HEADER) { - continue; - } - CDEBUG(("VOYAGER DEBUG: found module id 0x%x, %s\n", i, - cat_module_name(i))); - *modpp = kmalloc(sizeof(voyager_module_t), GFP_KERNEL); /*&voyager_module_storage[cat_count++];*/ - if(*modpp == NULL) { - printk("**WARNING** kmalloc failure in cat_init\n"); - continue; - } - memset(*modpp, 0, sizeof(voyager_module_t)); - /* need temporary asic for cat_subread. It will be - * filled in correctly later */ - (*modpp)->asic = kmalloc(sizeof(voyager_asic_t), GFP_KERNEL); /*&voyager_asic_storage[asic_count];*/ - if((*modpp)->asic == NULL) { - printk("**WARNING** kmalloc failure in cat_init\n"); - continue; - } - memset((*modpp)->asic, 0, sizeof(voyager_asic_t)); - (*modpp)->asic->asic_id = VOYAGER_CAT_ID; - (*modpp)->asic->subaddr = VOYAGER_SUBADDR_HI; - (*modpp)->module_addr = i; - (*modpp)->scan_path_connected = 0; - if(i == VOYAGER_PSI) { - /* Exception leg for modules with no EEPROM */ - printk("Module \"%s\"\n", cat_module_name(i)); - continue; - } - - CDEBUG(("cat_init: Reading eeprom for module 0x%x at offset %d\n", i, VOYAGER_XSUM_END_OFFSET)); - outb(VOYAGER_CAT_RUN, CAT_CMD); - cat_disconnect(*modpp, (*modpp)->asic); - if(cat_subread(*modpp, (*modpp)->asic, - VOYAGER_XSUM_END_OFFSET, sizeof(eprom_size), - &eprom_size)) { - printk("**WARNING**: Voyager couldn't read EPROM size for module 0x%x\n", i); - outb(VOYAGER_CAT_END, CAT_CMD); - continue; - } - if(eprom_size > sizeof(eprom_buf)) { - printk("**WARNING**: Voyager insufficient size to read EPROM data, module 0x%x. Need %d\n", i, eprom_size); - outb(VOYAGER_CAT_END, CAT_CMD); - continue; - } - outb(VOYAGER_CAT_END, CAT_CMD); - outb(VOYAGER_CAT_RUN, CAT_CMD); - CDEBUG(("cat_init: module 0x%x, eeprom_size %d\n", i, eprom_size)); - if(cat_subread(*modpp, (*modpp)->asic, 0, - eprom_size, eprom_buf)) { - outb(VOYAGER_CAT_END, CAT_CMD); - continue; - } - outb(VOYAGER_CAT_END, CAT_CMD); - printk("Module \"%s\", version 0x%x, tracer 0x%x, asics %d\n", - cat_module_name(i), eprom_hdr->version_id, - *((__u32 *)eprom_hdr->tracer), eprom_hdr->num_asics); - (*modpp)->ee_size = eprom_hdr->ee_size; - (*modpp)->num_asics = eprom_hdr->num_asics; - asicpp = &((*modpp)->asic); - sp_offset = eprom_hdr->scan_path_offset; - /* All we really care about are the Quad cards. We - * identify them because they are in a processor slot - * and have only four asics */ - if((i < 0x10 || (i>=0x14 && i < 0x1c) || i>0x1f)) { - modpp = &((*modpp)->next); - continue; - } - /* Now we know it's in a processor slot, does it have - * a quad baseboard submodule */ - outb(VOYAGER_CAT_RUN, CAT_CMD); - cat_read(*modpp, (*modpp)->asic, VOYAGER_SUBMODPRESENT, - &num_submodules); - /* lowest two bits, active low */ - num_submodules = ~(0xfc | num_submodules); - CDEBUG(("VOYAGER CAT: %d submodules present\n", num_submodules)); - if(num_submodules == 0) { - /* fill in the dyadic extended processors */ - __u8 cpu = i & 0x07; - - printk("Module \"%s\": Dyadic Processor Card\n", - cat_module_name(i)); - voyager_extended_vic_processors |= (1<asic, VOYAGER_SUBMODSELECT, &val); - CDEBUG(("cat_init: SUBMODSELECT value = 0x%x\n", val)); - val = (val & 0x7c) | VOYAGER_QUAD_BASEBOARD; - cat_write(*modpp, (*modpp)->asic, VOYAGER_SUBMODSELECT, val); - - outb(VOYAGER_CAT_END, CAT_CMD); - - - CDEBUG(("cat_init: Reading eeprom for module 0x%x at offset %d\n", i, VOYAGER_XSUM_END_OFFSET)); - outb(VOYAGER_CAT_RUN, CAT_CMD); - cat_disconnect(*modpp, (*modpp)->asic); - if(cat_subread(*modpp, (*modpp)->asic, - VOYAGER_XSUM_END_OFFSET, sizeof(eprom_size), - &eprom_size)) { - printk("**WARNING**: Voyager couldn't read EPROM size for module 0x%x\n", i); - outb(VOYAGER_CAT_END, CAT_CMD); - continue; - } - if(eprom_size > sizeof(eprom_buf)) { - printk("**WARNING**: Voyager insufficient size to read EPROM data, module 0x%x. Need %d\n", i, eprom_size); - outb(VOYAGER_CAT_END, CAT_CMD); - continue; - } - outb(VOYAGER_CAT_END, CAT_CMD); - outb(VOYAGER_CAT_RUN, CAT_CMD); - CDEBUG(("cat_init: module 0x%x, eeprom_size %d\n", i, eprom_size)); - if(cat_subread(*modpp, (*modpp)->asic, 0, - eprom_size, eprom_buf)) { - outb(VOYAGER_CAT_END, CAT_CMD); - continue; - } - outb(VOYAGER_CAT_END, CAT_CMD); - /* Now do everything for the QBB submodule 1 */ - (*modpp)->ee_size = eprom_hdr->ee_size; - (*modpp)->num_asics = eprom_hdr->num_asics; - asicpp = &((*modpp)->asic); - sp_offset = eprom_hdr->scan_path_offset; - /* get rid of the dummy CAT asic and read the real one */ - kfree((*modpp)->asic); - for(asic=0; asic < (*modpp)->num_asics; asic++) { - int j; - voyager_asic_t *asicp = *asicpp - = kzalloc(sizeof(voyager_asic_t), GFP_KERNEL); /*&voyager_asic_storage[asic_count++];*/ - voyager_sp_table_t *sp_table; - voyager_at_t *asic_table; - voyager_jtt_t *jtag_table; - - if(asicp == NULL) { - printk("**WARNING** kmalloc failure in cat_init\n"); - continue; - } - asicpp = &(asicp->next); - asicp->asic_location = asic; - sp_table = (voyager_sp_table_t *)(eprom_buf + sp_offset); - asicp->asic_id = sp_table->asic_id; - asic_table = (voyager_at_t *)(eprom_buf + sp_table->asic_data_offset); - for(j=0; j<4; j++) - asicp->jtag_id[j] = asic_table->jtag_id[j]; - jtag_table = (voyager_jtt_t *)(eprom_buf + asic_table->jtag_offset); - asicp->ireg_length = jtag_table->ireg_len; - asicp->bit_location = (*modpp)->inst_bits; - (*modpp)->inst_bits += asicp->ireg_length; - if(asicp->ireg_length > (*modpp)->largest_reg) - (*modpp)->largest_reg = asicp->ireg_length; - if (asicp->ireg_length < (*modpp)->smallest_reg || - (*modpp)->smallest_reg == 0) - (*modpp)->smallest_reg = asicp->ireg_length; - CDEBUG(("asic 0x%x, ireg_length=%d, bit_location=%d\n", - asicp->asic_id, asicp->ireg_length, - asicp->bit_location)); - if(asicp->asic_id == VOYAGER_QUAD_QABC) { - CDEBUG(("VOYAGER CAT: QABC ASIC found\n")); - qabc_asic = asicp; - } - sp_offset += sizeof(voyager_sp_table_t); - } - CDEBUG(("Module inst_bits = %d, largest_reg = %d, smallest_reg=%d\n", - (*modpp)->inst_bits, (*modpp)->largest_reg, - (*modpp)->smallest_reg)); - /* OK, now we have the QUAD ASICs set up, use them. - * we need to: - * - * 1. Find the Memory area for the Quad CPIs. - * 2. Find the Extended VIC processor - * 3. Configure a second extended VIC processor (This - * cannot be done for the 51xx. - * */ - outb(VOYAGER_CAT_RUN, CAT_CMD); - cat_connect(*modpp, (*modpp)->asic); - CDEBUG(("CAT CONNECTED!!\n")); - cat_subread(*modpp, qabc_asic, 0, sizeof(qabc_data), qabc_data); - qic_addr = qabc_data[5] << 8; - qic_addr = (qic_addr | qabc_data[6]) << 8; - qic_addr = (qic_addr | qabc_data[7]) << 8; - printk("Module \"%s\": Quad Processor Card; CPI 0x%lx, SET=0x%x\n", - cat_module_name(i), qic_addr, qabc_data[8]); -#if 0 /* plumbing fails---FIXME */ - if((qabc_data[8] & 0xf0) == 0) { - /* FIXME: 32 way 8 CPU slot monster cannot be - * plumbed this way---need to check for it */ - - printk("Plumbing second Extended Quad Processor\n"); - /* second VIC line hardwired to Quad CPU 1 */ - qabc_data[8] |= 0x20; - cat_subwrite(*modpp, qabc_asic, 8, 1, &qabc_data[8]); -#ifdef VOYAGER_CAT_DEBUG - /* verify plumbing */ - cat_subread(*modpp, qabc_asic, 8, 1, &qabc_data[8]); - if((qabc_data[8] & 0xf0) == 0) { - CDEBUG(("PLUMBING FAILED: 0x%x\n", qabc_data[8])); - } -#endif - } -#endif - - { - struct resource *res = kzalloc(sizeof(struct resource),GFP_KERNEL); - res->name = kmalloc(128, GFP_KERNEL); - sprintf((char *)res->name, "Voyager %s Quad CPI", cat_module_name(i)); - res->start = qic_addr; - res->end = qic_addr + 0x3ff; - request_resource(&iomem_resource, res); - } - - qic_addr = (unsigned long)ioremap(qic_addr, 0x400); - - for(j = 0; j < 4; j++) { - __u8 cpu; - - if(voyager_8slot) { - /* 8 slot has a different mapping, - * each slot has only one vic line, so - * 1 cpu in each slot must be < 8 */ - cpu = (i & 0x07) + j*8; - } else { - cpu = (i & 0x03) + j*4; - } - if( (qabc_data[8] & (1<next); - } - *modpp = NULL; - printk("CAT Bus Initialisation finished: extended procs 0x%x, quad procs 0x%x, allowed vic boot = 0x%x\n", voyager_extended_vic_processors, voyager_quad_processors, voyager_allowed_boot_processors); - request_resource(&ioport_resource, &vic_res); - if(voyager_quad_processors) - request_resource(&ioport_resource, &qic_res); - /* set up the front power switch */ -} - -int -voyager_cat_readb(__u8 module, __u8 asic, int reg) -{ - return 0; -} - -static int -cat_disconnect(voyager_module_t *modp, voyager_asic_t *asicp) -{ - __u8 val; - int err = 0; - - if(!modp->scan_path_connected) - return 0; - if(asicp->asic_id != VOYAGER_CAT_ID) { - CDEBUG(("cat_disconnect: ASIC is not CAT\n")); - return 1; - } - err = cat_read(modp, asicp, VOYAGER_SCANPATH, &val); - if(err) { - CDEBUG(("cat_disconnect: failed to read SCANPATH\n")); - return err; - } - val &= VOYAGER_DISCONNECT_ASIC; - err = cat_write(modp, asicp, VOYAGER_SCANPATH, val); - if(err) { - CDEBUG(("cat_disconnect: failed to write SCANPATH\n")); - return err; - } - outb(VOYAGER_CAT_END, CAT_CMD); - outb(VOYAGER_CAT_RUN, CAT_CMD); - modp->scan_path_connected = 0; - - return 0; -} - -static int -cat_connect(voyager_module_t *modp, voyager_asic_t *asicp) -{ - __u8 val; - int err = 0; - - if(modp->scan_path_connected) - return 0; - if(asicp->asic_id != VOYAGER_CAT_ID) { - CDEBUG(("cat_connect: ASIC is not CAT\n")); - return 1; - } - - err = cat_read(modp, asicp, VOYAGER_SCANPATH, &val); - if(err) { - CDEBUG(("cat_connect: failed to read SCANPATH\n")); - return err; - } - val |= VOYAGER_CONNECT_ASIC; - err = cat_write(modp, asicp, VOYAGER_SCANPATH, val); - if(err) { - CDEBUG(("cat_connect: failed to write SCANPATH\n")); - return err; - } - outb(VOYAGER_CAT_END, CAT_CMD); - outb(VOYAGER_CAT_RUN, CAT_CMD); - modp->scan_path_connected = 1; - - return 0; -} - -void -voyager_cat_power_off(void) -{ - /* Power the machine off by writing to the PSI over the CAT - * bus */ - __u8 data; - voyager_module_t psi = { 0 }; - voyager_asic_t psi_asic = { 0 }; - - psi.asic = &psi_asic; - psi.asic->asic_id = VOYAGER_CAT_ID; - psi.asic->subaddr = VOYAGER_SUBADDR_HI; - psi.module_addr = VOYAGER_PSI; - psi.scan_path_connected = 0; - - outb(VOYAGER_CAT_END, CAT_CMD); - /* Connect the PSI to the CAT Bus */ - outb(VOYAGER_CAT_DESELECT, VOYAGER_CAT_CONFIG_PORT); - outb(VOYAGER_PSI, VOYAGER_CAT_CONFIG_PORT); - outb(VOYAGER_CAT_RUN, CAT_CMD); - cat_disconnect(&psi, &psi_asic); - /* Read the status */ - cat_subread(&psi, &psi_asic, VOYAGER_PSI_GENERAL_REG, 1, &data); - outb(VOYAGER_CAT_END, CAT_CMD); - CDEBUG(("PSI STATUS 0x%x\n", data)); - /* These two writes are power off prep and perform */ - data = PSI_CLEAR; - outb(VOYAGER_CAT_RUN, CAT_CMD); - cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_GENERAL_REG, 1, &data); - outb(VOYAGER_CAT_END, CAT_CMD); - data = PSI_POWER_DOWN; - outb(VOYAGER_CAT_RUN, CAT_CMD); - cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_GENERAL_REG, 1, &data); - outb(VOYAGER_CAT_END, CAT_CMD); -} - -struct voyager_status voyager_status = { 0 }; - -void -voyager_cat_psi(__u8 cmd, __u16 reg, __u8 *data) -{ - voyager_module_t psi = { 0 }; - voyager_asic_t psi_asic = { 0 }; - - psi.asic = &psi_asic; - psi.asic->asic_id = VOYAGER_CAT_ID; - psi.asic->subaddr = VOYAGER_SUBADDR_HI; - psi.module_addr = VOYAGER_PSI; - psi.scan_path_connected = 0; - - outb(VOYAGER_CAT_END, CAT_CMD); - /* Connect the PSI to the CAT Bus */ - outb(VOYAGER_CAT_DESELECT, VOYAGER_CAT_CONFIG_PORT); - outb(VOYAGER_PSI, VOYAGER_CAT_CONFIG_PORT); - outb(VOYAGER_CAT_RUN, CAT_CMD); - cat_disconnect(&psi, &psi_asic); - switch(cmd) { - case VOYAGER_PSI_READ: - cat_read(&psi, &psi_asic, reg, data); - break; - case VOYAGER_PSI_WRITE: - cat_write(&psi, &psi_asic, reg, *data); - break; - case VOYAGER_PSI_SUBREAD: - cat_subread(&psi, &psi_asic, reg, 1, data); - break; - case VOYAGER_PSI_SUBWRITE: - cat_subwrite(&psi, &psi_asic, reg, 1, data); - break; - default: - printk(KERN_ERR "Voyager PSI, unrecognised command %d\n", cmd); - break; - } - outb(VOYAGER_CAT_END, CAT_CMD); -} - -void -voyager_cat_do_common_interrupt(void) -{ - /* This is caused either by a memory parity error or something - * in the PSI */ - __u8 data; - voyager_module_t psi = { 0 }; - voyager_asic_t psi_asic = { 0 }; - struct voyager_psi psi_reg; - int i; - re_read: - psi.asic = &psi_asic; - psi.asic->asic_id = VOYAGER_CAT_ID; - psi.asic->subaddr = VOYAGER_SUBADDR_HI; - psi.module_addr = VOYAGER_PSI; - psi.scan_path_connected = 0; - - outb(VOYAGER_CAT_END, CAT_CMD); - /* Connect the PSI to the CAT Bus */ - outb(VOYAGER_CAT_DESELECT, VOYAGER_CAT_CONFIG_PORT); - outb(VOYAGER_PSI, VOYAGER_CAT_CONFIG_PORT); - outb(VOYAGER_CAT_RUN, CAT_CMD); - cat_disconnect(&psi, &psi_asic); - /* Read the status. NOTE: Need to read *all* the PSI regs here - * otherwise the cmn int will be reasserted */ - for(i = 0; i < sizeof(psi_reg.regs); i++) { - cat_read(&psi, &psi_asic, i, &((__u8 *)&psi_reg.regs)[i]); - } - outb(VOYAGER_CAT_END, CAT_CMD); - if((psi_reg.regs.checkbit & 0x02) == 0) { - psi_reg.regs.checkbit |= 0x02; - cat_write(&psi, &psi_asic, 5, psi_reg.regs.checkbit); - printk("VOYAGER RE-READ PSI\n"); - goto re_read; - } - outb(VOYAGER_CAT_RUN, CAT_CMD); - for(i = 0; i < sizeof(psi_reg.subregs); i++) { - /* This looks strange, but the PSI doesn't do auto increment - * correctly */ - cat_subread(&psi, &psi_asic, VOYAGER_PSI_SUPPLY_REG + i, - 1, &((__u8 *)&psi_reg.subregs)[i]); - } - outb(VOYAGER_CAT_END, CAT_CMD); -#ifdef VOYAGER_CAT_DEBUG - printk("VOYAGER PSI: "); - for(i=0; i -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* TLB state -- visible externally, indexed physically */ -DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0 }; - -/* CPU IRQ affinity -- set to all ones initially */ -static unsigned long cpu_irq_affinity[NR_CPUS] __cacheline_aligned = { [0 ... NR_CPUS-1] = ~0UL }; - -/* per CPU data structure (for /proc/cpuinfo et al), visible externally - * indexed physically */ -struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; -EXPORT_SYMBOL(cpu_data); - -/* physical ID of the CPU used to boot the system */ -unsigned char boot_cpu_id; - -/* The memory line addresses for the Quad CPIs */ -struct voyager_qic_cpi *voyager_quad_cpi_addr[NR_CPUS] __cacheline_aligned; - -/* The masks for the Extended VIC processors, filled in by cat_init */ -__u32 voyager_extended_vic_processors = 0; - -/* Masks for the extended Quad processors which cannot be VIC booted */ -__u32 voyager_allowed_boot_processors = 0; - -/* The mask for the Quad Processors (both extended and non-extended) */ -__u32 voyager_quad_processors = 0; - -/* Total count of live CPUs, used in process.c to display - * the CPU information and in irq.c for the per CPU irq - * activity count. Finally exported by i386_ksyms.c */ -static int voyager_extended_cpus = 1; - -/* Have we found an SMP box - used by time.c to do the profiling - interrupt for timeslicing; do not set to 1 until the per CPU timer - interrupt is active */ -int smp_found_config = 0; - -/* Used for the invalidate map that's also checked in the spinlock */ -static volatile unsigned long smp_invalidate_needed; - -/* Bitmask of currently online CPUs - used by setup.c for - /proc/cpuinfo, visible externally but still physical */ -cpumask_t cpu_online_map = CPU_MASK_NONE; -EXPORT_SYMBOL(cpu_online_map); - -/* Bitmask of CPUs present in the system - exported by i386_syms.c, used - * by scheduler but indexed physically */ -cpumask_t phys_cpu_present_map = CPU_MASK_NONE; - - -/* The internal functions */ -static void send_CPI(__u32 cpuset, __u8 cpi); -static void ack_CPI(__u8 cpi); -static int ack_QIC_CPI(__u8 cpi); -static void ack_special_QIC_CPI(__u8 cpi); -static void ack_VIC_CPI(__u8 cpi); -static void send_CPI_allbutself(__u8 cpi); -static void mask_vic_irq(unsigned int irq); -static void unmask_vic_irq(unsigned int irq); -static unsigned int startup_vic_irq(unsigned int irq); -static void enable_local_vic_irq(unsigned int irq); -static void disable_local_vic_irq(unsigned int irq); -static void before_handle_vic_irq(unsigned int irq); -static void after_handle_vic_irq(unsigned int irq); -static void set_vic_irq_affinity(unsigned int irq, cpumask_t mask); -static void ack_vic_irq(unsigned int irq); -static void vic_enable_cpi(void); -static void do_boot_cpu(__u8 cpuid); -static void do_quad_bootstrap(void); - -int hard_smp_processor_id(void); -int safe_smp_processor_id(void); - -/* Inline functions */ -static inline void -send_one_QIC_CPI(__u8 cpu, __u8 cpi) -{ - voyager_quad_cpi_addr[cpu]->qic_cpi[cpi].cpi = - (smp_processor_id() << 16) + cpi; -} - -static inline void -send_QIC_CPI(__u32 cpuset, __u8 cpi) -{ - int cpu; - - for_each_online_cpu(cpu) { - if(cpuset & (1<>3 &0x7 on the 32 way */ - if(((cpuid >> 2) & 0x03) == i) - /* don't lower our own mask! */ - continue; - - /* masquerade as local Quad CPU */ - outb(QIC_CPUID_ENABLE | i, QIC_PROCESSOR_ID); - /* enable the startup CPI */ - outb(QIC_BOOT_CPI_MASK, QIC_MASK_REGISTER1); - /* restore cpu id */ - outb(0, QIC_PROCESSOR_ID); - } - local_irq_restore(flags); - } -} - - -/* Set up all the basic stuff: read the SMP config and make all the - * SMP information reflect only the boot cpu. All others will be - * brought on-line later. */ -void __init -find_smp_config(void) -{ - int i; - - boot_cpu_id = hard_smp_processor_id(); - - printk("VOYAGER SMP: Boot cpu is %d\n", boot_cpu_id); - - /* initialize the CPU structures (moved from smp_boot_cpus) */ - for(i=0; icpu = boot_cpu_id; - x86_write_percpu(cpu_number, boot_cpu_id); -} - -/* - * The bootstrap kernel entry code has set these up. Save them - * for a given CPU, id is physical */ -void __init -smp_store_cpu_info(int id) -{ - struct cpuinfo_x86 *c=&cpu_data[id]; - - *c = boot_cpu_data; - - identify_secondary_cpu(c); -} - -/* set up the trampoline and return the physical address of the code */ -static __u32 __init -setup_trampoline(void) -{ - /* these two are global symbols in trampoline.S */ - extern __u8 trampoline_end[]; - extern __u8 trampoline_data[]; - - memcpy((__u8 *)trampoline_base, trampoline_data, - trampoline_end - trampoline_data); - return virt_to_phys((__u8 *)trampoline_base); -} - -/* Routine initially called when a non-boot CPU is brought online */ -static void __init -start_secondary(void *unused) -{ - __u8 cpuid = hard_smp_processor_id(); - /* external functions not defined in the headers */ - extern void calibrate_delay(void); - - cpu_init(); - - /* OK, we're in the routine */ - ack_CPI(VIC_CPU_BOOT_CPI); - - /* setup the 8259 master slave pair belonging to this CPU --- - * we won't actually receive any until the boot CPU - * relinquishes it's static routing mask */ - vic_setup_pic(); - - qic_setup(); - - if(is_cpu_quad() && !is_cpu_vic_boot()) { - /* clear the boot CPI */ - __u8 dummy; - - dummy = voyager_quad_cpi_addr[cpuid]->qic_cpi[VIC_CPU_BOOT_CPI].cpi; - printk("read dummy %d\n", dummy); - } - - /* lower the mask to receive CPIs */ - vic_enable_cpi(); - - VDEBUG(("VOYAGER SMP: CPU%d, stack at about %p\n", cpuid, &cpuid)); - - /* enable interrupts */ - local_irq_enable(); - - /* get our bogomips */ - calibrate_delay(); - - /* save our processor parameters */ - smp_store_cpu_info(cpuid); - - /* if we're a quad, we may need to bootstrap other CPUs */ - do_quad_bootstrap(); - - /* FIXME: this is rather a poor hack to prevent the CPU - * activating softirqs while it's supposed to be waiting for - * permission to proceed. Without this, the new per CPU stuff - * in the softirqs will fail */ - local_irq_disable(); - cpu_set(cpuid, cpu_callin_map); - - /* signal that we're done */ - cpu_booted_map = 1; - - while (!cpu_isset(cpuid, smp_commenced_mask)) - rep_nop(); - local_irq_enable(); - - local_flush_tlb(); - - cpu_set(cpuid, cpu_online_map); - wmb(); - cpu_idle(); -} - - -/* Routine to kick start the given CPU and wait for it to report ready - * (or timeout in startup). When this routine returns, the requested - * CPU is either fully running and configured or known to be dead. - * - * We call this routine sequentially 1 CPU at a time, so no need for - * locking */ - -static void __init -do_boot_cpu(__u8 cpu) -{ - struct task_struct *idle; - int timeout; - unsigned long flags; - int quad_boot = (1<> 4) & 0xFFFF; - - cpucount++; - alternatives_smp_switch(1); - - idle = fork_idle(cpu); - if(IS_ERR(idle)) - panic("failed fork for CPU%d", cpu); - idle->thread.eip = (unsigned long) start_secondary; - /* init_tasks (in sched.c) is indexed logically */ - stack_start.esp = (void *) idle->thread.esp; - - init_gdt(cpu); - per_cpu(current_task, cpu) = idle; - early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); - irq_ctx_init(cpu); - - /* Note: Don't modify initial ss override */ - VDEBUG(("VOYAGER SMP: Booting CPU%d at 0x%lx[%x:%x], stack %p\n", cpu, - (unsigned long)hijack_source.val, hijack_source.idt.Segment, - hijack_source.idt.Offset, stack_start.esp)); - - /* init lowmem identity mapping */ - clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, - min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); - flush_tlb_all(); - - if(quad_boot) { - printk("CPU %d: non extended Quad boot\n", cpu); - hijack_vector = (__u32 *)phys_to_virt((VIC_CPU_BOOT_CPI + QIC_DEFAULT_CPI_BASE)*4); - *hijack_vector = hijack_source.val; - } else { - printk("CPU%d: extended VIC boot\n", cpu); - hijack_vector = (__u32 *)phys_to_virt((VIC_CPU_BOOT_CPI + VIC_DEFAULT_CPI_BASE)*4); - *hijack_vector = hijack_source.val; - /* VIC errata, may also receive interrupt at this address */ - hijack_vector = (__u32 *)phys_to_virt((VIC_CPU_BOOT_ERRATA_CPI + VIC_DEFAULT_CPI_BASE)*4); - *hijack_vector = hijack_source.val; - } - /* All non-boot CPUs start with interrupts fully masked. Need - * to lower the mask of the CPI we're about to send. We do - * this in the VIC by masquerading as the processor we're - * about to boot and lowering its interrupt mask */ - local_irq_save(flags); - if(quad_boot) { - send_one_QIC_CPI(cpu, VIC_CPU_BOOT_CPI); - } else { - outb(VIC_CPU_MASQUERADE_ENABLE | cpu, VIC_PROCESSOR_ID); - /* here we're altering registers belonging to `cpu' */ - - outb(VIC_BOOT_INTERRUPT_MASK, 0x21); - /* now go back to our original identity */ - outb(boot_cpu_id, VIC_PROCESSOR_ID); - - /* and boot the CPU */ - - send_CPI((1<thread.esp),"r" (current->thread.eip)); -} - -/* handle a Voyager SYS_INT -- If we don't, the base board will - * panic the system. - * - * System interrupts occur because some problem was detected on the - * various busses. To find out what you have to probe all the - * hardware via the CAT bus. FIXME: At the moment we do nothing. */ -fastcall void -smp_vic_sys_interrupt(struct pt_regs *regs) -{ - ack_CPI(VIC_SYS_INT); - printk("Voyager SYSTEM INTERRUPT\n"); -} - -/* Handle a voyager CMN_INT; These interrupts occur either because of - * a system status change or because a single bit memory error - * occurred. FIXME: At the moment, ignore all this. */ -fastcall void -smp_vic_cmn_interrupt(struct pt_regs *regs) -{ - static __u8 in_cmn_int = 0; - static DEFINE_SPINLOCK(cmn_int_lock); - - /* common ints are broadcast, so make sure we only do this once */ - _raw_spin_lock(&cmn_int_lock); - if(in_cmn_int) - goto unlock_end; - - in_cmn_int++; - _raw_spin_unlock(&cmn_int_lock); - - VDEBUG(("Voyager COMMON INTERRUPT\n")); - - if(voyager_level == 5) - voyager_cat_do_common_interrupt(); - - _raw_spin_lock(&cmn_int_lock); - in_cmn_int = 0; - unlock_end: - _raw_spin_unlock(&cmn_int_lock); - ack_CPI(VIC_CMN_INT); -} - -/* - * Reschedule call back. Nothing to do, all the work is done - * automatically when we return from the interrupt. */ -static void -smp_reschedule_interrupt(void) -{ - /* do nothing */ -} - -static struct mm_struct * flush_mm; -static unsigned long flush_va; -static DEFINE_SPINLOCK(tlbstate_lock); -#define FLUSH_ALL 0xffffffff - -/* - * We cannot call mmdrop() because we are in interrupt context, - * instead update mm->cpu_vm_mask. - * - * We need to reload %cr3 since the page tables may be going - * away from under us.. - */ -static inline void -leave_mm (unsigned long cpu) -{ - if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) - BUG(); - cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask); - load_cr3(swapper_pg_dir); -} - - -/* - * Invalidate call-back - */ -static void -smp_invalidate_interrupt(void) -{ - __u8 cpu = smp_processor_id(); - - if (!test_bit(cpu, &smp_invalidate_needed)) - return; - /* This will flood messages. Don't uncomment unless you see - * Problems with cross cpu invalidation - VDEBUG(("VOYAGER SMP: CPU%d received INVALIDATE_CPI\n", - smp_processor_id())); - */ - - if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) { - if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) { - if (flush_va == FLUSH_ALL) - local_flush_tlb(); - else - __flush_tlb_one(flush_va); - } else - leave_mm(cpu); - } - smp_mb__before_clear_bit(); - clear_bit(cpu, &smp_invalidate_needed); - smp_mb__after_clear_bit(); -} - -/* All the new flush operations for 2.4 */ - - -/* This routine is called with a physical cpu mask */ -static void -voyager_flush_tlb_others (unsigned long cpumask, struct mm_struct *mm, - unsigned long va) -{ - int stuck = 50000; - - if (!cpumask) - BUG(); - if ((cpumask & cpus_addr(cpu_online_map)[0]) != cpumask) - BUG(); - if (cpumask & (1 << smp_processor_id())) - BUG(); - if (!mm) - BUG(); - - spin_lock(&tlbstate_lock); - - flush_mm = mm; - flush_va = va; - atomic_set_mask(cpumask, &smp_invalidate_needed); - /* - * We have to send the CPI only to - * CPUs affected. - */ - send_CPI(cpumask, VIC_INVALIDATE_CPI); - - while (smp_invalidate_needed) { - mb(); - if(--stuck == 0) { - printk("***WARNING*** Stuck doing invalidate CPI (CPU%d)\n", smp_processor_id()); - break; - } - } - - /* Uncomment only to debug invalidation problems - VDEBUG(("VOYAGER SMP: Completed invalidate CPI (CPU%d)\n", cpu)); - */ - - flush_mm = NULL; - flush_va = 0; - spin_unlock(&tlbstate_lock); -} - -void -flush_tlb_current_task(void) -{ - struct mm_struct *mm = current->mm; - unsigned long cpu_mask; - - preempt_disable(); - - cpu_mask = cpus_addr(mm->cpu_vm_mask)[0] & ~(1 << smp_processor_id()); - local_flush_tlb(); - if (cpu_mask) - voyager_flush_tlb_others(cpu_mask, mm, FLUSH_ALL); - - preempt_enable(); -} - - -void -flush_tlb_mm (struct mm_struct * mm) -{ - unsigned long cpu_mask; - - preempt_disable(); - - cpu_mask = cpus_addr(mm->cpu_vm_mask)[0] & ~(1 << smp_processor_id()); - - if (current->active_mm == mm) { - if (current->mm) - local_flush_tlb(); - else - leave_mm(smp_processor_id()); - } - if (cpu_mask) - voyager_flush_tlb_others(cpu_mask, mm, FLUSH_ALL); - - preempt_enable(); -} - -void flush_tlb_page(struct vm_area_struct * vma, unsigned long va) -{ - struct mm_struct *mm = vma->vm_mm; - unsigned long cpu_mask; - - preempt_disable(); - - cpu_mask = cpus_addr(mm->cpu_vm_mask)[0] & ~(1 << smp_processor_id()); - if (current->active_mm == mm) { - if(current->mm) - __flush_tlb_one(va); - else - leave_mm(smp_processor_id()); - } - - if (cpu_mask) - voyager_flush_tlb_others(cpu_mask, mm, va); - - preempt_enable(); -} -EXPORT_SYMBOL(flush_tlb_page); - -/* enable the requested IRQs */ -static void -smp_enable_irq_interrupt(void) -{ - __u8 irq; - __u8 cpu = get_cpu(); - - VDEBUG(("VOYAGER SMP: CPU%d enabling irq mask 0x%x\n", cpu, - vic_irq_enable_mask[cpu])); - - spin_lock(&vic_irq_lock); - for(irq = 0; irq < 16; irq++) { - if(vic_irq_enable_mask[cpu] & (1<func; - void *info = call_data->info; - /* must take copy of wait because call_data may be replaced - * unless the function is waiting for us to finish */ - int wait = call_data->wait; - __u8 cpu = smp_processor_id(); - - /* - * Notify initiating CPU that I've grabbed the data and am - * about to execute the function - */ - mb(); - if(!test_and_clear_bit(cpu, &call_data->started)) { - /* If the bit wasn't set, this could be a replay */ - printk(KERN_WARNING "VOYAGER SMP: CPU %d received call funtion with no call pending\n", cpu); - return; - } - /* - * At this point the info structure may be out of scope unless wait==1 - */ - irq_enter(); - (*func)(info); - irq_exit(); - if (wait) { - mb(); - clear_bit(cpu, &call_data->finished); - } -} - -static int -voyager_smp_call_function_mask (cpumask_t cpumask, - void (*func) (void *info), void *info, - int wait) -{ - struct call_data_struct data; - u32 mask = cpus_addr(cpumask)[0]; - - mask &= ~(1<= 0x93000) - BUG(); -} - -/* send a reschedule CPI to one CPU by physical CPU number*/ -static void -voyager_smp_send_reschedule(int cpu) -{ - send_one_CPI(cpu, VIC_RESCHEDULE_CPI); -} - - -int -hard_smp_processor_id(void) -{ - __u8 i; - __u8 cpumask = inb(VIC_PROC_WHO_AM_I); - if((cpumask & QUAD_IDENTIFIER) == QUAD_IDENTIFIER) - return cpumask & 0x1F; - - for(i = 0; i < 8; i++) { - if(cpumask & (1<1 eligible CPUs are equal lowest, the - * lowest processor number gets it. - * - * The priority of a CPU is controlled by a special per-CPU - * VIC priority register which is 3 bits wide 0 being lowest - * and 7 highest priority.. - * - * Therefore we subtract the average number of interrupts from - * the number we've fielded. If this number is negative, we - * lower the activity count and if it is positive, we raise - * it. - * - * I'm afraid this still leads to odd looking interrupt counts: - * the totals are all roughly equal, but the individual ones - * look rather skewed. - * - * FIXME: This algorithm is total crap when mixed with SMP - * affinity code since we now try to even up the interrupt - * counts when an affinity binding is keeping them on a - * particular CPU*/ - weight = (vic_intr_count[cpu]*voyager_extended_cpus - - vic_intr_total) >> 4; - weight += 4; - if(weight > 7) - weight = 7; - if(weight < 0) - weight = 0; - - outb((__u8)weight, VIC_PRIORITY_REGISTER); - -#ifdef VOYAGER_DEBUG - if((vic_tick[cpu] & 0xFFF) == 0) { - /* print this message roughly every 25 secs */ - printk("VOYAGER SMP: vic_tick[%d] = %lu, weight = %ld\n", - cpu, vic_tick[cpu], weight); - } -#endif -} - -/* setup the profiling timer */ -int -setup_profiling_timer(unsigned int multiplier) -{ - int i; - - if ( (!multiplier)) - return -EINVAL; - - /* - * Set the new multiplier for each CPU. CPUs don't start using the - * new values until the next timer interrupt in which they do process - * accounting. - */ - for (i = 0; i < NR_CPUS; ++i) - per_cpu(prof_multiplier, i) = multiplier; - - return 0; -} - -/* This is a bit of a mess, but forced on us by the genirq changes - * there's no genirq handler that really does what voyager wants - * so hack it up with the simple IRQ handler */ -static void fastcall -handle_vic_irq(unsigned int irq, struct irq_desc *desc) -{ - before_handle_vic_irq(irq); - handle_simple_irq(irq, desc); - after_handle_vic_irq(irq); -} - - -/* The CPIs are handled in the per cpu 8259s, so they must be - * enabled to be received: FIX: enabling the CPIs in the early - * boot sequence interferes with bug checking; enable them later - * on in smp_init */ -#define VIC_SET_GATE(cpi, vector) \ - set_intr_gate((cpi) + VIC_DEFAULT_CPI_BASE, (vector)) -#define QIC_SET_GATE(cpi, vector) \ - set_intr_gate((cpi) + QIC_DEFAULT_CPI_BASE, (vector)) - -void __init -smp_intr_init(void) -{ - int i; - - /* initialize the per cpu irq mask to all disabled */ - for(i = 0; i < NR_CPUS; i++) - vic_irq_mask[i] = 0xFFFF; - - VIC_SET_GATE(VIC_CPI_LEVEL0, vic_cpi_interrupt); - - VIC_SET_GATE(VIC_SYS_INT, vic_sys_interrupt); - VIC_SET_GATE(VIC_CMN_INT, vic_cmn_interrupt); - - QIC_SET_GATE(QIC_TIMER_CPI, qic_timer_interrupt); - QIC_SET_GATE(QIC_INVALIDATE_CPI, qic_invalidate_interrupt); - QIC_SET_GATE(QIC_RESCHEDULE_CPI, qic_reschedule_interrupt); - QIC_SET_GATE(QIC_ENABLE_IRQ_CPI, qic_enable_irq_interrupt); - QIC_SET_GATE(QIC_CALL_FUNCTION_CPI, qic_call_function_interrupt); - - - /* now put the VIC descriptor into the first 48 IRQs - * - * This is for later: first 16 correspond to PC IRQs; next 16 - * are Primary MC IRQs and final 16 are Secondary MC IRQs */ - for(i = 0; i < 48; i++) - set_irq_chip_and_handler(i, &vic_chip, handle_vic_irq); -} - -/* send a CPI at level cpi to a set of cpus in cpuset (set 1 bit per - * processor to receive CPI */ -static void -send_CPI(__u32 cpuset, __u8 cpi) -{ - int cpu; - __u32 quad_cpuset = (cpuset & voyager_quad_processors); - - if(cpi < VIC_START_FAKE_CPI) { - /* fake CPI are only used for booting, so send to the - * extended quads as well---Quads must be VIC booted */ - outb((__u8)(cpuset), VIC_CPI_Registers[cpi]); - return; - } - if(quad_cpuset) - send_QIC_CPI(quad_cpuset, cpi); - cpuset &= ~quad_cpuset; - cpuset &= 0xff; /* only first 8 CPUs vaild for VIC CPI */ - if(cpuset == 0) - return; - for_each_online_cpu(cpu) { - if(cpuset & (1<qic_cpi[cpi].cpi; -} - -static void -ack_special_QIC_CPI(__u8 cpi) -{ - switch(cpi) { - case VIC_CMN_INT: - outb(QIC_CMN_INT, QIC_INTERRUPT_CLEAR0); - break; - case VIC_SYS_INT: - outb(QIC_SYS_INT, QIC_INTERRUPT_CLEAR0); - break; - } - /* also clear at the VIC, just in case (nop for non-extended proc) */ - ack_VIC_CPI(cpi); -} - -/* Acknowledge receipt of CPI in the VIC (essentially an EOI) */ -static void -ack_VIC_CPI(__u8 cpi) -{ -#ifdef VOYAGER_DEBUG - unsigned long flags; - __u16 isr; - __u8 cpu = smp_processor_id(); - - local_irq_save(flags); - isr = vic_read_isr(); - if((isr & (1<<(cpi &7))) == 0) { - printk("VOYAGER SMP: CPU%d lost CPI%d\n", cpu, cpi); - } -#endif - /* send specific EOI; the two system interrupts have - * bit 4 set for a separate vector but behave as the - * corresponding 3 bit intr */ - outb_p(0x60|(cpi & 7),0x20); - -#ifdef VOYAGER_DEBUG - if((vic_read_isr() & (1<<(cpi &7))) != 0) { - printk("VOYAGER SMP: CPU%d still asserting CPI%d\n", cpu, cpi); - } - local_irq_restore(flags); -#endif -} - -/* cribbed with thanks from irq.c */ -#define __byte(x,y) (((unsigned char *)&(y))[x]) -#define cached_21(cpu) (__byte(0,vic_irq_mask[cpu])) -#define cached_A1(cpu) (__byte(1,vic_irq_mask[cpu])) - -static unsigned int -startup_vic_irq(unsigned int irq) -{ - unmask_vic_irq(irq); - - return 0; -} - -/* The enable and disable routines. This is where we run into - * conflicting architectural philosophy. Fundamentally, the voyager - * architecture does not expect to have to disable interrupts globally - * (the IRQ controllers belong to each CPU). The processor masquerade - * which is used to start the system shouldn't be used in a running OS - * since it will cause great confusion if two separate CPUs drive to - * the same IRQ controller (I know, I've tried it). - * - * The solution is a variant on the NCR lazy SPL design: - * - * 1) To disable an interrupt, do nothing (other than set the - * IRQ_DISABLED flag). This dares the interrupt actually to arrive. - * - * 2) If the interrupt dares to come in, raise the local mask against - * it (this will result in all the CPU masks being raised - * eventually). - * - * 3) To enable the interrupt, lower the mask on the local CPU and - * broadcast an Interrupt enable CPI which causes all other CPUs to - * adjust their masks accordingly. */ - -static void -unmask_vic_irq(unsigned int irq) -{ - /* linux doesn't to processor-irq affinity, so enable on - * all CPUs we know about */ - int cpu = smp_processor_id(), real_cpu; - __u16 mask = (1<status |= IRQ_REPLAY | IRQ_INPROGRESS; - } else if(desc->status & IRQ_DISABLED) { - /* Damn, the interrupt actually arrived, do the lazy - * disable thing. The interrupt routine in irq.c will - * not handle a IRQ_DISABLED interrupt, so nothing more - * need be done here */ - VDEBUG(("VOYAGER DEBUG: lazy disable of irq %d on CPU %d\n", - irq, cpu)); - disable_local_vic_irq(irq); - desc->status |= IRQ_REPLAY; - } else { - desc->status &= ~IRQ_REPLAY; - } - - _raw_spin_unlock(&vic_irq_lock); -} - -/* Finish the VIC interrupt: basically mask */ -static void -after_handle_vic_irq(unsigned int irq) -{ - irq_desc_t *desc = irq_desc + irq; - - _raw_spin_lock(&vic_irq_lock); - { - unsigned int status = desc->status & ~IRQ_INPROGRESS; -#ifdef VOYAGER_DEBUG - __u16 isr; -#endif - - desc->status = status; - if ((status & IRQ_DISABLED)) - disable_local_vic_irq(irq); -#ifdef VOYAGER_DEBUG - /* DEBUG: before we ack, check what's in progress */ - isr = vic_read_isr(); - if((isr & (1<status &= ~(IRQ_REPLAY | IRQ_INPROGRESS); - } -#ifdef VOYAGER_DEBUG - isr = vic_read_isr(); - if((isr & (1<= 32) - /* You can only have 32 interrupts in a voyager system - * (and 32 only if you have a secondary microchannel - * bus) */ - return; - - for_each_online_cpu(cpu) { - unsigned long cpu_mask = 1 << cpu; - - if(cpu_mask & real_mask) { - /* enable the interrupt for this cpu */ - cpu_irq_affinity[cpu] |= irq_mask; - } else { - /* disable the interrupt for this cpu */ - cpu_irq_affinity[cpu] &= ~irq_mask; - } - } - /* this is magic, we now have the correct affinity maps, so - * enable the interrupt. This will send an enable CPI to - * those cpu's who need to enable it in their local masks, - * causing them to correct for the new affinity . If the - * interrupt is currently globally disabled, it will simply be - * disabled again as it comes in (voyager lazy disable). If - * the affinity map is tightened to disable the interrupt on a - * cpu, it will be pushed off when it comes in */ - unmask_vic_irq(irq); -} - -static void -ack_vic_irq(unsigned int irq) -{ - if (irq & 8) { - outb(0x62,0x20); /* Specific EOI to cascade */ - outb(0x60|(irq & 7),0xA0); - } else { - outb(0x60 | (irq & 7),0x20); - } -} - -/* enable the CPIs. In the VIC, the CPIs are delivered by the 8259 - * but are not vectored by it. This means that the 8259 mask must be - * lowered to receive them */ -static __init void -vic_enable_cpi(void) -{ - __u8 cpu = smp_processor_id(); - - /* just take a copy of the current mask (nop for boot cpu) */ - vic_irq_mask[cpu] = vic_irq_mask[boot_cpu_id]; - - enable_local_vic_irq(VIC_CPI_LEVEL0); - enable_local_vic_irq(VIC_CPI_LEVEL1); - /* for sys int and cmn int */ - enable_local_vic_irq(7); - - if(is_cpu_quad()) { - outb(QIC_DEFAULT_MASK0, QIC_MASK_REGISTER0); - outb(QIC_CPI_ENABLE, QIC_MASK_REGISTER1); - VDEBUG(("VOYAGER SMP: QIC ENABLE CPI: CPU%d: MASK 0x%x\n", - cpu, QIC_CPI_ENABLE)); - } - - VDEBUG(("VOYAGER SMP: ENABLE CPI: CPU%d: MASK 0x%x\n", - cpu, vic_irq_mask[cpu])); -} - -void -voyager_smp_dump() -{ - int old_cpu = smp_processor_id(), cpu; - - /* dump the interrupt masks of each processor */ - for_each_online_cpu(cpu) { - __u16 imr, isr, irr; - unsigned long flags; - - local_irq_save(flags); - outb(VIC_CPU_MASQUERADE_ENABLE | cpu, VIC_PROCESSOR_ID); - imr = (inb(0xa1) << 8) | inb(0x21); - outb(0x0a, 0xa0); - irr = inb(0xa0) << 8; - outb(0x0a, 0x20); - irr |= inb(0x20); - outb(0x0b, 0xa0); - isr = inb(0xa0) << 8; - outb(0x0b, 0x20); - isr |= inb(0x20); - outb(old_cpu, VIC_PROCESSOR_ID); - local_irq_restore(flags); - printk("\tCPU%d: mask=0x%x, IMR=0x%x, IRR=0x%x, ISR=0x%x\n", - cpu, vic_irq_mask[cpu], imr, irr, isr); -#if 0 - /* These lines are put in to try to unstick an un ack'd irq */ - if(isr != 0) { - int irq; - for(irq=0; irq<16; irq++) { - if(isr & (1<cpu = hard_smp_processor_id(); - x86_write_percpu(cpu_number, hard_smp_processor_id()); -} - -struct smp_ops smp_ops = { - .smp_prepare_boot_cpu = voyager_smp_prepare_boot_cpu, - .smp_prepare_cpus = voyager_smp_prepare_cpus, - .cpu_up = voyager_cpu_up, - .smp_cpus_done = voyager_smp_cpus_done, - - .smp_send_stop = voyager_smp_send_stop, - .smp_send_reschedule = voyager_smp_send_reschedule, - .smp_call_function_mask = voyager_smp_call_function_mask, -}; diff --git a/arch/i386/mach-voyager/voyager_thread.c b/arch/i386/mach-voyager/voyager_thread.c deleted file mode 100644 index f9d59533815..00000000000 --- a/arch/i386/mach-voyager/voyager_thread.c +++ /dev/null @@ -1,134 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8 -*- */ - -/* Copyright (C) 2001 - * - * Author: J.E.J.Bottomley@HansenPartnership.com - * - * linux/arch/i386/kernel/voyager_thread.c - * - * This module provides the machine status monitor thread for the - * voyager architecture. This allows us to monitor the machine - * environment (temp, voltage, fan function) and the front panel and - * internal UPS. If a fault is detected, this thread takes corrective - * action (usually just informing init) - * */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -struct task_struct *voyager_thread; -static __u8 set_timeout; - -static int -execute(const char *string) -{ - int ret; - - char *envp[] = { - "HOME=/", - "TERM=linux", - "PATH=/sbin:/usr/sbin:/bin:/usr/bin", - NULL, - }; - char *argv[] = { - "/bin/bash", - "-c", - (char *)string, - NULL, - }; - - if ((ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC)) != 0) { - printk(KERN_ERR "Voyager failed to run \"%s\": %i\n", - string, ret); - } - return ret; -} - -static void -check_from_kernel(void) -{ - if(voyager_status.switch_off) { - - /* FIXME: This should be configureable via proc */ - execute("umask 600; echo 0 > /etc/initrunlvl; kill -HUP 1"); - } else if(voyager_status.power_fail) { - VDEBUG(("Voyager daemon detected AC power failure\n")); - - /* FIXME: This should be configureable via proc */ - execute("umask 600; echo F > /etc/powerstatus; kill -PWR 1"); - set_timeout = 1; - } -} - -static void -check_continuing_condition(void) -{ - if(voyager_status.power_fail) { - __u8 data; - voyager_cat_psi(VOYAGER_PSI_SUBREAD, - VOYAGER_PSI_AC_FAIL_REG, &data); - if((data & 0x1f) == 0) { - /* all power restored */ - printk(KERN_NOTICE "VOYAGER AC power restored, cancelling shutdown\n"); - /* FIXME: should be user configureable */ - execute("umask 600; echo O > /etc/powerstatus; kill -PWR 1"); - set_timeout = 0; - } - } -} - -static int -thread(void *unused) -{ - printk(KERN_NOTICE "Voyager starting monitor thread\n"); - - for (;;) { - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(set_timeout ? HZ : MAX_SCHEDULE_TIMEOUT); - - VDEBUG(("Voyager Daemon awoken\n")); - if(voyager_status.request_from_kernel == 0) { - /* probably awoken from timeout */ - check_continuing_condition(); - } else { - check_from_kernel(); - voyager_status.request_from_kernel = 0; - } - } -} - -static int __init -voyager_thread_start(void) -{ - voyager_thread = kthread_run(thread, NULL, "kvoyagerd"); - if (IS_ERR(voyager_thread)) { - printk(KERN_ERR "Voyager: Failed to create system monitor thread.\n"); - return PTR_ERR(voyager_thread); - } - return 0; -} - - -static void __exit -voyager_thread_stop(void) -{ - kthread_stop(voyager_thread); -} - -module_init(voyager_thread_start); -module_exit(voyager_thread_stop); diff --git a/arch/x86/mach-voyager/Makefile b/arch/x86/mach-voyager/Makefile new file mode 100644 index 00000000000..33b74cf0dd2 --- /dev/null +++ b/arch/x86/mach-voyager/Makefile @@ -0,0 +1,8 @@ +# +# Makefile for the linux kernel. +# + +EXTRA_CFLAGS := -Iarch/i386/kernel +obj-y := setup.o voyager_basic.o voyager_thread.o + +obj-$(CONFIG_SMP) += voyager_smp.o voyager_cat.o diff --git a/arch/x86/mach-voyager/setup.c b/arch/x86/mach-voyager/setup.c new file mode 100644 index 00000000000..2b55694e640 --- /dev/null +++ b/arch/x86/mach-voyager/setup.c @@ -0,0 +1,125 @@ +/* + * Machine specific setup for generic + */ + +#include +#include +#include +#include +#include +#include +#include + +void __init pre_intr_init_hook(void) +{ + init_ISA_irqs(); +} + +/* + * IRQ2 is cascade interrupt to second interrupt controller + */ +static struct irqaction irq2 = { no_action, 0, CPU_MASK_NONE, "cascade", NULL, NULL}; + +void __init intr_init_hook(void) +{ +#ifdef CONFIG_SMP + smp_intr_init(); +#endif + + setup_irq(2, &irq2); +} + +void __init pre_setup_arch_hook(void) +{ + /* Voyagers run their CPUs from independent clocks, so disable + * the TSC code because we can't sync them */ + tsc_disable = 1; +} + +void __init trap_init_hook(void) +{ +} + +static struct irqaction irq0 = { + .handler = timer_interrupt, + .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL, + .mask = CPU_MASK_NONE, + .name = "timer" +}; + +void __init time_init_hook(void) +{ + irq0.mask = cpumask_of_cpu(safe_smp_processor_id()); + setup_irq(0, &irq0); +} + +/* Hook for machine specific memory setup. */ + +char * __init machine_specific_memory_setup(void) +{ + char *who; + + who = "NOT VOYAGER"; + + if(voyager_level == 5) { + __u32 addr, length; + int i; + + who = "Voyager-SUS"; + + e820.nr_map = 0; + for(i=0; voyager_memory_detect(i, &addr, &length); i++) { + add_memory_region(addr, length, E820_RAM); + } + return who; + } else if(voyager_level == 4) { + __u32 tom; + __u16 catbase = inb(VOYAGER_SSPB_RELOCATION_PORT)<<8; + /* select the DINO config space */ + outb(VOYAGER_DINO, VOYAGER_CAT_CONFIG_PORT); + /* Read DINO top of memory register */ + tom = ((inb(catbase + 0x4) & 0xf0) << 16) + + ((inb(catbase + 0x5) & 0x7f) << 24); + + if(inb(catbase) != VOYAGER_DINO) { + printk(KERN_ERR "Voyager: Failed to get DINO for L4, setting tom to EXT_MEM_K\n"); + tom = (EXT_MEM_K)<<10; + } + who = "Voyager-TOM"; + add_memory_region(0, 0x9f000, E820_RAM); + /* map from 1M to top of memory */ + add_memory_region(1*1024*1024, tom - 1*1024*1024, E820_RAM); + /* FIXME: Should check the ASICs to see if I need to + * take out the 8M window. Just do it at the moment + * */ + add_memory_region(8*1024*1024, 8*1024*1024, E820_RESERVED); + return who; + } + + who = "BIOS-e820"; + + /* + * Try to copy the BIOS-supplied E820-map. + * + * Otherwise fake a memory map; one section from 0k->640k, + * the next section from 1mb->appropriate_mem_k + */ + sanitize_e820_map(E820_MAP, &E820_MAP_NR); + if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0) { + unsigned long mem_size; + + /* compare results from other methods and take the greater */ + if (ALT_MEM_K < EXT_MEM_K) { + mem_size = EXT_MEM_K; + who = "BIOS-88"; + } else { + mem_size = ALT_MEM_K; + who = "BIOS-e801"; + } + + e820.nr_map = 0; + add_memory_region(0, LOWMEMSIZE(), E820_RAM); + add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM); + } + return who; +} diff --git a/arch/x86/mach-voyager/voyager_basic.c b/arch/x86/mach-voyager/voyager_basic.c new file mode 100644 index 00000000000..9b77b39b71a --- /dev/null +++ b/arch/x86/mach-voyager/voyager_basic.c @@ -0,0 +1,331 @@ +/* Copyright (C) 1999,2001 + * + * Author: J.E.J.Bottomley@HansenPartnership.com + * + * linux/arch/i386/kernel/voyager.c + * + * This file contains all the voyager specific routines for getting + * initialisation of the architecture to function. For additional + * features see: + * + * voyager_cat.c - Voyager CAT bus interface + * voyager_smp.c - Voyager SMP hal (emulates linux smp.c) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Power off function, if any + */ +void (*pm_power_off)(void); +EXPORT_SYMBOL(pm_power_off); + +int voyager_level = 0; + +struct voyager_SUS *voyager_SUS = NULL; + +#ifdef CONFIG_SMP +static void +voyager_dump(int dummy1, struct tty_struct *dummy3) +{ + /* get here via a sysrq */ + voyager_smp_dump(); +} + +static struct sysrq_key_op sysrq_voyager_dump_op = { + .handler = voyager_dump, + .help_msg = "Voyager", + .action_msg = "Dump Voyager Status", +}; +#endif + +void +voyager_detect(struct voyager_bios_info *bios) +{ + if(bios->len != 0xff) { + int class = (bios->class_1 << 8) + | (bios->class_2 & 0xff); + + printk("Voyager System detected.\n" + " Class %x, Revision %d.%d\n", + class, bios->major, bios->minor); + if(class == VOYAGER_LEVEL4) + voyager_level = 4; + else if(class < VOYAGER_LEVEL5_AND_ABOVE) + voyager_level = 3; + else + voyager_level = 5; + printk(" Architecture Level %d\n", voyager_level); + if(voyager_level < 4) + printk("\n**WARNING**: Voyager HAL only supports Levels 4 and 5 Architectures at the moment\n\n"); + /* install the power off handler */ + pm_power_off = voyager_power_off; +#ifdef CONFIG_SMP + register_sysrq_key('v', &sysrq_voyager_dump_op); +#endif + } else { + printk("\n\n**WARNING**: No Voyager Subsystem Found\n"); + } +} + +void +voyager_system_interrupt(int cpl, void *dev_id) +{ + printk("Voyager: detected system interrupt\n"); +} + +/* Routine to read information from the extended CMOS area */ +__u8 +voyager_extended_cmos_read(__u16 addr) +{ + outb(addr & 0xff, 0x74); + outb((addr >> 8) & 0xff, 0x75); + return inb(0x76); +} + +/* internal definitions for the SUS Click Map of memory */ + +#define CLICK_ENTRIES 16 +#define CLICK_SIZE 4096 /* click to byte conversion for Length */ + +typedef struct ClickMap { + struct Entry { + __u32 Address; + __u32 Length; + } Entry[CLICK_ENTRIES]; +} ClickMap_t; + + +/* This routine is pretty much an awful hack to read the bios clickmap by + * mapping it into page 0. There are usually three regions in the map: + * Base Memory + * Extended Memory + * zero length marker for end of map + * + * Returns are 0 for failure and 1 for success on extracting region. + */ +int __init +voyager_memory_detect(int region, __u32 *start, __u32 *length) +{ + int i; + int retval = 0; + __u8 cmos[4]; + ClickMap_t *map; + unsigned long map_addr; + unsigned long old; + + if(region >= CLICK_ENTRIES) { + printk("Voyager: Illegal ClickMap region %d\n", region); + return 0; + } + + for(i = 0; i < sizeof(cmos); i++) + cmos[i] = voyager_extended_cmos_read(VOYAGER_MEMORY_CLICKMAP + i); + + map_addr = *(unsigned long *)cmos; + + /* steal page 0 for this */ + old = pg0[0]; + pg0[0] = ((map_addr & PAGE_MASK) | _PAGE_RW | _PAGE_PRESENT); + local_flush_tlb(); + /* now clear everything out but page 0 */ + map = (ClickMap_t *)(map_addr & (~PAGE_MASK)); + + /* zero length is the end of the clickmap */ + if(map->Entry[region].Length != 0) { + *length = map->Entry[region].Length * CLICK_SIZE; + *start = map->Entry[region].Address; + retval = 1; + } + + /* replace the mapping */ + pg0[0] = old; + local_flush_tlb(); + return retval; +} + +/* voyager specific handling code for timer interrupts. Used to hand + * off the timer tick to the SMP code, since the VIC doesn't have an + * internal timer (The QIC does, but that's another story). */ +void +voyager_timer_interrupt(void) +{ + if((jiffies & 0x3ff) == 0) { + + /* There seems to be something flaky in either + * hardware or software that is resetting the timer 0 + * count to something much higher than it should be + * This seems to occur in the boot sequence, just + * before root is mounted. Therefore, every 10 + * seconds or so, we sanity check the timer zero count + * and kick it back to where it should be. + * + * FIXME: This is the most awful hack yet seen. I + * should work out exactly what is interfering with + * the timer count settings early in the boot sequence + * and swiftly introduce it to something sharp and + * pointy. */ + __u16 val; + + spin_lock(&i8253_lock); + + outb_p(0x00, 0x43); + val = inb_p(0x40); + val |= inb(0x40) << 8; + spin_unlock(&i8253_lock); + + if(val > LATCH) { + printk("\nVOYAGER: countdown timer value too high (%d), resetting\n\n", val); + spin_lock(&i8253_lock); + outb(0x34,0x43); + outb_p(LATCH & 0xff , 0x40); /* LSB */ + outb(LATCH >> 8 , 0x40); /* MSB */ + spin_unlock(&i8253_lock); + } + } +#ifdef CONFIG_SMP + smp_vic_timer_interrupt(); +#endif +} + +void +voyager_power_off(void) +{ + printk("VOYAGER Power Off\n"); + + if(voyager_level == 5) { + voyager_cat_power_off(); + } else if(voyager_level == 4) { + /* This doesn't apparently work on most L4 machines, + * but the specs say to do this to get automatic power + * off. Unfortunately, if it doesn't power off the + * machine, it ends up doing a cold restart, which + * isn't really intended, so comment out the code */ +#if 0 + int port; + + + /* enable the voyager Configuration Space */ + outb((inb(VOYAGER_MC_SETUP) & 0xf0) | 0x8, + VOYAGER_MC_SETUP); + /* the port for the power off flag is an offset from the + floating base */ + port = (inb(VOYAGER_SSPB_RELOCATION_PORT) << 8) + 0x21; + /* set the power off flag */ + outb(inb(port) | 0x1, port); +#endif + } + /* and wait for it to happen */ + local_irq_disable(); + for(;;) + halt(); +} + +/* copied from process.c */ +static inline void +kb_wait(void) +{ + int i; + + for (i=0; i<0x10000; i++) + if ((inb_p(0x64) & 0x02) == 0) + break; +} + +void +machine_shutdown(void) +{ + /* Architecture specific shutdown needed before a kexec */ +} + +void +machine_restart(char *cmd) +{ + printk("Voyager Warm Restart\n"); + kb_wait(); + + if(voyager_level == 5) { + /* write magic values to the RTC to inform system that + * shutdown is beginning */ + outb(0x8f, 0x70); + outb(0x5 , 0x71); + + udelay(50); + outb(0xfe,0x64); /* pull reset low */ + } else if(voyager_level == 4) { + __u16 catbase = inb(VOYAGER_SSPB_RELOCATION_PORT)<<8; + __u8 basebd = inb(VOYAGER_MC_SETUP); + + outb(basebd | 0x08, VOYAGER_MC_SETUP); + outb(0x02, catbase + 0x21); + } + local_irq_disable(); + for(;;) + halt(); +} + +void +machine_emergency_restart(void) +{ + /*for now, just hook this to a warm restart */ + machine_restart(NULL); +} + +void +mca_nmi_hook(void) +{ + __u8 dumpval __maybe_unused = inb(0xf823); + __u8 swnmi __maybe_unused = inb(0xf813); + + /* FIXME: assume dump switch pressed */ + /* check to see if the dump switch was pressed */ + VDEBUG(("VOYAGER: dumpval = 0x%x, swnmi = 0x%x\n", dumpval, swnmi)); + /* clear swnmi */ + outb(0xff, 0xf813); + /* tell SUS to ignore dump */ + if(voyager_level == 5 && voyager_SUS != NULL) { + if(voyager_SUS->SUS_mbox == VOYAGER_DUMP_BUTTON_NMI) { + voyager_SUS->kernel_mbox = VOYAGER_NO_COMMAND; + voyager_SUS->kernel_flags |= VOYAGER_OS_IN_PROGRESS; + udelay(1000); + voyager_SUS->kernel_mbox = VOYAGER_IGNORE_DUMP; + voyager_SUS->kernel_flags &= ~VOYAGER_OS_IN_PROGRESS; + } + } + printk(KERN_ERR "VOYAGER: Dump switch pressed, printing CPU%d tracebacks\n", smp_processor_id()); + show_stack(NULL, NULL); + show_state(); +} + + + +void +machine_halt(void) +{ + /* treat a halt like a power off */ + machine_power_off(); +} + +void machine_power_off(void) +{ + if (pm_power_off) + pm_power_off(); +} diff --git a/arch/x86/mach-voyager/voyager_cat.c b/arch/x86/mach-voyager/voyager_cat.c new file mode 100644 index 00000000000..26a2d4c54b6 --- /dev/null +++ b/arch/x86/mach-voyager/voyager_cat.c @@ -0,0 +1,1180 @@ +/* -*- mode: c; c-basic-offset: 8 -*- */ + +/* Copyright (C) 1999,2001 + * + * Author: J.E.J.Bottomley@HansenPartnership.com + * + * linux/arch/i386/kernel/voyager_cat.c + * + * This file contains all the logic for manipulating the CAT bus + * in a level 5 machine. + * + * The CAT bus is a serial configuration and test bus. Its primary + * uses are to probe the initial configuration of the system and to + * diagnose error conditions when a system interrupt occurs. The low + * level interface is fairly primitive, so most of this file consists + * of bit shift manipulations to send and receive packets on the + * serial bus */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef VOYAGER_CAT_DEBUG +#define CDEBUG(x) printk x +#else +#define CDEBUG(x) +#endif + +/* the CAT command port */ +#define CAT_CMD (sspb + 0xe) +/* the CAT data port */ +#define CAT_DATA (sspb + 0xd) + +/* the internal cat functions */ +static void cat_pack(__u8 *msg, __u16 start_bit, __u8 *data, + __u16 num_bits); +static void cat_unpack(__u8 *msg, __u16 start_bit, __u8 *data, + __u16 num_bits); +static void cat_build_header(__u8 *header, const __u16 len, + const __u16 smallest_reg_bits, + const __u16 longest_reg_bits); +static int cat_sendinst(voyager_module_t *modp, voyager_asic_t *asicp, + __u8 reg, __u8 op); +static int cat_getdata(voyager_module_t *modp, voyager_asic_t *asicp, + __u8 reg, __u8 *value); +static int cat_shiftout(__u8 *data, __u16 data_bytes, __u16 header_bytes, + __u8 pad_bits); +static int cat_write(voyager_module_t *modp, voyager_asic_t *asicp, __u8 reg, + __u8 value); +static int cat_read(voyager_module_t *modp, voyager_asic_t *asicp, __u8 reg, + __u8 *value); +static int cat_subread(voyager_module_t *modp, voyager_asic_t *asicp, + __u16 offset, __u16 len, void *buf); +static int cat_senddata(voyager_module_t *modp, voyager_asic_t *asicp, + __u8 reg, __u8 value); +static int cat_disconnect(voyager_module_t *modp, voyager_asic_t *asicp); +static int cat_connect(voyager_module_t *modp, voyager_asic_t *asicp); + +static inline const char * +cat_module_name(int module_id) +{ + switch(module_id) { + case 0x10: + return "Processor Slot 0"; + case 0x11: + return "Processor Slot 1"; + case 0x12: + return "Processor Slot 2"; + case 0x13: + return "Processor Slot 4"; + case 0x14: + return "Memory Slot 0"; + case 0x15: + return "Memory Slot 1"; + case 0x18: + return "Primary Microchannel"; + case 0x19: + return "Secondary Microchannel"; + case 0x1a: + return "Power Supply Interface"; + case 0x1c: + return "Processor Slot 5"; + case 0x1d: + return "Processor Slot 6"; + case 0x1e: + return "Processor Slot 7"; + case 0x1f: + return "Processor Slot 8"; + default: + return "Unknown Module"; + } +} + +static int sspb = 0; /* stores the super port location */ +int voyager_8slot = 0; /* set to true if a 51xx monster */ + +voyager_module_t *voyager_cat_list; + +/* the I/O port assignments for the VIC and QIC */ +static struct resource vic_res = { + .name = "Voyager Interrupt Controller", + .start = 0xFC00, + .end = 0xFC6F +}; +static struct resource qic_res = { + .name = "Quad Interrupt Controller", + .start = 0xFC70, + .end = 0xFCFF +}; + +/* This function is used to pack a data bit stream inside a message. + * It writes num_bits of the data buffer in msg starting at start_bit. + * Note: This function assumes that any unused bit in the data stream + * is set to zero so that the ors will work correctly */ +static void +cat_pack(__u8 *msg, const __u16 start_bit, __u8 *data, const __u16 num_bits) +{ + /* compute initial shift needed */ + const __u16 offset = start_bit % BITS_PER_BYTE; + __u16 len = num_bits / BITS_PER_BYTE; + __u16 byte = start_bit / BITS_PER_BYTE; + __u16 residue = (num_bits % BITS_PER_BYTE) + offset; + int i; + + /* adjust if we have more than a byte of residue */ + if(residue >= BITS_PER_BYTE) { + residue -= BITS_PER_BYTE; + len++; + } + + /* clear out the bits. We assume here that if len==0 then + * residue >= offset. This is always true for the catbus + * operations */ + msg[byte] &= 0xff << (BITS_PER_BYTE - offset); + msg[byte++] |= data[0] >> offset; + if(len == 0) + return; + for(i = 1; i < len; i++) + msg[byte++] = (data[i-1] << (BITS_PER_BYTE - offset)) + | (data[i] >> offset); + if(residue != 0) { + __u8 mask = 0xff >> residue; + __u8 last_byte = data[i-1] << (BITS_PER_BYTE - offset) + | (data[i] >> offset); + + last_byte &= ~mask; + msg[byte] &= mask; + msg[byte] |= last_byte; + } + return; +} +/* unpack the data again (same arguments as cat_pack()). data buffer + * must be zero populated. + * + * Function: given a message string move to start_bit and copy num_bits into + * data (starting at bit 0 in data). + */ +static void +cat_unpack(__u8 *msg, const __u16 start_bit, __u8 *data, const __u16 num_bits) +{ + /* compute initial shift needed */ + const __u16 offset = start_bit % BITS_PER_BYTE; + __u16 len = num_bits / BITS_PER_BYTE; + const __u8 last_bits = num_bits % BITS_PER_BYTE; + __u16 byte = start_bit / BITS_PER_BYTE; + int i; + + if(last_bits != 0) + len++; + + /* special case: want < 8 bits from msg and we can get it from + * a single byte of the msg */ + if(len == 0 && BITS_PER_BYTE - offset >= num_bits) { + data[0] = msg[byte] << offset; + data[0] &= 0xff >> (BITS_PER_BYTE - num_bits); + return; + } + for(i = 0; i < len; i++) { + /* this annoying if has to be done just in case a read of + * msg one beyond the array causes a panic */ + if(offset != 0) { + data[i] = msg[byte++] << offset; + data[i] |= msg[byte] >> (BITS_PER_BYTE - offset); + } + else { + data[i] = msg[byte++]; + } + } + /* do we need to truncate the final byte */ + if(last_bits != 0) { + data[i-1] &= 0xff << (BITS_PER_BYTE - last_bits); + } + return; +} + +static void +cat_build_header(__u8 *header, const __u16 len, const __u16 smallest_reg_bits, + const __u16 longest_reg_bits) +{ + int i; + __u16 start_bit = (smallest_reg_bits - 1) % BITS_PER_BYTE; + __u8 *last_byte = &header[len - 1]; + + if(start_bit == 0) + start_bit = 1; /* must have at least one bit in the hdr */ + + for(i=0; i < len; i++) + header[i] = 0; + + for(i = start_bit; i > 0; i--) + *last_byte = ((*last_byte) << 1) + 1; + +} + +static int +cat_sendinst(voyager_module_t *modp, voyager_asic_t *asicp, __u8 reg, __u8 op) +{ + __u8 parity, inst, inst_buf[4] = { 0 }; + __u8 iseq[VOYAGER_MAX_SCAN_PATH], hseq[VOYAGER_MAX_REG_SIZE]; + __u16 ibytes, hbytes, padbits; + int i; + + /* + * Parity is the parity of the register number + 1 (READ_REGISTER + * and WRITE_REGISTER always add '1' to the number of bits == 1) + */ + parity = (__u8)(1 + (reg & 0x01) + + ((__u8)(reg & 0x02) >> 1) + + ((__u8)(reg & 0x04) >> 2) + + ((__u8)(reg & 0x08) >> 3)) % 2; + + inst = ((parity << 7) | (reg << 2) | op); + + outb(VOYAGER_CAT_IRCYC, CAT_CMD); + if(!modp->scan_path_connected) { + if(asicp->asic_id != VOYAGER_CAT_ID) { + printk("**WARNING***: cat_sendinst has disconnected scan path not to CAT asic\n"); + return 1; + } + outb(VOYAGER_CAT_HEADER, CAT_DATA); + outb(inst, CAT_DATA); + if(inb(CAT_DATA) != VOYAGER_CAT_HEADER) { + CDEBUG(("VOYAGER CAT: cat_sendinst failed to get CAT_HEADER\n")); + return 1; + } + return 0; + } + ibytes = modp->inst_bits / BITS_PER_BYTE; + if((padbits = modp->inst_bits % BITS_PER_BYTE) != 0) { + padbits = BITS_PER_BYTE - padbits; + ibytes++; + } + hbytes = modp->largest_reg / BITS_PER_BYTE; + if(modp->largest_reg % BITS_PER_BYTE) + hbytes++; + CDEBUG(("cat_sendinst: ibytes=%d, hbytes=%d\n", ibytes, hbytes)); + /* initialise the instruction sequence to 0xff */ + for(i=0; i < ibytes + hbytes; i++) + iseq[i] = 0xff; + cat_build_header(hseq, hbytes, modp->smallest_reg, modp->largest_reg); + cat_pack(iseq, modp->inst_bits, hseq, hbytes * BITS_PER_BYTE); + inst_buf[0] = inst; + inst_buf[1] = 0xFF >> (modp->largest_reg % BITS_PER_BYTE); + cat_pack(iseq, asicp->bit_location, inst_buf, asicp->ireg_length); +#ifdef VOYAGER_CAT_DEBUG + printk("ins = 0x%x, iseq: ", inst); + for(i=0; i< ibytes + hbytes; i++) + printk("0x%x ", iseq[i]); + printk("\n"); +#endif + if(cat_shiftout(iseq, ibytes, hbytes, padbits)) { + CDEBUG(("VOYAGER CAT: cat_sendinst: cat_shiftout failed\n")); + return 1; + } + CDEBUG(("CAT SHIFTOUT DONE\n")); + return 0; +} + +static int +cat_getdata(voyager_module_t *modp, voyager_asic_t *asicp, __u8 reg, + __u8 *value) +{ + if(!modp->scan_path_connected) { + if(asicp->asic_id != VOYAGER_CAT_ID) { + CDEBUG(("VOYAGER CAT: ERROR: cat_getdata to CAT asic with scan path connected\n")); + return 1; + } + if(reg > VOYAGER_SUBADDRHI) + outb(VOYAGER_CAT_RUN, CAT_CMD); + outb(VOYAGER_CAT_DRCYC, CAT_CMD); + outb(VOYAGER_CAT_HEADER, CAT_DATA); + *value = inb(CAT_DATA); + outb(0xAA, CAT_DATA); + if(inb(CAT_DATA) != VOYAGER_CAT_HEADER) { + CDEBUG(("cat_getdata: failed to get VOYAGER_CAT_HEADER\n")); + return 1; + } + return 0; + } + else { + __u16 sbits = modp->num_asics -1 + asicp->ireg_length; + __u16 sbytes = sbits / BITS_PER_BYTE; + __u16 tbytes; + __u8 string[VOYAGER_MAX_SCAN_PATH], trailer[VOYAGER_MAX_REG_SIZE]; + __u8 padbits; + int i; + + outb(VOYAGER_CAT_DRCYC, CAT_CMD); + + if((padbits = sbits % BITS_PER_BYTE) != 0) { + padbits = BITS_PER_BYTE - padbits; + sbytes++; + } + tbytes = asicp->ireg_length / BITS_PER_BYTE; + if(asicp->ireg_length % BITS_PER_BYTE) + tbytes++; + CDEBUG(("cat_getdata: tbytes = %d, sbytes = %d, padbits = %d\n", + tbytes, sbytes, padbits)); + cat_build_header(trailer, tbytes, 1, asicp->ireg_length); + + + for(i = tbytes - 1; i >= 0; i--) { + outb(trailer[i], CAT_DATA); + string[sbytes + i] = inb(CAT_DATA); + } + + for(i = sbytes - 1; i >= 0; i--) { + outb(0xaa, CAT_DATA); + string[i] = inb(CAT_DATA); + } + *value = 0; + cat_unpack(string, padbits + (tbytes * BITS_PER_BYTE) + asicp->asic_location, value, asicp->ireg_length); +#ifdef VOYAGER_CAT_DEBUG + printk("value=0x%x, string: ", *value); + for(i=0; i< tbytes+sbytes; i++) + printk("0x%x ", string[i]); + printk("\n"); +#endif + + /* sanity check the rest of the return */ + for(i=0; i < tbytes; i++) { + __u8 input = 0; + + cat_unpack(string, padbits + (i * BITS_PER_BYTE), &input, BITS_PER_BYTE); + if(trailer[i] != input) { + CDEBUG(("cat_getdata: failed to sanity check rest of ret(%d) 0x%x != 0x%x\n", i, input, trailer[i])); + return 1; + } + } + CDEBUG(("cat_getdata DONE\n")); + return 0; + } +} + +static int +cat_shiftout(__u8 *data, __u16 data_bytes, __u16 header_bytes, __u8 pad_bits) +{ + int i; + + for(i = data_bytes + header_bytes - 1; i >= header_bytes; i--) + outb(data[i], CAT_DATA); + + for(i = header_bytes - 1; i >= 0; i--) { + __u8 header = 0; + __u8 input; + + outb(data[i], CAT_DATA); + input = inb(CAT_DATA); + CDEBUG(("cat_shiftout: returned 0x%x\n", input)); + cat_unpack(data, ((data_bytes + i) * BITS_PER_BYTE) - pad_bits, + &header, BITS_PER_BYTE); + if(input != header) { + CDEBUG(("VOYAGER CAT: cat_shiftout failed to return header 0x%x != 0x%x\n", input, header)); + return 1; + } + } + return 0; +} + +static int +cat_senddata(voyager_module_t *modp, voyager_asic_t *asicp, + __u8 reg, __u8 value) +{ + outb(VOYAGER_CAT_DRCYC, CAT_CMD); + if(!modp->scan_path_connected) { + if(asicp->asic_id != VOYAGER_CAT_ID) { + CDEBUG(("VOYAGER CAT: ERROR: scan path disconnected when asic != CAT\n")); + return 1; + } + outb(VOYAGER_CAT_HEADER, CAT_DATA); + outb(value, CAT_DATA); + if(inb(CAT_DATA) != VOYAGER_CAT_HEADER) { + CDEBUG(("cat_senddata: failed to get correct header response to sent data\n")); + return 1; + } + if(reg > VOYAGER_SUBADDRHI) { + outb(VOYAGER_CAT_RUN, CAT_CMD); + outb(VOYAGER_CAT_END, CAT_CMD); + outb(VOYAGER_CAT_RUN, CAT_CMD); + } + + return 0; + } + else { + __u16 hbytes = asicp->ireg_length / BITS_PER_BYTE; + __u16 dbytes = (modp->num_asics - 1 + asicp->ireg_length)/BITS_PER_BYTE; + __u8 padbits, dseq[VOYAGER_MAX_SCAN_PATH], + hseq[VOYAGER_MAX_REG_SIZE]; + int i; + + if((padbits = (modp->num_asics - 1 + + asicp->ireg_length) % BITS_PER_BYTE) != 0) { + padbits = BITS_PER_BYTE - padbits; + dbytes++; + } + if(asicp->ireg_length % BITS_PER_BYTE) + hbytes++; + + cat_build_header(hseq, hbytes, 1, asicp->ireg_length); + + for(i = 0; i < dbytes + hbytes; i++) + dseq[i] = 0xff; + CDEBUG(("cat_senddata: dbytes=%d, hbytes=%d, padbits=%d\n", + dbytes, hbytes, padbits)); + cat_pack(dseq, modp->num_asics - 1 + asicp->ireg_length, + hseq, hbytes * BITS_PER_BYTE); + cat_pack(dseq, asicp->asic_location, &value, + asicp->ireg_length); +#ifdef VOYAGER_CAT_DEBUG + printk("dseq "); + for(i=0; i 1) { + /* set auto increment */ + __u8 newval; + + if(cat_read(modp, asicp, VOYAGER_AUTO_INC_REG, &val)) { + CDEBUG(("cat_subaddrsetup: read of VOYAGER_AUTO_INC_REG failed\n")); + return 1; + } + CDEBUG(("cat_subaddrsetup: VOYAGER_AUTO_INC_REG = 0x%x\n", val)); + newval = val | VOYAGER_AUTO_INC; + if(newval != val) { + if(cat_write(modp, asicp, VOYAGER_AUTO_INC_REG, val)) { + CDEBUG(("cat_subaddrsetup: write to VOYAGER_AUTO_INC_REG failed\n")); + return 1; + } + } + } + if(cat_write(modp, asicp, VOYAGER_SUBADDRLO, (__u8)(offset &0xff))) { + CDEBUG(("cat_subaddrsetup: write to SUBADDRLO failed\n")); + return 1; + } + if(asicp->subaddr > VOYAGER_SUBADDR_LO) { + if(cat_write(modp, asicp, VOYAGER_SUBADDRHI, (__u8)(offset >> 8))) { + CDEBUG(("cat_subaddrsetup: write to SUBADDRHI failed\n")); + return 1; + } + cat_read(modp, asicp, VOYAGER_SUBADDRHI, &val); + CDEBUG(("cat_subaddrsetup: offset = %d, hi = %d\n", offset, val)); + } + cat_read(modp, asicp, VOYAGER_SUBADDRLO, &val); + CDEBUG(("cat_subaddrsetup: offset = %d, lo = %d\n", offset, val)); + return 0; +} + +static int +cat_subwrite(voyager_module_t *modp, voyager_asic_t *asicp, __u16 offset, + __u16 len, void *buf) +{ + int i, retval; + + /* FIXME: need special actions for VOYAGER_CAT_ID here */ + if(asicp->asic_id == VOYAGER_CAT_ID) { + CDEBUG(("cat_subwrite: ATTEMPT TO WRITE TO CAT ASIC\n")); + /* FIXME -- This is supposed to be handled better + * There is a problem writing to the cat asic in the + * PSI. The 30us delay seems to work, though */ + udelay(30); + } + + if((retval = cat_subaddrsetup(modp, asicp, offset, len)) != 0) { + printk("cat_subwrite: cat_subaddrsetup FAILED\n"); + return retval; + } + + if(cat_sendinst(modp, asicp, VOYAGER_SUBADDRDATA, VOYAGER_WRITE_CONFIG)) { + printk("cat_subwrite: cat_sendinst FAILED\n"); + return 1; + } + for(i = 0; i < len; i++) { + if(cat_senddata(modp, asicp, 0xFF, ((__u8 *)buf)[i])) { + printk("cat_subwrite: cat_sendata element at %d FAILED\n", i); + return 1; + } + } + return 0; +} +static int +cat_subread(voyager_module_t *modp, voyager_asic_t *asicp, __u16 offset, + __u16 len, void *buf) +{ + int i, retval; + + if((retval = cat_subaddrsetup(modp, asicp, offset, len)) != 0) { + CDEBUG(("cat_subread: cat_subaddrsetup FAILED\n")); + return retval; + } + + if(cat_sendinst(modp, asicp, VOYAGER_SUBADDRDATA, VOYAGER_READ_CONFIG)) { + CDEBUG(("cat_subread: cat_sendinst failed\n")); + return 1; + } + for(i = 0; i < len; i++) { + if(cat_getdata(modp, asicp, 0xFF, + &((__u8 *)buf)[i])) { + CDEBUG(("cat_subread: cat_getdata element %d failed\n", i)); + return 1; + } + } + return 0; +} + + +/* buffer for storing EPROM data read in during initialisation */ +static __initdata __u8 eprom_buf[0xFFFF]; +static voyager_module_t *voyager_initial_module; + +/* Initialise the cat bus components. We assume this is called by the + * boot cpu *after* all memory initialisation has been done (so we can + * use kmalloc) but before smp initialisation, so we can probe the SMP + * configuration and pick up necessary information. */ +void +voyager_cat_init(void) +{ + voyager_module_t **modpp = &voyager_initial_module; + voyager_asic_t **asicpp; + voyager_asic_t *qabc_asic = NULL; + int i, j; + unsigned long qic_addr = 0; + __u8 qabc_data[0x20]; + __u8 num_submodules, val; + voyager_eprom_hdr_t *eprom_hdr = (voyager_eprom_hdr_t *)&eprom_buf[0]; + + __u8 cmos[4]; + unsigned long addr; + + /* initiallise the SUS mailbox */ + for(i=0; iSUS_version); + voyager_SUS->kernel_version = VOYAGER_MAILBOX_VERSION; + voyager_SUS->kernel_flags = VOYAGER_OS_HAS_SYSINT; + } + + /* clear the processor counts */ + voyager_extended_vic_processors = 0; + voyager_quad_processors = 0; + + + + printk("VOYAGER: beginning CAT bus probe\n"); + /* set up the SuperSet Port Block which tells us where the + * CAT communication port is */ + sspb = inb(VOYAGER_SSPB_RELOCATION_PORT) * 0x100; + VDEBUG(("VOYAGER DEBUG: sspb = 0x%x\n", sspb)); + + /* now find out if were 8 slot or normal */ + if((inb(VIC_PROC_WHO_AM_I) & EIGHT_SLOT_IDENTIFIER) + == EIGHT_SLOT_IDENTIFIER) { + voyager_8slot = 1; + printk(KERN_NOTICE "Voyager: Eight slot 51xx configuration detected\n"); + } + + for(i = VOYAGER_MIN_MODULE; + i <= VOYAGER_MAX_MODULE; i++) { + __u8 input; + int asic; + __u16 eprom_size; + __u16 sp_offset; + + outb(VOYAGER_CAT_DESELECT, VOYAGER_CAT_CONFIG_PORT); + outb(i, VOYAGER_CAT_CONFIG_PORT); + + /* check the presence of the module */ + outb(VOYAGER_CAT_RUN, CAT_CMD); + outb(VOYAGER_CAT_IRCYC, CAT_CMD); + outb(VOYAGER_CAT_HEADER, CAT_DATA); + /* stream series of alternating 1's and 0's to stimulate + * response */ + outb(0xAA, CAT_DATA); + input = inb(CAT_DATA); + outb(VOYAGER_CAT_END, CAT_CMD); + if(input != VOYAGER_CAT_HEADER) { + continue; + } + CDEBUG(("VOYAGER DEBUG: found module id 0x%x, %s\n", i, + cat_module_name(i))); + *modpp = kmalloc(sizeof(voyager_module_t), GFP_KERNEL); /*&voyager_module_storage[cat_count++];*/ + if(*modpp == NULL) { + printk("**WARNING** kmalloc failure in cat_init\n"); + continue; + } + memset(*modpp, 0, sizeof(voyager_module_t)); + /* need temporary asic for cat_subread. It will be + * filled in correctly later */ + (*modpp)->asic = kmalloc(sizeof(voyager_asic_t), GFP_KERNEL); /*&voyager_asic_storage[asic_count];*/ + if((*modpp)->asic == NULL) { + printk("**WARNING** kmalloc failure in cat_init\n"); + continue; + } + memset((*modpp)->asic, 0, sizeof(voyager_asic_t)); + (*modpp)->asic->asic_id = VOYAGER_CAT_ID; + (*modpp)->asic->subaddr = VOYAGER_SUBADDR_HI; + (*modpp)->module_addr = i; + (*modpp)->scan_path_connected = 0; + if(i == VOYAGER_PSI) { + /* Exception leg for modules with no EEPROM */ + printk("Module \"%s\"\n", cat_module_name(i)); + continue; + } + + CDEBUG(("cat_init: Reading eeprom for module 0x%x at offset %d\n", i, VOYAGER_XSUM_END_OFFSET)); + outb(VOYAGER_CAT_RUN, CAT_CMD); + cat_disconnect(*modpp, (*modpp)->asic); + if(cat_subread(*modpp, (*modpp)->asic, + VOYAGER_XSUM_END_OFFSET, sizeof(eprom_size), + &eprom_size)) { + printk("**WARNING**: Voyager couldn't read EPROM size for module 0x%x\n", i); + outb(VOYAGER_CAT_END, CAT_CMD); + continue; + } + if(eprom_size > sizeof(eprom_buf)) { + printk("**WARNING**: Voyager insufficient size to read EPROM data, module 0x%x. Need %d\n", i, eprom_size); + outb(VOYAGER_CAT_END, CAT_CMD); + continue; + } + outb(VOYAGER_CAT_END, CAT_CMD); + outb(VOYAGER_CAT_RUN, CAT_CMD); + CDEBUG(("cat_init: module 0x%x, eeprom_size %d\n", i, eprom_size)); + if(cat_subread(*modpp, (*modpp)->asic, 0, + eprom_size, eprom_buf)) { + outb(VOYAGER_CAT_END, CAT_CMD); + continue; + } + outb(VOYAGER_CAT_END, CAT_CMD); + printk("Module \"%s\", version 0x%x, tracer 0x%x, asics %d\n", + cat_module_name(i), eprom_hdr->version_id, + *((__u32 *)eprom_hdr->tracer), eprom_hdr->num_asics); + (*modpp)->ee_size = eprom_hdr->ee_size; + (*modpp)->num_asics = eprom_hdr->num_asics; + asicpp = &((*modpp)->asic); + sp_offset = eprom_hdr->scan_path_offset; + /* All we really care about are the Quad cards. We + * identify them because they are in a processor slot + * and have only four asics */ + if((i < 0x10 || (i>=0x14 && i < 0x1c) || i>0x1f)) { + modpp = &((*modpp)->next); + continue; + } + /* Now we know it's in a processor slot, does it have + * a quad baseboard submodule */ + outb(VOYAGER_CAT_RUN, CAT_CMD); + cat_read(*modpp, (*modpp)->asic, VOYAGER_SUBMODPRESENT, + &num_submodules); + /* lowest two bits, active low */ + num_submodules = ~(0xfc | num_submodules); + CDEBUG(("VOYAGER CAT: %d submodules present\n", num_submodules)); + if(num_submodules == 0) { + /* fill in the dyadic extended processors */ + __u8 cpu = i & 0x07; + + printk("Module \"%s\": Dyadic Processor Card\n", + cat_module_name(i)); + voyager_extended_vic_processors |= (1<asic, VOYAGER_SUBMODSELECT, &val); + CDEBUG(("cat_init: SUBMODSELECT value = 0x%x\n", val)); + val = (val & 0x7c) | VOYAGER_QUAD_BASEBOARD; + cat_write(*modpp, (*modpp)->asic, VOYAGER_SUBMODSELECT, val); + + outb(VOYAGER_CAT_END, CAT_CMD); + + + CDEBUG(("cat_init: Reading eeprom for module 0x%x at offset %d\n", i, VOYAGER_XSUM_END_OFFSET)); + outb(VOYAGER_CAT_RUN, CAT_CMD); + cat_disconnect(*modpp, (*modpp)->asic); + if(cat_subread(*modpp, (*modpp)->asic, + VOYAGER_XSUM_END_OFFSET, sizeof(eprom_size), + &eprom_size)) { + printk("**WARNING**: Voyager couldn't read EPROM size for module 0x%x\n", i); + outb(VOYAGER_CAT_END, CAT_CMD); + continue; + } + if(eprom_size > sizeof(eprom_buf)) { + printk("**WARNING**: Voyager insufficient size to read EPROM data, module 0x%x. Need %d\n", i, eprom_size); + outb(VOYAGER_CAT_END, CAT_CMD); + continue; + } + outb(VOYAGER_CAT_END, CAT_CMD); + outb(VOYAGER_CAT_RUN, CAT_CMD); + CDEBUG(("cat_init: module 0x%x, eeprom_size %d\n", i, eprom_size)); + if(cat_subread(*modpp, (*modpp)->asic, 0, + eprom_size, eprom_buf)) { + outb(VOYAGER_CAT_END, CAT_CMD); + continue; + } + outb(VOYAGER_CAT_END, CAT_CMD); + /* Now do everything for the QBB submodule 1 */ + (*modpp)->ee_size = eprom_hdr->ee_size; + (*modpp)->num_asics = eprom_hdr->num_asics; + asicpp = &((*modpp)->asic); + sp_offset = eprom_hdr->scan_path_offset; + /* get rid of the dummy CAT asic and read the real one */ + kfree((*modpp)->asic); + for(asic=0; asic < (*modpp)->num_asics; asic++) { + int j; + voyager_asic_t *asicp = *asicpp + = kzalloc(sizeof(voyager_asic_t), GFP_KERNEL); /*&voyager_asic_storage[asic_count++];*/ + voyager_sp_table_t *sp_table; + voyager_at_t *asic_table; + voyager_jtt_t *jtag_table; + + if(asicp == NULL) { + printk("**WARNING** kmalloc failure in cat_init\n"); + continue; + } + asicpp = &(asicp->next); + asicp->asic_location = asic; + sp_table = (voyager_sp_table_t *)(eprom_buf + sp_offset); + asicp->asic_id = sp_table->asic_id; + asic_table = (voyager_at_t *)(eprom_buf + sp_table->asic_data_offset); + for(j=0; j<4; j++) + asicp->jtag_id[j] = asic_table->jtag_id[j]; + jtag_table = (voyager_jtt_t *)(eprom_buf + asic_table->jtag_offset); + asicp->ireg_length = jtag_table->ireg_len; + asicp->bit_location = (*modpp)->inst_bits; + (*modpp)->inst_bits += asicp->ireg_length; + if(asicp->ireg_length > (*modpp)->largest_reg) + (*modpp)->largest_reg = asicp->ireg_length; + if (asicp->ireg_length < (*modpp)->smallest_reg || + (*modpp)->smallest_reg == 0) + (*modpp)->smallest_reg = asicp->ireg_length; + CDEBUG(("asic 0x%x, ireg_length=%d, bit_location=%d\n", + asicp->asic_id, asicp->ireg_length, + asicp->bit_location)); + if(asicp->asic_id == VOYAGER_QUAD_QABC) { + CDEBUG(("VOYAGER CAT: QABC ASIC found\n")); + qabc_asic = asicp; + } + sp_offset += sizeof(voyager_sp_table_t); + } + CDEBUG(("Module inst_bits = %d, largest_reg = %d, smallest_reg=%d\n", + (*modpp)->inst_bits, (*modpp)->largest_reg, + (*modpp)->smallest_reg)); + /* OK, now we have the QUAD ASICs set up, use them. + * we need to: + * + * 1. Find the Memory area for the Quad CPIs. + * 2. Find the Extended VIC processor + * 3. Configure a second extended VIC processor (This + * cannot be done for the 51xx. + * */ + outb(VOYAGER_CAT_RUN, CAT_CMD); + cat_connect(*modpp, (*modpp)->asic); + CDEBUG(("CAT CONNECTED!!\n")); + cat_subread(*modpp, qabc_asic, 0, sizeof(qabc_data), qabc_data); + qic_addr = qabc_data[5] << 8; + qic_addr = (qic_addr | qabc_data[6]) << 8; + qic_addr = (qic_addr | qabc_data[7]) << 8; + printk("Module \"%s\": Quad Processor Card; CPI 0x%lx, SET=0x%x\n", + cat_module_name(i), qic_addr, qabc_data[8]); +#if 0 /* plumbing fails---FIXME */ + if((qabc_data[8] & 0xf0) == 0) { + /* FIXME: 32 way 8 CPU slot monster cannot be + * plumbed this way---need to check for it */ + + printk("Plumbing second Extended Quad Processor\n"); + /* second VIC line hardwired to Quad CPU 1 */ + qabc_data[8] |= 0x20; + cat_subwrite(*modpp, qabc_asic, 8, 1, &qabc_data[8]); +#ifdef VOYAGER_CAT_DEBUG + /* verify plumbing */ + cat_subread(*modpp, qabc_asic, 8, 1, &qabc_data[8]); + if((qabc_data[8] & 0xf0) == 0) { + CDEBUG(("PLUMBING FAILED: 0x%x\n", qabc_data[8])); + } +#endif + } +#endif + + { + struct resource *res = kzalloc(sizeof(struct resource),GFP_KERNEL); + res->name = kmalloc(128, GFP_KERNEL); + sprintf((char *)res->name, "Voyager %s Quad CPI", cat_module_name(i)); + res->start = qic_addr; + res->end = qic_addr + 0x3ff; + request_resource(&iomem_resource, res); + } + + qic_addr = (unsigned long)ioremap(qic_addr, 0x400); + + for(j = 0; j < 4; j++) { + __u8 cpu; + + if(voyager_8slot) { + /* 8 slot has a different mapping, + * each slot has only one vic line, so + * 1 cpu in each slot must be < 8 */ + cpu = (i & 0x07) + j*8; + } else { + cpu = (i & 0x03) + j*4; + } + if( (qabc_data[8] & (1<next); + } + *modpp = NULL; + printk("CAT Bus Initialisation finished: extended procs 0x%x, quad procs 0x%x, allowed vic boot = 0x%x\n", voyager_extended_vic_processors, voyager_quad_processors, voyager_allowed_boot_processors); + request_resource(&ioport_resource, &vic_res); + if(voyager_quad_processors) + request_resource(&ioport_resource, &qic_res); + /* set up the front power switch */ +} + +int +voyager_cat_readb(__u8 module, __u8 asic, int reg) +{ + return 0; +} + +static int +cat_disconnect(voyager_module_t *modp, voyager_asic_t *asicp) +{ + __u8 val; + int err = 0; + + if(!modp->scan_path_connected) + return 0; + if(asicp->asic_id != VOYAGER_CAT_ID) { + CDEBUG(("cat_disconnect: ASIC is not CAT\n")); + return 1; + } + err = cat_read(modp, asicp, VOYAGER_SCANPATH, &val); + if(err) { + CDEBUG(("cat_disconnect: failed to read SCANPATH\n")); + return err; + } + val &= VOYAGER_DISCONNECT_ASIC; + err = cat_write(modp, asicp, VOYAGER_SCANPATH, val); + if(err) { + CDEBUG(("cat_disconnect: failed to write SCANPATH\n")); + return err; + } + outb(VOYAGER_CAT_END, CAT_CMD); + outb(VOYAGER_CAT_RUN, CAT_CMD); + modp->scan_path_connected = 0; + + return 0; +} + +static int +cat_connect(voyager_module_t *modp, voyager_asic_t *asicp) +{ + __u8 val; + int err = 0; + + if(modp->scan_path_connected) + return 0; + if(asicp->asic_id != VOYAGER_CAT_ID) { + CDEBUG(("cat_connect: ASIC is not CAT\n")); + return 1; + } + + err = cat_read(modp, asicp, VOYAGER_SCANPATH, &val); + if(err) { + CDEBUG(("cat_connect: failed to read SCANPATH\n")); + return err; + } + val |= VOYAGER_CONNECT_ASIC; + err = cat_write(modp, asicp, VOYAGER_SCANPATH, val); + if(err) { + CDEBUG(("cat_connect: failed to write SCANPATH\n")); + return err; + } + outb(VOYAGER_CAT_END, CAT_CMD); + outb(VOYAGER_CAT_RUN, CAT_CMD); + modp->scan_path_connected = 1; + + return 0; +} + +void +voyager_cat_power_off(void) +{ + /* Power the machine off by writing to the PSI over the CAT + * bus */ + __u8 data; + voyager_module_t psi = { 0 }; + voyager_asic_t psi_asic = { 0 }; + + psi.asic = &psi_asic; + psi.asic->asic_id = VOYAGER_CAT_ID; + psi.asic->subaddr = VOYAGER_SUBADDR_HI; + psi.module_addr = VOYAGER_PSI; + psi.scan_path_connected = 0; + + outb(VOYAGER_CAT_END, CAT_CMD); + /* Connect the PSI to the CAT Bus */ + outb(VOYAGER_CAT_DESELECT, VOYAGER_CAT_CONFIG_PORT); + outb(VOYAGER_PSI, VOYAGER_CAT_CONFIG_PORT); + outb(VOYAGER_CAT_RUN, CAT_CMD); + cat_disconnect(&psi, &psi_asic); + /* Read the status */ + cat_subread(&psi, &psi_asic, VOYAGER_PSI_GENERAL_REG, 1, &data); + outb(VOYAGER_CAT_END, CAT_CMD); + CDEBUG(("PSI STATUS 0x%x\n", data)); + /* These two writes are power off prep and perform */ + data = PSI_CLEAR; + outb(VOYAGER_CAT_RUN, CAT_CMD); + cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_GENERAL_REG, 1, &data); + outb(VOYAGER_CAT_END, CAT_CMD); + data = PSI_POWER_DOWN; + outb(VOYAGER_CAT_RUN, CAT_CMD); + cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_GENERAL_REG, 1, &data); + outb(VOYAGER_CAT_END, CAT_CMD); +} + +struct voyager_status voyager_status = { 0 }; + +void +voyager_cat_psi(__u8 cmd, __u16 reg, __u8 *data) +{ + voyager_module_t psi = { 0 }; + voyager_asic_t psi_asic = { 0 }; + + psi.asic = &psi_asic; + psi.asic->asic_id = VOYAGER_CAT_ID; + psi.asic->subaddr = VOYAGER_SUBADDR_HI; + psi.module_addr = VOYAGER_PSI; + psi.scan_path_connected = 0; + + outb(VOYAGER_CAT_END, CAT_CMD); + /* Connect the PSI to the CAT Bus */ + outb(VOYAGER_CAT_DESELECT, VOYAGER_CAT_CONFIG_PORT); + outb(VOYAGER_PSI, VOYAGER_CAT_CONFIG_PORT); + outb(VOYAGER_CAT_RUN, CAT_CMD); + cat_disconnect(&psi, &psi_asic); + switch(cmd) { + case VOYAGER_PSI_READ: + cat_read(&psi, &psi_asic, reg, data); + break; + case VOYAGER_PSI_WRITE: + cat_write(&psi, &psi_asic, reg, *data); + break; + case VOYAGER_PSI_SUBREAD: + cat_subread(&psi, &psi_asic, reg, 1, data); + break; + case VOYAGER_PSI_SUBWRITE: + cat_subwrite(&psi, &psi_asic, reg, 1, data); + break; + default: + printk(KERN_ERR "Voyager PSI, unrecognised command %d\n", cmd); + break; + } + outb(VOYAGER_CAT_END, CAT_CMD); +} + +void +voyager_cat_do_common_interrupt(void) +{ + /* This is caused either by a memory parity error or something + * in the PSI */ + __u8 data; + voyager_module_t psi = { 0 }; + voyager_asic_t psi_asic = { 0 }; + struct voyager_psi psi_reg; + int i; + re_read: + psi.asic = &psi_asic; + psi.asic->asic_id = VOYAGER_CAT_ID; + psi.asic->subaddr = VOYAGER_SUBADDR_HI; + psi.module_addr = VOYAGER_PSI; + psi.scan_path_connected = 0; + + outb(VOYAGER_CAT_END, CAT_CMD); + /* Connect the PSI to the CAT Bus */ + outb(VOYAGER_CAT_DESELECT, VOYAGER_CAT_CONFIG_PORT); + outb(VOYAGER_PSI, VOYAGER_CAT_CONFIG_PORT); + outb(VOYAGER_CAT_RUN, CAT_CMD); + cat_disconnect(&psi, &psi_asic); + /* Read the status. NOTE: Need to read *all* the PSI regs here + * otherwise the cmn int will be reasserted */ + for(i = 0; i < sizeof(psi_reg.regs); i++) { + cat_read(&psi, &psi_asic, i, &((__u8 *)&psi_reg.regs)[i]); + } + outb(VOYAGER_CAT_END, CAT_CMD); + if((psi_reg.regs.checkbit & 0x02) == 0) { + psi_reg.regs.checkbit |= 0x02; + cat_write(&psi, &psi_asic, 5, psi_reg.regs.checkbit); + printk("VOYAGER RE-READ PSI\n"); + goto re_read; + } + outb(VOYAGER_CAT_RUN, CAT_CMD); + for(i = 0; i < sizeof(psi_reg.subregs); i++) { + /* This looks strange, but the PSI doesn't do auto increment + * correctly */ + cat_subread(&psi, &psi_asic, VOYAGER_PSI_SUPPLY_REG + i, + 1, &((__u8 *)&psi_reg.subregs)[i]); + } + outb(VOYAGER_CAT_END, CAT_CMD); +#ifdef VOYAGER_CAT_DEBUG + printk("VOYAGER PSI: "); + for(i=0; i +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* TLB state -- visible externally, indexed physically */ +DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0 }; + +/* CPU IRQ affinity -- set to all ones initially */ +static unsigned long cpu_irq_affinity[NR_CPUS] __cacheline_aligned = { [0 ... NR_CPUS-1] = ~0UL }; + +/* per CPU data structure (for /proc/cpuinfo et al), visible externally + * indexed physically */ +struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; +EXPORT_SYMBOL(cpu_data); + +/* physical ID of the CPU used to boot the system */ +unsigned char boot_cpu_id; + +/* The memory line addresses for the Quad CPIs */ +struct voyager_qic_cpi *voyager_quad_cpi_addr[NR_CPUS] __cacheline_aligned; + +/* The masks for the Extended VIC processors, filled in by cat_init */ +__u32 voyager_extended_vic_processors = 0; + +/* Masks for the extended Quad processors which cannot be VIC booted */ +__u32 voyager_allowed_boot_processors = 0; + +/* The mask for the Quad Processors (both extended and non-extended) */ +__u32 voyager_quad_processors = 0; + +/* Total count of live CPUs, used in process.c to display + * the CPU information and in irq.c for the per CPU irq + * activity count. Finally exported by i386_ksyms.c */ +static int voyager_extended_cpus = 1; + +/* Have we found an SMP box - used by time.c to do the profiling + interrupt for timeslicing; do not set to 1 until the per CPU timer + interrupt is active */ +int smp_found_config = 0; + +/* Used for the invalidate map that's also checked in the spinlock */ +static volatile unsigned long smp_invalidate_needed; + +/* Bitmask of currently online CPUs - used by setup.c for + /proc/cpuinfo, visible externally but still physical */ +cpumask_t cpu_online_map = CPU_MASK_NONE; +EXPORT_SYMBOL(cpu_online_map); + +/* Bitmask of CPUs present in the system - exported by i386_syms.c, used + * by scheduler but indexed physically */ +cpumask_t phys_cpu_present_map = CPU_MASK_NONE; + + +/* The internal functions */ +static void send_CPI(__u32 cpuset, __u8 cpi); +static void ack_CPI(__u8 cpi); +static int ack_QIC_CPI(__u8 cpi); +static void ack_special_QIC_CPI(__u8 cpi); +static void ack_VIC_CPI(__u8 cpi); +static void send_CPI_allbutself(__u8 cpi); +static void mask_vic_irq(unsigned int irq); +static void unmask_vic_irq(unsigned int irq); +static unsigned int startup_vic_irq(unsigned int irq); +static void enable_local_vic_irq(unsigned int irq); +static void disable_local_vic_irq(unsigned int irq); +static void before_handle_vic_irq(unsigned int irq); +static void after_handle_vic_irq(unsigned int irq); +static void set_vic_irq_affinity(unsigned int irq, cpumask_t mask); +static void ack_vic_irq(unsigned int irq); +static void vic_enable_cpi(void); +static void do_boot_cpu(__u8 cpuid); +static void do_quad_bootstrap(void); + +int hard_smp_processor_id(void); +int safe_smp_processor_id(void); + +/* Inline functions */ +static inline void +send_one_QIC_CPI(__u8 cpu, __u8 cpi) +{ + voyager_quad_cpi_addr[cpu]->qic_cpi[cpi].cpi = + (smp_processor_id() << 16) + cpi; +} + +static inline void +send_QIC_CPI(__u32 cpuset, __u8 cpi) +{ + int cpu; + + for_each_online_cpu(cpu) { + if(cpuset & (1<>3 &0x7 on the 32 way */ + if(((cpuid >> 2) & 0x03) == i) + /* don't lower our own mask! */ + continue; + + /* masquerade as local Quad CPU */ + outb(QIC_CPUID_ENABLE | i, QIC_PROCESSOR_ID); + /* enable the startup CPI */ + outb(QIC_BOOT_CPI_MASK, QIC_MASK_REGISTER1); + /* restore cpu id */ + outb(0, QIC_PROCESSOR_ID); + } + local_irq_restore(flags); + } +} + + +/* Set up all the basic stuff: read the SMP config and make all the + * SMP information reflect only the boot cpu. All others will be + * brought on-line later. */ +void __init +find_smp_config(void) +{ + int i; + + boot_cpu_id = hard_smp_processor_id(); + + printk("VOYAGER SMP: Boot cpu is %d\n", boot_cpu_id); + + /* initialize the CPU structures (moved from smp_boot_cpus) */ + for(i=0; icpu = boot_cpu_id; + x86_write_percpu(cpu_number, boot_cpu_id); +} + +/* + * The bootstrap kernel entry code has set these up. Save them + * for a given CPU, id is physical */ +void __init +smp_store_cpu_info(int id) +{ + struct cpuinfo_x86 *c=&cpu_data[id]; + + *c = boot_cpu_data; + + identify_secondary_cpu(c); +} + +/* set up the trampoline and return the physical address of the code */ +static __u32 __init +setup_trampoline(void) +{ + /* these two are global symbols in trampoline.S */ + extern __u8 trampoline_end[]; + extern __u8 trampoline_data[]; + + memcpy((__u8 *)trampoline_base, trampoline_data, + trampoline_end - trampoline_data); + return virt_to_phys((__u8 *)trampoline_base); +} + +/* Routine initially called when a non-boot CPU is brought online */ +static void __init +start_secondary(void *unused) +{ + __u8 cpuid = hard_smp_processor_id(); + /* external functions not defined in the headers */ + extern void calibrate_delay(void); + + cpu_init(); + + /* OK, we're in the routine */ + ack_CPI(VIC_CPU_BOOT_CPI); + + /* setup the 8259 master slave pair belonging to this CPU --- + * we won't actually receive any until the boot CPU + * relinquishes it's static routing mask */ + vic_setup_pic(); + + qic_setup(); + + if(is_cpu_quad() && !is_cpu_vic_boot()) { + /* clear the boot CPI */ + __u8 dummy; + + dummy = voyager_quad_cpi_addr[cpuid]->qic_cpi[VIC_CPU_BOOT_CPI].cpi; + printk("read dummy %d\n", dummy); + } + + /* lower the mask to receive CPIs */ + vic_enable_cpi(); + + VDEBUG(("VOYAGER SMP: CPU%d, stack at about %p\n", cpuid, &cpuid)); + + /* enable interrupts */ + local_irq_enable(); + + /* get our bogomips */ + calibrate_delay(); + + /* save our processor parameters */ + smp_store_cpu_info(cpuid); + + /* if we're a quad, we may need to bootstrap other CPUs */ + do_quad_bootstrap(); + + /* FIXME: this is rather a poor hack to prevent the CPU + * activating softirqs while it's supposed to be waiting for + * permission to proceed. Without this, the new per CPU stuff + * in the softirqs will fail */ + local_irq_disable(); + cpu_set(cpuid, cpu_callin_map); + + /* signal that we're done */ + cpu_booted_map = 1; + + while (!cpu_isset(cpuid, smp_commenced_mask)) + rep_nop(); + local_irq_enable(); + + local_flush_tlb(); + + cpu_set(cpuid, cpu_online_map); + wmb(); + cpu_idle(); +} + + +/* Routine to kick start the given CPU and wait for it to report ready + * (or timeout in startup). When this routine returns, the requested + * CPU is either fully running and configured or known to be dead. + * + * We call this routine sequentially 1 CPU at a time, so no need for + * locking */ + +static void __init +do_boot_cpu(__u8 cpu) +{ + struct task_struct *idle; + int timeout; + unsigned long flags; + int quad_boot = (1<> 4) & 0xFFFF; + + cpucount++; + alternatives_smp_switch(1); + + idle = fork_idle(cpu); + if(IS_ERR(idle)) + panic("failed fork for CPU%d", cpu); + idle->thread.eip = (unsigned long) start_secondary; + /* init_tasks (in sched.c) is indexed logically */ + stack_start.esp = (void *) idle->thread.esp; + + init_gdt(cpu); + per_cpu(current_task, cpu) = idle; + early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); + irq_ctx_init(cpu); + + /* Note: Don't modify initial ss override */ + VDEBUG(("VOYAGER SMP: Booting CPU%d at 0x%lx[%x:%x], stack %p\n", cpu, + (unsigned long)hijack_source.val, hijack_source.idt.Segment, + hijack_source.idt.Offset, stack_start.esp)); + + /* init lowmem identity mapping */ + clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, + min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); + flush_tlb_all(); + + if(quad_boot) { + printk("CPU %d: non extended Quad boot\n", cpu); + hijack_vector = (__u32 *)phys_to_virt((VIC_CPU_BOOT_CPI + QIC_DEFAULT_CPI_BASE)*4); + *hijack_vector = hijack_source.val; + } else { + printk("CPU%d: extended VIC boot\n", cpu); + hijack_vector = (__u32 *)phys_to_virt((VIC_CPU_BOOT_CPI + VIC_DEFAULT_CPI_BASE)*4); + *hijack_vector = hijack_source.val; + /* VIC errata, may also receive interrupt at this address */ + hijack_vector = (__u32 *)phys_to_virt((VIC_CPU_BOOT_ERRATA_CPI + VIC_DEFAULT_CPI_BASE)*4); + *hijack_vector = hijack_source.val; + } + /* All non-boot CPUs start with interrupts fully masked. Need + * to lower the mask of the CPI we're about to send. We do + * this in the VIC by masquerading as the processor we're + * about to boot and lowering its interrupt mask */ + local_irq_save(flags); + if(quad_boot) { + send_one_QIC_CPI(cpu, VIC_CPU_BOOT_CPI); + } else { + outb(VIC_CPU_MASQUERADE_ENABLE | cpu, VIC_PROCESSOR_ID); + /* here we're altering registers belonging to `cpu' */ + + outb(VIC_BOOT_INTERRUPT_MASK, 0x21); + /* now go back to our original identity */ + outb(boot_cpu_id, VIC_PROCESSOR_ID); + + /* and boot the CPU */ + + send_CPI((1<thread.esp),"r" (current->thread.eip)); +} + +/* handle a Voyager SYS_INT -- If we don't, the base board will + * panic the system. + * + * System interrupts occur because some problem was detected on the + * various busses. To find out what you have to probe all the + * hardware via the CAT bus. FIXME: At the moment we do nothing. */ +fastcall void +smp_vic_sys_interrupt(struct pt_regs *regs) +{ + ack_CPI(VIC_SYS_INT); + printk("Voyager SYSTEM INTERRUPT\n"); +} + +/* Handle a voyager CMN_INT; These interrupts occur either because of + * a system status change or because a single bit memory error + * occurred. FIXME: At the moment, ignore all this. */ +fastcall void +smp_vic_cmn_interrupt(struct pt_regs *regs) +{ + static __u8 in_cmn_int = 0; + static DEFINE_SPINLOCK(cmn_int_lock); + + /* common ints are broadcast, so make sure we only do this once */ + _raw_spin_lock(&cmn_int_lock); + if(in_cmn_int) + goto unlock_end; + + in_cmn_int++; + _raw_spin_unlock(&cmn_int_lock); + + VDEBUG(("Voyager COMMON INTERRUPT\n")); + + if(voyager_level == 5) + voyager_cat_do_common_interrupt(); + + _raw_spin_lock(&cmn_int_lock); + in_cmn_int = 0; + unlock_end: + _raw_spin_unlock(&cmn_int_lock); + ack_CPI(VIC_CMN_INT); +} + +/* + * Reschedule call back. Nothing to do, all the work is done + * automatically when we return from the interrupt. */ +static void +smp_reschedule_interrupt(void) +{ + /* do nothing */ +} + +static struct mm_struct * flush_mm; +static unsigned long flush_va; +static DEFINE_SPINLOCK(tlbstate_lock); +#define FLUSH_ALL 0xffffffff + +/* + * We cannot call mmdrop() because we are in interrupt context, + * instead update mm->cpu_vm_mask. + * + * We need to reload %cr3 since the page tables may be going + * away from under us.. + */ +static inline void +leave_mm (unsigned long cpu) +{ + if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) + BUG(); + cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask); + load_cr3(swapper_pg_dir); +} + + +/* + * Invalidate call-back + */ +static void +smp_invalidate_interrupt(void) +{ + __u8 cpu = smp_processor_id(); + + if (!test_bit(cpu, &smp_invalidate_needed)) + return; + /* This will flood messages. Don't uncomment unless you see + * Problems with cross cpu invalidation + VDEBUG(("VOYAGER SMP: CPU%d received INVALIDATE_CPI\n", + smp_processor_id())); + */ + + if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) { + if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) { + if (flush_va == FLUSH_ALL) + local_flush_tlb(); + else + __flush_tlb_one(flush_va); + } else + leave_mm(cpu); + } + smp_mb__before_clear_bit(); + clear_bit(cpu, &smp_invalidate_needed); + smp_mb__after_clear_bit(); +} + +/* All the new flush operations for 2.4 */ + + +/* This routine is called with a physical cpu mask */ +static void +voyager_flush_tlb_others (unsigned long cpumask, struct mm_struct *mm, + unsigned long va) +{ + int stuck = 50000; + + if (!cpumask) + BUG(); + if ((cpumask & cpus_addr(cpu_online_map)[0]) != cpumask) + BUG(); + if (cpumask & (1 << smp_processor_id())) + BUG(); + if (!mm) + BUG(); + + spin_lock(&tlbstate_lock); + + flush_mm = mm; + flush_va = va; + atomic_set_mask(cpumask, &smp_invalidate_needed); + /* + * We have to send the CPI only to + * CPUs affected. + */ + send_CPI(cpumask, VIC_INVALIDATE_CPI); + + while (smp_invalidate_needed) { + mb(); + if(--stuck == 0) { + printk("***WARNING*** Stuck doing invalidate CPI (CPU%d)\n", smp_processor_id()); + break; + } + } + + /* Uncomment only to debug invalidation problems + VDEBUG(("VOYAGER SMP: Completed invalidate CPI (CPU%d)\n", cpu)); + */ + + flush_mm = NULL; + flush_va = 0; + spin_unlock(&tlbstate_lock); +} + +void +flush_tlb_current_task(void) +{ + struct mm_struct *mm = current->mm; + unsigned long cpu_mask; + + preempt_disable(); + + cpu_mask = cpus_addr(mm->cpu_vm_mask)[0] & ~(1 << smp_processor_id()); + local_flush_tlb(); + if (cpu_mask) + voyager_flush_tlb_others(cpu_mask, mm, FLUSH_ALL); + + preempt_enable(); +} + + +void +flush_tlb_mm (struct mm_struct * mm) +{ + unsigned long cpu_mask; + + preempt_disable(); + + cpu_mask = cpus_addr(mm->cpu_vm_mask)[0] & ~(1 << smp_processor_id()); + + if (current->active_mm == mm) { + if (current->mm) + local_flush_tlb(); + else + leave_mm(smp_processor_id()); + } + if (cpu_mask) + voyager_flush_tlb_others(cpu_mask, mm, FLUSH_ALL); + + preempt_enable(); +} + +void flush_tlb_page(struct vm_area_struct * vma, unsigned long va) +{ + struct mm_struct *mm = vma->vm_mm; + unsigned long cpu_mask; + + preempt_disable(); + + cpu_mask = cpus_addr(mm->cpu_vm_mask)[0] & ~(1 << smp_processor_id()); + if (current->active_mm == mm) { + if(current->mm) + __flush_tlb_one(va); + else + leave_mm(smp_processor_id()); + } + + if (cpu_mask) + voyager_flush_tlb_others(cpu_mask, mm, va); + + preempt_enable(); +} +EXPORT_SYMBOL(flush_tlb_page); + +/* enable the requested IRQs */ +static void +smp_enable_irq_interrupt(void) +{ + __u8 irq; + __u8 cpu = get_cpu(); + + VDEBUG(("VOYAGER SMP: CPU%d enabling irq mask 0x%x\n", cpu, + vic_irq_enable_mask[cpu])); + + spin_lock(&vic_irq_lock); + for(irq = 0; irq < 16; irq++) { + if(vic_irq_enable_mask[cpu] & (1<func; + void *info = call_data->info; + /* must take copy of wait because call_data may be replaced + * unless the function is waiting for us to finish */ + int wait = call_data->wait; + __u8 cpu = smp_processor_id(); + + /* + * Notify initiating CPU that I've grabbed the data and am + * about to execute the function + */ + mb(); + if(!test_and_clear_bit(cpu, &call_data->started)) { + /* If the bit wasn't set, this could be a replay */ + printk(KERN_WARNING "VOYAGER SMP: CPU %d received call funtion with no call pending\n", cpu); + return; + } + /* + * At this point the info structure may be out of scope unless wait==1 + */ + irq_enter(); + (*func)(info); + irq_exit(); + if (wait) { + mb(); + clear_bit(cpu, &call_data->finished); + } +} + +static int +voyager_smp_call_function_mask (cpumask_t cpumask, + void (*func) (void *info), void *info, + int wait) +{ + struct call_data_struct data; + u32 mask = cpus_addr(cpumask)[0]; + + mask &= ~(1<= 0x93000) + BUG(); +} + +/* send a reschedule CPI to one CPU by physical CPU number*/ +static void +voyager_smp_send_reschedule(int cpu) +{ + send_one_CPI(cpu, VIC_RESCHEDULE_CPI); +} + + +int +hard_smp_processor_id(void) +{ + __u8 i; + __u8 cpumask = inb(VIC_PROC_WHO_AM_I); + if((cpumask & QUAD_IDENTIFIER) == QUAD_IDENTIFIER) + return cpumask & 0x1F; + + for(i = 0; i < 8; i++) { + if(cpumask & (1<1 eligible CPUs are equal lowest, the + * lowest processor number gets it. + * + * The priority of a CPU is controlled by a special per-CPU + * VIC priority register which is 3 bits wide 0 being lowest + * and 7 highest priority.. + * + * Therefore we subtract the average number of interrupts from + * the number we've fielded. If this number is negative, we + * lower the activity count and if it is positive, we raise + * it. + * + * I'm afraid this still leads to odd looking interrupt counts: + * the totals are all roughly equal, but the individual ones + * look rather skewed. + * + * FIXME: This algorithm is total crap when mixed with SMP + * affinity code since we now try to even up the interrupt + * counts when an affinity binding is keeping them on a + * particular CPU*/ + weight = (vic_intr_count[cpu]*voyager_extended_cpus + - vic_intr_total) >> 4; + weight += 4; + if(weight > 7) + weight = 7; + if(weight < 0) + weight = 0; + + outb((__u8)weight, VIC_PRIORITY_REGISTER); + +#ifdef VOYAGER_DEBUG + if((vic_tick[cpu] & 0xFFF) == 0) { + /* print this message roughly every 25 secs */ + printk("VOYAGER SMP: vic_tick[%d] = %lu, weight = %ld\n", + cpu, vic_tick[cpu], weight); + } +#endif +} + +/* setup the profiling timer */ +int +setup_profiling_timer(unsigned int multiplier) +{ + int i; + + if ( (!multiplier)) + return -EINVAL; + + /* + * Set the new multiplier for each CPU. CPUs don't start using the + * new values until the next timer interrupt in which they do process + * accounting. + */ + for (i = 0; i < NR_CPUS; ++i) + per_cpu(prof_multiplier, i) = multiplier; + + return 0; +} + +/* This is a bit of a mess, but forced on us by the genirq changes + * there's no genirq handler that really does what voyager wants + * so hack it up with the simple IRQ handler */ +static void fastcall +handle_vic_irq(unsigned int irq, struct irq_desc *desc) +{ + before_handle_vic_irq(irq); + handle_simple_irq(irq, desc); + after_handle_vic_irq(irq); +} + + +/* The CPIs are handled in the per cpu 8259s, so they must be + * enabled to be received: FIX: enabling the CPIs in the early + * boot sequence interferes with bug checking; enable them later + * on in smp_init */ +#define VIC_SET_GATE(cpi, vector) \ + set_intr_gate((cpi) + VIC_DEFAULT_CPI_BASE, (vector)) +#define QIC_SET_GATE(cpi, vector) \ + set_intr_gate((cpi) + QIC_DEFAULT_CPI_BASE, (vector)) + +void __init +smp_intr_init(void) +{ + int i; + + /* initialize the per cpu irq mask to all disabled */ + for(i = 0; i < NR_CPUS; i++) + vic_irq_mask[i] = 0xFFFF; + + VIC_SET_GATE(VIC_CPI_LEVEL0, vic_cpi_interrupt); + + VIC_SET_GATE(VIC_SYS_INT, vic_sys_interrupt); + VIC_SET_GATE(VIC_CMN_INT, vic_cmn_interrupt); + + QIC_SET_GATE(QIC_TIMER_CPI, qic_timer_interrupt); + QIC_SET_GATE(QIC_INVALIDATE_CPI, qic_invalidate_interrupt); + QIC_SET_GATE(QIC_RESCHEDULE_CPI, qic_reschedule_interrupt); + QIC_SET_GATE(QIC_ENABLE_IRQ_CPI, qic_enable_irq_interrupt); + QIC_SET_GATE(QIC_CALL_FUNCTION_CPI, qic_call_function_interrupt); + + + /* now put the VIC descriptor into the first 48 IRQs + * + * This is for later: first 16 correspond to PC IRQs; next 16 + * are Primary MC IRQs and final 16 are Secondary MC IRQs */ + for(i = 0; i < 48; i++) + set_irq_chip_and_handler(i, &vic_chip, handle_vic_irq); +} + +/* send a CPI at level cpi to a set of cpus in cpuset (set 1 bit per + * processor to receive CPI */ +static void +send_CPI(__u32 cpuset, __u8 cpi) +{ + int cpu; + __u32 quad_cpuset = (cpuset & voyager_quad_processors); + + if(cpi < VIC_START_FAKE_CPI) { + /* fake CPI are only used for booting, so send to the + * extended quads as well---Quads must be VIC booted */ + outb((__u8)(cpuset), VIC_CPI_Registers[cpi]); + return; + } + if(quad_cpuset) + send_QIC_CPI(quad_cpuset, cpi); + cpuset &= ~quad_cpuset; + cpuset &= 0xff; /* only first 8 CPUs vaild for VIC CPI */ + if(cpuset == 0) + return; + for_each_online_cpu(cpu) { + if(cpuset & (1<qic_cpi[cpi].cpi; +} + +static void +ack_special_QIC_CPI(__u8 cpi) +{ + switch(cpi) { + case VIC_CMN_INT: + outb(QIC_CMN_INT, QIC_INTERRUPT_CLEAR0); + break; + case VIC_SYS_INT: + outb(QIC_SYS_INT, QIC_INTERRUPT_CLEAR0); + break; + } + /* also clear at the VIC, just in case (nop for non-extended proc) */ + ack_VIC_CPI(cpi); +} + +/* Acknowledge receipt of CPI in the VIC (essentially an EOI) */ +static void +ack_VIC_CPI(__u8 cpi) +{ +#ifdef VOYAGER_DEBUG + unsigned long flags; + __u16 isr; + __u8 cpu = smp_processor_id(); + + local_irq_save(flags); + isr = vic_read_isr(); + if((isr & (1<<(cpi &7))) == 0) { + printk("VOYAGER SMP: CPU%d lost CPI%d\n", cpu, cpi); + } +#endif + /* send specific EOI; the two system interrupts have + * bit 4 set for a separate vector but behave as the + * corresponding 3 bit intr */ + outb_p(0x60|(cpi & 7),0x20); + +#ifdef VOYAGER_DEBUG + if((vic_read_isr() & (1<<(cpi &7))) != 0) { + printk("VOYAGER SMP: CPU%d still asserting CPI%d\n", cpu, cpi); + } + local_irq_restore(flags); +#endif +} + +/* cribbed with thanks from irq.c */ +#define __byte(x,y) (((unsigned char *)&(y))[x]) +#define cached_21(cpu) (__byte(0,vic_irq_mask[cpu])) +#define cached_A1(cpu) (__byte(1,vic_irq_mask[cpu])) + +static unsigned int +startup_vic_irq(unsigned int irq) +{ + unmask_vic_irq(irq); + + return 0; +} + +/* The enable and disable routines. This is where we run into + * conflicting architectural philosophy. Fundamentally, the voyager + * architecture does not expect to have to disable interrupts globally + * (the IRQ controllers belong to each CPU). The processor masquerade + * which is used to start the system shouldn't be used in a running OS + * since it will cause great confusion if two separate CPUs drive to + * the same IRQ controller (I know, I've tried it). + * + * The solution is a variant on the NCR lazy SPL design: + * + * 1) To disable an interrupt, do nothing (other than set the + * IRQ_DISABLED flag). This dares the interrupt actually to arrive. + * + * 2) If the interrupt dares to come in, raise the local mask against + * it (this will result in all the CPU masks being raised + * eventually). + * + * 3) To enable the interrupt, lower the mask on the local CPU and + * broadcast an Interrupt enable CPI which causes all other CPUs to + * adjust their masks accordingly. */ + +static void +unmask_vic_irq(unsigned int irq) +{ + /* linux doesn't to processor-irq affinity, so enable on + * all CPUs we know about */ + int cpu = smp_processor_id(), real_cpu; + __u16 mask = (1<status |= IRQ_REPLAY | IRQ_INPROGRESS; + } else if(desc->status & IRQ_DISABLED) { + /* Damn, the interrupt actually arrived, do the lazy + * disable thing. The interrupt routine in irq.c will + * not handle a IRQ_DISABLED interrupt, so nothing more + * need be done here */ + VDEBUG(("VOYAGER DEBUG: lazy disable of irq %d on CPU %d\n", + irq, cpu)); + disable_local_vic_irq(irq); + desc->status |= IRQ_REPLAY; + } else { + desc->status &= ~IRQ_REPLAY; + } + + _raw_spin_unlock(&vic_irq_lock); +} + +/* Finish the VIC interrupt: basically mask */ +static void +after_handle_vic_irq(unsigned int irq) +{ + irq_desc_t *desc = irq_desc + irq; + + _raw_spin_lock(&vic_irq_lock); + { + unsigned int status = desc->status & ~IRQ_INPROGRESS; +#ifdef VOYAGER_DEBUG + __u16 isr; +#endif + + desc->status = status; + if ((status & IRQ_DISABLED)) + disable_local_vic_irq(irq); +#ifdef VOYAGER_DEBUG + /* DEBUG: before we ack, check what's in progress */ + isr = vic_read_isr(); + if((isr & (1<status &= ~(IRQ_REPLAY | IRQ_INPROGRESS); + } +#ifdef VOYAGER_DEBUG + isr = vic_read_isr(); + if((isr & (1<= 32) + /* You can only have 32 interrupts in a voyager system + * (and 32 only if you have a secondary microchannel + * bus) */ + return; + + for_each_online_cpu(cpu) { + unsigned long cpu_mask = 1 << cpu; + + if(cpu_mask & real_mask) { + /* enable the interrupt for this cpu */ + cpu_irq_affinity[cpu] |= irq_mask; + } else { + /* disable the interrupt for this cpu */ + cpu_irq_affinity[cpu] &= ~irq_mask; + } + } + /* this is magic, we now have the correct affinity maps, so + * enable the interrupt. This will send an enable CPI to + * those cpu's who need to enable it in their local masks, + * causing them to correct for the new affinity . If the + * interrupt is currently globally disabled, it will simply be + * disabled again as it comes in (voyager lazy disable). If + * the affinity map is tightened to disable the interrupt on a + * cpu, it will be pushed off when it comes in */ + unmask_vic_irq(irq); +} + +static void +ack_vic_irq(unsigned int irq) +{ + if (irq & 8) { + outb(0x62,0x20); /* Specific EOI to cascade */ + outb(0x60|(irq & 7),0xA0); + } else { + outb(0x60 | (irq & 7),0x20); + } +} + +/* enable the CPIs. In the VIC, the CPIs are delivered by the 8259 + * but are not vectored by it. This means that the 8259 mask must be + * lowered to receive them */ +static __init void +vic_enable_cpi(void) +{ + __u8 cpu = smp_processor_id(); + + /* just take a copy of the current mask (nop for boot cpu) */ + vic_irq_mask[cpu] = vic_irq_mask[boot_cpu_id]; + + enable_local_vic_irq(VIC_CPI_LEVEL0); + enable_local_vic_irq(VIC_CPI_LEVEL1); + /* for sys int and cmn int */ + enable_local_vic_irq(7); + + if(is_cpu_quad()) { + outb(QIC_DEFAULT_MASK0, QIC_MASK_REGISTER0); + outb(QIC_CPI_ENABLE, QIC_MASK_REGISTER1); + VDEBUG(("VOYAGER SMP: QIC ENABLE CPI: CPU%d: MASK 0x%x\n", + cpu, QIC_CPI_ENABLE)); + } + + VDEBUG(("VOYAGER SMP: ENABLE CPI: CPU%d: MASK 0x%x\n", + cpu, vic_irq_mask[cpu])); +} + +void +voyager_smp_dump() +{ + int old_cpu = smp_processor_id(), cpu; + + /* dump the interrupt masks of each processor */ + for_each_online_cpu(cpu) { + __u16 imr, isr, irr; + unsigned long flags; + + local_irq_save(flags); + outb(VIC_CPU_MASQUERADE_ENABLE | cpu, VIC_PROCESSOR_ID); + imr = (inb(0xa1) << 8) | inb(0x21); + outb(0x0a, 0xa0); + irr = inb(0xa0) << 8; + outb(0x0a, 0x20); + irr |= inb(0x20); + outb(0x0b, 0xa0); + isr = inb(0xa0) << 8; + outb(0x0b, 0x20); + isr |= inb(0x20); + outb(old_cpu, VIC_PROCESSOR_ID); + local_irq_restore(flags); + printk("\tCPU%d: mask=0x%x, IMR=0x%x, IRR=0x%x, ISR=0x%x\n", + cpu, vic_irq_mask[cpu], imr, irr, isr); +#if 0 + /* These lines are put in to try to unstick an un ack'd irq */ + if(isr != 0) { + int irq; + for(irq=0; irq<16; irq++) { + if(isr & (1<cpu = hard_smp_processor_id(); + x86_write_percpu(cpu_number, hard_smp_processor_id()); +} + +struct smp_ops smp_ops = { + .smp_prepare_boot_cpu = voyager_smp_prepare_boot_cpu, + .smp_prepare_cpus = voyager_smp_prepare_cpus, + .cpu_up = voyager_cpu_up, + .smp_cpus_done = voyager_smp_cpus_done, + + .smp_send_stop = voyager_smp_send_stop, + .smp_send_reschedule = voyager_smp_send_reschedule, + .smp_call_function_mask = voyager_smp_call_function_mask, +}; diff --git a/arch/x86/mach-voyager/voyager_thread.c b/arch/x86/mach-voyager/voyager_thread.c new file mode 100644 index 00000000000..f9d59533815 --- /dev/null +++ b/arch/x86/mach-voyager/voyager_thread.c @@ -0,0 +1,134 @@ +/* -*- mode: c; c-basic-offset: 8 -*- */ + +/* Copyright (C) 2001 + * + * Author: J.E.J.Bottomley@HansenPartnership.com + * + * linux/arch/i386/kernel/voyager_thread.c + * + * This module provides the machine status monitor thread for the + * voyager architecture. This allows us to monitor the machine + * environment (temp, voltage, fan function) and the front panel and + * internal UPS. If a fault is detected, this thread takes corrective + * action (usually just informing init) + * */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +struct task_struct *voyager_thread; +static __u8 set_timeout; + +static int +execute(const char *string) +{ + int ret; + + char *envp[] = { + "HOME=/", + "TERM=linux", + "PATH=/sbin:/usr/sbin:/bin:/usr/bin", + NULL, + }; + char *argv[] = { + "/bin/bash", + "-c", + (char *)string, + NULL, + }; + + if ((ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC)) != 0) { + printk(KERN_ERR "Voyager failed to run \"%s\": %i\n", + string, ret); + } + return ret; +} + +static void +check_from_kernel(void) +{ + if(voyager_status.switch_off) { + + /* FIXME: This should be configureable via proc */ + execute("umask 600; echo 0 > /etc/initrunlvl; kill -HUP 1"); + } else if(voyager_status.power_fail) { + VDEBUG(("Voyager daemon detected AC power failure\n")); + + /* FIXME: This should be configureable via proc */ + execute("umask 600; echo F > /etc/powerstatus; kill -PWR 1"); + set_timeout = 1; + } +} + +static void +check_continuing_condition(void) +{ + if(voyager_status.power_fail) { + __u8 data; + voyager_cat_psi(VOYAGER_PSI_SUBREAD, + VOYAGER_PSI_AC_FAIL_REG, &data); + if((data & 0x1f) == 0) { + /* all power restored */ + printk(KERN_NOTICE "VOYAGER AC power restored, cancelling shutdown\n"); + /* FIXME: should be user configureable */ + execute("umask 600; echo O > /etc/powerstatus; kill -PWR 1"); + set_timeout = 0; + } + } +} + +static int +thread(void *unused) +{ + printk(KERN_NOTICE "Voyager starting monitor thread\n"); + + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(set_timeout ? HZ : MAX_SCHEDULE_TIMEOUT); + + VDEBUG(("Voyager Daemon awoken\n")); + if(voyager_status.request_from_kernel == 0) { + /* probably awoken from timeout */ + check_continuing_condition(); + } else { + check_from_kernel(); + voyager_status.request_from_kernel = 0; + } + } +} + +static int __init +voyager_thread_start(void) +{ + voyager_thread = kthread_run(thread, NULL, "kvoyagerd"); + if (IS_ERR(voyager_thread)) { + printk(KERN_ERR "Voyager: Failed to create system monitor thread.\n"); + return PTR_ERR(voyager_thread); + } + return 0; +} + + +static void __exit +voyager_thread_stop(void) +{ + kthread_stop(voyager_thread); +} + +module_init(voyager_thread_start); +module_exit(voyager_thread_stop); -- cgit v1.2.3-70-g09d2 From ff4395654dc6a3a5e35611940047114d4f3d0a7a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 11 Oct 2007 11:16:55 +0200 Subject: i386: move oprofile Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/i386/Kconfig | 2 +- arch/i386/Makefile | 2 +- arch/i386/oprofile/Kconfig | 17 - arch/i386/oprofile/Makefile | 12 - arch/i386/oprofile/backtrace.c | 127 ------ arch/i386/oprofile/init.c | 48 --- arch/i386/oprofile/nmi_int.c | 477 ----------------------- arch/i386/oprofile/nmi_timer_int.c | 69 ---- arch/i386/oprofile/op_counter.h | 29 -- arch/i386/oprofile/op_model_athlon.c | 180 --------- arch/i386/oprofile/op_model_p4.c | 722 ----------------------------------- arch/i386/oprofile/op_model_ppro.c | 192 ---------- arch/i386/oprofile/op_x86_model.h | 51 --- arch/x86/oprofile/Kconfig | 17 + arch/x86/oprofile/Makefile | 12 + arch/x86/oprofile/backtrace.c | 127 ++++++ arch/x86/oprofile/init.c | 48 +++ arch/x86/oprofile/nmi_int.c | 477 +++++++++++++++++++++++ arch/x86/oprofile/nmi_timer_int.c | 69 ++++ arch/x86/oprofile/op_counter.h | 29 ++ arch/x86/oprofile/op_model_athlon.c | 180 +++++++++ arch/x86/oprofile/op_model_p4.c | 722 +++++++++++++++++++++++++++++++++++ arch/x86/oprofile/op_model_ppro.c | 192 ++++++++++ arch/x86/oprofile/op_x86_model.h | 51 +++ arch/x86_64/Makefile | 2 +- 25 files changed, 1927 insertions(+), 1927 deletions(-) delete mode 100644 arch/i386/oprofile/Kconfig delete mode 100644 arch/i386/oprofile/Makefile delete mode 100644 arch/i386/oprofile/backtrace.c delete mode 100644 arch/i386/oprofile/init.c delete mode 100644 arch/i386/oprofile/nmi_int.c delete mode 100644 arch/i386/oprofile/nmi_timer_int.c delete mode 100644 arch/i386/oprofile/op_counter.h delete mode 100644 arch/i386/oprofile/op_model_athlon.c delete mode 100644 arch/i386/oprofile/op_model_p4.c delete mode 100644 arch/i386/oprofile/op_model_ppro.c delete mode 100644 arch/i386/oprofile/op_x86_model.h create mode 100644 arch/x86/oprofile/Kconfig create mode 100644 arch/x86/oprofile/Makefile create mode 100644 arch/x86/oprofile/backtrace.c create mode 100644 arch/x86/oprofile/init.c create mode 100644 arch/x86/oprofile/nmi_int.c create mode 100644 arch/x86/oprofile/nmi_timer_int.c create mode 100644 arch/x86/oprofile/op_counter.h create mode 100644 arch/x86/oprofile/op_model_athlon.c create mode 100644 arch/x86/oprofile/op_model_p4.c create mode 100644 arch/x86/oprofile/op_model_ppro.c create mode 100644 arch/x86/oprofile/op_x86_model.h (limited to 'arch/x86') diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 0f6186f0303..2d85e4b8730 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -1240,7 +1240,7 @@ menuconfig INSTRUMENTATION if INSTRUMENTATION -source "arch/i386/oprofile/Kconfig" +source "arch/x86/oprofile/Kconfig" config KPROBES bool "Kprobes" diff --git a/arch/i386/Makefile b/arch/i386/Makefile index e03c47e56c7..4d1c0015e44 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -109,7 +109,7 @@ core-y += arch/i386/kernel/ \ drivers-$(CONFIG_MATH_EMULATION) += arch/x86/math-emu/ drivers-$(CONFIG_PCI) += arch/x86/pci/ # must be linked after kernel/ -drivers-$(CONFIG_OPROFILE) += arch/i386/oprofile/ +drivers-$(CONFIG_OPROFILE) += arch/x86/oprofile/ drivers-$(CONFIG_PM) += arch/x86/power/ drivers-$(CONFIG_FB) += arch/i386/video/ diff --git a/arch/i386/oprofile/Kconfig b/arch/i386/oprofile/Kconfig deleted file mode 100644 index d8a84088471..00000000000 --- a/arch/i386/oprofile/Kconfig +++ /dev/null @@ -1,17 +0,0 @@ -config PROFILING - bool "Profiling support (EXPERIMENTAL)" - help - Say Y here to enable the extended profiling support mechanisms used - by profilers such as OProfile. - - -config OPROFILE - tristate "OProfile system profiling (EXPERIMENTAL)" - depends on PROFILING - help - OProfile is a profiling system capable of profiling the - whole system, include the kernel, kernel modules, libraries, - and applications. - - If unsure, say N. - diff --git a/arch/i386/oprofile/Makefile b/arch/i386/oprofile/Makefile deleted file mode 100644 index 30f3eb36666..00000000000 --- a/arch/i386/oprofile/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -obj-$(CONFIG_OPROFILE) += oprofile.o - -DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \ - oprof.o cpu_buffer.o buffer_sync.o \ - event_buffer.o oprofile_files.o \ - oprofilefs.o oprofile_stats.o \ - timer_int.o ) - -oprofile-y := $(DRIVER_OBJS) init.o backtrace.o -oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_athlon.o \ - op_model_ppro.o op_model_p4.o -oprofile-$(CONFIG_X86_IO_APIC) += nmi_timer_int.o diff --git a/arch/i386/oprofile/backtrace.c b/arch/i386/oprofile/backtrace.c deleted file mode 100644 index c049ce414f0..00000000000 --- a/arch/i386/oprofile/backtrace.c +++ /dev/null @@ -1,127 +0,0 @@ -/** - * @file backtrace.c - * - * @remark Copyright 2002 OProfile authors - * @remark Read the file COPYING - * - * @author John Levon - * @author David Smith - */ - -#include -#include -#include -#include -#include - -struct frame_head { - struct frame_head * ebp; - unsigned long ret; -} __attribute__((packed)); - -static struct frame_head * -dump_kernel_backtrace(struct frame_head * head) -{ - oprofile_add_trace(head->ret); - - /* frame pointers should strictly progress back up the stack - * (towards higher addresses) */ - if (head >= head->ebp) - return NULL; - - return head->ebp; -} - -static struct frame_head * -dump_user_backtrace(struct frame_head * head) -{ - struct frame_head bufhead[2]; - - /* Also check accessibility of one struct frame_head beyond */ - if (!access_ok(VERIFY_READ, head, sizeof(bufhead))) - return NULL; - if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead))) - return NULL; - - oprofile_add_trace(bufhead[0].ret); - - /* frame pointers should strictly progress back up the stack - * (towards higher addresses) */ - if (head >= bufhead[0].ebp) - return NULL; - - return bufhead[0].ebp; -} - -/* - * | | /\ Higher addresses - * | | - * --------------- stack base (address of current_thread_info) - * | thread info | - * . . - * | stack | - * --------------- saved regs->ebp value if valid (frame_head address) - * . . - * --------------- saved regs->rsp value if x86_64 - * | | - * --------------- struct pt_regs * stored on stack if 32-bit - * | | - * . . - * | | - * --------------- %esp - * | | - * | | \/ Lower addresses - * - * Thus, regs (or regs->rsp for x86_64) <-> stack base restricts the - * valid(ish) ebp values. Note: (1) for x86_64, NMI and several other - * exceptions use special stacks, maintained by the interrupt stack table - * (IST). These stacks are set up in trap_init() in - * arch/x86_64/kernel/traps.c. Thus, for x86_64, regs now does not point - * to the kernel stack; instead, it points to some location on the NMI - * stack. On the other hand, regs->rsp is the stack pointer saved when the - * NMI occurred. (2) For 32-bit, regs->esp is not valid because the - * processor does not save %esp on the kernel stack when interrupts occur - * in the kernel mode. - */ -#ifdef CONFIG_FRAME_POINTER -static int valid_kernel_stack(struct frame_head * head, struct pt_regs * regs) -{ - unsigned long headaddr = (unsigned long)head; -#ifdef CONFIG_X86_64 - unsigned long stack = (unsigned long)regs->rsp; -#else - unsigned long stack = (unsigned long)regs; -#endif - unsigned long stack_base = (stack & ~(THREAD_SIZE - 1)) + THREAD_SIZE; - - return headaddr > stack && headaddr < stack_base; -} -#else -/* without fp, it's just junk */ -static int valid_kernel_stack(struct frame_head * head, struct pt_regs * regs) -{ - return 0; -} -#endif - - -void -x86_backtrace(struct pt_regs * const regs, unsigned int depth) -{ - struct frame_head *head; - -#ifdef CONFIG_X86_64 - head = (struct frame_head *)regs->rbp; -#else - head = (struct frame_head *)regs->ebp; -#endif - - if (!user_mode_vm(regs)) { - while (depth-- && valid_kernel_stack(head, regs)) - head = dump_kernel_backtrace(head); - return; - } - - while (depth-- && head) - head = dump_user_backtrace(head); -} diff --git a/arch/i386/oprofile/init.c b/arch/i386/oprofile/init.c deleted file mode 100644 index 5341d481d92..00000000000 --- a/arch/i386/oprofile/init.c +++ /dev/null @@ -1,48 +0,0 @@ -/** - * @file init.c - * - * @remark Copyright 2002 OProfile authors - * @remark Read the file COPYING - * - * @author John Levon - */ - -#include -#include -#include - -/* We support CPUs that have performance counters like the Pentium Pro - * with the NMI mode driver. - */ - -extern int op_nmi_init(struct oprofile_operations * ops); -extern int op_nmi_timer_init(struct oprofile_operations * ops); -extern void op_nmi_exit(void); -extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth); - - -int __init oprofile_arch_init(struct oprofile_operations * ops) -{ - int ret; - - ret = -ENODEV; - -#ifdef CONFIG_X86_LOCAL_APIC - ret = op_nmi_init(ops); -#endif -#ifdef CONFIG_X86_IO_APIC - if (ret < 0) - ret = op_nmi_timer_init(ops); -#endif - ops->backtrace = x86_backtrace; - - return ret; -} - - -void oprofile_arch_exit(void) -{ -#ifdef CONFIG_X86_LOCAL_APIC - op_nmi_exit(); -#endif -} diff --git a/arch/i386/oprofile/nmi_int.c b/arch/i386/oprofile/nmi_int.c deleted file mode 100644 index 11b7a51566a..00000000000 --- a/arch/i386/oprofile/nmi_int.c +++ /dev/null @@ -1,477 +0,0 @@ -/** - * @file nmi_int.c - * - * @remark Copyright 2002 OProfile authors - * @remark Read the file COPYING - * - * @author John Levon - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "op_counter.h" -#include "op_x86_model.h" - -static struct op_x86_model_spec const * model; -static struct op_msrs cpu_msrs[NR_CPUS]; -static unsigned long saved_lvtpc[NR_CPUS]; - -static int nmi_start(void); -static void nmi_stop(void); - -/* 0 == registered but off, 1 == registered and on */ -static int nmi_enabled = 0; - -#ifdef CONFIG_PM - -static int nmi_suspend(struct sys_device *dev, pm_message_t state) -{ - if (nmi_enabled == 1) - nmi_stop(); - return 0; -} - - -static int nmi_resume(struct sys_device *dev) -{ - if (nmi_enabled == 1) - nmi_start(); - return 0; -} - - -static struct sysdev_class oprofile_sysclass = { - set_kset_name("oprofile"), - .resume = nmi_resume, - .suspend = nmi_suspend, -}; - - -static struct sys_device device_oprofile = { - .id = 0, - .cls = &oprofile_sysclass, -}; - - -static int __init init_sysfs(void) -{ - int error; - if (!(error = sysdev_class_register(&oprofile_sysclass))) - error = sysdev_register(&device_oprofile); - return error; -} - - -static void exit_sysfs(void) -{ - sysdev_unregister(&device_oprofile); - sysdev_class_unregister(&oprofile_sysclass); -} - -#else -#define init_sysfs() do { } while (0) -#define exit_sysfs() do { } while (0) -#endif /* CONFIG_PM */ - -static int profile_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data) -{ - struct die_args *args = (struct die_args *)data; - int ret = NOTIFY_DONE; - int cpu = smp_processor_id(); - - switch(val) { - case DIE_NMI: - if (model->check_ctrs(args->regs, &cpu_msrs[cpu])) - ret = NOTIFY_STOP; - break; - default: - break; - } - return ret; -} - -static void nmi_cpu_save_registers(struct op_msrs * msrs) -{ - unsigned int const nr_ctrs = model->num_counters; - unsigned int const nr_ctrls = model->num_controls; - struct op_msr * counters = msrs->counters; - struct op_msr * controls = msrs->controls; - unsigned int i; - - for (i = 0; i < nr_ctrs; ++i) { - if (counters[i].addr){ - rdmsr(counters[i].addr, - counters[i].saved.low, - counters[i].saved.high); - } - } - - for (i = 0; i < nr_ctrls; ++i) { - if (controls[i].addr){ - rdmsr(controls[i].addr, - controls[i].saved.low, - controls[i].saved.high); - } - } -} - - -static void nmi_save_registers(void * dummy) -{ - int cpu = smp_processor_id(); - struct op_msrs * msrs = &cpu_msrs[cpu]; - nmi_cpu_save_registers(msrs); -} - - -static void free_msrs(void) -{ - int i; - for_each_possible_cpu(i) { - kfree(cpu_msrs[i].counters); - cpu_msrs[i].counters = NULL; - kfree(cpu_msrs[i].controls); - cpu_msrs[i].controls = NULL; - } -} - - -static int allocate_msrs(void) -{ - int success = 1; - size_t controls_size = sizeof(struct op_msr) * model->num_controls; - size_t counters_size = sizeof(struct op_msr) * model->num_counters; - - int i; - for_each_possible_cpu(i) { - cpu_msrs[i].counters = kmalloc(counters_size, GFP_KERNEL); - if (!cpu_msrs[i].counters) { - success = 0; - break; - } - cpu_msrs[i].controls = kmalloc(controls_size, GFP_KERNEL); - if (!cpu_msrs[i].controls) { - success = 0; - break; - } - } - - if (!success) - free_msrs(); - - return success; -} - - -static void nmi_cpu_setup(void * dummy) -{ - int cpu = smp_processor_id(); - struct op_msrs * msrs = &cpu_msrs[cpu]; - spin_lock(&oprofilefs_lock); - model->setup_ctrs(msrs); - spin_unlock(&oprofilefs_lock); - saved_lvtpc[cpu] = apic_read(APIC_LVTPC); - apic_write(APIC_LVTPC, APIC_DM_NMI); -} - -static struct notifier_block profile_exceptions_nb = { - .notifier_call = profile_exceptions_notify, - .next = NULL, - .priority = 0 -}; - -static int nmi_setup(void) -{ - int err=0; - int cpu; - - if (!allocate_msrs()) - return -ENOMEM; - - if ((err = register_die_notifier(&profile_exceptions_nb))){ - free_msrs(); - return err; - } - - /* We need to serialize save and setup for HT because the subset - * of msrs are distinct for save and setup operations - */ - - /* Assume saved/restored counters are the same on all CPUs */ - model->fill_in_addresses(&cpu_msrs[0]); - for_each_possible_cpu (cpu) { - if (cpu != 0) { - memcpy(cpu_msrs[cpu].counters, cpu_msrs[0].counters, - sizeof(struct op_msr) * model->num_counters); - - memcpy(cpu_msrs[cpu].controls, cpu_msrs[0].controls, - sizeof(struct op_msr) * model->num_controls); - } - - } - on_each_cpu(nmi_save_registers, NULL, 0, 1); - on_each_cpu(nmi_cpu_setup, NULL, 0, 1); - nmi_enabled = 1; - return 0; -} - - -static void nmi_restore_registers(struct op_msrs * msrs) -{ - unsigned int const nr_ctrs = model->num_counters; - unsigned int const nr_ctrls = model->num_controls; - struct op_msr * counters = msrs->counters; - struct op_msr * controls = msrs->controls; - unsigned int i; - - for (i = 0; i < nr_ctrls; ++i) { - if (controls[i].addr){ - wrmsr(controls[i].addr, - controls[i].saved.low, - controls[i].saved.high); - } - } - - for (i = 0; i < nr_ctrs; ++i) { - if (counters[i].addr){ - wrmsr(counters[i].addr, - counters[i].saved.low, - counters[i].saved.high); - } - } -} - - -static void nmi_cpu_shutdown(void * dummy) -{ - unsigned int v; - int cpu = smp_processor_id(); - struct op_msrs * msrs = &cpu_msrs[cpu]; - - /* restoring APIC_LVTPC can trigger an apic error because the delivery - * mode and vector nr combination can be illegal. That's by design: on - * power on apic lvt contain a zero vector nr which are legal only for - * NMI delivery mode. So inhibit apic err before restoring lvtpc - */ - v = apic_read(APIC_LVTERR); - apic_write(APIC_LVTERR, v | APIC_LVT_MASKED); - apic_write(APIC_LVTPC, saved_lvtpc[cpu]); - apic_write(APIC_LVTERR, v); - nmi_restore_registers(msrs); - model->shutdown(msrs); -} - - -static void nmi_shutdown(void) -{ - nmi_enabled = 0; - on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1); - unregister_die_notifier(&profile_exceptions_nb); - free_msrs(); -} - - -static void nmi_cpu_start(void * dummy) -{ - struct op_msrs const * msrs = &cpu_msrs[smp_processor_id()]; - model->start(msrs); -} - - -static int nmi_start(void) -{ - on_each_cpu(nmi_cpu_start, NULL, 0, 1); - return 0; -} - - -static void nmi_cpu_stop(void * dummy) -{ - struct op_msrs const * msrs = &cpu_msrs[smp_processor_id()]; - model->stop(msrs); -} - - -static void nmi_stop(void) -{ - on_each_cpu(nmi_cpu_stop, NULL, 0, 1); -} - - -struct op_counter_config counter_config[OP_MAX_COUNTER]; - -static int nmi_create_files(struct super_block * sb, struct dentry * root) -{ - unsigned int i; - - for (i = 0; i < model->num_counters; ++i) { - struct dentry * dir; - char buf[4]; - - /* quick little hack to _not_ expose a counter if it is not - * available for use. This should protect userspace app. - * NOTE: assumes 1:1 mapping here (that counters are organized - * sequentially in their struct assignment). - */ - if (unlikely(!avail_to_resrv_perfctr_nmi_bit(i))) - continue; - - snprintf(buf, sizeof(buf), "%d", i); - dir = oprofilefs_mkdir(sb, root, buf); - oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled); - oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event); - oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count); - oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask); - oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel); - oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user); - } - - return 0; -} - -static int p4force; -module_param(p4force, int, 0); - -static int __init p4_init(char ** cpu_type) -{ - __u8 cpu_model = boot_cpu_data.x86_model; - - if (!p4force && (cpu_model > 6 || cpu_model == 5)) - return 0; - -#ifndef CONFIG_SMP - *cpu_type = "i386/p4"; - model = &op_p4_spec; - return 1; -#else - switch (smp_num_siblings) { - case 1: - *cpu_type = "i386/p4"; - model = &op_p4_spec; - return 1; - - case 2: - *cpu_type = "i386/p4-ht"; - model = &op_p4_ht2_spec; - return 1; - } -#endif - - printk(KERN_INFO "oprofile: P4 HyperThreading detected with > 2 threads\n"); - printk(KERN_INFO "oprofile: Reverting to timer mode.\n"); - return 0; -} - - -static int __init ppro_init(char ** cpu_type) -{ - __u8 cpu_model = boot_cpu_data.x86_model; - - if (cpu_model == 14) - *cpu_type = "i386/core"; - else if (cpu_model == 15) - *cpu_type = "i386/core_2"; - else if (cpu_model > 0xd) - return 0; - else if (cpu_model == 9) { - *cpu_type = "i386/p6_mobile"; - } else if (cpu_model > 5) { - *cpu_type = "i386/piii"; - } else if (cpu_model > 2) { - *cpu_type = "i386/pii"; - } else { - *cpu_type = "i386/ppro"; - } - - model = &op_ppro_spec; - return 1; -} - -/* in order to get sysfs right */ -static int using_nmi; - -int __init op_nmi_init(struct oprofile_operations *ops) -{ - __u8 vendor = boot_cpu_data.x86_vendor; - __u8 family = boot_cpu_data.x86; - char *cpu_type; - - if (!cpu_has_apic) - return -ENODEV; - - switch (vendor) { - case X86_VENDOR_AMD: - /* Needs to be at least an Athlon (or hammer in 32bit mode) */ - - switch (family) { - default: - return -ENODEV; - case 6: - model = &op_athlon_spec; - cpu_type = "i386/athlon"; - break; - case 0xf: - model = &op_athlon_spec; - /* Actually it could be i386/hammer too, but give - user space an consistent name. */ - cpu_type = "x86-64/hammer"; - break; - case 0x10: - model = &op_athlon_spec; - cpu_type = "x86-64/family10"; - break; - } - break; - - case X86_VENDOR_INTEL: - switch (family) { - /* Pentium IV */ - case 0xf: - if (!p4_init(&cpu_type)) - return -ENODEV; - break; - - /* A P6-class processor */ - case 6: - if (!ppro_init(&cpu_type)) - return -ENODEV; - break; - - default: - return -ENODEV; - } - break; - - default: - return -ENODEV; - } - - init_sysfs(); - using_nmi = 1; - ops->create_files = nmi_create_files; - ops->setup = nmi_setup; - ops->shutdown = nmi_shutdown; - ops->start = nmi_start; - ops->stop = nmi_stop; - ops->cpu_type = cpu_type; - printk(KERN_INFO "oprofile: using NMI interrupt.\n"); - return 0; -} - - -void op_nmi_exit(void) -{ - if (using_nmi) - exit_sysfs(); -} diff --git a/arch/i386/oprofile/nmi_timer_int.c b/arch/i386/oprofile/nmi_timer_int.c deleted file mode 100644 index 1418e36ae7a..00000000000 --- a/arch/i386/oprofile/nmi_timer_int.c +++ /dev/null @@ -1,69 +0,0 @@ -/** - * @file nmi_timer_int.c - * - * @remark Copyright 2003 OProfile authors - * @remark Read the file COPYING - * - * @author Zwane Mwaikambo - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -static int profile_timer_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data) -{ - struct die_args *args = (struct die_args *)data; - int ret = NOTIFY_DONE; - - switch(val) { - case DIE_NMI: - oprofile_add_sample(args->regs, 0); - ret = NOTIFY_STOP; - break; - default: - break; - } - return ret; -} - -static struct notifier_block profile_timer_exceptions_nb = { - .notifier_call = profile_timer_exceptions_notify, - .next = NULL, - .priority = 0 -}; - -static int timer_start(void) -{ - if (register_die_notifier(&profile_timer_exceptions_nb)) - return 1; - return 0; -} - - -static void timer_stop(void) -{ - unregister_die_notifier(&profile_timer_exceptions_nb); - synchronize_sched(); /* Allow already-started NMIs to complete. */ -} - - -int __init op_nmi_timer_init(struct oprofile_operations * ops) -{ - if ((nmi_watchdog != NMI_IO_APIC) || (atomic_read(&nmi_active) <= 0)) - return -ENODEV; - - ops->start = timer_start; - ops->stop = timer_stop; - ops->cpu_type = "timer"; - printk(KERN_INFO "oprofile: using NMI timer interrupt.\n"); - return 0; -} diff --git a/arch/i386/oprofile/op_counter.h b/arch/i386/oprofile/op_counter.h deleted file mode 100644 index 2880b15c467..00000000000 --- a/arch/i386/oprofile/op_counter.h +++ /dev/null @@ -1,29 +0,0 @@ -/** - * @file op_counter.h - * - * @remark Copyright 2002 OProfile authors - * @remark Read the file COPYING - * - * @author John Levon - */ - -#ifndef OP_COUNTER_H -#define OP_COUNTER_H - -#define OP_MAX_COUNTER 8 - -/* Per-perfctr configuration as set via - * oprofilefs. - */ -struct op_counter_config { - unsigned long count; - unsigned long enabled; - unsigned long event; - unsigned long kernel; - unsigned long user; - unsigned long unit_mask; -}; - -extern struct op_counter_config counter_config[]; - -#endif /* OP_COUNTER_H */ diff --git a/arch/i386/oprofile/op_model_athlon.c b/arch/i386/oprofile/op_model_athlon.c deleted file mode 100644 index 3057a19e464..00000000000 --- a/arch/i386/oprofile/op_model_athlon.c +++ /dev/null @@ -1,180 +0,0 @@ -/** - * @file op_model_athlon.h - * athlon / K7 model-specific MSR operations - * - * @remark Copyright 2002 OProfile authors - * @remark Read the file COPYING - * - * @author John Levon - * @author Philippe Elie - * @author Graydon Hoare - */ - -#include -#include -#include -#include - -#include "op_x86_model.h" -#include "op_counter.h" - -#define NUM_COUNTERS 4 -#define NUM_CONTROLS 4 - -#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0) -#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0) -#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1);} while (0) -#define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) - -#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0) -#define CTRL_READ(l,h,msrs,c) do {rdmsr(msrs->controls[(c)].addr, (l), (h));} while (0) -#define CTRL_WRITE(l,h,msrs,c) do {wrmsr(msrs->controls[(c)].addr, (l), (h));} while (0) -#define CTRL_SET_ACTIVE(n) (n |= (1<<22)) -#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22)) -#define CTRL_CLEAR(x) (x &= (1<<21)) -#define CTRL_SET_ENABLE(val) (val |= 1<<20) -#define CTRL_SET_USR(val,u) (val |= ((u & 1) << 16)) -#define CTRL_SET_KERN(val,k) (val |= ((k & 1) << 17)) -#define CTRL_SET_UM(val, m) (val |= (m << 8)) -#define CTRL_SET_EVENT(val, e) (val |= e) - -static unsigned long reset_value[NUM_COUNTERS]; - -static void athlon_fill_in_addresses(struct op_msrs * const msrs) -{ - int i; - - for (i=0; i < NUM_COUNTERS; i++) { - if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) - msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; - else - msrs->counters[i].addr = 0; - } - - for (i=0; i < NUM_CONTROLS; i++) { - if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) - msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; - else - msrs->controls[i].addr = 0; - } -} - - -static void athlon_setup_ctrs(struct op_msrs const * const msrs) -{ - unsigned int low, high; - int i; - - /* clear all counters */ - for (i = 0 ; i < NUM_CONTROLS; ++i) { - if (unlikely(!CTRL_IS_RESERVED(msrs,i))) - continue; - CTRL_READ(low, high, msrs, i); - CTRL_CLEAR(low); - CTRL_WRITE(low, high, msrs, i); - } - - /* avoid a false detection of ctr overflows in NMI handler */ - for (i = 0; i < NUM_COUNTERS; ++i) { - if (unlikely(!CTR_IS_RESERVED(msrs,i))) - continue; - CTR_WRITE(1, msrs, i); - } - - /* enable active counters */ - for (i = 0; i < NUM_COUNTERS; ++i) { - if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs,i))) { - reset_value[i] = counter_config[i].count; - - CTR_WRITE(counter_config[i].count, msrs, i); - - CTRL_READ(low, high, msrs, i); - CTRL_CLEAR(low); - CTRL_SET_ENABLE(low); - CTRL_SET_USR(low, counter_config[i].user); - CTRL_SET_KERN(low, counter_config[i].kernel); - CTRL_SET_UM(low, counter_config[i].unit_mask); - CTRL_SET_EVENT(low, counter_config[i].event); - CTRL_WRITE(low, high, msrs, i); - } else { - reset_value[i] = 0; - } - } -} - - -static int athlon_check_ctrs(struct pt_regs * const regs, - struct op_msrs const * const msrs) -{ - unsigned int low, high; - int i; - - for (i = 0 ; i < NUM_COUNTERS; ++i) { - if (!reset_value[i]) - continue; - CTR_READ(low, high, msrs, i); - if (CTR_OVERFLOWED(low)) { - oprofile_add_sample(regs, i); - CTR_WRITE(reset_value[i], msrs, i); - } - } - - /* See op_model_ppro.c */ - return 1; -} - - -static void athlon_start(struct op_msrs const * const msrs) -{ - unsigned int low, high; - int i; - for (i = 0 ; i < NUM_COUNTERS ; ++i) { - if (reset_value[i]) { - CTRL_READ(low, high, msrs, i); - CTRL_SET_ACTIVE(low); - CTRL_WRITE(low, high, msrs, i); - } - } -} - - -static void athlon_stop(struct op_msrs const * const msrs) -{ - unsigned int low,high; - int i; - - /* Subtle: stop on all counters to avoid race with - * setting our pm callback */ - for (i = 0 ; i < NUM_COUNTERS ; ++i) { - if (!reset_value[i]) - continue; - CTRL_READ(low, high, msrs, i); - CTRL_SET_INACTIVE(low); - CTRL_WRITE(low, high, msrs, i); - } -} - -static void athlon_shutdown(struct op_msrs const * const msrs) -{ - int i; - - for (i = 0 ; i < NUM_COUNTERS ; ++i) { - if (CTR_IS_RESERVED(msrs,i)) - release_perfctr_nmi(MSR_K7_PERFCTR0 + i); - } - for (i = 0 ; i < NUM_CONTROLS ; ++i) { - if (CTRL_IS_RESERVED(msrs,i)) - release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); - } -} - -struct op_x86_model_spec const op_athlon_spec = { - .num_counters = NUM_COUNTERS, - .num_controls = NUM_CONTROLS, - .fill_in_addresses = &athlon_fill_in_addresses, - .setup_ctrs = &athlon_setup_ctrs, - .check_ctrs = &athlon_check_ctrs, - .start = &athlon_start, - .stop = &athlon_stop, - .shutdown = &athlon_shutdown -}; diff --git a/arch/i386/oprofile/op_model_p4.c b/arch/i386/oprofile/op_model_p4.c deleted file mode 100644 index 47925927b12..00000000000 --- a/arch/i386/oprofile/op_model_p4.c +++ /dev/null @@ -1,722 +0,0 @@ -/** - * @file op_model_p4.c - * P4 model-specific MSR operations - * - * @remark Copyright 2002 OProfile authors - * @remark Read the file COPYING - * - * @author Graydon Hoare - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "op_x86_model.h" -#include "op_counter.h" - -#define NUM_EVENTS 39 - -#define NUM_COUNTERS_NON_HT 8 -#define NUM_ESCRS_NON_HT 45 -#define NUM_CCCRS_NON_HT 18 -#define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT) - -#define NUM_COUNTERS_HT2 4 -#define NUM_ESCRS_HT2 23 -#define NUM_CCCRS_HT2 9 -#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2) - -static unsigned int num_counters = NUM_COUNTERS_NON_HT; -static unsigned int num_controls = NUM_CONTROLS_NON_HT; - -/* this has to be checked dynamically since the - hyper-threadedness of a chip is discovered at - kernel boot-time. */ -static inline void setup_num_counters(void) -{ -#ifdef CONFIG_SMP - if (smp_num_siblings == 2){ - num_counters = NUM_COUNTERS_HT2; - num_controls = NUM_CONTROLS_HT2; - } -#endif -} - -static int inline addr_increment(void) -{ -#ifdef CONFIG_SMP - return smp_num_siblings == 2 ? 2 : 1; -#else - return 1; -#endif -} - - -/* tables to simulate simplified hardware view of p4 registers */ -struct p4_counter_binding { - int virt_counter; - int counter_address; - int cccr_address; -}; - -struct p4_event_binding { - int escr_select; /* value to put in CCCR */ - int event_select; /* value to put in ESCR */ - struct { - int virt_counter; /* for this counter... */ - int escr_address; /* use this ESCR */ - } bindings[2]; -}; - -/* nb: these CTR_* defines are a duplicate of defines in - event/i386.p4*events. */ - - -#define CTR_BPU_0 (1 << 0) -#define CTR_MS_0 (1 << 1) -#define CTR_FLAME_0 (1 << 2) -#define CTR_IQ_4 (1 << 3) -#define CTR_BPU_2 (1 << 4) -#define CTR_MS_2 (1 << 5) -#define CTR_FLAME_2 (1 << 6) -#define CTR_IQ_5 (1 << 7) - -static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = { - { CTR_BPU_0, MSR_P4_BPU_PERFCTR0, MSR_P4_BPU_CCCR0 }, - { CTR_MS_0, MSR_P4_MS_PERFCTR0, MSR_P4_MS_CCCR0 }, - { CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 }, - { CTR_IQ_4, MSR_P4_IQ_PERFCTR4, MSR_P4_IQ_CCCR4 }, - { CTR_BPU_2, MSR_P4_BPU_PERFCTR2, MSR_P4_BPU_CCCR2 }, - { CTR_MS_2, MSR_P4_MS_PERFCTR2, MSR_P4_MS_CCCR2 }, - { CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 }, - { CTR_IQ_5, MSR_P4_IQ_PERFCTR5, MSR_P4_IQ_CCCR5 } -}; - -#define NUM_UNUSED_CCCRS NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT - -/* p4 event codes in libop/op_event.h are indices into this table. */ - -static struct p4_event_binding p4_events[NUM_EVENTS] = { - - { /* BRANCH_RETIRED */ - 0x05, 0x06, - { {CTR_IQ_4, MSR_P4_CRU_ESCR2}, - {CTR_IQ_5, MSR_P4_CRU_ESCR3} } - }, - - { /* MISPRED_BRANCH_RETIRED */ - 0x04, 0x03, - { { CTR_IQ_4, MSR_P4_CRU_ESCR0}, - { CTR_IQ_5, MSR_P4_CRU_ESCR1} } - }, - - { /* TC_DELIVER_MODE */ - 0x01, 0x01, - { { CTR_MS_0, MSR_P4_TC_ESCR0}, - { CTR_MS_2, MSR_P4_TC_ESCR1} } - }, - - { /* BPU_FETCH_REQUEST */ - 0x00, 0x03, - { { CTR_BPU_0, MSR_P4_BPU_ESCR0}, - { CTR_BPU_2, MSR_P4_BPU_ESCR1} } - }, - - { /* ITLB_REFERENCE */ - 0x03, 0x18, - { { CTR_BPU_0, MSR_P4_ITLB_ESCR0}, - { CTR_BPU_2, MSR_P4_ITLB_ESCR1} } - }, - - { /* MEMORY_CANCEL */ - 0x05, 0x02, - { { CTR_FLAME_0, MSR_P4_DAC_ESCR0}, - { CTR_FLAME_2, MSR_P4_DAC_ESCR1} } - }, - - { /* MEMORY_COMPLETE */ - 0x02, 0x08, - { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0}, - { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} } - }, - - { /* LOAD_PORT_REPLAY */ - 0x02, 0x04, - { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0}, - { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} } - }, - - { /* STORE_PORT_REPLAY */ - 0x02, 0x05, - { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0}, - { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} } - }, - - { /* MOB_LOAD_REPLAY */ - 0x02, 0x03, - { { CTR_BPU_0, MSR_P4_MOB_ESCR0}, - { CTR_BPU_2, MSR_P4_MOB_ESCR1} } - }, - - { /* PAGE_WALK_TYPE */ - 0x04, 0x01, - { { CTR_BPU_0, MSR_P4_PMH_ESCR0}, - { CTR_BPU_2, MSR_P4_PMH_ESCR1} } - }, - - { /* BSQ_CACHE_REFERENCE */ - 0x07, 0x0c, - { { CTR_BPU_0, MSR_P4_BSU_ESCR0}, - { CTR_BPU_2, MSR_P4_BSU_ESCR1} } - }, - - { /* IOQ_ALLOCATION */ - 0x06, 0x03, - { { CTR_BPU_0, MSR_P4_FSB_ESCR0}, - { 0, 0 } } - }, - - { /* IOQ_ACTIVE_ENTRIES */ - 0x06, 0x1a, - { { CTR_BPU_2, MSR_P4_FSB_ESCR1}, - { 0, 0 } } - }, - - { /* FSB_DATA_ACTIVITY */ - 0x06, 0x17, - { { CTR_BPU_0, MSR_P4_FSB_ESCR0}, - { CTR_BPU_2, MSR_P4_FSB_ESCR1} } - }, - - { /* BSQ_ALLOCATION */ - 0x07, 0x05, - { { CTR_BPU_0, MSR_P4_BSU_ESCR0}, - { 0, 0 } } - }, - - { /* BSQ_ACTIVE_ENTRIES */ - 0x07, 0x06, - { { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */}, - { 0, 0 } } - }, - - { /* X87_ASSIST */ - 0x05, 0x03, - { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, - { CTR_IQ_5, MSR_P4_CRU_ESCR3} } - }, - - { /* SSE_INPUT_ASSIST */ - 0x01, 0x34, - { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, - { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } - }, - - { /* PACKED_SP_UOP */ - 0x01, 0x08, - { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, - { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } - }, - - { /* PACKED_DP_UOP */ - 0x01, 0x0c, - { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, - { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } - }, - - { /* SCALAR_SP_UOP */ - 0x01, 0x0a, - { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, - { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } - }, - - { /* SCALAR_DP_UOP */ - 0x01, 0x0e, - { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, - { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } - }, - - { /* 64BIT_MMX_UOP */ - 0x01, 0x02, - { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, - { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } - }, - - { /* 128BIT_MMX_UOP */ - 0x01, 0x1a, - { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, - { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } - }, - - { /* X87_FP_UOP */ - 0x01, 0x04, - { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, - { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } - }, - - { /* X87_SIMD_MOVES_UOP */ - 0x01, 0x2e, - { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, - { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } - }, - - { /* MACHINE_CLEAR */ - 0x05, 0x02, - { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, - { CTR_IQ_5, MSR_P4_CRU_ESCR3} } - }, - - { /* GLOBAL_POWER_EVENTS */ - 0x06, 0x13 /* older manual says 0x05, newer 0x13 */, - { { CTR_BPU_0, MSR_P4_FSB_ESCR0}, - { CTR_BPU_2, MSR_P4_FSB_ESCR1} } - }, - - { /* TC_MS_XFER */ - 0x00, 0x05, - { { CTR_MS_0, MSR_P4_MS_ESCR0}, - { CTR_MS_2, MSR_P4_MS_ESCR1} } - }, - - { /* UOP_QUEUE_WRITES */ - 0x00, 0x09, - { { CTR_MS_0, MSR_P4_MS_ESCR0}, - { CTR_MS_2, MSR_P4_MS_ESCR1} } - }, - - { /* FRONT_END_EVENT */ - 0x05, 0x08, - { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, - { CTR_IQ_5, MSR_P4_CRU_ESCR3} } - }, - - { /* EXECUTION_EVENT */ - 0x05, 0x0c, - { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, - { CTR_IQ_5, MSR_P4_CRU_ESCR3} } - }, - - { /* REPLAY_EVENT */ - 0x05, 0x09, - { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, - { CTR_IQ_5, MSR_P4_CRU_ESCR3} } - }, - - { /* INSTR_RETIRED */ - 0x04, 0x02, - { { CTR_IQ_4, MSR_P4_CRU_ESCR0}, - { CTR_IQ_5, MSR_P4_CRU_ESCR1} } - }, - - { /* UOPS_RETIRED */ - 0x04, 0x01, - { { CTR_IQ_4, MSR_P4_CRU_ESCR0}, - { CTR_IQ_5, MSR_P4_CRU_ESCR1} } - }, - - { /* UOP_TYPE */ - 0x02, 0x02, - { { CTR_IQ_4, MSR_P4_RAT_ESCR0}, - { CTR_IQ_5, MSR_P4_RAT_ESCR1} } - }, - - { /* RETIRED_MISPRED_BRANCH_TYPE */ - 0x02, 0x05, - { { CTR_MS_0, MSR_P4_TBPU_ESCR0}, - { CTR_MS_2, MSR_P4_TBPU_ESCR1} } - }, - - { /* RETIRED_BRANCH_TYPE */ - 0x02, 0x04, - { { CTR_MS_0, MSR_P4_TBPU_ESCR0}, - { CTR_MS_2, MSR_P4_TBPU_ESCR1} } - } -}; - - -#define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7) - -#define ESCR_RESERVED_BITS 0x80000003 -#define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS) -#define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2)) -#define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3)) -#define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1))) -#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1)) -#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25)) -#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9)) -#define ESCR_READ(escr,high,ev,i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0) -#define ESCR_WRITE(escr,high,ev,i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0) - -#define CCCR_RESERVED_BITS 0x38030FFF -#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS) -#define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000) -#define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13)) -#define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26)) -#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27)) -#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12)) -#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12)) -#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0) -#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0) -#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31)) -#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31))) - -#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0) -#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0) -#define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0) -#define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0) -#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000)) - - -/* this assigns a "stagger" to the current CPU, which is used throughout - the code in this module as an extra array offset, to select the "even" - or "odd" part of all the divided resources. */ -static unsigned int get_stagger(void) -{ -#ifdef CONFIG_SMP - int cpu = smp_processor_id(); - return (cpu != first_cpu(cpu_sibling_map[cpu])); -#endif - return 0; -} - - -/* finally, mediate access to a real hardware counter - by passing a "virtual" counter numer to this macro, - along with your stagger setting. */ -#define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger))) - -static unsigned long reset_value[NUM_COUNTERS_NON_HT]; - - -static void p4_fill_in_addresses(struct op_msrs * const msrs) -{ - unsigned int i; - unsigned int addr, cccraddr, stag; - - setup_num_counters(); - stag = get_stagger(); - - /* initialize some registers */ - for (i = 0; i < num_counters; ++i) { - msrs->counters[i].addr = 0; - } - for (i = 0; i < num_controls; ++i) { - msrs->controls[i].addr = 0; - } - - /* the counter & cccr registers we pay attention to */ - for (i = 0; i < num_counters; ++i) { - addr = p4_counters[VIRT_CTR(stag, i)].counter_address; - cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address; - if (reserve_perfctr_nmi(addr)){ - msrs->counters[i].addr = addr; - msrs->controls[i].addr = cccraddr; - } - } - - /* 43 ESCR registers in three or four discontiguous group */ - for (addr = MSR_P4_BSU_ESCR0 + stag; - addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) { - if (reserve_evntsel_nmi(addr)) - msrs->controls[i].addr = addr; - } - - /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1 - * to avoid special case in nmi_{save|restore}_registers() */ - if (boot_cpu_data.x86_model >= 0x3) { - for (addr = MSR_P4_BSU_ESCR0 + stag; - addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) { - if (reserve_evntsel_nmi(addr)) - msrs->controls[i].addr = addr; - } - } else { - for (addr = MSR_P4_IQ_ESCR0 + stag; - addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) { - if (reserve_evntsel_nmi(addr)) - msrs->controls[i].addr = addr; - } - } - - for (addr = MSR_P4_RAT_ESCR0 + stag; - addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) { - if (reserve_evntsel_nmi(addr)) - msrs->controls[i].addr = addr; - } - - for (addr = MSR_P4_MS_ESCR0 + stag; - addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) { - if (reserve_evntsel_nmi(addr)) - msrs->controls[i].addr = addr; - } - - for (addr = MSR_P4_IX_ESCR0 + stag; - addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) { - if (reserve_evntsel_nmi(addr)) - msrs->controls[i].addr = addr; - } - - /* there are 2 remaining non-contiguously located ESCRs */ - - if (num_counters == NUM_COUNTERS_NON_HT) { - /* standard non-HT CPUs handle both remaining ESCRs*/ - if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) - msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; - if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4)) - msrs->controls[i++].addr = MSR_P4_CRU_ESCR4; - - } else if (stag == 0) { - /* HT CPUs give the first remainder to the even thread, as - the 32nd control register */ - if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4)) - msrs->controls[i++].addr = MSR_P4_CRU_ESCR4; - - } else { - /* and two copies of the second to the odd thread, - for the 22st and 23nd control registers */ - if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) { - msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; - msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; - } - } -} - - -static void pmc_setup_one_p4_counter(unsigned int ctr) -{ - int i; - int const maxbind = 2; - unsigned int cccr = 0; - unsigned int escr = 0; - unsigned int high = 0; - unsigned int counter_bit; - struct p4_event_binding *ev = NULL; - unsigned int stag; - - stag = get_stagger(); - - /* convert from counter *number* to counter *bit* */ - counter_bit = 1 << VIRT_CTR(stag, ctr); - - /* find our event binding structure. */ - if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) { - printk(KERN_ERR - "oprofile: P4 event code 0x%lx out of range\n", - counter_config[ctr].event); - return; - } - - ev = &(p4_events[counter_config[ctr].event - 1]); - - for (i = 0; i < maxbind; i++) { - if (ev->bindings[i].virt_counter & counter_bit) { - - /* modify ESCR */ - ESCR_READ(escr, high, ev, i); - ESCR_CLEAR(escr); - if (stag == 0) { - ESCR_SET_USR_0(escr, counter_config[ctr].user); - ESCR_SET_OS_0(escr, counter_config[ctr].kernel); - } else { - ESCR_SET_USR_1(escr, counter_config[ctr].user); - ESCR_SET_OS_1(escr, counter_config[ctr].kernel); - } - ESCR_SET_EVENT_SELECT(escr, ev->event_select); - ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask); - ESCR_WRITE(escr, high, ev, i); - - /* modify CCCR */ - CCCR_READ(cccr, high, VIRT_CTR(stag, ctr)); - CCCR_CLEAR(cccr); - CCCR_SET_REQUIRED_BITS(cccr); - CCCR_SET_ESCR_SELECT(cccr, ev->escr_select); - if (stag == 0) { - CCCR_SET_PMI_OVF_0(cccr); - } else { - CCCR_SET_PMI_OVF_1(cccr); - } - CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr)); - return; - } - } - - printk(KERN_ERR - "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n", - counter_config[ctr].event, stag, ctr); -} - - -static void p4_setup_ctrs(struct op_msrs const * const msrs) -{ - unsigned int i; - unsigned int low, high; - unsigned int stag; - - stag = get_stagger(); - - rdmsr(MSR_IA32_MISC_ENABLE, low, high); - if (! MISC_PMC_ENABLED_P(low)) { - printk(KERN_ERR "oprofile: P4 PMC not available\n"); - return; - } - - /* clear the cccrs we will use */ - for (i = 0 ; i < num_counters ; i++) { - if (unlikely(!CTRL_IS_RESERVED(msrs,i))) - continue; - rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); - CCCR_CLEAR(low); - CCCR_SET_REQUIRED_BITS(low); - wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); - } - - /* clear all escrs (including those outside our concern) */ - for (i = num_counters; i < num_controls; i++) { - if (unlikely(!CTRL_IS_RESERVED(msrs,i))) - continue; - wrmsr(msrs->controls[i].addr, 0, 0); - } - - /* setup all counters */ - for (i = 0 ; i < num_counters ; ++i) { - if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs,i))) { - reset_value[i] = counter_config[i].count; - pmc_setup_one_p4_counter(i); - CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i)); - } else { - reset_value[i] = 0; - } - } -} - - -static int p4_check_ctrs(struct pt_regs * const regs, - struct op_msrs const * const msrs) -{ - unsigned long ctr, low, high, stag, real; - int i; - - stag = get_stagger(); - - for (i = 0; i < num_counters; ++i) { - - if (!reset_value[i]) - continue; - - /* - * there is some eccentricity in the hardware which - * requires that we perform 2 extra corrections: - * - * - check both the CCCR:OVF flag for overflow and the - * counter high bit for un-flagged overflows. - * - * - write the counter back twice to ensure it gets - * updated properly. - * - * the former seems to be related to extra NMIs happening - * during the current NMI; the latter is reported as errata - * N15 in intel doc 249199-029, pentium 4 specification - * update, though their suggested work-around does not - * appear to solve the problem. - */ - - real = VIRT_CTR(stag, i); - - CCCR_READ(low, high, real); - CTR_READ(ctr, high, real); - if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) { - oprofile_add_sample(regs, i); - CTR_WRITE(reset_value[i], real); - CCCR_CLEAR_OVF(low); - CCCR_WRITE(low, high, real); - CTR_WRITE(reset_value[i], real); - } - } - - /* P4 quirk: you have to re-unmask the apic vector */ - apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); - - /* See op_model_ppro.c */ - return 1; -} - - -static void p4_start(struct op_msrs const * const msrs) -{ - unsigned int low, high, stag; - int i; - - stag = get_stagger(); - - for (i = 0; i < num_counters; ++i) { - if (!reset_value[i]) - continue; - CCCR_READ(low, high, VIRT_CTR(stag, i)); - CCCR_SET_ENABLE(low); - CCCR_WRITE(low, high, VIRT_CTR(stag, i)); - } -} - - -static void p4_stop(struct op_msrs const * const msrs) -{ - unsigned int low, high, stag; - int i; - - stag = get_stagger(); - - for (i = 0; i < num_counters; ++i) { - if (!reset_value[i]) - continue; - CCCR_READ(low, high, VIRT_CTR(stag, i)); - CCCR_SET_DISABLE(low); - CCCR_WRITE(low, high, VIRT_CTR(stag, i)); - } -} - -static void p4_shutdown(struct op_msrs const * const msrs) -{ - int i; - - for (i = 0 ; i < num_counters ; ++i) { - if (CTR_IS_RESERVED(msrs,i)) - release_perfctr_nmi(msrs->counters[i].addr); - } - /* some of the control registers are specially reserved in - * conjunction with the counter registers (hence the starting offset). - * This saves a few bits. - */ - for (i = num_counters ; i < num_controls ; ++i) { - if (CTRL_IS_RESERVED(msrs,i)) - release_evntsel_nmi(msrs->controls[i].addr); - } -} - - -#ifdef CONFIG_SMP -struct op_x86_model_spec const op_p4_ht2_spec = { - .num_counters = NUM_COUNTERS_HT2, - .num_controls = NUM_CONTROLS_HT2, - .fill_in_addresses = &p4_fill_in_addresses, - .setup_ctrs = &p4_setup_ctrs, - .check_ctrs = &p4_check_ctrs, - .start = &p4_start, - .stop = &p4_stop, - .shutdown = &p4_shutdown -}; -#endif - -struct op_x86_model_spec const op_p4_spec = { - .num_counters = NUM_COUNTERS_NON_HT, - .num_controls = NUM_CONTROLS_NON_HT, - .fill_in_addresses = &p4_fill_in_addresses, - .setup_ctrs = &p4_setup_ctrs, - .check_ctrs = &p4_check_ctrs, - .start = &p4_start, - .stop = &p4_stop, - .shutdown = &p4_shutdown -}; diff --git a/arch/i386/oprofile/op_model_ppro.c b/arch/i386/oprofile/op_model_ppro.c deleted file mode 100644 index c554f52cb80..00000000000 --- a/arch/i386/oprofile/op_model_ppro.c +++ /dev/null @@ -1,192 +0,0 @@ -/** - * @file op_model_ppro.h - * pentium pro / P6 model-specific MSR operations - * - * @remark Copyright 2002 OProfile authors - * @remark Read the file COPYING - * - * @author John Levon - * @author Philippe Elie - * @author Graydon Hoare - */ - -#include -#include -#include -#include -#include - -#include "op_x86_model.h" -#include "op_counter.h" - -#define NUM_COUNTERS 2 -#define NUM_CONTROLS 2 - -#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0) -#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0) -#define CTR_32BIT_WRITE(l,msrs,c) \ - do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), 0);} while (0) -#define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) - -#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0) -#define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls[(c)].addr), (l), (h));} while (0) -#define CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls[(c)].addr), (l), (h));} while (0) -#define CTRL_SET_ACTIVE(n) (n |= (1<<22)) -#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22)) -#define CTRL_CLEAR(x) (x &= (1<<21)) -#define CTRL_SET_ENABLE(val) (val |= 1<<20) -#define CTRL_SET_USR(val,u) (val |= ((u & 1) << 16)) -#define CTRL_SET_KERN(val,k) (val |= ((k & 1) << 17)) -#define CTRL_SET_UM(val, m) (val |= (m << 8)) -#define CTRL_SET_EVENT(val, e) (val |= e) - -static unsigned long reset_value[NUM_COUNTERS]; - -static void ppro_fill_in_addresses(struct op_msrs * const msrs) -{ - int i; - - for (i=0; i < NUM_COUNTERS; i++) { - if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i)) - msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; - else - msrs->counters[i].addr = 0; - } - - for (i=0; i < NUM_CONTROLS; i++) { - if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) - msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; - else - msrs->controls[i].addr = 0; - } -} - - -static void ppro_setup_ctrs(struct op_msrs const * const msrs) -{ - unsigned int low, high; - int i; - - /* clear all counters */ - for (i = 0 ; i < NUM_CONTROLS; ++i) { - if (unlikely(!CTRL_IS_RESERVED(msrs,i))) - continue; - CTRL_READ(low, high, msrs, i); - CTRL_CLEAR(low); - CTRL_WRITE(low, high, msrs, i); - } - - /* avoid a false detection of ctr overflows in NMI handler */ - for (i = 0; i < NUM_COUNTERS; ++i) { - if (unlikely(!CTR_IS_RESERVED(msrs,i))) - continue; - CTR_32BIT_WRITE(1, msrs, i); - } - - /* enable active counters */ - for (i = 0; i < NUM_COUNTERS; ++i) { - if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs,i))) { - reset_value[i] = counter_config[i].count; - - CTR_32BIT_WRITE(counter_config[i].count, msrs, i); - - CTRL_READ(low, high, msrs, i); - CTRL_CLEAR(low); - CTRL_SET_ENABLE(low); - CTRL_SET_USR(low, counter_config[i].user); - CTRL_SET_KERN(low, counter_config[i].kernel); - CTRL_SET_UM(low, counter_config[i].unit_mask); - CTRL_SET_EVENT(low, counter_config[i].event); - CTRL_WRITE(low, high, msrs, i); - } else { - reset_value[i] = 0; - } - } -} - - -static int ppro_check_ctrs(struct pt_regs * const regs, - struct op_msrs const * const msrs) -{ - unsigned int low, high; - int i; - - for (i = 0 ; i < NUM_COUNTERS; ++i) { - if (!reset_value[i]) - continue; - CTR_READ(low, high, msrs, i); - if (CTR_OVERFLOWED(low)) { - oprofile_add_sample(regs, i); - CTR_32BIT_WRITE(reset_value[i], msrs, i); - } - } - - /* Only P6 based Pentium M need to re-unmask the apic vector but it - * doesn't hurt other P6 variant */ - apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); - - /* We can't work out if we really handled an interrupt. We - * might have caught a *second* counter just after overflowing - * the interrupt for this counter then arrives - * and we don't find a counter that's overflowed, so we - * would return 0 and get dazed + confused. Instead we always - * assume we found an overflow. This sucks. - */ - return 1; -} - - -static void ppro_start(struct op_msrs const * const msrs) -{ - unsigned int low,high; - int i; - - for (i = 0; i < NUM_COUNTERS; ++i) { - if (reset_value[i]) { - CTRL_READ(low, high, msrs, i); - CTRL_SET_ACTIVE(low); - CTRL_WRITE(low, high, msrs, i); - } - } -} - - -static void ppro_stop(struct op_msrs const * const msrs) -{ - unsigned int low,high; - int i; - - for (i = 0; i < NUM_COUNTERS; ++i) { - if (!reset_value[i]) - continue; - CTRL_READ(low, high, msrs, i); - CTRL_SET_INACTIVE(low); - CTRL_WRITE(low, high, msrs, i); - } -} - -static void ppro_shutdown(struct op_msrs const * const msrs) -{ - int i; - - for (i = 0 ; i < NUM_COUNTERS ; ++i) { - if (CTR_IS_RESERVED(msrs,i)) - release_perfctr_nmi(MSR_P6_PERFCTR0 + i); - } - for (i = 0 ; i < NUM_CONTROLS ; ++i) { - if (CTRL_IS_RESERVED(msrs,i)) - release_evntsel_nmi(MSR_P6_EVNTSEL0 + i); - } -} - - -struct op_x86_model_spec const op_ppro_spec = { - .num_counters = NUM_COUNTERS, - .num_controls = NUM_CONTROLS, - .fill_in_addresses = &ppro_fill_in_addresses, - .setup_ctrs = &ppro_setup_ctrs, - .check_ctrs = &ppro_check_ctrs, - .start = &ppro_start, - .stop = &ppro_stop, - .shutdown = &ppro_shutdown -}; diff --git a/arch/i386/oprofile/op_x86_model.h b/arch/i386/oprofile/op_x86_model.h deleted file mode 100644 index abb1aa95b97..00000000000 --- a/arch/i386/oprofile/op_x86_model.h +++ /dev/null @@ -1,51 +0,0 @@ -/** - * @file op_x86_model.h - * interface to x86 model-specific MSR operations - * - * @remark Copyright 2002 OProfile authors - * @remark Read the file COPYING - * - * @author Graydon Hoare - */ - -#ifndef OP_X86_MODEL_H -#define OP_X86_MODEL_H - -struct op_saved_msr { - unsigned int high; - unsigned int low; -}; - -struct op_msr { - unsigned long addr; - struct op_saved_msr saved; -}; - -struct op_msrs { - struct op_msr * counters; - struct op_msr * controls; -}; - -struct pt_regs; - -/* The model vtable abstracts the differences between - * various x86 CPU model's perfctr support. - */ -struct op_x86_model_spec { - unsigned int const num_counters; - unsigned int const num_controls; - void (*fill_in_addresses)(struct op_msrs * const msrs); - void (*setup_ctrs)(struct op_msrs const * const msrs); - int (*check_ctrs)(struct pt_regs * const regs, - struct op_msrs const * const msrs); - void (*start)(struct op_msrs const * const msrs); - void (*stop)(struct op_msrs const * const msrs); - void (*shutdown)(struct op_msrs const * const msrs); -}; - -extern struct op_x86_model_spec const op_ppro_spec; -extern struct op_x86_model_spec const op_p4_spec; -extern struct op_x86_model_spec const op_p4_ht2_spec; -extern struct op_x86_model_spec const op_athlon_spec; - -#endif /* OP_X86_MODEL_H */ diff --git a/arch/x86/oprofile/Kconfig b/arch/x86/oprofile/Kconfig new file mode 100644 index 00000000000..d8a84088471 --- /dev/null +++ b/arch/x86/oprofile/Kconfig @@ -0,0 +1,17 @@ +config PROFILING + bool "Profiling support (EXPERIMENTAL)" + help + Say Y here to enable the extended profiling support mechanisms used + by profilers such as OProfile. + + +config OPROFILE + tristate "OProfile system profiling (EXPERIMENTAL)" + depends on PROFILING + help + OProfile is a profiling system capable of profiling the + whole system, include the kernel, kernel modules, libraries, + and applications. + + If unsure, say N. + diff --git a/arch/x86/oprofile/Makefile b/arch/x86/oprofile/Makefile new file mode 100644 index 00000000000..30f3eb36666 --- /dev/null +++ b/arch/x86/oprofile/Makefile @@ -0,0 +1,12 @@ +obj-$(CONFIG_OPROFILE) += oprofile.o + +DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \ + oprof.o cpu_buffer.o buffer_sync.o \ + event_buffer.o oprofile_files.o \ + oprofilefs.o oprofile_stats.o \ + timer_int.o ) + +oprofile-y := $(DRIVER_OBJS) init.o backtrace.o +oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_athlon.o \ + op_model_ppro.o op_model_p4.o +oprofile-$(CONFIG_X86_IO_APIC) += nmi_timer_int.o diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c new file mode 100644 index 00000000000..c049ce414f0 --- /dev/null +++ b/arch/x86/oprofile/backtrace.c @@ -0,0 +1,127 @@ +/** + * @file backtrace.c + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon + * @author David Smith + */ + +#include +#include +#include +#include +#include + +struct frame_head { + struct frame_head * ebp; + unsigned long ret; +} __attribute__((packed)); + +static struct frame_head * +dump_kernel_backtrace(struct frame_head * head) +{ + oprofile_add_trace(head->ret); + + /* frame pointers should strictly progress back up the stack + * (towards higher addresses) */ + if (head >= head->ebp) + return NULL; + + return head->ebp; +} + +static struct frame_head * +dump_user_backtrace(struct frame_head * head) +{ + struct frame_head bufhead[2]; + + /* Also check accessibility of one struct frame_head beyond */ + if (!access_ok(VERIFY_READ, head, sizeof(bufhead))) + return NULL; + if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead))) + return NULL; + + oprofile_add_trace(bufhead[0].ret); + + /* frame pointers should strictly progress back up the stack + * (towards higher addresses) */ + if (head >= bufhead[0].ebp) + return NULL; + + return bufhead[0].ebp; +} + +/* + * | | /\ Higher addresses + * | | + * --------------- stack base (address of current_thread_info) + * | thread info | + * . . + * | stack | + * --------------- saved regs->ebp value if valid (frame_head address) + * . . + * --------------- saved regs->rsp value if x86_64 + * | | + * --------------- struct pt_regs * stored on stack if 32-bit + * | | + * . . + * | | + * --------------- %esp + * | | + * | | \/ Lower addresses + * + * Thus, regs (or regs->rsp for x86_64) <-> stack base restricts the + * valid(ish) ebp values. Note: (1) for x86_64, NMI and several other + * exceptions use special stacks, maintained by the interrupt stack table + * (IST). These stacks are set up in trap_init() in + * arch/x86_64/kernel/traps.c. Thus, for x86_64, regs now does not point + * to the kernel stack; instead, it points to some location on the NMI + * stack. On the other hand, regs->rsp is the stack pointer saved when the + * NMI occurred. (2) For 32-bit, regs->esp is not valid because the + * processor does not save %esp on the kernel stack when interrupts occur + * in the kernel mode. + */ +#ifdef CONFIG_FRAME_POINTER +static int valid_kernel_stack(struct frame_head * head, struct pt_regs * regs) +{ + unsigned long headaddr = (unsigned long)head; +#ifdef CONFIG_X86_64 + unsigned long stack = (unsigned long)regs->rsp; +#else + unsigned long stack = (unsigned long)regs; +#endif + unsigned long stack_base = (stack & ~(THREAD_SIZE - 1)) + THREAD_SIZE; + + return headaddr > stack && headaddr < stack_base; +} +#else +/* without fp, it's just junk */ +static int valid_kernel_stack(struct frame_head * head, struct pt_regs * regs) +{ + return 0; +} +#endif + + +void +x86_backtrace(struct pt_regs * const regs, unsigned int depth) +{ + struct frame_head *head; + +#ifdef CONFIG_X86_64 + head = (struct frame_head *)regs->rbp; +#else + head = (struct frame_head *)regs->ebp; +#endif + + if (!user_mode_vm(regs)) { + while (depth-- && valid_kernel_stack(head, regs)) + head = dump_kernel_backtrace(head); + return; + } + + while (depth-- && head) + head = dump_user_backtrace(head); +} diff --git a/arch/x86/oprofile/init.c b/arch/x86/oprofile/init.c new file mode 100644 index 00000000000..5341d481d92 --- /dev/null +++ b/arch/x86/oprofile/init.c @@ -0,0 +1,48 @@ +/** + * @file init.c + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon + */ + +#include +#include +#include + +/* We support CPUs that have performance counters like the Pentium Pro + * with the NMI mode driver. + */ + +extern int op_nmi_init(struct oprofile_operations * ops); +extern int op_nmi_timer_init(struct oprofile_operations * ops); +extern void op_nmi_exit(void); +extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth); + + +int __init oprofile_arch_init(struct oprofile_operations * ops) +{ + int ret; + + ret = -ENODEV; + +#ifdef CONFIG_X86_LOCAL_APIC + ret = op_nmi_init(ops); +#endif +#ifdef CONFIG_X86_IO_APIC + if (ret < 0) + ret = op_nmi_timer_init(ops); +#endif + ops->backtrace = x86_backtrace; + + return ret; +} + + +void oprofile_arch_exit(void) +{ +#ifdef CONFIG_X86_LOCAL_APIC + op_nmi_exit(); +#endif +} diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c new file mode 100644 index 00000000000..11b7a51566a --- /dev/null +++ b/arch/x86/oprofile/nmi_int.c @@ -0,0 +1,477 @@ +/** + * @file nmi_int.c + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "op_counter.h" +#include "op_x86_model.h" + +static struct op_x86_model_spec const * model; +static struct op_msrs cpu_msrs[NR_CPUS]; +static unsigned long saved_lvtpc[NR_CPUS]; + +static int nmi_start(void); +static void nmi_stop(void); + +/* 0 == registered but off, 1 == registered and on */ +static int nmi_enabled = 0; + +#ifdef CONFIG_PM + +static int nmi_suspend(struct sys_device *dev, pm_message_t state) +{ + if (nmi_enabled == 1) + nmi_stop(); + return 0; +} + + +static int nmi_resume(struct sys_device *dev) +{ + if (nmi_enabled == 1) + nmi_start(); + return 0; +} + + +static struct sysdev_class oprofile_sysclass = { + set_kset_name("oprofile"), + .resume = nmi_resume, + .suspend = nmi_suspend, +}; + + +static struct sys_device device_oprofile = { + .id = 0, + .cls = &oprofile_sysclass, +}; + + +static int __init init_sysfs(void) +{ + int error; + if (!(error = sysdev_class_register(&oprofile_sysclass))) + error = sysdev_register(&device_oprofile); + return error; +} + + +static void exit_sysfs(void) +{ + sysdev_unregister(&device_oprofile); + sysdev_class_unregister(&oprofile_sysclass); +} + +#else +#define init_sysfs() do { } while (0) +#define exit_sysfs() do { } while (0) +#endif /* CONFIG_PM */ + +static int profile_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + struct die_args *args = (struct die_args *)data; + int ret = NOTIFY_DONE; + int cpu = smp_processor_id(); + + switch(val) { + case DIE_NMI: + if (model->check_ctrs(args->regs, &cpu_msrs[cpu])) + ret = NOTIFY_STOP; + break; + default: + break; + } + return ret; +} + +static void nmi_cpu_save_registers(struct op_msrs * msrs) +{ + unsigned int const nr_ctrs = model->num_counters; + unsigned int const nr_ctrls = model->num_controls; + struct op_msr * counters = msrs->counters; + struct op_msr * controls = msrs->controls; + unsigned int i; + + for (i = 0; i < nr_ctrs; ++i) { + if (counters[i].addr){ + rdmsr(counters[i].addr, + counters[i].saved.low, + counters[i].saved.high); + } + } + + for (i = 0; i < nr_ctrls; ++i) { + if (controls[i].addr){ + rdmsr(controls[i].addr, + controls[i].saved.low, + controls[i].saved.high); + } + } +} + + +static void nmi_save_registers(void * dummy) +{ + int cpu = smp_processor_id(); + struct op_msrs * msrs = &cpu_msrs[cpu]; + nmi_cpu_save_registers(msrs); +} + + +static void free_msrs(void) +{ + int i; + for_each_possible_cpu(i) { + kfree(cpu_msrs[i].counters); + cpu_msrs[i].counters = NULL; + kfree(cpu_msrs[i].controls); + cpu_msrs[i].controls = NULL; + } +} + + +static int allocate_msrs(void) +{ + int success = 1; + size_t controls_size = sizeof(struct op_msr) * model->num_controls; + size_t counters_size = sizeof(struct op_msr) * model->num_counters; + + int i; + for_each_possible_cpu(i) { + cpu_msrs[i].counters = kmalloc(counters_size, GFP_KERNEL); + if (!cpu_msrs[i].counters) { + success = 0; + break; + } + cpu_msrs[i].controls = kmalloc(controls_size, GFP_KERNEL); + if (!cpu_msrs[i].controls) { + success = 0; + break; + } + } + + if (!success) + free_msrs(); + + return success; +} + + +static void nmi_cpu_setup(void * dummy) +{ + int cpu = smp_processor_id(); + struct op_msrs * msrs = &cpu_msrs[cpu]; + spin_lock(&oprofilefs_lock); + model->setup_ctrs(msrs); + spin_unlock(&oprofilefs_lock); + saved_lvtpc[cpu] = apic_read(APIC_LVTPC); + apic_write(APIC_LVTPC, APIC_DM_NMI); +} + +static struct notifier_block profile_exceptions_nb = { + .notifier_call = profile_exceptions_notify, + .next = NULL, + .priority = 0 +}; + +static int nmi_setup(void) +{ + int err=0; + int cpu; + + if (!allocate_msrs()) + return -ENOMEM; + + if ((err = register_die_notifier(&profile_exceptions_nb))){ + free_msrs(); + return err; + } + + /* We need to serialize save and setup for HT because the subset + * of msrs are distinct for save and setup operations + */ + + /* Assume saved/restored counters are the same on all CPUs */ + model->fill_in_addresses(&cpu_msrs[0]); + for_each_possible_cpu (cpu) { + if (cpu != 0) { + memcpy(cpu_msrs[cpu].counters, cpu_msrs[0].counters, + sizeof(struct op_msr) * model->num_counters); + + memcpy(cpu_msrs[cpu].controls, cpu_msrs[0].controls, + sizeof(struct op_msr) * model->num_controls); + } + + } + on_each_cpu(nmi_save_registers, NULL, 0, 1); + on_each_cpu(nmi_cpu_setup, NULL, 0, 1); + nmi_enabled = 1; + return 0; +} + + +static void nmi_restore_registers(struct op_msrs * msrs) +{ + unsigned int const nr_ctrs = model->num_counters; + unsigned int const nr_ctrls = model->num_controls; + struct op_msr * counters = msrs->counters; + struct op_msr * controls = msrs->controls; + unsigned int i; + + for (i = 0; i < nr_ctrls; ++i) { + if (controls[i].addr){ + wrmsr(controls[i].addr, + controls[i].saved.low, + controls[i].saved.high); + } + } + + for (i = 0; i < nr_ctrs; ++i) { + if (counters[i].addr){ + wrmsr(counters[i].addr, + counters[i].saved.low, + counters[i].saved.high); + } + } +} + + +static void nmi_cpu_shutdown(void * dummy) +{ + unsigned int v; + int cpu = smp_processor_id(); + struct op_msrs * msrs = &cpu_msrs[cpu]; + + /* restoring APIC_LVTPC can trigger an apic error because the delivery + * mode and vector nr combination can be illegal. That's by design: on + * power on apic lvt contain a zero vector nr which are legal only for + * NMI delivery mode. So inhibit apic err before restoring lvtpc + */ + v = apic_read(APIC_LVTERR); + apic_write(APIC_LVTERR, v | APIC_LVT_MASKED); + apic_write(APIC_LVTPC, saved_lvtpc[cpu]); + apic_write(APIC_LVTERR, v); + nmi_restore_registers(msrs); + model->shutdown(msrs); +} + + +static void nmi_shutdown(void) +{ + nmi_enabled = 0; + on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1); + unregister_die_notifier(&profile_exceptions_nb); + free_msrs(); +} + + +static void nmi_cpu_start(void * dummy) +{ + struct op_msrs const * msrs = &cpu_msrs[smp_processor_id()]; + model->start(msrs); +} + + +static int nmi_start(void) +{ + on_each_cpu(nmi_cpu_start, NULL, 0, 1); + return 0; +} + + +static void nmi_cpu_stop(void * dummy) +{ + struct op_msrs const * msrs = &cpu_msrs[smp_processor_id()]; + model->stop(msrs); +} + + +static void nmi_stop(void) +{ + on_each_cpu(nmi_cpu_stop, NULL, 0, 1); +} + + +struct op_counter_config counter_config[OP_MAX_COUNTER]; + +static int nmi_create_files(struct super_block * sb, struct dentry * root) +{ + unsigned int i; + + for (i = 0; i < model->num_counters; ++i) { + struct dentry * dir; + char buf[4]; + + /* quick little hack to _not_ expose a counter if it is not + * available for use. This should protect userspace app. + * NOTE: assumes 1:1 mapping here (that counters are organized + * sequentially in their struct assignment). + */ + if (unlikely(!avail_to_resrv_perfctr_nmi_bit(i))) + continue; + + snprintf(buf, sizeof(buf), "%d", i); + dir = oprofilefs_mkdir(sb, root, buf); + oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled); + oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event); + oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count); + oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask); + oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel); + oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user); + } + + return 0; +} + +static int p4force; +module_param(p4force, int, 0); + +static int __init p4_init(char ** cpu_type) +{ + __u8 cpu_model = boot_cpu_data.x86_model; + + if (!p4force && (cpu_model > 6 || cpu_model == 5)) + return 0; + +#ifndef CONFIG_SMP + *cpu_type = "i386/p4"; + model = &op_p4_spec; + return 1; +#else + switch (smp_num_siblings) { + case 1: + *cpu_type = "i386/p4"; + model = &op_p4_spec; + return 1; + + case 2: + *cpu_type = "i386/p4-ht"; + model = &op_p4_ht2_spec; + return 1; + } +#endif + + printk(KERN_INFO "oprofile: P4 HyperThreading detected with > 2 threads\n"); + printk(KERN_INFO "oprofile: Reverting to timer mode.\n"); + return 0; +} + + +static int __init ppro_init(char ** cpu_type) +{ + __u8 cpu_model = boot_cpu_data.x86_model; + + if (cpu_model == 14) + *cpu_type = "i386/core"; + else if (cpu_model == 15) + *cpu_type = "i386/core_2"; + else if (cpu_model > 0xd) + return 0; + else if (cpu_model == 9) { + *cpu_type = "i386/p6_mobile"; + } else if (cpu_model > 5) { + *cpu_type = "i386/piii"; + } else if (cpu_model > 2) { + *cpu_type = "i386/pii"; + } else { + *cpu_type = "i386/ppro"; + } + + model = &op_ppro_spec; + return 1; +} + +/* in order to get sysfs right */ +static int using_nmi; + +int __init op_nmi_init(struct oprofile_operations *ops) +{ + __u8 vendor = boot_cpu_data.x86_vendor; + __u8 family = boot_cpu_data.x86; + char *cpu_type; + + if (!cpu_has_apic) + return -ENODEV; + + switch (vendor) { + case X86_VENDOR_AMD: + /* Needs to be at least an Athlon (or hammer in 32bit mode) */ + + switch (family) { + default: + return -ENODEV; + case 6: + model = &op_athlon_spec; + cpu_type = "i386/athlon"; + break; + case 0xf: + model = &op_athlon_spec; + /* Actually it could be i386/hammer too, but give + user space an consistent name. */ + cpu_type = "x86-64/hammer"; + break; + case 0x10: + model = &op_athlon_spec; + cpu_type = "x86-64/family10"; + break; + } + break; + + case X86_VENDOR_INTEL: + switch (family) { + /* Pentium IV */ + case 0xf: + if (!p4_init(&cpu_type)) + return -ENODEV; + break; + + /* A P6-class processor */ + case 6: + if (!ppro_init(&cpu_type)) + return -ENODEV; + break; + + default: + return -ENODEV; + } + break; + + default: + return -ENODEV; + } + + init_sysfs(); + using_nmi = 1; + ops->create_files = nmi_create_files; + ops->setup = nmi_setup; + ops->shutdown = nmi_shutdown; + ops->start = nmi_start; + ops->stop = nmi_stop; + ops->cpu_type = cpu_type; + printk(KERN_INFO "oprofile: using NMI interrupt.\n"); + return 0; +} + + +void op_nmi_exit(void) +{ + if (using_nmi) + exit_sysfs(); +} diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c new file mode 100644 index 00000000000..1418e36ae7a --- /dev/null +++ b/arch/x86/oprofile/nmi_timer_int.c @@ -0,0 +1,69 @@ +/** + * @file nmi_timer_int.c + * + * @remark Copyright 2003 OProfile authors + * @remark Read the file COPYING + * + * @author Zwane Mwaikambo + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static int profile_timer_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + struct die_args *args = (struct die_args *)data; + int ret = NOTIFY_DONE; + + switch(val) { + case DIE_NMI: + oprofile_add_sample(args->regs, 0); + ret = NOTIFY_STOP; + break; + default: + break; + } + return ret; +} + +static struct notifier_block profile_timer_exceptions_nb = { + .notifier_call = profile_timer_exceptions_notify, + .next = NULL, + .priority = 0 +}; + +static int timer_start(void) +{ + if (register_die_notifier(&profile_timer_exceptions_nb)) + return 1; + return 0; +} + + +static void timer_stop(void) +{ + unregister_die_notifier(&profile_timer_exceptions_nb); + synchronize_sched(); /* Allow already-started NMIs to complete. */ +} + + +int __init op_nmi_timer_init(struct oprofile_operations * ops) +{ + if ((nmi_watchdog != NMI_IO_APIC) || (atomic_read(&nmi_active) <= 0)) + return -ENODEV; + + ops->start = timer_start; + ops->stop = timer_stop; + ops->cpu_type = "timer"; + printk(KERN_INFO "oprofile: using NMI timer interrupt.\n"); + return 0; +} diff --git a/arch/x86/oprofile/op_counter.h b/arch/x86/oprofile/op_counter.h new file mode 100644 index 00000000000..2880b15c467 --- /dev/null +++ b/arch/x86/oprofile/op_counter.h @@ -0,0 +1,29 @@ +/** + * @file op_counter.h + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon + */ + +#ifndef OP_COUNTER_H +#define OP_COUNTER_H + +#define OP_MAX_COUNTER 8 + +/* Per-perfctr configuration as set via + * oprofilefs. + */ +struct op_counter_config { + unsigned long count; + unsigned long enabled; + unsigned long event; + unsigned long kernel; + unsigned long user; + unsigned long unit_mask; +}; + +extern struct op_counter_config counter_config[]; + +#endif /* OP_COUNTER_H */ diff --git a/arch/x86/oprofile/op_model_athlon.c b/arch/x86/oprofile/op_model_athlon.c new file mode 100644 index 00000000000..3057a19e464 --- /dev/null +++ b/arch/x86/oprofile/op_model_athlon.c @@ -0,0 +1,180 @@ +/** + * @file op_model_athlon.h + * athlon / K7 model-specific MSR operations + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon + * @author Philippe Elie + * @author Graydon Hoare + */ + +#include +#include +#include +#include + +#include "op_x86_model.h" +#include "op_counter.h" + +#define NUM_COUNTERS 4 +#define NUM_CONTROLS 4 + +#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0) +#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0) +#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1);} while (0) +#define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) + +#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0) +#define CTRL_READ(l,h,msrs,c) do {rdmsr(msrs->controls[(c)].addr, (l), (h));} while (0) +#define CTRL_WRITE(l,h,msrs,c) do {wrmsr(msrs->controls[(c)].addr, (l), (h));} while (0) +#define CTRL_SET_ACTIVE(n) (n |= (1<<22)) +#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22)) +#define CTRL_CLEAR(x) (x &= (1<<21)) +#define CTRL_SET_ENABLE(val) (val |= 1<<20) +#define CTRL_SET_USR(val,u) (val |= ((u & 1) << 16)) +#define CTRL_SET_KERN(val,k) (val |= ((k & 1) << 17)) +#define CTRL_SET_UM(val, m) (val |= (m << 8)) +#define CTRL_SET_EVENT(val, e) (val |= e) + +static unsigned long reset_value[NUM_COUNTERS]; + +static void athlon_fill_in_addresses(struct op_msrs * const msrs) +{ + int i; + + for (i=0; i < NUM_COUNTERS; i++) { + if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) + msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; + else + msrs->counters[i].addr = 0; + } + + for (i=0; i < NUM_CONTROLS; i++) { + if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) + msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; + else + msrs->controls[i].addr = 0; + } +} + + +static void athlon_setup_ctrs(struct op_msrs const * const msrs) +{ + unsigned int low, high; + int i; + + /* clear all counters */ + for (i = 0 ; i < NUM_CONTROLS; ++i) { + if (unlikely(!CTRL_IS_RESERVED(msrs,i))) + continue; + CTRL_READ(low, high, msrs, i); + CTRL_CLEAR(low); + CTRL_WRITE(low, high, msrs, i); + } + + /* avoid a false detection of ctr overflows in NMI handler */ + for (i = 0; i < NUM_COUNTERS; ++i) { + if (unlikely(!CTR_IS_RESERVED(msrs,i))) + continue; + CTR_WRITE(1, msrs, i); + } + + /* enable active counters */ + for (i = 0; i < NUM_COUNTERS; ++i) { + if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs,i))) { + reset_value[i] = counter_config[i].count; + + CTR_WRITE(counter_config[i].count, msrs, i); + + CTRL_READ(low, high, msrs, i); + CTRL_CLEAR(low); + CTRL_SET_ENABLE(low); + CTRL_SET_USR(low, counter_config[i].user); + CTRL_SET_KERN(low, counter_config[i].kernel); + CTRL_SET_UM(low, counter_config[i].unit_mask); + CTRL_SET_EVENT(low, counter_config[i].event); + CTRL_WRITE(low, high, msrs, i); + } else { + reset_value[i] = 0; + } + } +} + + +static int athlon_check_ctrs(struct pt_regs * const regs, + struct op_msrs const * const msrs) +{ + unsigned int low, high; + int i; + + for (i = 0 ; i < NUM_COUNTERS; ++i) { + if (!reset_value[i]) + continue; + CTR_READ(low, high, msrs, i); + if (CTR_OVERFLOWED(low)) { + oprofile_add_sample(regs, i); + CTR_WRITE(reset_value[i], msrs, i); + } + } + + /* See op_model_ppro.c */ + return 1; +} + + +static void athlon_start(struct op_msrs const * const msrs) +{ + unsigned int low, high; + int i; + for (i = 0 ; i < NUM_COUNTERS ; ++i) { + if (reset_value[i]) { + CTRL_READ(low, high, msrs, i); + CTRL_SET_ACTIVE(low); + CTRL_WRITE(low, high, msrs, i); + } + } +} + + +static void athlon_stop(struct op_msrs const * const msrs) +{ + unsigned int low,high; + int i; + + /* Subtle: stop on all counters to avoid race with + * setting our pm callback */ + for (i = 0 ; i < NUM_COUNTERS ; ++i) { + if (!reset_value[i]) + continue; + CTRL_READ(low, high, msrs, i); + CTRL_SET_INACTIVE(low); + CTRL_WRITE(low, high, msrs, i); + } +} + +static void athlon_shutdown(struct op_msrs const * const msrs) +{ + int i; + + for (i = 0 ; i < NUM_COUNTERS ; ++i) { + if (CTR_IS_RESERVED(msrs,i)) + release_perfctr_nmi(MSR_K7_PERFCTR0 + i); + } + for (i = 0 ; i < NUM_CONTROLS ; ++i) { + if (CTRL_IS_RESERVED(msrs,i)) + release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); + } +} + +struct op_x86_model_spec const op_athlon_spec = { + .num_counters = NUM_COUNTERS, + .num_controls = NUM_CONTROLS, + .fill_in_addresses = &athlon_fill_in_addresses, + .setup_ctrs = &athlon_setup_ctrs, + .check_ctrs = &athlon_check_ctrs, + .start = &athlon_start, + .stop = &athlon_stop, + .shutdown = &athlon_shutdown +}; diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c new file mode 100644 index 00000000000..47925927b12 --- /dev/null +++ b/arch/x86/oprofile/op_model_p4.c @@ -0,0 +1,722 @@ +/** + * @file op_model_p4.c + * P4 model-specific MSR operations + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author Graydon Hoare + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "op_x86_model.h" +#include "op_counter.h" + +#define NUM_EVENTS 39 + +#define NUM_COUNTERS_NON_HT 8 +#define NUM_ESCRS_NON_HT 45 +#define NUM_CCCRS_NON_HT 18 +#define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT) + +#define NUM_COUNTERS_HT2 4 +#define NUM_ESCRS_HT2 23 +#define NUM_CCCRS_HT2 9 +#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2) + +static unsigned int num_counters = NUM_COUNTERS_NON_HT; +static unsigned int num_controls = NUM_CONTROLS_NON_HT; + +/* this has to be checked dynamically since the + hyper-threadedness of a chip is discovered at + kernel boot-time. */ +static inline void setup_num_counters(void) +{ +#ifdef CONFIG_SMP + if (smp_num_siblings == 2){ + num_counters = NUM_COUNTERS_HT2; + num_controls = NUM_CONTROLS_HT2; + } +#endif +} + +static int inline addr_increment(void) +{ +#ifdef CONFIG_SMP + return smp_num_siblings == 2 ? 2 : 1; +#else + return 1; +#endif +} + + +/* tables to simulate simplified hardware view of p4 registers */ +struct p4_counter_binding { + int virt_counter; + int counter_address; + int cccr_address; +}; + +struct p4_event_binding { + int escr_select; /* value to put in CCCR */ + int event_select; /* value to put in ESCR */ + struct { + int virt_counter; /* for this counter... */ + int escr_address; /* use this ESCR */ + } bindings[2]; +}; + +/* nb: these CTR_* defines are a duplicate of defines in + event/i386.p4*events. */ + + +#define CTR_BPU_0 (1 << 0) +#define CTR_MS_0 (1 << 1) +#define CTR_FLAME_0 (1 << 2) +#define CTR_IQ_4 (1 << 3) +#define CTR_BPU_2 (1 << 4) +#define CTR_MS_2 (1 << 5) +#define CTR_FLAME_2 (1 << 6) +#define CTR_IQ_5 (1 << 7) + +static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = { + { CTR_BPU_0, MSR_P4_BPU_PERFCTR0, MSR_P4_BPU_CCCR0 }, + { CTR_MS_0, MSR_P4_MS_PERFCTR0, MSR_P4_MS_CCCR0 }, + { CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 }, + { CTR_IQ_4, MSR_P4_IQ_PERFCTR4, MSR_P4_IQ_CCCR4 }, + { CTR_BPU_2, MSR_P4_BPU_PERFCTR2, MSR_P4_BPU_CCCR2 }, + { CTR_MS_2, MSR_P4_MS_PERFCTR2, MSR_P4_MS_CCCR2 }, + { CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 }, + { CTR_IQ_5, MSR_P4_IQ_PERFCTR5, MSR_P4_IQ_CCCR5 } +}; + +#define NUM_UNUSED_CCCRS NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT + +/* p4 event codes in libop/op_event.h are indices into this table. */ + +static struct p4_event_binding p4_events[NUM_EVENTS] = { + + { /* BRANCH_RETIRED */ + 0x05, 0x06, + { {CTR_IQ_4, MSR_P4_CRU_ESCR2}, + {CTR_IQ_5, MSR_P4_CRU_ESCR3} } + }, + + { /* MISPRED_BRANCH_RETIRED */ + 0x04, 0x03, + { { CTR_IQ_4, MSR_P4_CRU_ESCR0}, + { CTR_IQ_5, MSR_P4_CRU_ESCR1} } + }, + + { /* TC_DELIVER_MODE */ + 0x01, 0x01, + { { CTR_MS_0, MSR_P4_TC_ESCR0}, + { CTR_MS_2, MSR_P4_TC_ESCR1} } + }, + + { /* BPU_FETCH_REQUEST */ + 0x00, 0x03, + { { CTR_BPU_0, MSR_P4_BPU_ESCR0}, + { CTR_BPU_2, MSR_P4_BPU_ESCR1} } + }, + + { /* ITLB_REFERENCE */ + 0x03, 0x18, + { { CTR_BPU_0, MSR_P4_ITLB_ESCR0}, + { CTR_BPU_2, MSR_P4_ITLB_ESCR1} } + }, + + { /* MEMORY_CANCEL */ + 0x05, 0x02, + { { CTR_FLAME_0, MSR_P4_DAC_ESCR0}, + { CTR_FLAME_2, MSR_P4_DAC_ESCR1} } + }, + + { /* MEMORY_COMPLETE */ + 0x02, 0x08, + { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0}, + { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} } + }, + + { /* LOAD_PORT_REPLAY */ + 0x02, 0x04, + { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0}, + { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} } + }, + + { /* STORE_PORT_REPLAY */ + 0x02, 0x05, + { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0}, + { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} } + }, + + { /* MOB_LOAD_REPLAY */ + 0x02, 0x03, + { { CTR_BPU_0, MSR_P4_MOB_ESCR0}, + { CTR_BPU_2, MSR_P4_MOB_ESCR1} } + }, + + { /* PAGE_WALK_TYPE */ + 0x04, 0x01, + { { CTR_BPU_0, MSR_P4_PMH_ESCR0}, + { CTR_BPU_2, MSR_P4_PMH_ESCR1} } + }, + + { /* BSQ_CACHE_REFERENCE */ + 0x07, 0x0c, + { { CTR_BPU_0, MSR_P4_BSU_ESCR0}, + { CTR_BPU_2, MSR_P4_BSU_ESCR1} } + }, + + { /* IOQ_ALLOCATION */ + 0x06, 0x03, + { { CTR_BPU_0, MSR_P4_FSB_ESCR0}, + { 0, 0 } } + }, + + { /* IOQ_ACTIVE_ENTRIES */ + 0x06, 0x1a, + { { CTR_BPU_2, MSR_P4_FSB_ESCR1}, + { 0, 0 } } + }, + + { /* FSB_DATA_ACTIVITY */ + 0x06, 0x17, + { { CTR_BPU_0, MSR_P4_FSB_ESCR0}, + { CTR_BPU_2, MSR_P4_FSB_ESCR1} } + }, + + { /* BSQ_ALLOCATION */ + 0x07, 0x05, + { { CTR_BPU_0, MSR_P4_BSU_ESCR0}, + { 0, 0 } } + }, + + { /* BSQ_ACTIVE_ENTRIES */ + 0x07, 0x06, + { { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */}, + { 0, 0 } } + }, + + { /* X87_ASSIST */ + 0x05, 0x03, + { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, + { CTR_IQ_5, MSR_P4_CRU_ESCR3} } + }, + + { /* SSE_INPUT_ASSIST */ + 0x01, 0x34, + { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, + { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } + }, + + { /* PACKED_SP_UOP */ + 0x01, 0x08, + { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, + { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } + }, + + { /* PACKED_DP_UOP */ + 0x01, 0x0c, + { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, + { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } + }, + + { /* SCALAR_SP_UOP */ + 0x01, 0x0a, + { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, + { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } + }, + + { /* SCALAR_DP_UOP */ + 0x01, 0x0e, + { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, + { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } + }, + + { /* 64BIT_MMX_UOP */ + 0x01, 0x02, + { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, + { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } + }, + + { /* 128BIT_MMX_UOP */ + 0x01, 0x1a, + { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, + { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } + }, + + { /* X87_FP_UOP */ + 0x01, 0x04, + { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, + { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } + }, + + { /* X87_SIMD_MOVES_UOP */ + 0x01, 0x2e, + { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, + { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } + }, + + { /* MACHINE_CLEAR */ + 0x05, 0x02, + { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, + { CTR_IQ_5, MSR_P4_CRU_ESCR3} } + }, + + { /* GLOBAL_POWER_EVENTS */ + 0x06, 0x13 /* older manual says 0x05, newer 0x13 */, + { { CTR_BPU_0, MSR_P4_FSB_ESCR0}, + { CTR_BPU_2, MSR_P4_FSB_ESCR1} } + }, + + { /* TC_MS_XFER */ + 0x00, 0x05, + { { CTR_MS_0, MSR_P4_MS_ESCR0}, + { CTR_MS_2, MSR_P4_MS_ESCR1} } + }, + + { /* UOP_QUEUE_WRITES */ + 0x00, 0x09, + { { CTR_MS_0, MSR_P4_MS_ESCR0}, + { CTR_MS_2, MSR_P4_MS_ESCR1} } + }, + + { /* FRONT_END_EVENT */ + 0x05, 0x08, + { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, + { CTR_IQ_5, MSR_P4_CRU_ESCR3} } + }, + + { /* EXECUTION_EVENT */ + 0x05, 0x0c, + { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, + { CTR_IQ_5, MSR_P4_CRU_ESCR3} } + }, + + { /* REPLAY_EVENT */ + 0x05, 0x09, + { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, + { CTR_IQ_5, MSR_P4_CRU_ESCR3} } + }, + + { /* INSTR_RETIRED */ + 0x04, 0x02, + { { CTR_IQ_4, MSR_P4_CRU_ESCR0}, + { CTR_IQ_5, MSR_P4_CRU_ESCR1} } + }, + + { /* UOPS_RETIRED */ + 0x04, 0x01, + { { CTR_IQ_4, MSR_P4_CRU_ESCR0}, + { CTR_IQ_5, MSR_P4_CRU_ESCR1} } + }, + + { /* UOP_TYPE */ + 0x02, 0x02, + { { CTR_IQ_4, MSR_P4_RAT_ESCR0}, + { CTR_IQ_5, MSR_P4_RAT_ESCR1} } + }, + + { /* RETIRED_MISPRED_BRANCH_TYPE */ + 0x02, 0x05, + { { CTR_MS_0, MSR_P4_TBPU_ESCR0}, + { CTR_MS_2, MSR_P4_TBPU_ESCR1} } + }, + + { /* RETIRED_BRANCH_TYPE */ + 0x02, 0x04, + { { CTR_MS_0, MSR_P4_TBPU_ESCR0}, + { CTR_MS_2, MSR_P4_TBPU_ESCR1} } + } +}; + + +#define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7) + +#define ESCR_RESERVED_BITS 0x80000003 +#define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS) +#define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2)) +#define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3)) +#define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1))) +#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1)) +#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25)) +#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9)) +#define ESCR_READ(escr,high,ev,i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0) +#define ESCR_WRITE(escr,high,ev,i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0) + +#define CCCR_RESERVED_BITS 0x38030FFF +#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS) +#define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000) +#define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13)) +#define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26)) +#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27)) +#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12)) +#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12)) +#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0) +#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0) +#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31)) +#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31))) + +#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0) +#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0) +#define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0) +#define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0) +#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000)) + + +/* this assigns a "stagger" to the current CPU, which is used throughout + the code in this module as an extra array offset, to select the "even" + or "odd" part of all the divided resources. */ +static unsigned int get_stagger(void) +{ +#ifdef CONFIG_SMP + int cpu = smp_processor_id(); + return (cpu != first_cpu(cpu_sibling_map[cpu])); +#endif + return 0; +} + + +/* finally, mediate access to a real hardware counter + by passing a "virtual" counter numer to this macro, + along with your stagger setting. */ +#define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger))) + +static unsigned long reset_value[NUM_COUNTERS_NON_HT]; + + +static void p4_fill_in_addresses(struct op_msrs * const msrs) +{ + unsigned int i; + unsigned int addr, cccraddr, stag; + + setup_num_counters(); + stag = get_stagger(); + + /* initialize some registers */ + for (i = 0; i < num_counters; ++i) { + msrs->counters[i].addr = 0; + } + for (i = 0; i < num_controls; ++i) { + msrs->controls[i].addr = 0; + } + + /* the counter & cccr registers we pay attention to */ + for (i = 0; i < num_counters; ++i) { + addr = p4_counters[VIRT_CTR(stag, i)].counter_address; + cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address; + if (reserve_perfctr_nmi(addr)){ + msrs->counters[i].addr = addr; + msrs->controls[i].addr = cccraddr; + } + } + + /* 43 ESCR registers in three or four discontiguous group */ + for (addr = MSR_P4_BSU_ESCR0 + stag; + addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) { + if (reserve_evntsel_nmi(addr)) + msrs->controls[i].addr = addr; + } + + /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1 + * to avoid special case in nmi_{save|restore}_registers() */ + if (boot_cpu_data.x86_model >= 0x3) { + for (addr = MSR_P4_BSU_ESCR0 + stag; + addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) { + if (reserve_evntsel_nmi(addr)) + msrs->controls[i].addr = addr; + } + } else { + for (addr = MSR_P4_IQ_ESCR0 + stag; + addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) { + if (reserve_evntsel_nmi(addr)) + msrs->controls[i].addr = addr; + } + } + + for (addr = MSR_P4_RAT_ESCR0 + stag; + addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) { + if (reserve_evntsel_nmi(addr)) + msrs->controls[i].addr = addr; + } + + for (addr = MSR_P4_MS_ESCR0 + stag; + addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) { + if (reserve_evntsel_nmi(addr)) + msrs->controls[i].addr = addr; + } + + for (addr = MSR_P4_IX_ESCR0 + stag; + addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) { + if (reserve_evntsel_nmi(addr)) + msrs->controls[i].addr = addr; + } + + /* there are 2 remaining non-contiguously located ESCRs */ + + if (num_counters == NUM_COUNTERS_NON_HT) { + /* standard non-HT CPUs handle both remaining ESCRs*/ + if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) + msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; + if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4)) + msrs->controls[i++].addr = MSR_P4_CRU_ESCR4; + + } else if (stag == 0) { + /* HT CPUs give the first remainder to the even thread, as + the 32nd control register */ + if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4)) + msrs->controls[i++].addr = MSR_P4_CRU_ESCR4; + + } else { + /* and two copies of the second to the odd thread, + for the 22st and 23nd control registers */ + if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) { + msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; + msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; + } + } +} + + +static void pmc_setup_one_p4_counter(unsigned int ctr) +{ + int i; + int const maxbind = 2; + unsigned int cccr = 0; + unsigned int escr = 0; + unsigned int high = 0; + unsigned int counter_bit; + struct p4_event_binding *ev = NULL; + unsigned int stag; + + stag = get_stagger(); + + /* convert from counter *number* to counter *bit* */ + counter_bit = 1 << VIRT_CTR(stag, ctr); + + /* find our event binding structure. */ + if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) { + printk(KERN_ERR + "oprofile: P4 event code 0x%lx out of range\n", + counter_config[ctr].event); + return; + } + + ev = &(p4_events[counter_config[ctr].event - 1]); + + for (i = 0; i < maxbind; i++) { + if (ev->bindings[i].virt_counter & counter_bit) { + + /* modify ESCR */ + ESCR_READ(escr, high, ev, i); + ESCR_CLEAR(escr); + if (stag == 0) { + ESCR_SET_USR_0(escr, counter_config[ctr].user); + ESCR_SET_OS_0(escr, counter_config[ctr].kernel); + } else { + ESCR_SET_USR_1(escr, counter_config[ctr].user); + ESCR_SET_OS_1(escr, counter_config[ctr].kernel); + } + ESCR_SET_EVENT_SELECT(escr, ev->event_select); + ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask); + ESCR_WRITE(escr, high, ev, i); + + /* modify CCCR */ + CCCR_READ(cccr, high, VIRT_CTR(stag, ctr)); + CCCR_CLEAR(cccr); + CCCR_SET_REQUIRED_BITS(cccr); + CCCR_SET_ESCR_SELECT(cccr, ev->escr_select); + if (stag == 0) { + CCCR_SET_PMI_OVF_0(cccr); + } else { + CCCR_SET_PMI_OVF_1(cccr); + } + CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr)); + return; + } + } + + printk(KERN_ERR + "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n", + counter_config[ctr].event, stag, ctr); +} + + +static void p4_setup_ctrs(struct op_msrs const * const msrs) +{ + unsigned int i; + unsigned int low, high; + unsigned int stag; + + stag = get_stagger(); + + rdmsr(MSR_IA32_MISC_ENABLE, low, high); + if (! MISC_PMC_ENABLED_P(low)) { + printk(KERN_ERR "oprofile: P4 PMC not available\n"); + return; + } + + /* clear the cccrs we will use */ + for (i = 0 ; i < num_counters ; i++) { + if (unlikely(!CTRL_IS_RESERVED(msrs,i))) + continue; + rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); + CCCR_CLEAR(low); + CCCR_SET_REQUIRED_BITS(low); + wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); + } + + /* clear all escrs (including those outside our concern) */ + for (i = num_counters; i < num_controls; i++) { + if (unlikely(!CTRL_IS_RESERVED(msrs,i))) + continue; + wrmsr(msrs->controls[i].addr, 0, 0); + } + + /* setup all counters */ + for (i = 0 ; i < num_counters ; ++i) { + if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs,i))) { + reset_value[i] = counter_config[i].count; + pmc_setup_one_p4_counter(i); + CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i)); + } else { + reset_value[i] = 0; + } + } +} + + +static int p4_check_ctrs(struct pt_regs * const regs, + struct op_msrs const * const msrs) +{ + unsigned long ctr, low, high, stag, real; + int i; + + stag = get_stagger(); + + for (i = 0; i < num_counters; ++i) { + + if (!reset_value[i]) + continue; + + /* + * there is some eccentricity in the hardware which + * requires that we perform 2 extra corrections: + * + * - check both the CCCR:OVF flag for overflow and the + * counter high bit for un-flagged overflows. + * + * - write the counter back twice to ensure it gets + * updated properly. + * + * the former seems to be related to extra NMIs happening + * during the current NMI; the latter is reported as errata + * N15 in intel doc 249199-029, pentium 4 specification + * update, though their suggested work-around does not + * appear to solve the problem. + */ + + real = VIRT_CTR(stag, i); + + CCCR_READ(low, high, real); + CTR_READ(ctr, high, real); + if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) { + oprofile_add_sample(regs, i); + CTR_WRITE(reset_value[i], real); + CCCR_CLEAR_OVF(low); + CCCR_WRITE(low, high, real); + CTR_WRITE(reset_value[i], real); + } + } + + /* P4 quirk: you have to re-unmask the apic vector */ + apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); + + /* See op_model_ppro.c */ + return 1; +} + + +static void p4_start(struct op_msrs const * const msrs) +{ + unsigned int low, high, stag; + int i; + + stag = get_stagger(); + + for (i = 0; i < num_counters; ++i) { + if (!reset_value[i]) + continue; + CCCR_READ(low, high, VIRT_CTR(stag, i)); + CCCR_SET_ENABLE(low); + CCCR_WRITE(low, high, VIRT_CTR(stag, i)); + } +} + + +static void p4_stop(struct op_msrs const * const msrs) +{ + unsigned int low, high, stag; + int i; + + stag = get_stagger(); + + for (i = 0; i < num_counters; ++i) { + if (!reset_value[i]) + continue; + CCCR_READ(low, high, VIRT_CTR(stag, i)); + CCCR_SET_DISABLE(low); + CCCR_WRITE(low, high, VIRT_CTR(stag, i)); + } +} + +static void p4_shutdown(struct op_msrs const * const msrs) +{ + int i; + + for (i = 0 ; i < num_counters ; ++i) { + if (CTR_IS_RESERVED(msrs,i)) + release_perfctr_nmi(msrs->counters[i].addr); + } + /* some of the control registers are specially reserved in + * conjunction with the counter registers (hence the starting offset). + * This saves a few bits. + */ + for (i = num_counters ; i < num_controls ; ++i) { + if (CTRL_IS_RESERVED(msrs,i)) + release_evntsel_nmi(msrs->controls[i].addr); + } +} + + +#ifdef CONFIG_SMP +struct op_x86_model_spec const op_p4_ht2_spec = { + .num_counters = NUM_COUNTERS_HT2, + .num_controls = NUM_CONTROLS_HT2, + .fill_in_addresses = &p4_fill_in_addresses, + .setup_ctrs = &p4_setup_ctrs, + .check_ctrs = &p4_check_ctrs, + .start = &p4_start, + .stop = &p4_stop, + .shutdown = &p4_shutdown +}; +#endif + +struct op_x86_model_spec const op_p4_spec = { + .num_counters = NUM_COUNTERS_NON_HT, + .num_controls = NUM_CONTROLS_NON_HT, + .fill_in_addresses = &p4_fill_in_addresses, + .setup_ctrs = &p4_setup_ctrs, + .check_ctrs = &p4_check_ctrs, + .start = &p4_start, + .stop = &p4_stop, + .shutdown = &p4_shutdown +}; diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c new file mode 100644 index 00000000000..c554f52cb80 --- /dev/null +++ b/arch/x86/oprofile/op_model_ppro.c @@ -0,0 +1,192 @@ +/** + * @file op_model_ppro.h + * pentium pro / P6 model-specific MSR operations + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon + * @author Philippe Elie + * @author Graydon Hoare + */ + +#include +#include +#include +#include +#include + +#include "op_x86_model.h" +#include "op_counter.h" + +#define NUM_COUNTERS 2 +#define NUM_CONTROLS 2 + +#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0) +#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0) +#define CTR_32BIT_WRITE(l,msrs,c) \ + do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), 0);} while (0) +#define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) + +#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0) +#define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls[(c)].addr), (l), (h));} while (0) +#define CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls[(c)].addr), (l), (h));} while (0) +#define CTRL_SET_ACTIVE(n) (n |= (1<<22)) +#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22)) +#define CTRL_CLEAR(x) (x &= (1<<21)) +#define CTRL_SET_ENABLE(val) (val |= 1<<20) +#define CTRL_SET_USR(val,u) (val |= ((u & 1) << 16)) +#define CTRL_SET_KERN(val,k) (val |= ((k & 1) << 17)) +#define CTRL_SET_UM(val, m) (val |= (m << 8)) +#define CTRL_SET_EVENT(val, e) (val |= e) + +static unsigned long reset_value[NUM_COUNTERS]; + +static void ppro_fill_in_addresses(struct op_msrs * const msrs) +{ + int i; + + for (i=0; i < NUM_COUNTERS; i++) { + if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i)) + msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; + else + msrs->counters[i].addr = 0; + } + + for (i=0; i < NUM_CONTROLS; i++) { + if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) + msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; + else + msrs->controls[i].addr = 0; + } +} + + +static void ppro_setup_ctrs(struct op_msrs const * const msrs) +{ + unsigned int low, high; + int i; + + /* clear all counters */ + for (i = 0 ; i < NUM_CONTROLS; ++i) { + if (unlikely(!CTRL_IS_RESERVED(msrs,i))) + continue; + CTRL_READ(low, high, msrs, i); + CTRL_CLEAR(low); + CTRL_WRITE(low, high, msrs, i); + } + + /* avoid a false detection of ctr overflows in NMI handler */ + for (i = 0; i < NUM_COUNTERS; ++i) { + if (unlikely(!CTR_IS_RESERVED(msrs,i))) + continue; + CTR_32BIT_WRITE(1, msrs, i); + } + + /* enable active counters */ + for (i = 0; i < NUM_COUNTERS; ++i) { + if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs,i))) { + reset_value[i] = counter_config[i].count; + + CTR_32BIT_WRITE(counter_config[i].count, msrs, i); + + CTRL_READ(low, high, msrs, i); + CTRL_CLEAR(low); + CTRL_SET_ENABLE(low); + CTRL_SET_USR(low, counter_config[i].user); + CTRL_SET_KERN(low, counter_config[i].kernel); + CTRL_SET_UM(low, counter_config[i].unit_mask); + CTRL_SET_EVENT(low, counter_config[i].event); + CTRL_WRITE(low, high, msrs, i); + } else { + reset_value[i] = 0; + } + } +} + + +static int ppro_check_ctrs(struct pt_regs * const regs, + struct op_msrs const * const msrs) +{ + unsigned int low, high; + int i; + + for (i = 0 ; i < NUM_COUNTERS; ++i) { + if (!reset_value[i]) + continue; + CTR_READ(low, high, msrs, i); + if (CTR_OVERFLOWED(low)) { + oprofile_add_sample(regs, i); + CTR_32BIT_WRITE(reset_value[i], msrs, i); + } + } + + /* Only P6 based Pentium M need to re-unmask the apic vector but it + * doesn't hurt other P6 variant */ + apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); + + /* We can't work out if we really handled an interrupt. We + * might have caught a *second* counter just after overflowing + * the interrupt for this counter then arrives + * and we don't find a counter that's overflowed, so we + * would return 0 and get dazed + confused. Instead we always + * assume we found an overflow. This sucks. + */ + return 1; +} + + +static void ppro_start(struct op_msrs const * const msrs) +{ + unsigned int low,high; + int i; + + for (i = 0; i < NUM_COUNTERS; ++i) { + if (reset_value[i]) { + CTRL_READ(low, high, msrs, i); + CTRL_SET_ACTIVE(low); + CTRL_WRITE(low, high, msrs, i); + } + } +} + + +static void ppro_stop(struct op_msrs const * const msrs) +{ + unsigned int low,high; + int i; + + for (i = 0; i < NUM_COUNTERS; ++i) { + if (!reset_value[i]) + continue; + CTRL_READ(low, high, msrs, i); + CTRL_SET_INACTIVE(low); + CTRL_WRITE(low, high, msrs, i); + } +} + +static void ppro_shutdown(struct op_msrs const * const msrs) +{ + int i; + + for (i = 0 ; i < NUM_COUNTERS ; ++i) { + if (CTR_IS_RESERVED(msrs,i)) + release_perfctr_nmi(MSR_P6_PERFCTR0 + i); + } + for (i = 0 ; i < NUM_CONTROLS ; ++i) { + if (CTRL_IS_RESERVED(msrs,i)) + release_evntsel_nmi(MSR_P6_EVNTSEL0 + i); + } +} + + +struct op_x86_model_spec const op_ppro_spec = { + .num_counters = NUM_COUNTERS, + .num_controls = NUM_CONTROLS, + .fill_in_addresses = &ppro_fill_in_addresses, + .setup_ctrs = &ppro_setup_ctrs, + .check_ctrs = &ppro_check_ctrs, + .start = &ppro_start, + .stop = &ppro_stop, + .shutdown = &ppro_shutdown +}; diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h new file mode 100644 index 00000000000..abb1aa95b97 --- /dev/null +++ b/arch/x86/oprofile/op_x86_model.h @@ -0,0 +1,51 @@ +/** + * @file op_x86_model.h + * interface to x86 model-specific MSR operations + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author Graydon Hoare + */ + +#ifndef OP_X86_MODEL_H +#define OP_X86_MODEL_H + +struct op_saved_msr { + unsigned int high; + unsigned int low; +}; + +struct op_msr { + unsigned long addr; + struct op_saved_msr saved; +}; + +struct op_msrs { + struct op_msr * counters; + struct op_msr * controls; +}; + +struct pt_regs; + +/* The model vtable abstracts the differences between + * various x86 CPU model's perfctr support. + */ +struct op_x86_model_spec { + unsigned int const num_counters; + unsigned int const num_controls; + void (*fill_in_addresses)(struct op_msrs * const msrs); + void (*setup_ctrs)(struct op_msrs const * const msrs); + int (*check_ctrs)(struct pt_regs * const regs, + struct op_msrs const * const msrs); + void (*start)(struct op_msrs const * const msrs); + void (*stop)(struct op_msrs const * const msrs); + void (*shutdown)(struct op_msrs const * const msrs); +}; + +extern struct op_x86_model_spec const op_ppro_spec; +extern struct op_x86_model_spec const op_p4_spec; +extern struct op_x86_model_spec const op_p4_ht2_spec; +extern struct op_x86_model_spec const op_athlon_spec; + +#endif /* OP_X86_MODEL_H */ diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile index c556b666151..0416dd2f2db 100644 --- a/arch/x86_64/Makefile +++ b/arch/x86_64/Makefile @@ -80,7 +80,7 @@ core-y += arch/x86_64/kernel/ \ arch/x86_64/vdso/ core-$(CONFIG_IA32_EMULATION) += arch/x86_64/ia32/ drivers-$(CONFIG_PCI) += arch/x86_64/pci/ -drivers-$(CONFIG_OPROFILE) += arch/i386/oprofile/ +drivers-$(CONFIG_OPROFILE) += arch/x86/oprofile/ boot := arch/x86_64/boot -- cgit v1.2.3-70-g09d2 From 4ac24f63fd203bc12a841a88a2034dccd358d0d1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 11 Oct 2007 11:16:56 +0200 Subject: i386: move video Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/i386/Makefile | 2 +- arch/i386/video/Makefile | 1 - arch/i386/video/fbdev.c | 32 -------------------------------- arch/x86/video/Makefile | 1 + arch/x86/video/fbdev.c | 32 ++++++++++++++++++++++++++++++++ 5 files changed, 34 insertions(+), 34 deletions(-) delete mode 100644 arch/i386/video/Makefile delete mode 100644 arch/i386/video/fbdev.c create mode 100644 arch/x86/video/Makefile create mode 100644 arch/x86/video/fbdev.c (limited to 'arch/x86') diff --git a/arch/i386/Makefile b/arch/i386/Makefile index 4d1c0015e44..397cfedb4b1 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -111,7 +111,7 @@ drivers-$(CONFIG_PCI) += arch/x86/pci/ # must be linked after kernel/ drivers-$(CONFIG_OPROFILE) += arch/x86/oprofile/ drivers-$(CONFIG_PM) += arch/x86/power/ -drivers-$(CONFIG_FB) += arch/i386/video/ +drivers-$(CONFIG_FB) += arch/x86/video/ CFLAGS += $(mflags-y) AFLAGS += $(mflags-y) diff --git a/arch/i386/video/Makefile b/arch/i386/video/Makefile deleted file mode 100644 index 2c447c94adc..00000000000 --- a/arch/i386/video/Makefile +++ /dev/null @@ -1 +0,0 @@ -obj-$(CONFIG_FB) += fbdev.o diff --git a/arch/i386/video/fbdev.c b/arch/i386/video/fbdev.c deleted file mode 100644 index 48fb38d7d2c..00000000000 --- a/arch/i386/video/fbdev.c +++ /dev/null @@ -1,32 +0,0 @@ -/* - * arch/i386/video/fbdev.c - i386 Framebuffer - * - * Copyright (C) 2007 Antonino Daplas - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of this archive - * for more details. - * - */ -#include -#include - -int fb_is_primary_device(struct fb_info *info) -{ - struct device *device = info->device; - struct pci_dev *pci_dev = NULL; - struct resource *res = NULL; - int retval = 0; - - if (device) - pci_dev = to_pci_dev(device); - - if (pci_dev) - res = &pci_dev->resource[PCI_ROM_RESOURCE]; - - if (res && res->flags & IORESOURCE_ROM_SHADOW) - retval = 1; - - return retval; -} -EXPORT_SYMBOL(fb_is_primary_device); diff --git a/arch/x86/video/Makefile b/arch/x86/video/Makefile new file mode 100644 index 00000000000..2c447c94adc --- /dev/null +++ b/arch/x86/video/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_FB) += fbdev.o diff --git a/arch/x86/video/fbdev.c b/arch/x86/video/fbdev.c new file mode 100644 index 00000000000..48fb38d7d2c --- /dev/null +++ b/arch/x86/video/fbdev.c @@ -0,0 +1,32 @@ +/* + * arch/i386/video/fbdev.c - i386 Framebuffer + * + * Copyright (C) 2007 Antonino Daplas + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive + * for more details. + * + */ +#include +#include + +int fb_is_primary_device(struct fb_info *info) +{ + struct device *device = info->device; + struct pci_dev *pci_dev = NULL; + struct resource *res = NULL; + int retval = 0; + + if (device) + pci_dev = to_pci_dev(device); + + if (pci_dev) + res = &pci_dev->resource[PCI_ROM_RESOURCE]; + + if (res && res->flags & IORESOURCE_ROM_SHADOW) + retval = 1; + + return retval; +} +EXPORT_SYMBOL(fb_is_primary_device); -- cgit v1.2.3-70-g09d2 From f7627e2513987bb5d4e8cb13c4e0a478352141ac Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 11 Oct 2007 11:16:58 +0200 Subject: i386: move kernel/cpu Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/i386/kernel/Makefile_32 | 2 +- arch/i386/kernel/cpu/Makefile | 20 - arch/i386/kernel/cpu/addon_cpuid_features.c | 50 -- arch/i386/kernel/cpu/amd.c | 337 ------------ arch/i386/kernel/cpu/bugs.c | 192 ------- arch/i386/kernel/cpu/centaur.c | 471 ---------------- arch/i386/kernel/cpu/common.c | 733 ------------------------- arch/i386/kernel/cpu/cpu.h | 28 - arch/i386/kernel/cpu/cyrix.c | 463 ---------------- arch/i386/kernel/cpu/intel.c | 333 ------------ arch/i386/kernel/cpu/intel_cacheinfo.c | 806 ---------------------------- arch/i386/kernel/cpu/nexgen.c | 60 --- arch/i386/kernel/cpu/perfctr-watchdog.c | 713 ------------------------ arch/i386/kernel/cpu/proc.c | 192 ------- arch/i386/kernel/cpu/transmeta.c | 116 ---- arch/i386/kernel/cpu/umc.c | 26 - arch/x86/kernel/cpu/Makefile | 20 + arch/x86/kernel/cpu/addon_cpuid_features.c | 50 ++ arch/x86/kernel/cpu/amd.c | 337 ++++++++++++ arch/x86/kernel/cpu/bugs.c | 192 +++++++ arch/x86/kernel/cpu/centaur.c | 471 ++++++++++++++++ arch/x86/kernel/cpu/common.c | 733 +++++++++++++++++++++++++ arch/x86/kernel/cpu/cpu.h | 28 + arch/x86/kernel/cpu/cyrix.c | 463 ++++++++++++++++ arch/x86/kernel/cpu/intel.c | 333 ++++++++++++ arch/x86/kernel/cpu/intel_cacheinfo.c | 806 ++++++++++++++++++++++++++++ arch/x86/kernel/cpu/nexgen.c | 60 +++ arch/x86/kernel/cpu/perfctr-watchdog.c | 713 ++++++++++++++++++++++++ arch/x86/kernel/cpu/proc.c | 192 +++++++ arch/x86/kernel/cpu/transmeta.c | 116 ++++ arch/x86/kernel/cpu/umc.c | 26 + arch/x86_64/kernel/Makefile_64 | 6 +- 32 files changed, 4544 insertions(+), 4544 deletions(-) delete mode 100644 arch/i386/kernel/cpu/Makefile delete mode 100644 arch/i386/kernel/cpu/addon_cpuid_features.c delete mode 100644 arch/i386/kernel/cpu/amd.c delete mode 100644 arch/i386/kernel/cpu/bugs.c delete mode 100644 arch/i386/kernel/cpu/centaur.c delete mode 100644 arch/i386/kernel/cpu/common.c delete mode 100644 arch/i386/kernel/cpu/cpu.h delete mode 100644 arch/i386/kernel/cpu/cyrix.c delete mode 100644 arch/i386/kernel/cpu/intel.c delete mode 100644 arch/i386/kernel/cpu/intel_cacheinfo.c delete mode 100644 arch/i386/kernel/cpu/nexgen.c delete mode 100644 arch/i386/kernel/cpu/perfctr-watchdog.c delete mode 100644 arch/i386/kernel/cpu/proc.c delete mode 100644 arch/i386/kernel/cpu/transmeta.c delete mode 100644 arch/i386/kernel/cpu/umc.c create mode 100644 arch/x86/kernel/cpu/Makefile create mode 100644 arch/x86/kernel/cpu/addon_cpuid_features.c create mode 100644 arch/x86/kernel/cpu/amd.c create mode 100644 arch/x86/kernel/cpu/bugs.c create mode 100644 arch/x86/kernel/cpu/centaur.c create mode 100644 arch/x86/kernel/cpu/common.c create mode 100644 arch/x86/kernel/cpu/cpu.h create mode 100644 arch/x86/kernel/cpu/cyrix.c create mode 100644 arch/x86/kernel/cpu/intel.c create mode 100644 arch/x86/kernel/cpu/intel_cacheinfo.c create mode 100644 arch/x86/kernel/cpu/nexgen.c create mode 100644 arch/x86/kernel/cpu/perfctr-watchdog.c create mode 100644 arch/x86/kernel/cpu/proc.c create mode 100644 arch/x86/kernel/cpu/transmeta.c create mode 100644 arch/x86/kernel/cpu/umc.c (limited to 'arch/x86') diff --git a/arch/i386/kernel/Makefile_32 b/arch/i386/kernel/Makefile_32 index af8304b921d..5096f486d38 100644 --- a/arch/i386/kernel/Makefile_32 +++ b/arch/i386/kernel/Makefile_32 @@ -10,7 +10,7 @@ obj-y := process_32.o signal_32.o entry_32.o traps_32.o irq_32.o \ quirks.o i8237.o topology.o alternative.o i8253_32.o tsc_32.o obj-$(CONFIG_STACKTRACE) += stacktrace.o -obj-y += cpu/ +obj-y += ../../x86/kernel/cpu/ obj-y += ../../x86/kernel/acpi/ obj-$(CONFIG_X86_BIOS_REBOOT) += reboot_32.o obj-$(CONFIG_MCA) += mca_32.o diff --git a/arch/i386/kernel/cpu/Makefile b/arch/i386/kernel/cpu/Makefile deleted file mode 100644 index 6687f6d5ad2..00000000000 --- a/arch/i386/kernel/cpu/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -# -# Makefile for x86-compatible CPU details and quirks -# - -obj-y := common.o proc.o bugs.o - -obj-y += amd.o -obj-y += cyrix.o -obj-y += centaur.o -obj-y += transmeta.o -obj-y += intel.o intel_cacheinfo.o addon_cpuid_features.o -obj-y += nexgen.o -obj-y += umc.o - -obj-$(CONFIG_X86_MCE) += ../../../x86/kernel/cpu/mcheck/ - -obj-$(CONFIG_MTRR) += ../../../x86/kernel/cpu/mtrr/ -obj-$(CONFIG_CPU_FREQ) += ../../../x86/kernel/cpu/cpufreq/ - -obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o diff --git a/arch/i386/kernel/cpu/addon_cpuid_features.c b/arch/i386/kernel/cpu/addon_cpuid_features.c deleted file mode 100644 index 3e91d3ee26e..00000000000 --- a/arch/i386/kernel/cpu/addon_cpuid_features.c +++ /dev/null @@ -1,50 +0,0 @@ - -/* - * Routines to indentify additional cpu features that are scattered in - * cpuid space. - */ - -#include - -#include - -struct cpuid_bit { - u16 feature; - u8 reg; - u8 bit; - u32 level; -}; - -enum cpuid_regs { - CR_EAX = 0, - CR_ECX, - CR_EDX, - CR_EBX -}; - -void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) -{ - u32 max_level; - u32 regs[4]; - const struct cpuid_bit *cb; - - static const struct cpuid_bit cpuid_bits[] = { - { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 }, - { 0, 0, 0, 0 } - }; - - for (cb = cpuid_bits; cb->feature; cb++) { - - /* Verify that the level is valid */ - max_level = cpuid_eax(cb->level & 0xffff0000); - if (max_level < cb->level || - max_level > (cb->level | 0xffff)) - continue; - - cpuid(cb->level, ®s[CR_EAX], ®s[CR_EBX], - ®s[CR_ECX], ®s[CR_EDX]); - - if (regs[cb->reg] & (1 << cb->bit)) - set_bit(cb->feature, c->x86_capability); - } -} diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c deleted file mode 100644 index dcf6bbb1c7c..00000000000 --- a/arch/i386/kernel/cpu/amd.c +++ /dev/null @@ -1,337 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#include "cpu.h" - -/* - * B step AMD K6 before B 9730xxxx have hardware bugs that can cause - * misexecution of code under Linux. Owners of such processors should - * contact AMD for precise details and a CPU swap. - * - * See http://www.multimania.com/poulot/k6bug.html - * http://www.amd.com/K6/k6docs/revgd.html - * - * The following test is erm.. interesting. AMD neglected to up - * the chip setting when fixing the bug but they also tweaked some - * performance at the same time.. - */ - -extern void vide(void); -__asm__(".align 4\nvide: ret"); - -#ifdef CONFIG_X86_LOCAL_APIC -#define ENABLE_C1E_MASK 0x18000000 -#define CPUID_PROCESSOR_SIGNATURE 1 -#define CPUID_XFAM 0x0ff00000 -#define CPUID_XFAM_K8 0x00000000 -#define CPUID_XFAM_10H 0x00100000 -#define CPUID_XFAM_11H 0x00200000 -#define CPUID_XMOD 0x000f0000 -#define CPUID_XMOD_REV_F 0x00040000 - -/* AMD systems with C1E don't have a working lAPIC timer. Check for that. */ -static __cpuinit int amd_apic_timer_broken(void) -{ - u32 lo, hi; - u32 eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); - switch (eax & CPUID_XFAM) { - case CPUID_XFAM_K8: - if ((eax & CPUID_XMOD) < CPUID_XMOD_REV_F) - break; - case CPUID_XFAM_10H: - case CPUID_XFAM_11H: - rdmsr(MSR_K8_ENABLE_C1E, lo, hi); - if (lo & ENABLE_C1E_MASK) - return 1; - break; - default: - /* err on the side of caution */ - return 1; - } - return 0; -} -#endif - -int force_mwait __cpuinitdata; - -static void __cpuinit init_amd(struct cpuinfo_x86 *c) -{ - u32 l, h; - int mbytes = num_physpages >> (20-PAGE_SHIFT); - int r; - -#ifdef CONFIG_SMP - unsigned long long value; - - /* Disable TLB flush filter by setting HWCR.FFDIS on K8 - * bit 6 of msr C001_0015 - * - * Errata 63 for SH-B3 steppings - * Errata 122 for all steppings (F+ have it disabled by default) - */ - if (c->x86 == 15) { - rdmsrl(MSR_K7_HWCR, value); - value |= 1 << 6; - wrmsrl(MSR_K7_HWCR, value); - } -#endif - - /* - * FIXME: We should handle the K5 here. Set up the write - * range and also turn on MSR 83 bits 4 and 31 (write alloc, - * no bus pipeline) - */ - - /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; - 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ - clear_bit(0*32+31, c->x86_capability); - - r = get_model_name(c); - - switch(c->x86) - { - case 4: - /* - * General Systems BIOSen alias the cpu frequency registers - * of the Elan at 0x000df000. Unfortuantly, one of the Linux - * drivers subsequently pokes it, and changes the CPU speed. - * Workaround : Remove the unneeded alias. - */ -#define CBAR (0xfffc) /* Configuration Base Address (32-bit) */ -#define CBAR_ENB (0x80000000) -#define CBAR_KEY (0X000000CB) - if (c->x86_model==9 || c->x86_model == 10) { - if (inl (CBAR) & CBAR_ENB) - outl (0 | CBAR_KEY, CBAR); - } - break; - case 5: - if( c->x86_model < 6 ) - { - /* Based on AMD doc 20734R - June 2000 */ - if ( c->x86_model == 0 ) { - clear_bit(X86_FEATURE_APIC, c->x86_capability); - set_bit(X86_FEATURE_PGE, c->x86_capability); - } - break; - } - - if ( c->x86_model == 6 && c->x86_mask == 1 ) { - const int K6_BUG_LOOP = 1000000; - int n; - void (*f_vide)(void); - unsigned long d, d2; - - printk(KERN_INFO "AMD K6 stepping B detected - "); - - /* - * It looks like AMD fixed the 2.6.2 bug and improved indirect - * calls at the same time. - */ - - n = K6_BUG_LOOP; - f_vide = vide; - rdtscl(d); - while (n--) - f_vide(); - rdtscl(d2); - d = d2-d; - - if (d > 20*K6_BUG_LOOP) - printk("system stability may be impaired when more than 32 MB are used.\n"); - else - printk("probably OK (after B9730xxxx).\n"); - printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n"); - } - - /* K6 with old style WHCR */ - if (c->x86_model < 8 || - (c->x86_model== 8 && c->x86_mask < 8)) { - /* We can only write allocate on the low 508Mb */ - if(mbytes>508) - mbytes=508; - - rdmsr(MSR_K6_WHCR, l, h); - if ((l&0x0000FFFF)==0) { - unsigned long flags; - l=(1<<0)|((mbytes/4)<<1); - local_irq_save(flags); - wbinvd(); - wrmsr(MSR_K6_WHCR, l, h); - local_irq_restore(flags); - printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n", - mbytes); - } - break; - } - - if ((c->x86_model == 8 && c->x86_mask >7) || - c->x86_model == 9 || c->x86_model == 13) { - /* The more serious chips .. */ - - if(mbytes>4092) - mbytes=4092; - - rdmsr(MSR_K6_WHCR, l, h); - if ((l&0xFFFF0000)==0) { - unsigned long flags; - l=((mbytes>>2)<<22)|(1<<16); - local_irq_save(flags); - wbinvd(); - wrmsr(MSR_K6_WHCR, l, h); - local_irq_restore(flags); - printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n", - mbytes); - } - - /* Set MTRR capability flag if appropriate */ - if (c->x86_model == 13 || c->x86_model == 9 || - (c->x86_model == 8 && c->x86_mask >= 8)) - set_bit(X86_FEATURE_K6_MTRR, c->x86_capability); - break; - } - - if (c->x86_model == 10) { - /* AMD Geode LX is model 10 */ - /* placeholder for any needed mods */ - break; - } - break; - case 6: /* An Athlon/Duron */ - - /* Bit 15 of Athlon specific MSR 15, needs to be 0 - * to enable SSE on Palomino/Morgan/Barton CPU's. - * If the BIOS didn't enable it already, enable it here. - */ - if (c->x86_model >= 6 && c->x86_model <= 10) { - if (!cpu_has(c, X86_FEATURE_XMM)) { - printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); - rdmsr(MSR_K7_HWCR, l, h); - l &= ~0x00008000; - wrmsr(MSR_K7_HWCR, l, h); - set_bit(X86_FEATURE_XMM, c->x86_capability); - } - } - - /* It's been determined by AMD that Athlons since model 8 stepping 1 - * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx - * As per AMD technical note 27212 0.2 - */ - if ((c->x86_model == 8 && c->x86_mask>=1) || (c->x86_model > 8)) { - rdmsr(MSR_K7_CLK_CTL, l, h); - if ((l & 0xfff00000) != 0x20000000) { - printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l, - ((l & 0x000fffff)|0x20000000)); - wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); - } - } - break; - } - - switch (c->x86) { - case 15: - /* Use K8 tuning for Fam10h and Fam11h */ - case 0x10: - case 0x11: - set_bit(X86_FEATURE_K8, c->x86_capability); - break; - case 6: - set_bit(X86_FEATURE_K7, c->x86_capability); - break; - } - if (c->x86 >= 6) - set_bit(X86_FEATURE_FXSAVE_LEAK, c->x86_capability); - - display_cacheinfo(c); - - if (cpuid_eax(0x80000000) >= 0x80000008) { - c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; - } - - if (cpuid_eax(0x80000000) >= 0x80000007) { - c->x86_power = cpuid_edx(0x80000007); - if (c->x86_power & (1<<8)) - set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); - } - -#ifdef CONFIG_X86_HT - /* - * On a AMD multi core setup the lower bits of the APIC id - * distingush the cores. - */ - if (c->x86_max_cores > 1) { - int cpu = smp_processor_id(); - unsigned bits = (cpuid_ecx(0x80000008) >> 12) & 0xf; - - if (bits == 0) { - while ((1 << bits) < c->x86_max_cores) - bits++; - } - c->cpu_core_id = c->phys_proc_id & ((1<phys_proc_id >>= bits; - printk(KERN_INFO "CPU %d(%d) -> Core %d\n", - cpu, c->x86_max_cores, c->cpu_core_id); - } -#endif - - if (cpuid_eax(0x80000000) >= 0x80000006) { - if ((c->x86 == 0x10) && (cpuid_edx(0x80000006) & 0xf000)) - num_cache_leaves = 4; - else - num_cache_leaves = 3; - } - -#ifdef CONFIG_X86_LOCAL_APIC - if (amd_apic_timer_broken()) - local_apic_timer_disabled = 1; -#endif - - if (c->x86 == 0x10 && !force_mwait) - clear_bit(X86_FEATURE_MWAIT, c->x86_capability); - - /* K6s reports MCEs but don't actually have all the MSRs */ - if (c->x86 < 6) - clear_bit(X86_FEATURE_MCE, c->x86_capability); -} - -static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 * c, unsigned int size) -{ - /* AMD errata T13 (order #21922) */ - if ((c->x86 == 6)) { - if (c->x86_model == 3 && c->x86_mask == 0) /* Duron Rev A0 */ - size = 64; - if (c->x86_model == 4 && - (c->x86_mask==0 || c->x86_mask==1)) /* Tbird rev A1/A2 */ - size = 256; - } - return size; -} - -static struct cpu_dev amd_cpu_dev __cpuinitdata = { - .c_vendor = "AMD", - .c_ident = { "AuthenticAMD" }, - .c_models = { - { .vendor = X86_VENDOR_AMD, .family = 4, .model_names = - { - [3] = "486 DX/2", - [7] = "486 DX/2-WB", - [8] = "486 DX/4", - [9] = "486 DX/4-WB", - [14] = "Am5x86-WT", - [15] = "Am5x86-WB" - } - }, - }, - .c_init = init_amd, - .c_size_cache = amd_size_cache, -}; - -int __init amd_init_cpu(void) -{ - cpu_devs[X86_VENDOR_AMD] = &amd_cpu_dev; - return 0; -} diff --git a/arch/i386/kernel/cpu/bugs.c b/arch/i386/kernel/cpu/bugs.c deleted file mode 100644 index 59266f03d1c..00000000000 --- a/arch/i386/kernel/cpu/bugs.c +++ /dev/null @@ -1,192 +0,0 @@ -/* - * arch/i386/cpu/bugs.c - * - * Copyright (C) 1994 Linus Torvalds - * - * Cyrix stuff, June 1998 by: - * - Rafael R. Reilova (moved everything from head.S), - * - * - Channing Corn (tests & fixes), - * - Andrew D. Balsa (code cleanup). - */ -#include -#include -#include -#include -#include -#include -#include -#include - -static int __init no_halt(char *s) -{ - boot_cpu_data.hlt_works_ok = 0; - return 1; -} - -__setup("no-hlt", no_halt); - -static int __init mca_pentium(char *s) -{ - mca_pentium_flag = 1; - return 1; -} - -__setup("mca-pentium", mca_pentium); - -static int __init no_387(char *s) -{ - boot_cpu_data.hard_math = 0; - write_cr0(0xE | read_cr0()); - return 1; -} - -__setup("no387", no_387); - -static double __initdata x = 4195835.0; -static double __initdata y = 3145727.0; - -/* - * This used to check for exceptions.. - * However, it turns out that to support that, - * the XMM trap handlers basically had to - * be buggy. So let's have a correct XMM trap - * handler, and forget about printing out - * some status at boot. - * - * We should really only care about bugs here - * anyway. Not features. - */ -static void __init check_fpu(void) -{ - if (!boot_cpu_data.hard_math) { -#ifndef CONFIG_MATH_EMULATION - printk(KERN_EMERG "No coprocessor found and no math emulation present.\n"); - printk(KERN_EMERG "Giving up.\n"); - for (;;) ; -#endif - return; - } - -/* trap_init() enabled FXSR and company _before_ testing for FP problems here. */ - /* Test for the divl bug.. */ - __asm__("fninit\n\t" - "fldl %1\n\t" - "fdivl %2\n\t" - "fmull %2\n\t" - "fldl %1\n\t" - "fsubp %%st,%%st(1)\n\t" - "fistpl %0\n\t" - "fwait\n\t" - "fninit" - : "=m" (*&boot_cpu_data.fdiv_bug) - : "m" (*&x), "m" (*&y)); - if (boot_cpu_data.fdiv_bug) - printk("Hmm, FPU with FDIV bug.\n"); -} - -static void __init check_hlt(void) -{ - if (paravirt_enabled()) - return; - - printk(KERN_INFO "Checking 'hlt' instruction... "); - if (!boot_cpu_data.hlt_works_ok) { - printk("disabled\n"); - return; - } - halt(); - halt(); - halt(); - halt(); - printk("OK.\n"); -} - -/* - * Most 386 processors have a bug where a POPAD can lock the - * machine even from user space. - */ - -static void __init check_popad(void) -{ -#ifndef CONFIG_X86_POPAD_OK - int res, inp = (int) &res; - - printk(KERN_INFO "Checking for popad bug... "); - __asm__ __volatile__( - "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx " - : "=&a" (res) - : "d" (inp) - : "ecx", "edi" ); - /* If this fails, it means that any user program may lock the CPU hard. Too bad. */ - if (res != 12345678) printk( "Buggy.\n" ); - else printk( "OK.\n" ); -#endif -} - -/* - * Check whether we are able to run this kernel safely on SMP. - * - * - In order to run on a i386, we need to be compiled for i386 - * (for due to lack of "invlpg" and working WP on a i386) - * - In order to run on anything without a TSC, we need to be - * compiled for a i486. - * - In order to support the local APIC on a buggy Pentium machine, - * we need to be compiled with CONFIG_X86_GOOD_APIC disabled, - * which happens implicitly if compiled for a Pentium or lower - * (unless an advanced selection of CPU features is used) as an - * otherwise config implies a properly working local APIC without - * the need to do extra reads from the APIC. -*/ - -static void __init check_config(void) -{ -/* - * We'd better not be a i386 if we're configured to use some - * i486+ only features! (WP works in supervisor mode and the - * new "invlpg" and "bswap" instructions) - */ -#if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_BSWAP) - if (boot_cpu_data.x86 == 3) - panic("Kernel requires i486+ for 'invlpg' and other features"); -#endif - -/* - * If we configured ourselves for a TSC, we'd better have one! - */ -#ifdef CONFIG_X86_TSC - if (!cpu_has_tsc && !tsc_disable) - panic("Kernel compiled for Pentium+, requires TSC feature!"); -#endif - -/* - * If we were told we had a good local APIC, check for buggy Pentia, - * i.e. all B steppings and the C2 stepping of P54C when using their - * integrated APIC (see 11AP erratum in "Pentium Processor - * Specification Update"). - */ -#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_GOOD_APIC) - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL - && cpu_has_apic - && boot_cpu_data.x86 == 5 - && boot_cpu_data.x86_model == 2 - && (boot_cpu_data.x86_mask < 6 || boot_cpu_data.x86_mask == 11)) - panic("Kernel compiled for PMMX+, assumes a local APIC without the read-before-write bug!"); -#endif -} - - -void __init check_bugs(void) -{ - identify_boot_cpu(); -#ifndef CONFIG_SMP - printk("CPU: "); - print_cpu_info(&boot_cpu_data); -#endif - check_config(); - check_fpu(); - check_hlt(); - check_popad(); - init_utsname()->machine[1] = '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); - alternative_instructions(); -} diff --git a/arch/i386/kernel/cpu/centaur.c b/arch/i386/kernel/cpu/centaur.c deleted file mode 100644 index 473eac883c7..00000000000 --- a/arch/i386/kernel/cpu/centaur.c +++ /dev/null @@ -1,471 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include "cpu.h" - -#ifdef CONFIG_X86_OOSTORE - -static u32 __cpuinit power2(u32 x) -{ - u32 s=1; - while(s<=x) - s<<=1; - return s>>=1; -} - - -/* - * Set up an actual MCR - */ - -static void __cpuinit centaur_mcr_insert(int reg, u32 base, u32 size, int key) -{ - u32 lo, hi; - - hi = base & ~0xFFF; - lo = ~(size-1); /* Size is a power of 2 so this makes a mask */ - lo &= ~0xFFF; /* Remove the ctrl value bits */ - lo |= key; /* Attribute we wish to set */ - wrmsr(reg+MSR_IDT_MCR0, lo, hi); - mtrr_centaur_report_mcr(reg, lo, hi); /* Tell the mtrr driver */ -} - -/* - * Figure what we can cover with MCR's - * - * Shortcut: We know you can't put 4Gig of RAM on a winchip - */ - -static u32 __cpuinit ramtop(void) /* 16388 */ -{ - int i; - u32 top = 0; - u32 clip = 0xFFFFFFFFUL; - - for (i = 0; i < e820.nr_map; i++) { - unsigned long start, end; - - if (e820.map[i].addr > 0xFFFFFFFFUL) - continue; - /* - * Don't MCR over reserved space. Ignore the ISA hole - * we frob around that catastrophy already - */ - - if (e820.map[i].type == E820_RESERVED) - { - if(e820.map[i].addr >= 0x100000UL && e820.map[i].addr < clip) - clip = e820.map[i].addr; - continue; - } - start = e820.map[i].addr; - end = e820.map[i].addr + e820.map[i].size; - if (start >= end) - continue; - if (end > top) - top = end; - } - /* Everything below 'top' should be RAM except for the ISA hole. - Because of the limited MCR's we want to map NV/ACPI into our - MCR range for gunk in RAM - - Clip might cause us to MCR insufficient RAM but that is an - acceptable failure mode and should only bite obscure boxes with - a VESA hole at 15Mb - - The second case Clip sometimes kicks in is when the EBDA is marked - as reserved. Again we fail safe with reasonable results - */ - - if(top>clip) - top=clip; - - return top; -} - -/* - * Compute a set of MCR's to give maximum coverage - */ - -static int __cpuinit centaur_mcr_compute(int nr, int key) -{ - u32 mem = ramtop(); - u32 root = power2(mem); - u32 base = root; - u32 top = root; - u32 floor = 0; - int ct = 0; - - while(ct high && fspace > low) - { - centaur_mcr_insert(ct, floor, fspace, key); - floor += fspace; - } - else if(high > low) - { - centaur_mcr_insert(ct, top, high, key); - top += high; - } - else if(low > 0) - { - base -= low; - centaur_mcr_insert(ct, base, low, key); - } - else break; - ct++; - } - /* - * We loaded ct values. We now need to set the mask. The caller - * must do this bit. - */ - - return ct; -} - -static void __cpuinit centaur_create_optimal_mcr(void) -{ - int i; - /* - * Allocate up to 6 mcrs to mark as much of ram as possible - * as write combining and weak write ordered. - * - * To experiment with: Linux never uses stack operations for - * mmio spaces so we could globally enable stack operation wc - * - * Load the registers with type 31 - full write combining, all - * writes weakly ordered. - */ - int used = centaur_mcr_compute(6, 31); - - /* - * Wipe unused MCRs - */ - - for(i=used;i<8;i++) - wrmsr(MSR_IDT_MCR0+i, 0, 0); -} - -static void __cpuinit winchip2_create_optimal_mcr(void) -{ - u32 lo, hi; - int i; - - /* - * Allocate up to 6 mcrs to mark as much of ram as possible - * as write combining, weak store ordered. - * - * Load the registers with type 25 - * 8 - weak write ordering - * 16 - weak read ordering - * 1 - write combining - */ - - int used = centaur_mcr_compute(6, 25); - - /* - * Mark the registers we are using. - */ - - rdmsr(MSR_IDT_MCR_CTRL, lo, hi); - for(i=0;i>17) & 7; - lo |= key<<6; /* replace with unlock key */ - wrmsr(MSR_IDT_MCR_CTRL, lo, hi); -} - -static void __cpuinit winchip2_protect_mcr(void) -{ - u32 lo, hi; - - rdmsr(MSR_IDT_MCR_CTRL, lo, hi); - lo&=~0x1C0; /* blank bits 8-6 */ - wrmsr(MSR_IDT_MCR_CTRL, lo, hi); -} -#endif /* CONFIG_X86_OOSTORE */ - -#define ACE_PRESENT (1 << 6) -#define ACE_ENABLED (1 << 7) -#define ACE_FCR (1 << 28) /* MSR_VIA_FCR */ - -#define RNG_PRESENT (1 << 2) -#define RNG_ENABLED (1 << 3) -#define RNG_ENABLE (1 << 6) /* MSR_VIA_RNG */ - -static void __cpuinit init_c3(struct cpuinfo_x86 *c) -{ - u32 lo, hi; - - /* Test for Centaur Extended Feature Flags presence */ - if (cpuid_eax(0xC0000000) >= 0xC0000001) { - u32 tmp = cpuid_edx(0xC0000001); - - /* enable ACE unit, if present and disabled */ - if ((tmp & (ACE_PRESENT | ACE_ENABLED)) == ACE_PRESENT) { - rdmsr (MSR_VIA_FCR, lo, hi); - lo |= ACE_FCR; /* enable ACE unit */ - wrmsr (MSR_VIA_FCR, lo, hi); - printk(KERN_INFO "CPU: Enabled ACE h/w crypto\n"); - } - - /* enable RNG unit, if present and disabled */ - if ((tmp & (RNG_PRESENT | RNG_ENABLED)) == RNG_PRESENT) { - rdmsr (MSR_VIA_RNG, lo, hi); - lo |= RNG_ENABLE; /* enable RNG unit */ - wrmsr (MSR_VIA_RNG, lo, hi); - printk(KERN_INFO "CPU: Enabled h/w RNG\n"); - } - - /* store Centaur Extended Feature Flags as - * word 5 of the CPU capability bit array - */ - c->x86_capability[5] = cpuid_edx(0xC0000001); - } - - /* Cyrix III family needs CX8 & PGE explicity enabled. */ - if (c->x86_model >=6 && c->x86_model <= 9) { - rdmsr (MSR_VIA_FCR, lo, hi); - lo |= (1<<1 | 1<<7); - wrmsr (MSR_VIA_FCR, lo, hi); - set_bit(X86_FEATURE_CX8, c->x86_capability); - } - - /* Before Nehemiah, the C3's had 3dNOW! */ - if (c->x86_model >=6 && c->x86_model <9) - set_bit(X86_FEATURE_3DNOW, c->x86_capability); - - get_model_name(c); - display_cacheinfo(c); -} - -static void __cpuinit init_centaur(struct cpuinfo_x86 *c) -{ - enum { - ECX8=1<<1, - EIERRINT=1<<2, - DPM=1<<3, - DMCE=1<<4, - DSTPCLK=1<<5, - ELINEAR=1<<6, - DSMC=1<<7, - DTLOCK=1<<8, - EDCTLB=1<<8, - EMMX=1<<9, - DPDC=1<<11, - EBRPRED=1<<12, - DIC=1<<13, - DDC=1<<14, - DNA=1<<15, - ERETSTK=1<<16, - E2MMX=1<<19, - EAMD3D=1<<20, - }; - - char *name; - u32 fcr_set=0; - u32 fcr_clr=0; - u32 lo,hi,newlo; - u32 aa,bb,cc,dd; - - /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; - 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ - clear_bit(0*32+31, c->x86_capability); - - switch (c->x86) { - - case 5: - switch(c->x86_model) { - case 4: - name="C6"; - fcr_set=ECX8|DSMC|EDCTLB|EMMX|ERETSTK; - fcr_clr=DPDC; - printk(KERN_NOTICE "Disabling bugged TSC.\n"); - clear_bit(X86_FEATURE_TSC, c->x86_capability); -#ifdef CONFIG_X86_OOSTORE - centaur_create_optimal_mcr(); - /* Enable - write combining on non-stack, non-string - write combining on string, all types - weak write ordering - - The C6 original lacks weak read order - - Note 0x120 is write only on Winchip 1 */ - - wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0); -#endif - break; - case 8: - switch(c->x86_mask) { - default: - name="2"; - break; - case 7 ... 9: - name="2A"; - break; - case 10 ... 15: - name="2B"; - break; - } - fcr_set=ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|E2MMX|EAMD3D; - fcr_clr=DPDC; -#ifdef CONFIG_X86_OOSTORE - winchip2_unprotect_mcr(); - winchip2_create_optimal_mcr(); - rdmsr(MSR_IDT_MCR_CTRL, lo, hi); - /* Enable - write combining on non-stack, non-string - write combining on string, all types - weak write ordering - */ - lo|=31; - wrmsr(MSR_IDT_MCR_CTRL, lo, hi); - winchip2_protect_mcr(); -#endif - break; - case 9: - name="3"; - fcr_set=ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|E2MMX|EAMD3D; - fcr_clr=DPDC; -#ifdef CONFIG_X86_OOSTORE - winchip2_unprotect_mcr(); - winchip2_create_optimal_mcr(); - rdmsr(MSR_IDT_MCR_CTRL, lo, hi); - /* Enable - write combining on non-stack, non-string - write combining on string, all types - weak write ordering - */ - lo|=31; - wrmsr(MSR_IDT_MCR_CTRL, lo, hi); - winchip2_protect_mcr(); -#endif - break; - default: - name="??"; - } - - rdmsr(MSR_IDT_FCR1, lo, hi); - newlo=(lo|fcr_set) & (~fcr_clr); - - if (newlo!=lo) { - printk(KERN_INFO "Centaur FCR was 0x%X now 0x%X\n", lo, newlo ); - wrmsr(MSR_IDT_FCR1, newlo, hi ); - } else { - printk(KERN_INFO "Centaur FCR is 0x%X\n",lo); - } - /* Emulate MTRRs using Centaur's MCR. */ - set_bit(X86_FEATURE_CENTAUR_MCR, c->x86_capability); - /* Report CX8 */ - set_bit(X86_FEATURE_CX8, c->x86_capability); - /* Set 3DNow! on Winchip 2 and above. */ - if (c->x86_model >=8) - set_bit(X86_FEATURE_3DNOW, c->x86_capability); - /* See if we can find out some more. */ - if ( cpuid_eax(0x80000000) >= 0x80000005 ) { - /* Yes, we can. */ - cpuid(0x80000005,&aa,&bb,&cc,&dd); - /* Add L1 data and code cache sizes. */ - c->x86_cache_size = (cc>>24)+(dd>>24); - } - sprintf( c->x86_model_id, "WinChip %s", name ); - break; - - case 6: - init_c3(c); - break; - } -} - -static unsigned int __cpuinit centaur_size_cache(struct cpuinfo_x86 * c, unsigned int size) -{ - /* VIA C3 CPUs (670-68F) need further shifting. */ - if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8))) - size >>= 8; - - /* VIA also screwed up Nehemiah stepping 1, and made - it return '65KB' instead of '64KB' - - Note, it seems this may only be in engineering samples. */ - if ((c->x86==6) && (c->x86_model==9) && (c->x86_mask==1) && (size==65)) - size -=1; - - return size; -} - -static struct cpu_dev centaur_cpu_dev __cpuinitdata = { - .c_vendor = "Centaur", - .c_ident = { "CentaurHauls" }, - .c_init = init_centaur, - .c_size_cache = centaur_size_cache, -}; - -int __init centaur_init_cpu(void) -{ - cpu_devs[X86_VENDOR_CENTAUR] = ¢aur_cpu_dev; - return 0; -} diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c deleted file mode 100644 index d506201d397..00000000000 --- a/arch/i386/kernel/cpu/common.c +++ /dev/null @@ -1,733 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#ifdef CONFIG_X86_LOCAL_APIC -#include -#include -#include -#endif - -#include "cpu.h" - -DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { - [GDT_ENTRY_KERNEL_CS] = { 0x0000ffff, 0x00cf9a00 }, - [GDT_ENTRY_KERNEL_DS] = { 0x0000ffff, 0x00cf9200 }, - [GDT_ENTRY_DEFAULT_USER_CS] = { 0x0000ffff, 0x00cffa00 }, - [GDT_ENTRY_DEFAULT_USER_DS] = { 0x0000ffff, 0x00cff200 }, - /* - * Segments used for calling PnP BIOS have byte granularity. - * They code segments and data segments have fixed 64k limits, - * the transfer segment sizes are set at run time. - */ - [GDT_ENTRY_PNPBIOS_CS32] = { 0x0000ffff, 0x00409a00 },/* 32-bit code */ - [GDT_ENTRY_PNPBIOS_CS16] = { 0x0000ffff, 0x00009a00 },/* 16-bit code */ - [GDT_ENTRY_PNPBIOS_DS] = { 0x0000ffff, 0x00009200 }, /* 16-bit data */ - [GDT_ENTRY_PNPBIOS_TS1] = { 0x00000000, 0x00009200 },/* 16-bit data */ - [GDT_ENTRY_PNPBIOS_TS2] = { 0x00000000, 0x00009200 },/* 16-bit data */ - /* - * The APM segments have byte granularity and their bases - * are set at run time. All have 64k limits. - */ - [GDT_ENTRY_APMBIOS_BASE] = { 0x0000ffff, 0x00409a00 },/* 32-bit code */ - /* 16-bit code */ - [GDT_ENTRY_APMBIOS_BASE+1] = { 0x0000ffff, 0x00009a00 }, - [GDT_ENTRY_APMBIOS_BASE+2] = { 0x0000ffff, 0x00409200 }, /* data */ - - [GDT_ENTRY_ESPFIX_SS] = { 0x00000000, 0x00c09200 }, - [GDT_ENTRY_PERCPU] = { 0x00000000, 0x00000000 }, -} }; -EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); - -static int cachesize_override __cpuinitdata = -1; -static int disable_x86_fxsr __cpuinitdata; -static int disable_x86_serial_nr __cpuinitdata = 1; -static int disable_x86_sep __cpuinitdata; - -struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {}; - -extern int disable_pse; - -static void __cpuinit default_init(struct cpuinfo_x86 * c) -{ - /* Not much we can do here... */ - /* Check if at least it has cpuid */ - if (c->cpuid_level == -1) { - /* No cpuid. It must be an ancient CPU */ - if (c->x86 == 4) - strcpy(c->x86_model_id, "486"); - else if (c->x86 == 3) - strcpy(c->x86_model_id, "386"); - } -} - -static struct cpu_dev __cpuinitdata default_cpu = { - .c_init = default_init, - .c_vendor = "Unknown", -}; -static struct cpu_dev * this_cpu __cpuinitdata = &default_cpu; - -static int __init cachesize_setup(char *str) -{ - get_option (&str, &cachesize_override); - return 1; -} -__setup("cachesize=", cachesize_setup); - -int __cpuinit get_model_name(struct cpuinfo_x86 *c) -{ - unsigned int *v; - char *p, *q; - - if (cpuid_eax(0x80000000) < 0x80000004) - return 0; - - v = (unsigned int *) c->x86_model_id; - cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); - cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); - cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); - c->x86_model_id[48] = 0; - - /* Intel chips right-justify this string for some dumb reason; - undo that brain damage */ - p = q = &c->x86_model_id[0]; - while ( *p == ' ' ) - p++; - if ( p != q ) { - while ( *p ) - *q++ = *p++; - while ( q <= &c->x86_model_id[48] ) - *q++ = '\0'; /* Zero-pad the rest */ - } - - return 1; -} - - -void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) -{ - unsigned int n, dummy, ecx, edx, l2size; - - n = cpuid_eax(0x80000000); - - if (n >= 0x80000005) { - cpuid(0x80000005, &dummy, &dummy, &ecx, &edx); - printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", - edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); - c->x86_cache_size=(ecx>>24)+(edx>>24); - } - - if (n < 0x80000006) /* Some chips just has a large L1. */ - return; - - ecx = cpuid_ecx(0x80000006); - l2size = ecx >> 16; - - /* do processor-specific cache resizing */ - if (this_cpu->c_size_cache) - l2size = this_cpu->c_size_cache(c,l2size); - - /* Allow user to override all this if necessary. */ - if (cachesize_override != -1) - l2size = cachesize_override; - - if ( l2size == 0 ) - return; /* Again, no L2 cache is possible */ - - c->x86_cache_size = l2size; - - printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", - l2size, ecx & 0xFF); -} - -/* Naming convention should be: [()] */ -/* This table only is used unless init_() below doesn't set it; */ -/* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */ - -/* Look up CPU names by table lookup. */ -static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) -{ - struct cpu_model_info *info; - - if ( c->x86_model >= 16 ) - return NULL; /* Range check */ - - if (!this_cpu) - return NULL; - - info = this_cpu->c_models; - - while (info && info->family) { - if (info->family == c->x86) - return info->model_names[c->x86_model]; - info++; - } - return NULL; /* Not found */ -} - - -static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) -{ - char *v = c->x86_vendor_id; - int i; - static int printed; - - for (i = 0; i < X86_VENDOR_NUM; i++) { - if (cpu_devs[i]) { - if (!strcmp(v,cpu_devs[i]->c_ident[0]) || - (cpu_devs[i]->c_ident[1] && - !strcmp(v,cpu_devs[i]->c_ident[1]))) { - c->x86_vendor = i; - if (!early) - this_cpu = cpu_devs[i]; - return; - } - } - } - if (!printed) { - printed++; - printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n"); - printk(KERN_ERR "CPU: Your system may be unstable.\n"); - } - c->x86_vendor = X86_VENDOR_UNKNOWN; - this_cpu = &default_cpu; -} - - -static int __init x86_fxsr_setup(char * s) -{ - /* Tell all the other CPU's to not use it... */ - disable_x86_fxsr = 1; - - /* - * ... and clear the bits early in the boot_cpu_data - * so that the bootup process doesn't try to do this - * either. - */ - clear_bit(X86_FEATURE_FXSR, boot_cpu_data.x86_capability); - clear_bit(X86_FEATURE_XMM, boot_cpu_data.x86_capability); - return 1; -} -__setup("nofxsr", x86_fxsr_setup); - - -static int __init x86_sep_setup(char * s) -{ - disable_x86_sep = 1; - return 1; -} -__setup("nosep", x86_sep_setup); - - -/* Standard macro to see if a specific flag is changeable */ -static inline int flag_is_changeable_p(u32 flag) -{ - u32 f1, f2; - - asm("pushfl\n\t" - "pushfl\n\t" - "popl %0\n\t" - "movl %0,%1\n\t" - "xorl %2,%0\n\t" - "pushl %0\n\t" - "popfl\n\t" - "pushfl\n\t" - "popl %0\n\t" - "popfl\n\t" - : "=&r" (f1), "=&r" (f2) - : "ir" (flag)); - - return ((f1^f2) & flag) != 0; -} - - -/* Probe for the CPUID instruction */ -static int __cpuinit have_cpuid_p(void) -{ - return flag_is_changeable_p(X86_EFLAGS_ID); -} - -void __init cpu_detect(struct cpuinfo_x86 *c) -{ - /* Get vendor name */ - cpuid(0x00000000, &c->cpuid_level, - (int *)&c->x86_vendor_id[0], - (int *)&c->x86_vendor_id[8], - (int *)&c->x86_vendor_id[4]); - - c->x86 = 4; - if (c->cpuid_level >= 0x00000001) { - u32 junk, tfms, cap0, misc; - cpuid(0x00000001, &tfms, &misc, &junk, &cap0); - c->x86 = (tfms >> 8) & 15; - c->x86_model = (tfms >> 4) & 15; - if (c->x86 == 0xf) - c->x86 += (tfms >> 20) & 0xff; - if (c->x86 >= 0x6) - c->x86_model += ((tfms >> 16) & 0xF) << 4; - c->x86_mask = tfms & 15; - if (cap0 & (1<<19)) - c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8; - } -} - -/* Do minimum CPU detection early. - Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment. - The others are not touched to avoid unwanted side effects. - - WARNING: this function is only called on the BP. Don't add code here - that is supposed to run on all CPUs. */ -static void __init early_cpu_detect(void) -{ - struct cpuinfo_x86 *c = &boot_cpu_data; - - c->x86_cache_alignment = 32; - - if (!have_cpuid_p()) - return; - - cpu_detect(c); - - get_cpu_vendor(c, 1); -} - -static void __cpuinit generic_identify(struct cpuinfo_x86 * c) -{ - u32 tfms, xlvl; - int ebx; - - if (have_cpuid_p()) { - /* Get vendor name */ - cpuid(0x00000000, &c->cpuid_level, - (int *)&c->x86_vendor_id[0], - (int *)&c->x86_vendor_id[8], - (int *)&c->x86_vendor_id[4]); - - get_cpu_vendor(c, 0); - /* Initialize the standard set of capabilities */ - /* Note that the vendor-specific code below might override */ - - /* Intel-defined flags: level 0x00000001 */ - if ( c->cpuid_level >= 0x00000001 ) { - u32 capability, excap; - cpuid(0x00000001, &tfms, &ebx, &excap, &capability); - c->x86_capability[0] = capability; - c->x86_capability[4] = excap; - c->x86 = (tfms >> 8) & 15; - c->x86_model = (tfms >> 4) & 15; - if (c->x86 == 0xf) - c->x86 += (tfms >> 20) & 0xff; - if (c->x86 >= 0x6) - c->x86_model += ((tfms >> 16) & 0xF) << 4; - c->x86_mask = tfms & 15; -#ifdef CONFIG_X86_HT - c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0); -#else - c->apicid = (ebx >> 24) & 0xFF; -#endif - if (c->x86_capability[0] & (1<<19)) - c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8; - } else { - /* Have CPUID level 0 only - unheard of */ - c->x86 = 4; - } - - /* AMD-defined flags: level 0x80000001 */ - xlvl = cpuid_eax(0x80000000); - if ( (xlvl & 0xffff0000) == 0x80000000 ) { - if ( xlvl >= 0x80000001 ) { - c->x86_capability[1] = cpuid_edx(0x80000001); - c->x86_capability[6] = cpuid_ecx(0x80000001); - } - if ( xlvl >= 0x80000004 ) - get_model_name(c); /* Default name */ - } - - init_scattered_cpuid_features(c); - } - - early_intel_workaround(c); - -#ifdef CONFIG_X86_HT - c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff; -#endif -} - -static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) -{ - if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) { - /* Disable processor serial number */ - unsigned long lo,hi; - rdmsr(MSR_IA32_BBL_CR_CTL,lo,hi); - lo |= 0x200000; - wrmsr(MSR_IA32_BBL_CR_CTL,lo,hi); - printk(KERN_NOTICE "CPU serial number disabled.\n"); - clear_bit(X86_FEATURE_PN, c->x86_capability); - - /* Disabling the serial number may affect the cpuid level */ - c->cpuid_level = cpuid_eax(0); - } -} - -static int __init x86_serial_nr_setup(char *s) -{ - disable_x86_serial_nr = 0; - return 1; -} -__setup("serialnumber", x86_serial_nr_setup); - - - -/* - * This does the hard work of actually picking apart the CPU stuff... - */ -static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) -{ - int i; - - c->loops_per_jiffy = loops_per_jiffy; - c->x86_cache_size = -1; - c->x86_vendor = X86_VENDOR_UNKNOWN; - c->cpuid_level = -1; /* CPUID not detected */ - c->x86_model = c->x86_mask = 0; /* So far unknown... */ - c->x86_vendor_id[0] = '\0'; /* Unset */ - c->x86_model_id[0] = '\0'; /* Unset */ - c->x86_max_cores = 1; - c->x86_clflush_size = 32; - memset(&c->x86_capability, 0, sizeof c->x86_capability); - - if (!have_cpuid_p()) { - /* First of all, decide if this is a 486 or higher */ - /* It's a 486 if we can modify the AC flag */ - if ( flag_is_changeable_p(X86_EFLAGS_AC) ) - c->x86 = 4; - else - c->x86 = 3; - } - - generic_identify(c); - - printk(KERN_DEBUG "CPU: After generic identify, caps:"); - for (i = 0; i < NCAPINTS; i++) - printk(" %08lx", c->x86_capability[i]); - printk("\n"); - - if (this_cpu->c_identify) { - this_cpu->c_identify(c); - - printk(KERN_DEBUG "CPU: After vendor identify, caps:"); - for (i = 0; i < NCAPINTS; i++) - printk(" %08lx", c->x86_capability[i]); - printk("\n"); - } - - /* - * Vendor-specific initialization. In this section we - * canonicalize the feature flags, meaning if there are - * features a certain CPU supports which CPUID doesn't - * tell us, CPUID claiming incorrect flags, or other bugs, - * we handle them here. - * - * At the end of this section, c->x86_capability better - * indicate the features this CPU genuinely supports! - */ - if (this_cpu->c_init) - this_cpu->c_init(c); - - /* Disable the PN if appropriate */ - squash_the_stupid_serial_number(c); - - /* - * The vendor-specific functions might have changed features. Now - * we do "generic changes." - */ - - /* TSC disabled? */ - if ( tsc_disable ) - clear_bit(X86_FEATURE_TSC, c->x86_capability); - - /* FXSR disabled? */ - if (disable_x86_fxsr) { - clear_bit(X86_FEATURE_FXSR, c->x86_capability); - clear_bit(X86_FEATURE_XMM, c->x86_capability); - } - - /* SEP disabled? */ - if (disable_x86_sep) - clear_bit(X86_FEATURE_SEP, c->x86_capability); - - if (disable_pse) - clear_bit(X86_FEATURE_PSE, c->x86_capability); - - /* If the model name is still unset, do table lookup. */ - if ( !c->x86_model_id[0] ) { - char *p; - p = table_lookup_model(c); - if ( p ) - strcpy(c->x86_model_id, p); - else - /* Last resort... */ - sprintf(c->x86_model_id, "%02x/%02x", - c->x86, c->x86_model); - } - - /* Now the feature flags better reflect actual CPU features! */ - - printk(KERN_DEBUG "CPU: After all inits, caps:"); - for (i = 0; i < NCAPINTS; i++) - printk(" %08lx", c->x86_capability[i]); - printk("\n"); - - /* - * On SMP, boot_cpu_data holds the common feature set between - * all CPUs; so make sure that we indicate which features are - * common between the CPUs. The first time this routine gets - * executed, c == &boot_cpu_data. - */ - if ( c != &boot_cpu_data ) { - /* AND the already accumulated flags with these */ - for ( i = 0 ; i < NCAPINTS ; i++ ) - boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; - } - - /* Init Machine Check Exception if available. */ - mcheck_init(c); -} - -void __init identify_boot_cpu(void) -{ - identify_cpu(&boot_cpu_data); - sysenter_setup(); - enable_sep_cpu(); - mtrr_bp_init(); -} - -void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) -{ - BUG_ON(c == &boot_cpu_data); - identify_cpu(c); - enable_sep_cpu(); - mtrr_ap_init(); -} - -#ifdef CONFIG_X86_HT -void __cpuinit detect_ht(struct cpuinfo_x86 *c) -{ - u32 eax, ebx, ecx, edx; - int index_msb, core_bits; - - cpuid(1, &eax, &ebx, &ecx, &edx); - - if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) - return; - - smp_num_siblings = (ebx & 0xff0000) >> 16; - - if (smp_num_siblings == 1) { - printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); - } else if (smp_num_siblings > 1 ) { - - if (smp_num_siblings > NR_CPUS) { - printk(KERN_WARNING "CPU: Unsupported number of the " - "siblings %d", smp_num_siblings); - smp_num_siblings = 1; - return; - } - - index_msb = get_count_order(smp_num_siblings); - c->phys_proc_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb); - - printk(KERN_INFO "CPU: Physical Processor ID: %d\n", - c->phys_proc_id); - - smp_num_siblings = smp_num_siblings / c->x86_max_cores; - - index_msb = get_count_order(smp_num_siblings) ; - - core_bits = get_count_order(c->x86_max_cores); - - c->cpu_core_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) & - ((1 << core_bits) - 1); - - if (c->x86_max_cores > 1) - printk(KERN_INFO "CPU: Processor Core ID: %d\n", - c->cpu_core_id); - } -} -#endif - -void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) -{ - char *vendor = NULL; - - if (c->x86_vendor < X86_VENDOR_NUM) - vendor = this_cpu->c_vendor; - else if (c->cpuid_level >= 0) - vendor = c->x86_vendor_id; - - if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor))) - printk("%s ", vendor); - - if (!c->x86_model_id[0]) - printk("%d86", c->x86); - else - printk("%s", c->x86_model_id); - - if (c->x86_mask || c->cpuid_level >= 0) - printk(" stepping %02x\n", c->x86_mask); - else - printk("\n"); -} - -cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; - -/* This is hacky. :) - * We're emulating future behavior. - * In the future, the cpu-specific init functions will be called implicitly - * via the magic of initcalls. - * They will insert themselves into the cpu_devs structure. - * Then, when cpu_init() is called, we can just iterate over that array. - */ - -extern int intel_cpu_init(void); -extern int cyrix_init_cpu(void); -extern int nsc_init_cpu(void); -extern int amd_init_cpu(void); -extern int centaur_init_cpu(void); -extern int transmeta_init_cpu(void); -extern int nexgen_init_cpu(void); -extern int umc_init_cpu(void); - -void __init early_cpu_init(void) -{ - intel_cpu_init(); - cyrix_init_cpu(); - nsc_init_cpu(); - amd_init_cpu(); - centaur_init_cpu(); - transmeta_init_cpu(); - nexgen_init_cpu(); - umc_init_cpu(); - early_cpu_detect(); - -#ifdef CONFIG_DEBUG_PAGEALLOC - /* pse is not compatible with on-the-fly unmapping, - * disable it even if the cpus claim to support it. - */ - clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability); - disable_pse = 1; -#endif -} - -/* Make sure %fs is initialized properly in idle threads */ -struct pt_regs * __devinit idle_regs(struct pt_regs *regs) -{ - memset(regs, 0, sizeof(struct pt_regs)); - regs->xfs = __KERNEL_PERCPU; - return regs; -} - -/* Current gdt points %fs at the "master" per-cpu area: after this, - * it's on the real one. */ -void switch_to_new_gdt(void) -{ - struct Xgt_desc_struct gdt_descr; - - gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); - gdt_descr.size = GDT_SIZE - 1; - load_gdt(&gdt_descr); - asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); -} - -/* - * cpu_init() initializes state that is per-CPU. Some data is already - * initialized (naturally) in the bootstrap process, such as the GDT - * and IDT. We reload them nevertheless, this function acts as a - * 'CPU state barrier', nothing should get across. - */ -void __cpuinit cpu_init(void) -{ - int cpu = smp_processor_id(); - struct task_struct *curr = current; - struct tss_struct * t = &per_cpu(init_tss, cpu); - struct thread_struct *thread = &curr->thread; - - if (cpu_test_and_set(cpu, cpu_initialized)) { - printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); - for (;;) local_irq_enable(); - } - - printk(KERN_INFO "Initializing CPU#%d\n", cpu); - - if (cpu_has_vme || cpu_has_tsc || cpu_has_de) - clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); - if (tsc_disable && cpu_has_tsc) { - printk(KERN_NOTICE "Disabling TSC...\n"); - /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/ - clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability); - set_in_cr4(X86_CR4_TSD); - } - - load_idt(&idt_descr); - switch_to_new_gdt(); - - /* - * Set up and load the per-CPU TSS and LDT - */ - atomic_inc(&init_mm.mm_count); - curr->active_mm = &init_mm; - if (curr->mm) - BUG(); - enter_lazy_tlb(&init_mm, curr); - - load_esp0(t, thread); - set_tss_desc(cpu,t); - load_TR_desc(); - load_LDT(&init_mm.context); - -#ifdef CONFIG_DOUBLEFAULT - /* Set up doublefault TSS pointer in the GDT */ - __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); -#endif - - /* Clear %gs. */ - asm volatile ("mov %0, %%gs" : : "r" (0)); - - /* Clear all 6 debug registers: */ - set_debugreg(0, 0); - set_debugreg(0, 1); - set_debugreg(0, 2); - set_debugreg(0, 3); - set_debugreg(0, 6); - set_debugreg(0, 7); - - /* - * Force FPU initialization: - */ - current_thread_info()->status = 0; - clear_used_math(); - mxcsr_feature_mask_init(); -} - -#ifdef CONFIG_HOTPLUG_CPU -void __cpuinit cpu_uninit(void) -{ - int cpu = raw_smp_processor_id(); - cpu_clear(cpu, cpu_initialized); - - /* lazy TLB state */ - per_cpu(cpu_tlbstate, cpu).state = 0; - per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; -} -#endif diff --git a/arch/i386/kernel/cpu/cpu.h b/arch/i386/kernel/cpu/cpu.h deleted file mode 100644 index 2f6432cef6f..00000000000 --- a/arch/i386/kernel/cpu/cpu.h +++ /dev/null @@ -1,28 +0,0 @@ - -struct cpu_model_info { - int vendor; - int family; - char *model_names[16]; -}; - -/* attempt to consolidate cpu attributes */ -struct cpu_dev { - char * c_vendor; - - /* some have two possibilities for cpuid string */ - char * c_ident[2]; - - struct cpu_model_info c_models[4]; - - void (*c_init)(struct cpuinfo_x86 * c); - void (*c_identify)(struct cpuinfo_x86 * c); - unsigned int (*c_size_cache)(struct cpuinfo_x86 * c, unsigned int size); -}; - -extern struct cpu_dev * cpu_devs [X86_VENDOR_NUM]; - -extern int get_model_name(struct cpuinfo_x86 *c); -extern void display_cacheinfo(struct cpuinfo_x86 *c); - -extern void early_intel_workaround(struct cpuinfo_x86 *c); - diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c deleted file mode 100644 index 122d2d75aa9..00000000000 --- a/arch/i386/kernel/cpu/cyrix.c +++ /dev/null @@ -1,463 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "cpu.h" - -/* - * Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU - */ -static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) -{ - unsigned char ccr2, ccr3; - unsigned long flags; - - /* we test for DEVID by checking whether CCR3 is writable */ - local_irq_save(flags); - ccr3 = getCx86(CX86_CCR3); - setCx86(CX86_CCR3, ccr3 ^ 0x80); - getCx86(0xc0); /* dummy to change bus */ - - if (getCx86(CX86_CCR3) == ccr3) { /* no DEVID regs. */ - ccr2 = getCx86(CX86_CCR2); - setCx86(CX86_CCR2, ccr2 ^ 0x04); - getCx86(0xc0); /* dummy */ - - if (getCx86(CX86_CCR2) == ccr2) /* old Cx486SLC/DLC */ - *dir0 = 0xfd; - else { /* Cx486S A step */ - setCx86(CX86_CCR2, ccr2); - *dir0 = 0xfe; - } - } - else { - setCx86(CX86_CCR3, ccr3); /* restore CCR3 */ - - /* read DIR0 and DIR1 CPU registers */ - *dir0 = getCx86(CX86_DIR0); - *dir1 = getCx86(CX86_DIR1); - } - local_irq_restore(flags); -} - -/* - * Cx86_dir0_msb is a HACK needed by check_cx686_cpuid/slop in bugs.h in - * order to identify the Cyrix CPU model after we're out of setup.c - * - * Actually since bugs.h doesn't even reference this perhaps someone should - * fix the documentation ??? - */ -static unsigned char Cx86_dir0_msb __cpuinitdata = 0; - -static char Cx86_model[][9] __cpuinitdata = { - "Cx486", "Cx486", "5x86 ", "6x86", "MediaGX ", "6x86MX ", - "M II ", "Unknown" -}; -static char Cx486_name[][5] __cpuinitdata = { - "SLC", "DLC", "SLC2", "DLC2", "SRx", "DRx", - "SRx2", "DRx2" -}; -static char Cx486S_name[][4] __cpuinitdata = { - "S", "S2", "Se", "S2e" -}; -static char Cx486D_name[][4] __cpuinitdata = { - "DX", "DX2", "?", "?", "?", "DX4" -}; -static char Cx86_cb[] __cpuinitdata = "?.5x Core/Bus Clock"; -static char cyrix_model_mult1[] __cpuinitdata = "12??43"; -static char cyrix_model_mult2[] __cpuinitdata = "12233445"; - -/* - * Reset the slow-loop (SLOP) bit on the 686(L) which is set by some old - * BIOSes for compatibility with DOS games. This makes the udelay loop - * work correctly, and improves performance. - * - * FIXME: our newer udelay uses the tsc. We don't need to frob with SLOP - */ - -extern void calibrate_delay(void) __init; - -static void __cpuinit check_cx686_slop(struct cpuinfo_x86 *c) -{ - unsigned long flags; - - if (Cx86_dir0_msb == 3) { - unsigned char ccr3, ccr5; - - local_irq_save(flags); - ccr3 = getCx86(CX86_CCR3); - setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ - ccr5 = getCx86(CX86_CCR5); - if (ccr5 & 2) - setCx86(CX86_CCR5, ccr5 & 0xfd); /* reset SLOP */ - setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ - local_irq_restore(flags); - - if (ccr5 & 2) { /* possible wrong calibration done */ - printk(KERN_INFO "Recalibrating delay loop with SLOP bit reset\n"); - calibrate_delay(); - c->loops_per_jiffy = loops_per_jiffy; - } - } -} - - -static void __cpuinit set_cx86_reorder(void) -{ - u8 ccr3; - - printk(KERN_INFO "Enable Memory access reorder on Cyrix/NSC processor.\n"); - ccr3 = getCx86(CX86_CCR3); - setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN  */ - - /* Load/Store Serialize to mem access disable (=reorder it)  */ - setCx86(CX86_PCR0, getCx86(CX86_PCR0) & ~0x80); - /* set load/store serialize from 1GB to 4GB */ - ccr3 |= 0xe0; - setCx86(CX86_CCR3, ccr3); -} - -static void __cpuinit set_cx86_memwb(void) -{ - u32 cr0; - - printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n"); - - /* CCR2 bit 2: unlock NW bit */ - setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04); - /* set 'Not Write-through' */ - cr0 = 0x20000000; - write_cr0(read_cr0() | cr0); - /* CCR2 bit 2: lock NW bit and set WT1 */ - setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 ); -} - -static void __cpuinit set_cx86_inc(void) -{ - unsigned char ccr3; - - printk(KERN_INFO "Enable Incrementor on Cyrix/NSC processor.\n"); - - ccr3 = getCx86(CX86_CCR3); - setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN  */ - /* PCR1 -- Performance Control */ - /* Incrementor on, whatever that is */ - setCx86(CX86_PCR1, getCx86(CX86_PCR1) | 0x02); - /* PCR0 -- Performance Control */ - /* Incrementor Margin 10 */ - setCx86(CX86_PCR0, getCx86(CX86_PCR0) | 0x04); - setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ -} - -/* - * Configure later MediaGX and/or Geode processor. - */ - -static void __cpuinit geode_configure(void) -{ - unsigned long flags; - u8 ccr3; - local_irq_save(flags); - - /* Suspend on halt power saving and enable #SUSP pin */ - setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88); - - ccr3 = getCx86(CX86_CCR3); - setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ - - - /* FPU fast, DTE cache, Mem bypass */ - setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x38); - setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ - - set_cx86_memwb(); - set_cx86_reorder(); - set_cx86_inc(); - - local_irq_restore(flags); -} - - -static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) -{ - unsigned char dir0, dir0_msn, dir0_lsn, dir1 = 0; - char *buf = c->x86_model_id; - const char *p = NULL; - - /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; - 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ - clear_bit(0*32+31, c->x86_capability); - - /* Cyrix used bit 24 in extended (AMD) CPUID for Cyrix MMX extensions */ - if ( test_bit(1*32+24, c->x86_capability) ) { - clear_bit(1*32+24, c->x86_capability); - set_bit(X86_FEATURE_CXMMX, c->x86_capability); - } - - do_cyrix_devid(&dir0, &dir1); - - check_cx686_slop(c); - - Cx86_dir0_msb = dir0_msn = dir0 >> 4; /* identifies CPU "family" */ - dir0_lsn = dir0 & 0xf; /* model or clock multiplier */ - - /* common case step number/rev -- exceptions handled below */ - c->x86_model = (dir1 >> 4) + 1; - c->x86_mask = dir1 & 0xf; - - /* Now cook; the original recipe is by Channing Corn, from Cyrix. - * We do the same thing for each generation: we work out - * the model, multiplier and stepping. Black magic included, - * to make the silicon step/rev numbers match the printed ones. - */ - - switch (dir0_msn) { - unsigned char tmp; - - case 0: /* Cx486SLC/DLC/SRx/DRx */ - p = Cx486_name[dir0_lsn & 7]; - break; - - case 1: /* Cx486S/DX/DX2/DX4 */ - p = (dir0_lsn & 8) ? Cx486D_name[dir0_lsn & 5] - : Cx486S_name[dir0_lsn & 3]; - break; - - case 2: /* 5x86 */ - Cx86_cb[2] = cyrix_model_mult1[dir0_lsn & 5]; - p = Cx86_cb+2; - break; - - case 3: /* 6x86/6x86L */ - Cx86_cb[1] = ' '; - Cx86_cb[2] = cyrix_model_mult1[dir0_lsn & 5]; - if (dir1 > 0x21) { /* 686L */ - Cx86_cb[0] = 'L'; - p = Cx86_cb; - (c->x86_model)++; - } else /* 686 */ - p = Cx86_cb+1; - /* Emulate MTRRs using Cyrix's ARRs. */ - set_bit(X86_FEATURE_CYRIX_ARR, c->x86_capability); - /* 6x86's contain this bug */ - c->coma_bug = 1; - break; - - case 4: /* MediaGX/GXm or Geode GXM/GXLV/GX1 */ -#ifdef CONFIG_PCI - { - u32 vendor, device; - /* It isn't really a PCI quirk directly, but the cure is the - same. The MediaGX has deep magic SMM stuff that handles the - SB emulation. It thows away the fifo on disable_dma() which - is wrong and ruins the audio. - - Bug2: VSA1 has a wrap bug so that using maximum sized DMA - causes bad things. According to NatSemi VSA2 has another - bug to do with 'hlt'. I've not seen any boards using VSA2 - and X doesn't seem to support it either so who cares 8). - VSA1 we work around however. - */ - - printk(KERN_INFO "Working around Cyrix MediaGX virtual DMA bugs.\n"); - isa_dma_bridge_buggy = 2; - - /* We do this before the PCI layer is running. However we - are safe here as we know the bridge must be a Cyrix - companion and must be present */ - vendor = read_pci_config_16(0, 0, 0x12, PCI_VENDOR_ID); - device = read_pci_config_16(0, 0, 0x12, PCI_DEVICE_ID); - - /* - * The 5510/5520 companion chips have a funky PIT. - */ - if (vendor == PCI_VENDOR_ID_CYRIX && - (device == PCI_DEVICE_ID_CYRIX_5510 || device == PCI_DEVICE_ID_CYRIX_5520)) - mark_tsc_unstable("cyrix 5510/5520 detected"); - } -#endif - c->x86_cache_size=16; /* Yep 16K integrated cache thats it */ - - /* GXm supports extended cpuid levels 'ala' AMD */ - if (c->cpuid_level == 2) { - /* Enable cxMMX extensions (GX1 Datasheet 54) */ - setCx86(CX86_CCR7, getCx86(CX86_CCR7) | 1); - - /* - * GXm : 0x30 ... 0x5f GXm datasheet 51 - * GXlv: 0x6x GXlv datasheet 54 - * ? : 0x7x - * GX1 : 0x8x GX1 datasheet 56 - */ - if((0x30 <= dir1 && dir1 <= 0x6f) || (0x80 <=dir1 && dir1 <= 0x8f)) - geode_configure(); - get_model_name(c); /* get CPU marketing name */ - return; - } - else { /* MediaGX */ - Cx86_cb[2] = (dir0_lsn & 1) ? '3' : '4'; - p = Cx86_cb+2; - c->x86_model = (dir1 & 0x20) ? 1 : 2; - } - break; - - case 5: /* 6x86MX/M II */ - if (dir1 > 7) - { - dir0_msn++; /* M II */ - /* Enable MMX extensions (App note 108) */ - setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1); - } - else - { - c->coma_bug = 1; /* 6x86MX, it has the bug. */ - } - tmp = (!(dir0_lsn & 7) || dir0_lsn & 1) ? 2 : 0; - Cx86_cb[tmp] = cyrix_model_mult2[dir0_lsn & 7]; - p = Cx86_cb+tmp; - if (((dir1 & 0x0f) > 4) || ((dir1 & 0xf0) == 0x20)) - (c->x86_model)++; - /* Emulate MTRRs using Cyrix's ARRs. */ - set_bit(X86_FEATURE_CYRIX_ARR, c->x86_capability); - break; - - case 0xf: /* Cyrix 486 without DEVID registers */ - switch (dir0_lsn) { - case 0xd: /* either a 486SLC or DLC w/o DEVID */ - dir0_msn = 0; - p = Cx486_name[(c->hard_math) ? 1 : 0]; - break; - - case 0xe: /* a 486S A step */ - dir0_msn = 0; - p = Cx486S_name[0]; - break; - } - break; - - default: /* unknown (shouldn't happen, we know everyone ;-) */ - dir0_msn = 7; - break; - } - strcpy(buf, Cx86_model[dir0_msn & 7]); - if (p) strcat(buf, p); - return; -} - -/* - * Handle National Semiconductor branded processors - */ -static void __cpuinit init_nsc(struct cpuinfo_x86 *c) -{ - /* There may be GX1 processors in the wild that are branded - * NSC and not Cyrix. - * - * This function only handles the GX processor, and kicks every - * thing else to the Cyrix init function above - that should - * cover any processors that might have been branded differently - * after NSC acquired Cyrix. - * - * If this breaks your GX1 horribly, please e-mail - * info-linux@ldcmail.amd.com to tell us. - */ - - /* Handle the GX (Formally known as the GX2) */ - - if (c->x86 == 5 && c->x86_model == 5) - display_cacheinfo(c); - else - init_cyrix(c); -} - -/* - * Cyrix CPUs without cpuid or with cpuid not yet enabled can be detected - * by the fact that they preserve the flags across the division of 5/2. - * PII and PPro exhibit this behavior too, but they have cpuid available. - */ - -/* - * Perform the Cyrix 5/2 test. A Cyrix won't change - * the flags, while other 486 chips will. - */ -static inline int test_cyrix_52div(void) -{ - unsigned int test; - - __asm__ __volatile__( - "sahf\n\t" /* clear flags (%eax = 0x0005) */ - "div %b2\n\t" /* divide 5 by 2 */ - "lahf" /* store flags into %ah */ - : "=a" (test) - : "0" (5), "q" (2) - : "cc"); - - /* AH is 0x02 on Cyrix after the divide.. */ - return (unsigned char) (test >> 8) == 0x02; -} - -static void __cpuinit cyrix_identify(struct cpuinfo_x86 * c) -{ - /* Detect Cyrix with disabled CPUID */ - if ( c->x86 == 4 && test_cyrix_52div() ) { - unsigned char dir0, dir1; - - strcpy(c->x86_vendor_id, "CyrixInstead"); - c->x86_vendor = X86_VENDOR_CYRIX; - - /* Actually enable cpuid on the older cyrix */ - - /* Retrieve CPU revisions */ - - do_cyrix_devid(&dir0, &dir1); - - dir0>>=4; - - /* Check it is an affected model */ - - if (dir0 == 5 || dir0 == 3) - { - unsigned char ccr3; - unsigned long flags; - printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n"); - local_irq_save(flags); - ccr3 = getCx86(CX86_CCR3); - setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ - setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x80); /* enable cpuid */ - setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ - local_irq_restore(flags); - } - } -} - -static struct cpu_dev cyrix_cpu_dev __cpuinitdata = { - .c_vendor = "Cyrix", - .c_ident = { "CyrixInstead" }, - .c_init = init_cyrix, - .c_identify = cyrix_identify, -}; - -int __init cyrix_init_cpu(void) -{ - cpu_devs[X86_VENDOR_CYRIX] = &cyrix_cpu_dev; - return 0; -} - -static struct cpu_dev nsc_cpu_dev __cpuinitdata = { - .c_vendor = "NSC", - .c_ident = { "Geode by NSC" }, - .c_init = init_nsc, -}; - -int __init nsc_init_cpu(void) -{ - cpu_devs[X86_VENDOR_NSC] = &nsc_cpu_dev; - return 0; -} - diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c deleted file mode 100644 index dc4e08147b1..00000000000 --- a/arch/i386/kernel/cpu/intel.c +++ /dev/null @@ -1,333 +0,0 @@ -#include -#include - -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "cpu.h" - -#ifdef CONFIG_X86_LOCAL_APIC -#include -#include -#include -#endif - -extern int trap_init_f00f_bug(void); - -#ifdef CONFIG_X86_INTEL_USERCOPY -/* - * Alignment at which movsl is preferred for bulk memory copies. - */ -struct movsl_mask movsl_mask __read_mostly; -#endif - -void __cpuinit early_intel_workaround(struct cpuinfo_x86 *c) -{ - if (c->x86_vendor != X86_VENDOR_INTEL) - return; - /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ - if (c->x86 == 15 && c->x86_cache_alignment == 64) - c->x86_cache_alignment = 128; -} - -/* - * Early probe support logic for ppro memory erratum #50 - * - * This is called before we do cpu ident work - */ - -int __cpuinit ppro_with_ram_bug(void) -{ - /* Uses data from early_cpu_detect now */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && - boot_cpu_data.x86 == 6 && - boot_cpu_data.x86_model == 1 && - boot_cpu_data.x86_mask < 8) { - printk(KERN_INFO "Pentium Pro with Errata#50 detected. Taking evasive action.\n"); - return 1; - } - return 0; -} - - -/* - * P4 Xeon errata 037 workaround. - * Hardware prefetcher may cause stale data to be loaded into the cache. - */ -static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c) -{ - unsigned long lo, hi; - - if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { - rdmsr (MSR_IA32_MISC_ENABLE, lo, hi); - if ((lo & (1<<9)) == 0) { - printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n"); - printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n"); - lo |= (1<<9); /* Disable hw prefetching */ - wrmsr (MSR_IA32_MISC_ENABLE, lo, hi); - } - } -} - - -/* - * find out the number of processor cores on the die - */ -static int __cpuinit num_cpu_cores(struct cpuinfo_x86 *c) -{ - unsigned int eax, ebx, ecx, edx; - - if (c->cpuid_level < 4) - return 1; - - /* Intel has a non-standard dependency on %ecx for this CPUID level. */ - cpuid_count(4, 0, &eax, &ebx, &ecx, &edx); - if (eax & 0x1f) - return ((eax >> 26) + 1); - else - return 1; -} - -static void __cpuinit init_intel(struct cpuinfo_x86 *c) -{ - unsigned int l2 = 0; - char *p = NULL; - -#ifdef CONFIG_X86_F00F_BUG - /* - * All current models of Pentium and Pentium with MMX technology CPUs - * have the F0 0F bug, which lets nonprivileged users lock up the system. - * Note that the workaround only should be initialized once... - */ - c->f00f_bug = 0; - if (!paravirt_enabled() && c->x86 == 5) { - static int f00f_workaround_enabled = 0; - - c->f00f_bug = 1; - if ( !f00f_workaround_enabled ) { - trap_init_f00f_bug(); - printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n"); - f00f_workaround_enabled = 1; - } - } -#endif - - select_idle_routine(c); - l2 = init_intel_cacheinfo(c); - if (c->cpuid_level > 9 ) { - unsigned eax = cpuid_eax(10); - /* Check for version and the number of counters */ - if ((eax & 0xff) && (((eax>>8) & 0xff) > 1)) - set_bit(X86_FEATURE_ARCH_PERFMON, c->x86_capability); - } - - /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ - if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) - clear_bit(X86_FEATURE_SEP, c->x86_capability); - - /* Names for the Pentium II/Celeron processors - detectable only by also checking the cache size. - Dixon is NOT a Celeron. */ - if (c->x86 == 6) { - switch (c->x86_model) { - case 5: - if (c->x86_mask == 0) { - if (l2 == 0) - p = "Celeron (Covington)"; - else if (l2 == 256) - p = "Mobile Pentium II (Dixon)"; - } - break; - - case 6: - if (l2 == 128) - p = "Celeron (Mendocino)"; - else if (c->x86_mask == 0 || c->x86_mask == 5) - p = "Celeron-A"; - break; - - case 8: - if (l2 == 128) - p = "Celeron (Coppermine)"; - break; - } - } - - if ( p ) - strcpy(c->x86_model_id, p); - - c->x86_max_cores = num_cpu_cores(c); - - detect_ht(c); - - /* Work around errata */ - Intel_errata_workarounds(c); - -#ifdef CONFIG_X86_INTEL_USERCOPY - /* - * Set up the preferred alignment for movsl bulk memory moves - */ - switch (c->x86) { - case 4: /* 486: untested */ - break; - case 5: /* Old Pentia: untested */ - break; - case 6: /* PII/PIII only like movsl with 8-byte alignment */ - movsl_mask.mask = 7; - break; - case 15: /* P4 is OK down to 8-byte alignment */ - movsl_mask.mask = 7; - break; - } -#endif - - if (c->x86 == 15) { - set_bit(X86_FEATURE_P4, c->x86_capability); - set_bit(X86_FEATURE_SYNC_RDTSC, c->x86_capability); - } - if (c->x86 == 6) - set_bit(X86_FEATURE_P3, c->x86_capability); - if ((c->x86 == 0xf && c->x86_model >= 0x03) || - (c->x86 == 0x6 && c->x86_model >= 0x0e)) - set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); - - if (cpu_has_ds) { - unsigned int l1; - rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); - if (!(l1 & (1<<11))) - set_bit(X86_FEATURE_BTS, c->x86_capability); - if (!(l1 & (1<<12))) - set_bit(X86_FEATURE_PEBS, c->x86_capability); - } -} - -static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 * c, unsigned int size) -{ - /* Intel PIII Tualatin. This comes in two flavours. - * One has 256kb of cache, the other 512. We have no way - * to determine which, so we use a boottime override - * for the 512kb model, and assume 256 otherwise. - */ - if ((c->x86 == 6) && (c->x86_model == 11) && (size == 0)) - size = 256; - return size; -} - -static struct cpu_dev intel_cpu_dev __cpuinitdata = { - .c_vendor = "Intel", - .c_ident = { "GenuineIntel" }, - .c_models = { - { .vendor = X86_VENDOR_INTEL, .family = 4, .model_names = - { - [0] = "486 DX-25/33", - [1] = "486 DX-50", - [2] = "486 SX", - [3] = "486 DX/2", - [4] = "486 SL", - [5] = "486 SX/2", - [7] = "486 DX/2-WB", - [8] = "486 DX/4", - [9] = "486 DX/4-WB" - } - }, - { .vendor = X86_VENDOR_INTEL, .family = 5, .model_names = - { - [0] = "Pentium 60/66 A-step", - [1] = "Pentium 60/66", - [2] = "Pentium 75 - 200", - [3] = "OverDrive PODP5V83", - [4] = "Pentium MMX", - [7] = "Mobile Pentium 75 - 200", - [8] = "Mobile Pentium MMX" - } - }, - { .vendor = X86_VENDOR_INTEL, .family = 6, .model_names = - { - [0] = "Pentium Pro A-step", - [1] = "Pentium Pro", - [3] = "Pentium II (Klamath)", - [4] = "Pentium II (Deschutes)", - [5] = "Pentium II (Deschutes)", - [6] = "Mobile Pentium II", - [7] = "Pentium III (Katmai)", - [8] = "Pentium III (Coppermine)", - [10] = "Pentium III (Cascades)", - [11] = "Pentium III (Tualatin)", - } - }, - { .vendor = X86_VENDOR_INTEL, .family = 15, .model_names = - { - [0] = "Pentium 4 (Unknown)", - [1] = "Pentium 4 (Willamette)", - [2] = "Pentium 4 (Northwood)", - [4] = "Pentium 4 (Foster)", - [5] = "Pentium 4 (Foster)", - } - }, - }, - .c_init = init_intel, - .c_size_cache = intel_size_cache, -}; - -__init int intel_cpu_init(void) -{ - cpu_devs[X86_VENDOR_INTEL] = &intel_cpu_dev; - return 0; -} - -#ifndef CONFIG_X86_CMPXCHG -unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new) -{ - u8 prev; - unsigned long flags; - - /* Poor man's cmpxchg for 386. Unsuitable for SMP */ - local_irq_save(flags); - prev = *(u8 *)ptr; - if (prev == old) - *(u8 *)ptr = new; - local_irq_restore(flags); - return prev; -} -EXPORT_SYMBOL(cmpxchg_386_u8); - -unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new) -{ - u16 prev; - unsigned long flags; - - /* Poor man's cmpxchg for 386. Unsuitable for SMP */ - local_irq_save(flags); - prev = *(u16 *)ptr; - if (prev == old) - *(u16 *)ptr = new; - local_irq_restore(flags); - return prev; -} -EXPORT_SYMBOL(cmpxchg_386_u16); - -unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new) -{ - u32 prev; - unsigned long flags; - - /* Poor man's cmpxchg for 386. Unsuitable for SMP */ - local_irq_save(flags); - prev = *(u32 *)ptr; - if (prev == old) - *(u32 *)ptr = new; - local_irq_restore(flags); - return prev; -} -EXPORT_SYMBOL(cmpxchg_386_u32); -#endif - -// arch_initcall(intel_cpu_init); - diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c deleted file mode 100644 index db6c25aa577..00000000000 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ /dev/null @@ -1,806 +0,0 @@ -/* - * Routines to indentify caches on Intel CPU. - * - * Changes: - * Venkatesh Pallipadi : Adding cache identification through cpuid(4) - * Ashok Raj : Work with CPU hotplug infrastructure. - * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD. - */ - -#include -#include -#include -#include -#include -#include - -#include -#include - -#define LVL_1_INST 1 -#define LVL_1_DATA 2 -#define LVL_2 3 -#define LVL_3 4 -#define LVL_TRACE 5 - -struct _cache_table -{ - unsigned char descriptor; - char cache_type; - short size; -}; - -/* all the cache descriptor types we care about (no TLB or trace cache entries) */ -static struct _cache_table cache_table[] __cpuinitdata = -{ - { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */ - { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */ - { 0x0a, LVL_1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */ - { 0x0c, LVL_1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */ - { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ - { 0x23, LVL_3, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x25, LVL_3, 2048 }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x29, LVL_3, 4096 }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */ - { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */ - { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */ - { 0x3a, LVL_2, 192 }, /* 6-way set assoc, sectored cache, 64 byte line size */ - { 0x3b, LVL_2, 128 }, /* 2-way set assoc, sectored cache, 64 byte line size */ - { 0x3c, LVL_2, 256 }, /* 4-way set assoc, sectored cache, 64 byte line size */ - { 0x3d, LVL_2, 384 }, /* 6-way set assoc, sectored cache, 64 byte line size */ - { 0x3e, LVL_2, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ - { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */ - { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */ - { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */ - { 0x44, LVL_2, 1024 }, /* 4-way set assoc, 32 byte line size */ - { 0x45, LVL_2, 2048 }, /* 4-way set assoc, 32 byte line size */ - { 0x46, LVL_3, 4096 }, /* 4-way set assoc, 64 byte line size */ - { 0x47, LVL_3, 8192 }, /* 8-way set assoc, 64 byte line size */ - { 0x49, LVL_3, 4096 }, /* 16-way set assoc, 64 byte line size */ - { 0x4a, LVL_3, 6144 }, /* 12-way set assoc, 64 byte line size */ - { 0x4b, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */ - { 0x4c, LVL_3, 12288 }, /* 12-way set assoc, 64 byte line size */ - { 0x4d, LVL_3, 16384 }, /* 16-way set assoc, 64 byte line size */ - { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */ - { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */ - { 0x68, LVL_1_DATA, 32 }, /* 4-way set assoc, sectored cache, 64 byte line size */ - { 0x70, LVL_TRACE, 12 }, /* 8-way set assoc */ - { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */ - { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */ - { 0x73, LVL_TRACE, 64 }, /* 8-way set assoc */ - { 0x78, LVL_2, 1024 }, /* 4-way set assoc, 64 byte line size */ - { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x7c, LVL_2, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x7d, LVL_2, 2048 }, /* 8-way set assoc, 64 byte line size */ - { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */ - { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */ - { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */ - { 0x84, LVL_2, 1024 }, /* 8-way set assoc, 32 byte line size */ - { 0x85, LVL_2, 2048 }, /* 8-way set assoc, 32 byte line size */ - { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */ - { 0x87, LVL_2, 1024 }, /* 8-way set assoc, 64 byte line size */ - { 0x00, 0, 0} -}; - - -enum _cache_type -{ - CACHE_TYPE_NULL = 0, - CACHE_TYPE_DATA = 1, - CACHE_TYPE_INST = 2, - CACHE_TYPE_UNIFIED = 3 -}; - -union _cpuid4_leaf_eax { - struct { - enum _cache_type type:5; - unsigned int level:3; - unsigned int is_self_initializing:1; - unsigned int is_fully_associative:1; - unsigned int reserved:4; - unsigned int num_threads_sharing:12; - unsigned int num_cores_on_die:6; - } split; - u32 full; -}; - -union _cpuid4_leaf_ebx { - struct { - unsigned int coherency_line_size:12; - unsigned int physical_line_partition:10; - unsigned int ways_of_associativity:10; - } split; - u32 full; -}; - -union _cpuid4_leaf_ecx { - struct { - unsigned int number_of_sets:32; - } split; - u32 full; -}; - -struct _cpuid4_info { - union _cpuid4_leaf_eax eax; - union _cpuid4_leaf_ebx ebx; - union _cpuid4_leaf_ecx ecx; - unsigned long size; - cpumask_t shared_cpu_map; -}; - -unsigned short num_cache_leaves; - -/* AMD doesn't have CPUID4. Emulate it here to report the same - information to the user. This makes some assumptions about the machine: - L2 not shared, no SMT etc. that is currently true on AMD CPUs. - - In theory the TLBs could be reported as fake type (they are in "dummy"). - Maybe later */ -union l1_cache { - struct { - unsigned line_size : 8; - unsigned lines_per_tag : 8; - unsigned assoc : 8; - unsigned size_in_kb : 8; - }; - unsigned val; -}; - -union l2_cache { - struct { - unsigned line_size : 8; - unsigned lines_per_tag : 4; - unsigned assoc : 4; - unsigned size_in_kb : 16; - }; - unsigned val; -}; - -union l3_cache { - struct { - unsigned line_size : 8; - unsigned lines_per_tag : 4; - unsigned assoc : 4; - unsigned res : 2; - unsigned size_encoded : 14; - }; - unsigned val; -}; - -static const unsigned short assocs[] = { - [1] = 1, [2] = 2, [4] = 4, [6] = 8, - [8] = 16, [0xa] = 32, [0xb] = 48, - [0xc] = 64, - [0xf] = 0xffff // ?? -}; - -static const unsigned char levels[] = { 1, 1, 2, 3 }; -static const unsigned char types[] = { 1, 2, 3, 3 }; - -static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, - union _cpuid4_leaf_ebx *ebx, - union _cpuid4_leaf_ecx *ecx) -{ - unsigned dummy; - unsigned line_size, lines_per_tag, assoc, size_in_kb; - union l1_cache l1i, l1d; - union l2_cache l2; - union l3_cache l3; - union l1_cache *l1 = &l1d; - - eax->full = 0; - ebx->full = 0; - ecx->full = 0; - - cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val); - cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val); - - switch (leaf) { - case 1: - l1 = &l1i; - case 0: - if (!l1->val) - return; - assoc = l1->assoc; - line_size = l1->line_size; - lines_per_tag = l1->lines_per_tag; - size_in_kb = l1->size_in_kb; - break; - case 2: - if (!l2.val) - return; - assoc = l2.assoc; - line_size = l2.line_size; - lines_per_tag = l2.lines_per_tag; - /* cpu_data has errata corrections for K7 applied */ - size_in_kb = current_cpu_data.x86_cache_size; - break; - case 3: - if (!l3.val) - return; - assoc = l3.assoc; - line_size = l3.line_size; - lines_per_tag = l3.lines_per_tag; - size_in_kb = l3.size_encoded * 512; - break; - default: - return; - } - - eax->split.is_self_initializing = 1; - eax->split.type = types[leaf]; - eax->split.level = levels[leaf]; - if (leaf == 3) - eax->split.num_threads_sharing = current_cpu_data.x86_max_cores - 1; - else - eax->split.num_threads_sharing = 0; - eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1; - - - if (assoc == 0xf) - eax->split.is_fully_associative = 1; - ebx->split.coherency_line_size = line_size - 1; - ebx->split.ways_of_associativity = assocs[assoc] - 1; - ebx->split.physical_line_partition = lines_per_tag - 1; - ecx->split.number_of_sets = (size_in_kb * 1024) / line_size / - (ebx->split.ways_of_associativity + 1) - 1; -} - -static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) -{ - union _cpuid4_leaf_eax eax; - union _cpuid4_leaf_ebx ebx; - union _cpuid4_leaf_ecx ecx; - unsigned edx; - - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) - amd_cpuid4(index, &eax, &ebx, &ecx); - else - cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); - if (eax.split.type == CACHE_TYPE_NULL) - return -EIO; /* better error ? */ - - this_leaf->eax = eax; - this_leaf->ebx = ebx; - this_leaf->ecx = ecx; - this_leaf->size = (ecx.split.number_of_sets + 1) * - (ebx.split.coherency_line_size + 1) * - (ebx.split.physical_line_partition + 1) * - (ebx.split.ways_of_associativity + 1); - return 0; -} - -static int __cpuinit find_num_cache_leaves(void) -{ - unsigned int eax, ebx, ecx, edx; - union _cpuid4_leaf_eax cache_eax; - int i = -1; - - do { - ++i; - /* Do cpuid(4) loop to find out num_cache_leaves */ - cpuid_count(4, i, &eax, &ebx, &ecx, &edx); - cache_eax.full = eax; - } while (cache_eax.split.type != CACHE_TYPE_NULL); - return i; -} - -unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) -{ - unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */ - unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ - unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ - unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; -#ifdef CONFIG_X86_HT - unsigned int cpu = (c == &boot_cpu_data) ? 0 : (c - cpu_data); -#endif - - if (c->cpuid_level > 3) { - static int is_initialized; - - if (is_initialized == 0) { - /* Init num_cache_leaves from boot CPU */ - num_cache_leaves = find_num_cache_leaves(); - is_initialized++; - } - - /* - * Whenever possible use cpuid(4), deterministic cache - * parameters cpuid leaf to find the cache details - */ - for (i = 0; i < num_cache_leaves; i++) { - struct _cpuid4_info this_leaf; - - int retval; - - retval = cpuid4_cache_lookup(i, &this_leaf); - if (retval >= 0) { - switch(this_leaf.eax.split.level) { - case 1: - if (this_leaf.eax.split.type == - CACHE_TYPE_DATA) - new_l1d = this_leaf.size/1024; - else if (this_leaf.eax.split.type == - CACHE_TYPE_INST) - new_l1i = this_leaf.size/1024; - break; - case 2: - new_l2 = this_leaf.size/1024; - num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; - index_msb = get_count_order(num_threads_sharing); - l2_id = c->apicid >> index_msb; - break; - case 3: - new_l3 = this_leaf.size/1024; - num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; - index_msb = get_count_order(num_threads_sharing); - l3_id = c->apicid >> index_msb; - break; - default: - break; - } - } - } - } - /* - * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for - * trace cache - */ - if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) { - /* supports eax=2 call */ - int i, j, n; - int regs[4]; - unsigned char *dp = (unsigned char *)regs; - int only_trace = 0; - - if (num_cache_leaves != 0 && c->x86 == 15) - only_trace = 1; - - /* Number of times to iterate */ - n = cpuid_eax(2) & 0xFF; - - for ( i = 0 ; i < n ; i++ ) { - cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]); - - /* If bit 31 is set, this is an unknown format */ - for ( j = 0 ; j < 3 ; j++ ) { - if ( regs[j] < 0 ) regs[j] = 0; - } - - /* Byte 0 is level count, not a descriptor */ - for ( j = 1 ; j < 16 ; j++ ) { - unsigned char des = dp[j]; - unsigned char k = 0; - - /* look up this descriptor in the table */ - while (cache_table[k].descriptor != 0) - { - if (cache_table[k].descriptor == des) { - if (only_trace && cache_table[k].cache_type != LVL_TRACE) - break; - switch (cache_table[k].cache_type) { - case LVL_1_INST: - l1i += cache_table[k].size; - break; - case LVL_1_DATA: - l1d += cache_table[k].size; - break; - case LVL_2: - l2 += cache_table[k].size; - break; - case LVL_3: - l3 += cache_table[k].size; - break; - case LVL_TRACE: - trace += cache_table[k].size; - break; - } - - break; - } - - k++; - } - } - } - } - - if (new_l1d) - l1d = new_l1d; - - if (new_l1i) - l1i = new_l1i; - - if (new_l2) { - l2 = new_l2; -#ifdef CONFIG_X86_HT - cpu_llc_id[cpu] = l2_id; -#endif - } - - if (new_l3) { - l3 = new_l3; -#ifdef CONFIG_X86_HT - cpu_llc_id[cpu] = l3_id; -#endif - } - - if (trace) - printk (KERN_INFO "CPU: Trace cache: %dK uops", trace); - else if ( l1i ) - printk (KERN_INFO "CPU: L1 I cache: %dK", l1i); - - if (l1d) - printk(", L1 D cache: %dK\n", l1d); - else - printk("\n"); - - if (l2) - printk(KERN_INFO "CPU: L2 cache: %dK\n", l2); - - if (l3) - printk(KERN_INFO "CPU: L3 cache: %dK\n", l3); - - c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); - - return l2; -} - -/* pointer to _cpuid4_info array (for each cache leaf) */ -static struct _cpuid4_info *cpuid4_info[NR_CPUS]; -#define CPUID4_INFO_IDX(x,y) (&((cpuid4_info[x])[y])) - -#ifdef CONFIG_SMP -static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) -{ - struct _cpuid4_info *this_leaf, *sibling_leaf; - unsigned long num_threads_sharing; - int index_msb, i; - struct cpuinfo_x86 *c = cpu_data; - - this_leaf = CPUID4_INFO_IDX(cpu, index); - num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; - - if (num_threads_sharing == 1) - cpu_set(cpu, this_leaf->shared_cpu_map); - else { - index_msb = get_count_order(num_threads_sharing); - - for_each_online_cpu(i) { - if (c[i].apicid >> index_msb == - c[cpu].apicid >> index_msb) { - cpu_set(i, this_leaf->shared_cpu_map); - if (i != cpu && cpuid4_info[i]) { - sibling_leaf = CPUID4_INFO_IDX(i, index); - cpu_set(cpu, sibling_leaf->shared_cpu_map); - } - } - } - } -} -static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) -{ - struct _cpuid4_info *this_leaf, *sibling_leaf; - int sibling; - - this_leaf = CPUID4_INFO_IDX(cpu, index); - for_each_cpu_mask(sibling, this_leaf->shared_cpu_map) { - sibling_leaf = CPUID4_INFO_IDX(sibling, index); - cpu_clear(cpu, sibling_leaf->shared_cpu_map); - } -} -#else -static void __init cache_shared_cpu_map_setup(unsigned int cpu, int index) {} -static void __init cache_remove_shared_cpu_map(unsigned int cpu, int index) {} -#endif - -static void free_cache_attributes(unsigned int cpu) -{ - kfree(cpuid4_info[cpu]); - cpuid4_info[cpu] = NULL; -} - -static int __cpuinit detect_cache_attributes(unsigned int cpu) -{ - struct _cpuid4_info *this_leaf; - unsigned long j; - int retval; - cpumask_t oldmask; - - if (num_cache_leaves == 0) - return -ENOENT; - - cpuid4_info[cpu] = kzalloc( - sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL); - if (cpuid4_info[cpu] == NULL) - return -ENOMEM; - - oldmask = current->cpus_allowed; - retval = set_cpus_allowed(current, cpumask_of_cpu(cpu)); - if (retval) - goto out; - - /* Do cpuid and store the results */ - retval = 0; - for (j = 0; j < num_cache_leaves; j++) { - this_leaf = CPUID4_INFO_IDX(cpu, j); - retval = cpuid4_cache_lookup(j, this_leaf); - if (unlikely(retval < 0)) - break; - cache_shared_cpu_map_setup(cpu, j); - } - set_cpus_allowed(current, oldmask); - -out: - if (retval) - free_cache_attributes(cpu); - return retval; -} - -#ifdef CONFIG_SYSFS - -#include -#include - -extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */ - -/* pointer to kobject for cpuX/cache */ -static struct kobject * cache_kobject[NR_CPUS]; - -struct _index_kobject { - struct kobject kobj; - unsigned int cpu; - unsigned short index; -}; - -/* pointer to array of kobjects for cpuX/cache/indexY */ -static struct _index_kobject *index_kobject[NR_CPUS]; -#define INDEX_KOBJECT_PTR(x,y) (&((index_kobject[x])[y])) - -#define show_one_plus(file_name, object, val) \ -static ssize_t show_##file_name \ - (struct _cpuid4_info *this_leaf, char *buf) \ -{ \ - return sprintf (buf, "%lu\n", (unsigned long)this_leaf->object + val); \ -} - -show_one_plus(level, eax.split.level, 0); -show_one_plus(coherency_line_size, ebx.split.coherency_line_size, 1); -show_one_plus(physical_line_partition, ebx.split.physical_line_partition, 1); -show_one_plus(ways_of_associativity, ebx.split.ways_of_associativity, 1); -show_one_plus(number_of_sets, ecx.split.number_of_sets, 1); - -static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf) -{ - return sprintf (buf, "%luK\n", this_leaf->size / 1024); -} - -static ssize_t show_shared_cpu_map(struct _cpuid4_info *this_leaf, char *buf) -{ - char mask_str[NR_CPUS]; - cpumask_scnprintf(mask_str, NR_CPUS, this_leaf->shared_cpu_map); - return sprintf(buf, "%s\n", mask_str); -} - -static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) { - switch(this_leaf->eax.split.type) { - case CACHE_TYPE_DATA: - return sprintf(buf, "Data\n"); - break; - case CACHE_TYPE_INST: - return sprintf(buf, "Instruction\n"); - break; - case CACHE_TYPE_UNIFIED: - return sprintf(buf, "Unified\n"); - break; - default: - return sprintf(buf, "Unknown\n"); - break; - } -} - -struct _cache_attr { - struct attribute attr; - ssize_t (*show)(struct _cpuid4_info *, char *); - ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count); -}; - -#define define_one_ro(_name) \ -static struct _cache_attr _name = \ - __ATTR(_name, 0444, show_##_name, NULL) - -define_one_ro(level); -define_one_ro(type); -define_one_ro(coherency_line_size); -define_one_ro(physical_line_partition); -define_one_ro(ways_of_associativity); -define_one_ro(number_of_sets); -define_one_ro(size); -define_one_ro(shared_cpu_map); - -static struct attribute * default_attrs[] = { - &type.attr, - &level.attr, - &coherency_line_size.attr, - &physical_line_partition.attr, - &ways_of_associativity.attr, - &number_of_sets.attr, - &size.attr, - &shared_cpu_map.attr, - NULL -}; - -#define to_object(k) container_of(k, struct _index_kobject, kobj) -#define to_attr(a) container_of(a, struct _cache_attr, attr) - -static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf) -{ - struct _cache_attr *fattr = to_attr(attr); - struct _index_kobject *this_leaf = to_object(kobj); - ssize_t ret; - - ret = fattr->show ? - fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), - buf) : - 0; - return ret; -} - -static ssize_t store(struct kobject * kobj, struct attribute * attr, - const char * buf, size_t count) -{ - return 0; -} - -static struct sysfs_ops sysfs_ops = { - .show = show, - .store = store, -}; - -static struct kobj_type ktype_cache = { - .sysfs_ops = &sysfs_ops, - .default_attrs = default_attrs, -}; - -static struct kobj_type ktype_percpu_entry = { - .sysfs_ops = &sysfs_ops, -}; - -static void cpuid4_cache_sysfs_exit(unsigned int cpu) -{ - kfree(cache_kobject[cpu]); - kfree(index_kobject[cpu]); - cache_kobject[cpu] = NULL; - index_kobject[cpu] = NULL; - free_cache_attributes(cpu); -} - -static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu) -{ - - if (num_cache_leaves == 0) - return -ENOENT; - - detect_cache_attributes(cpu); - if (cpuid4_info[cpu] == NULL) - return -ENOENT; - - /* Allocate all required memory */ - cache_kobject[cpu] = kzalloc(sizeof(struct kobject), GFP_KERNEL); - if (unlikely(cache_kobject[cpu] == NULL)) - goto err_out; - - index_kobject[cpu] = kzalloc( - sizeof(struct _index_kobject ) * num_cache_leaves, GFP_KERNEL); - if (unlikely(index_kobject[cpu] == NULL)) - goto err_out; - - return 0; - -err_out: - cpuid4_cache_sysfs_exit(cpu); - return -ENOMEM; -} - -/* Add/Remove cache interface for CPU device */ -static int __cpuinit cache_add_dev(struct sys_device * sys_dev) -{ - unsigned int cpu = sys_dev->id; - unsigned long i, j; - struct _index_kobject *this_object; - int retval = 0; - - retval = cpuid4_cache_sysfs_init(cpu); - if (unlikely(retval < 0)) - return retval; - - cache_kobject[cpu]->parent = &sys_dev->kobj; - kobject_set_name(cache_kobject[cpu], "%s", "cache"); - cache_kobject[cpu]->ktype = &ktype_percpu_entry; - retval = kobject_register(cache_kobject[cpu]); - - for (i = 0; i < num_cache_leaves; i++) { - this_object = INDEX_KOBJECT_PTR(cpu,i); - this_object->cpu = cpu; - this_object->index = i; - this_object->kobj.parent = cache_kobject[cpu]; - kobject_set_name(&(this_object->kobj), "index%1lu", i); - this_object->kobj.ktype = &ktype_cache; - retval = kobject_register(&(this_object->kobj)); - if (unlikely(retval)) { - for (j = 0; j < i; j++) { - kobject_unregister( - &(INDEX_KOBJECT_PTR(cpu,j)->kobj)); - } - kobject_unregister(cache_kobject[cpu]); - cpuid4_cache_sysfs_exit(cpu); - break; - } - } - return retval; -} - -static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) -{ - unsigned int cpu = sys_dev->id; - unsigned long i; - - if (cpuid4_info[cpu] == NULL) - return; - for (i = 0; i < num_cache_leaves; i++) { - cache_remove_shared_cpu_map(cpu, i); - kobject_unregister(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); - } - kobject_unregister(cache_kobject[cpu]); - cpuid4_cache_sysfs_exit(cpu); - return; -} - -static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb, - unsigned long action, void *hcpu) -{ - unsigned int cpu = (unsigned long)hcpu; - struct sys_device *sys_dev; - - sys_dev = get_cpu_sysdev(cpu); - switch (action) { - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - cache_add_dev(sys_dev); - break; - case CPU_DEAD: - case CPU_DEAD_FROZEN: - cache_remove_dev(sys_dev); - break; - } - return NOTIFY_OK; -} - -static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = -{ - .notifier_call = cacheinfo_cpu_callback, -}; - -static int __cpuinit cache_sysfs_init(void) -{ - int i; - - if (num_cache_leaves == 0) - return 0; - - register_hotcpu_notifier(&cacheinfo_cpu_notifier); - - for_each_online_cpu(i) { - cacheinfo_cpu_callback(&cacheinfo_cpu_notifier, CPU_ONLINE, - (void *)(long)i); - } - - return 0; -} - -device_initcall(cache_sysfs_init); - -#endif diff --git a/arch/i386/kernel/cpu/nexgen.c b/arch/i386/kernel/cpu/nexgen.c deleted file mode 100644 index 961fbe1a748..00000000000 --- a/arch/i386/kernel/cpu/nexgen.c +++ /dev/null @@ -1,60 +0,0 @@ -#include -#include -#include -#include - -#include "cpu.h" - -/* - * Detect a NexGen CPU running without BIOS hypercode new enough - * to have CPUID. (Thanks to Herbert Oppmann) - */ - -static int __cpuinit deep_magic_nexgen_probe(void) -{ - int ret; - - __asm__ __volatile__ ( - " movw $0x5555, %%ax\n" - " xorw %%dx,%%dx\n" - " movw $2, %%cx\n" - " divw %%cx\n" - " movl $0, %%eax\n" - " jnz 1f\n" - " movl $1, %%eax\n" - "1:\n" - : "=a" (ret) : : "cx", "dx" ); - return ret; -} - -static void __cpuinit init_nexgen(struct cpuinfo_x86 * c) -{ - c->x86_cache_size = 256; /* A few had 1 MB... */ -} - -static void __cpuinit nexgen_identify(struct cpuinfo_x86 * c) -{ - /* Detect NexGen with old hypercode */ - if ( deep_magic_nexgen_probe() ) { - strcpy(c->x86_vendor_id, "NexGenDriven"); - } -} - -static struct cpu_dev nexgen_cpu_dev __cpuinitdata = { - .c_vendor = "Nexgen", - .c_ident = { "NexGenDriven" }, - .c_models = { - { .vendor = X86_VENDOR_NEXGEN, - .family = 5, - .model_names = { [1] = "Nx586" } - }, - }, - .c_init = init_nexgen, - .c_identify = nexgen_identify, -}; - -int __init nexgen_init_cpu(void) -{ - cpu_devs[X86_VENDOR_NEXGEN] = &nexgen_cpu_dev; - return 0; -} diff --git a/arch/i386/kernel/cpu/perfctr-watchdog.c b/arch/i386/kernel/cpu/perfctr-watchdog.c deleted file mode 100644 index 93fecd4b03d..00000000000 --- a/arch/i386/kernel/cpu/perfctr-watchdog.c +++ /dev/null @@ -1,713 +0,0 @@ -/* local apic based NMI watchdog for various CPUs. - This file also handles reservation of performance counters for coordination - with other users (like oprofile). - - Note that these events normally don't tick when the CPU idles. This means - the frequency varies with CPU load. - - Original code for K7/P6 written by Keith Owens */ - -#include -#include -#include -#include -#include -#include -#include -#include - -struct nmi_watchdog_ctlblk { - unsigned int cccr_msr; - unsigned int perfctr_msr; /* the MSR to reset in NMI handler */ - unsigned int evntsel_msr; /* the MSR to select the events to handle */ -}; - -/* Interface defining a CPU specific perfctr watchdog */ -struct wd_ops { - int (*reserve)(void); - void (*unreserve)(void); - int (*setup)(unsigned nmi_hz); - void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz); - void (*stop)(void); - unsigned perfctr; - unsigned evntsel; - u64 checkbit; -}; - -static struct wd_ops *wd_ops; - -/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's - * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) - */ -#define NMI_MAX_COUNTER_BITS 66 - -/* perfctr_nmi_owner tracks the ownership of the perfctr registers: - * evtsel_nmi_owner tracks the ownership of the event selection - * - different performance counters/ event selection may be reserved for - * different subsystems this reservation system just tries to coordinate - * things a little - */ -static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS); -static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS); - -static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk); - -/* converts an msr to an appropriate reservation bit */ -static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) -{ - /* returns the bit offset of the performance counter register */ - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_AMD: - return (msr - MSR_K7_PERFCTR0); - case X86_VENDOR_INTEL: - if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) - return (msr - MSR_ARCH_PERFMON_PERFCTR0); - - switch (boot_cpu_data.x86) { - case 6: - return (msr - MSR_P6_PERFCTR0); - case 15: - return (msr - MSR_P4_BPU_PERFCTR0); - } - } - return 0; -} - -/* converts an msr to an appropriate reservation bit */ -/* returns the bit offset of the event selection register */ -static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) -{ - /* returns the bit offset of the event selection register */ - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_AMD: - return (msr - MSR_K7_EVNTSEL0); - case X86_VENDOR_INTEL: - if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) - return (msr - MSR_ARCH_PERFMON_EVENTSEL0); - - switch (boot_cpu_data.x86) { - case 6: - return (msr - MSR_P6_EVNTSEL0); - case 15: - return (msr - MSR_P4_BSU_ESCR0); - } - } - return 0; - -} - -/* checks for a bit availability (hack for oprofile) */ -int avail_to_resrv_perfctr_nmi_bit(unsigned int counter) -{ - BUG_ON(counter > NMI_MAX_COUNTER_BITS); - - return (!test_bit(counter, perfctr_nmi_owner)); -} - -/* checks the an msr for availability */ -int avail_to_resrv_perfctr_nmi(unsigned int msr) -{ - unsigned int counter; - - counter = nmi_perfctr_msr_to_bit(msr); - BUG_ON(counter > NMI_MAX_COUNTER_BITS); - - return (!test_bit(counter, perfctr_nmi_owner)); -} - -int reserve_perfctr_nmi(unsigned int msr) -{ - unsigned int counter; - - counter = nmi_perfctr_msr_to_bit(msr); - BUG_ON(counter > NMI_MAX_COUNTER_BITS); - - if (!test_and_set_bit(counter, perfctr_nmi_owner)) - return 1; - return 0; -} - -void release_perfctr_nmi(unsigned int msr) -{ - unsigned int counter; - - counter = nmi_perfctr_msr_to_bit(msr); - BUG_ON(counter > NMI_MAX_COUNTER_BITS); - - clear_bit(counter, perfctr_nmi_owner); -} - -int reserve_evntsel_nmi(unsigned int msr) -{ - unsigned int counter; - - counter = nmi_evntsel_msr_to_bit(msr); - BUG_ON(counter > NMI_MAX_COUNTER_BITS); - - if (!test_and_set_bit(counter, evntsel_nmi_owner)) - return 1; - return 0; -} - -void release_evntsel_nmi(unsigned int msr) -{ - unsigned int counter; - - counter = nmi_evntsel_msr_to_bit(msr); - BUG_ON(counter > NMI_MAX_COUNTER_BITS); - - clear_bit(counter, evntsel_nmi_owner); -} - -EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi); -EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); -EXPORT_SYMBOL(reserve_perfctr_nmi); -EXPORT_SYMBOL(release_perfctr_nmi); -EXPORT_SYMBOL(reserve_evntsel_nmi); -EXPORT_SYMBOL(release_evntsel_nmi); - -void disable_lapic_nmi_watchdog(void) -{ - BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); - - if (atomic_read(&nmi_active) <= 0) - return; - - on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1); - wd_ops->unreserve(); - - BUG_ON(atomic_read(&nmi_active) != 0); -} - -void enable_lapic_nmi_watchdog(void) -{ - BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); - - /* are we already enabled */ - if (atomic_read(&nmi_active) != 0) - return; - - /* are we lapic aware */ - if (!wd_ops) - return; - if (!wd_ops->reserve()) { - printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n"); - return; - } - - on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1); - touch_nmi_watchdog(); -} - -/* - * Activate the NMI watchdog via the local APIC. - */ - -static unsigned int adjust_for_32bit_ctr(unsigned int hz) -{ - u64 counter_val; - unsigned int retval = hz; - - /* - * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter - * are writable, with higher bits sign extending from bit 31. - * So, we can only program the counter with 31 bit values and - * 32nd bit should be 1, for 33.. to be 1. - * Find the appropriate nmi_hz - */ - counter_val = (u64)cpu_khz * 1000; - do_div(counter_val, retval); - if (counter_val > 0x7fffffffULL) { - u64 count = (u64)cpu_khz * 1000; - do_div(count, 0x7fffffffUL); - retval = count + 1; - } - return retval; -} - -static void -write_watchdog_counter(unsigned int perfctr_msr, const char *descr, unsigned nmi_hz) -{ - u64 count = (u64)cpu_khz * 1000; - - do_div(count, nmi_hz); - if(descr) - Dprintk("setting %s to -0x%08Lx\n", descr, count); - wrmsrl(perfctr_msr, 0 - count); -} - -static void write_watchdog_counter32(unsigned int perfctr_msr, - const char *descr, unsigned nmi_hz) -{ - u64 count = (u64)cpu_khz * 1000; - - do_div(count, nmi_hz); - if(descr) - Dprintk("setting %s to -0x%08Lx\n", descr, count); - wrmsr(perfctr_msr, (u32)(-count), 0); -} - -/* AMD K7/K8/Family10h/Family11h support. AMD keeps this interface - nicely stable so there is not much variety */ - -#define K7_EVNTSEL_ENABLE (1 << 22) -#define K7_EVNTSEL_INT (1 << 20) -#define K7_EVNTSEL_OS (1 << 17) -#define K7_EVNTSEL_USR (1 << 16) -#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 -#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING - -static int setup_k7_watchdog(unsigned nmi_hz) -{ - unsigned int perfctr_msr, evntsel_msr; - unsigned int evntsel; - struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); - - perfctr_msr = wd_ops->perfctr; - evntsel_msr = wd_ops->evntsel; - - wrmsrl(perfctr_msr, 0UL); - - evntsel = K7_EVNTSEL_INT - | K7_EVNTSEL_OS - | K7_EVNTSEL_USR - | K7_NMI_EVENT; - - /* setup the timer */ - wrmsr(evntsel_msr, evntsel, 0); - write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz); - apic_write(APIC_LVTPC, APIC_DM_NMI); - evntsel |= K7_EVNTSEL_ENABLE; - wrmsr(evntsel_msr, evntsel, 0); - - wd->perfctr_msr = perfctr_msr; - wd->evntsel_msr = evntsel_msr; - wd->cccr_msr = 0; //unused - return 1; -} - -static void single_msr_stop_watchdog(void) -{ - struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); - - wrmsr(wd->evntsel_msr, 0, 0); -} - -static int single_msr_reserve(void) -{ - if (!reserve_perfctr_nmi(wd_ops->perfctr)) - return 0; - - if (!reserve_evntsel_nmi(wd_ops->evntsel)) { - release_perfctr_nmi(wd_ops->perfctr); - return 0; - } - return 1; -} - -static void single_msr_unreserve(void) -{ - release_evntsel_nmi(wd_ops->evntsel); - release_perfctr_nmi(wd_ops->perfctr); -} - -static void single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) -{ - /* start the cycle over again */ - write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); -} - -static struct wd_ops k7_wd_ops = { - .reserve = single_msr_reserve, - .unreserve = single_msr_unreserve, - .setup = setup_k7_watchdog, - .rearm = single_msr_rearm, - .stop = single_msr_stop_watchdog, - .perfctr = MSR_K7_PERFCTR0, - .evntsel = MSR_K7_EVNTSEL0, - .checkbit = 1ULL<<47, -}; - -/* Intel Model 6 (PPro+,P2,P3,P-M,Core1) */ - -#define P6_EVNTSEL0_ENABLE (1 << 22) -#define P6_EVNTSEL_INT (1 << 20) -#define P6_EVNTSEL_OS (1 << 17) -#define P6_EVNTSEL_USR (1 << 16) -#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 -#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED - -static int setup_p6_watchdog(unsigned nmi_hz) -{ - unsigned int perfctr_msr, evntsel_msr; - unsigned int evntsel; - struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); - - perfctr_msr = wd_ops->perfctr; - evntsel_msr = wd_ops->evntsel; - - /* KVM doesn't implement this MSR */ - if (wrmsr_safe(perfctr_msr, 0, 0) < 0) - return 0; - - evntsel = P6_EVNTSEL_INT - | P6_EVNTSEL_OS - | P6_EVNTSEL_USR - | P6_NMI_EVENT; - - /* setup the timer */ - wrmsr(evntsel_msr, evntsel, 0); - nmi_hz = adjust_for_32bit_ctr(nmi_hz); - write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz); - apic_write(APIC_LVTPC, APIC_DM_NMI); - evntsel |= P6_EVNTSEL0_ENABLE; - wrmsr(evntsel_msr, evntsel, 0); - - wd->perfctr_msr = perfctr_msr; - wd->evntsel_msr = evntsel_msr; - wd->cccr_msr = 0; //unused - return 1; -} - -static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) -{ - /* P6 based Pentium M need to re-unmask - * the apic vector but it doesn't hurt - * other P6 variant. - * ArchPerfom/Core Duo also needs this */ - apic_write(APIC_LVTPC, APIC_DM_NMI); - /* P6/ARCH_PERFMON has 32 bit counter write */ - write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz); -} - -static struct wd_ops p6_wd_ops = { - .reserve = single_msr_reserve, - .unreserve = single_msr_unreserve, - .setup = setup_p6_watchdog, - .rearm = p6_rearm, - .stop = single_msr_stop_watchdog, - .perfctr = MSR_P6_PERFCTR0, - .evntsel = MSR_P6_EVNTSEL0, - .checkbit = 1ULL<<39, -}; - -/* Intel P4 performance counters. By far the most complicated of all. */ - -#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) -#define P4_ESCR_EVENT_SELECT(N) ((N)<<25) -#define P4_ESCR_OS (1<<3) -#define P4_ESCR_USR (1<<2) -#define P4_CCCR_OVF_PMI0 (1<<26) -#define P4_CCCR_OVF_PMI1 (1<<27) -#define P4_CCCR_THRESHOLD(N) ((N)<<20) -#define P4_CCCR_COMPLEMENT (1<<19) -#define P4_CCCR_COMPARE (1<<18) -#define P4_CCCR_REQUIRED (3<<16) -#define P4_CCCR_ESCR_SELECT(N) ((N)<<13) -#define P4_CCCR_ENABLE (1<<12) -#define P4_CCCR_OVF (1<<31) - -/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter - CRU_ESCR0 (with any non-null event selector) through a complemented - max threshold. [IA32-Vol3, Section 14.9.9] */ - -static int setup_p4_watchdog(unsigned nmi_hz) -{ - unsigned int perfctr_msr, evntsel_msr, cccr_msr; - unsigned int evntsel, cccr_val; - unsigned int misc_enable, dummy; - unsigned int ht_num; - struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); - - rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy); - if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) - return 0; - -#ifdef CONFIG_SMP - /* detect which hyperthread we are on */ - if (smp_num_siblings == 2) { - unsigned int ebx, apicid; - - ebx = cpuid_ebx(1); - apicid = (ebx >> 24) & 0xff; - ht_num = apicid & 1; - } else -#endif - ht_num = 0; - - /* performance counters are shared resources - * assign each hyperthread its own set - * (re-use the ESCR0 register, seems safe - * and keeps the cccr_val the same) - */ - if (!ht_num) { - /* logical cpu 0 */ - perfctr_msr = MSR_P4_IQ_PERFCTR0; - evntsel_msr = MSR_P4_CRU_ESCR0; - cccr_msr = MSR_P4_IQ_CCCR0; - cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4); - } else { - /* logical cpu 1 */ - perfctr_msr = MSR_P4_IQ_PERFCTR1; - evntsel_msr = MSR_P4_CRU_ESCR0; - cccr_msr = MSR_P4_IQ_CCCR1; - cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4); - } - - evntsel = P4_ESCR_EVENT_SELECT(0x3F) - | P4_ESCR_OS - | P4_ESCR_USR; - - cccr_val |= P4_CCCR_THRESHOLD(15) - | P4_CCCR_COMPLEMENT - | P4_CCCR_COMPARE - | P4_CCCR_REQUIRED; - - wrmsr(evntsel_msr, evntsel, 0); - wrmsr(cccr_msr, cccr_val, 0); - write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz); - apic_write(APIC_LVTPC, APIC_DM_NMI); - cccr_val |= P4_CCCR_ENABLE; - wrmsr(cccr_msr, cccr_val, 0); - wd->perfctr_msr = perfctr_msr; - wd->evntsel_msr = evntsel_msr; - wd->cccr_msr = cccr_msr; - return 1; -} - -static void stop_p4_watchdog(void) -{ - struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); - wrmsr(wd->cccr_msr, 0, 0); - wrmsr(wd->evntsel_msr, 0, 0); -} - -static int p4_reserve(void) -{ - if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0)) - return 0; -#ifdef CONFIG_SMP - if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1)) - goto fail1; -#endif - if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0)) - goto fail2; - /* RED-PEN why is ESCR1 not reserved here? */ - return 1; - fail2: -#ifdef CONFIG_SMP - if (smp_num_siblings > 1) - release_perfctr_nmi(MSR_P4_IQ_PERFCTR1); - fail1: -#endif - release_perfctr_nmi(MSR_P4_IQ_PERFCTR0); - return 0; -} - -static void p4_unreserve(void) -{ -#ifdef CONFIG_SMP - if (smp_num_siblings > 1) - release_perfctr_nmi(MSR_P4_IQ_PERFCTR1); -#endif - release_evntsel_nmi(MSR_P4_CRU_ESCR0); - release_perfctr_nmi(MSR_P4_IQ_PERFCTR0); -} - -static void p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) -{ - unsigned dummy; - /* - * P4 quirks: - * - An overflown perfctr will assert its interrupt - * until the OVF flag in its CCCR is cleared. - * - LVTPC is masked on interrupt and must be - * unmasked by the LVTPC handler. - */ - rdmsrl(wd->cccr_msr, dummy); - dummy &= ~P4_CCCR_OVF; - wrmsrl(wd->cccr_msr, dummy); - apic_write(APIC_LVTPC, APIC_DM_NMI); - /* start the cycle over again */ - write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); -} - -static struct wd_ops p4_wd_ops = { - .reserve = p4_reserve, - .unreserve = p4_unreserve, - .setup = setup_p4_watchdog, - .rearm = p4_rearm, - .stop = stop_p4_watchdog, - /* RED-PEN this is wrong for the other sibling */ - .perfctr = MSR_P4_BPU_PERFCTR0, - .evntsel = MSR_P4_BSU_ESCR0, - .checkbit = 1ULL<<39, -}; - -/* Watchdog using the Intel architected PerfMon. Used for Core2 and hopefully - all future Intel CPUs. */ - -#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL -#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK - -static int setup_intel_arch_watchdog(unsigned nmi_hz) -{ - unsigned int ebx; - union cpuid10_eax eax; - unsigned int unused; - unsigned int perfctr_msr, evntsel_msr; - unsigned int evntsel; - struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); - - /* - * Check whether the Architectural PerfMon supports - * Unhalted Core Cycles Event or not. - * NOTE: Corresponding bit = 0 in ebx indicates event present. - */ - cpuid(10, &(eax.full), &ebx, &unused, &unused); - if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || - (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) - return 0; - - perfctr_msr = wd_ops->perfctr; - evntsel_msr = wd_ops->evntsel; - - wrmsrl(perfctr_msr, 0UL); - - evntsel = ARCH_PERFMON_EVENTSEL_INT - | ARCH_PERFMON_EVENTSEL_OS - | ARCH_PERFMON_EVENTSEL_USR - | ARCH_PERFMON_NMI_EVENT_SEL - | ARCH_PERFMON_NMI_EVENT_UMASK; - - /* setup the timer */ - wrmsr(evntsel_msr, evntsel, 0); - nmi_hz = adjust_for_32bit_ctr(nmi_hz); - write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz); - apic_write(APIC_LVTPC, APIC_DM_NMI); - evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; - wrmsr(evntsel_msr, evntsel, 0); - - wd->perfctr_msr = perfctr_msr; - wd->evntsel_msr = evntsel_msr; - wd->cccr_msr = 0; //unused - wd_ops->checkbit = 1ULL << (eax.split.bit_width - 1); - return 1; -} - -static struct wd_ops intel_arch_wd_ops = { - .reserve = single_msr_reserve, - .unreserve = single_msr_unreserve, - .setup = setup_intel_arch_watchdog, - .rearm = p6_rearm, - .stop = single_msr_stop_watchdog, - .perfctr = MSR_ARCH_PERFMON_PERFCTR1, - .evntsel = MSR_ARCH_PERFMON_EVENTSEL1, -}; - -static struct wd_ops coreduo_wd_ops = { - .reserve = single_msr_reserve, - .unreserve = single_msr_unreserve, - .setup = setup_intel_arch_watchdog, - .rearm = p6_rearm, - .stop = single_msr_stop_watchdog, - .perfctr = MSR_ARCH_PERFMON_PERFCTR0, - .evntsel = MSR_ARCH_PERFMON_EVENTSEL0, -}; - -static void probe_nmi_watchdog(void) -{ - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_AMD: - if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 && - boot_cpu_data.x86 != 16) - return; - wd_ops = &k7_wd_ops; - break; - case X86_VENDOR_INTEL: - /* Work around Core Duo (Yonah) errata AE49 where perfctr1 - doesn't have a working enable bit. */ - if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) { - wd_ops = &coreduo_wd_ops; - break; - } - if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { - wd_ops = &intel_arch_wd_ops; - break; - } - switch (boot_cpu_data.x86) { - case 6: - if (boot_cpu_data.x86_model > 0xd) - return; - - wd_ops = &p6_wd_ops; - break; - case 15: - if (boot_cpu_data.x86_model > 0x4) - return; - - wd_ops = &p4_wd_ops; - break; - default: - return; - } - break; - } -} - -/* Interface to nmi.c */ - -int lapic_watchdog_init(unsigned nmi_hz) -{ - if (!wd_ops) { - probe_nmi_watchdog(); - if (!wd_ops) - return -1; - - if (!wd_ops->reserve()) { - printk(KERN_ERR - "NMI watchdog: cannot reserve perfctrs\n"); - return -1; - } - } - - if (!(wd_ops->setup(nmi_hz))) { - printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n", - raw_smp_processor_id()); - return -1; - } - - return 0; -} - -void lapic_watchdog_stop(void) -{ - if (wd_ops) - wd_ops->stop(); -} - -unsigned lapic_adjust_nmi_hz(unsigned hz) -{ - struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); - if (wd->perfctr_msr == MSR_P6_PERFCTR0 || - wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1) - hz = adjust_for_32bit_ctr(hz); - return hz; -} - -int lapic_wd_event(unsigned nmi_hz) -{ - struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); - u64 ctr; - rdmsrl(wd->perfctr_msr, ctr); - if (ctr & wd_ops->checkbit) { /* perfctr still running? */ - return 0; - } - wd_ops->rearm(wd, nmi_hz); - return 1; -} - -int lapic_watchdog_ok(void) -{ - return wd_ops != NULL; -} diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c deleted file mode 100644 index 1e31b6caffb..00000000000 --- a/arch/i386/kernel/cpu/proc.c +++ /dev/null @@ -1,192 +0,0 @@ -#include -#include -#include -#include -#include -#include - -/* - * Get CPU information for use by the procfs. - */ -static int show_cpuinfo(struct seq_file *m, void *v) -{ - /* - * These flag bits must match the definitions in . - * NULL means this bit is undefined or reserved; either way it doesn't - * have meaning as far as Linux is concerned. Note that it's important - * to realize there is a difference between this table and CPUID -- if - * applications want to get the raw CPUID data, they should access - * /dev/cpu//cpuid instead. - */ - static const char * const x86_cap_flags[] = { - /* Intel-defined */ - "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce", - "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov", - "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx", - "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe", - - /* AMD-defined */ - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, - NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL, - NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm", - "3dnowext", "3dnow", - - /* Transmeta-defined */ - "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* Other (Linux-defined) */ - "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", - NULL, NULL, NULL, NULL, - "constant_tsc", "up", NULL, "arch_perfmon", - "pebs", "bts", NULL, "sync_rdtsc", - "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* Intel-defined (#2) */ - "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", - "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, - NULL, NULL, "dca", NULL, NULL, NULL, NULL, "popcnt", - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* VIA/Cyrix/Centaur-defined */ - NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en", - "ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* AMD-defined (#2) */ - "lahf_lm", "cmp_legacy", "svm", "extapic", "cr8_legacy", - "altmovcr8", "abm", "sse4a", - "misalignsse", "3dnowprefetch", - "osvw", "ibs", NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* Auxiliary (Linux-defined) */ - "ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - }; - static const char * const x86_power_flags[] = { - "ts", /* temperature sensor */ - "fid", /* frequency id control */ - "vid", /* voltage id control */ - "ttp", /* thermal trip */ - "tm", - "stc", - "100mhzsteps", - "hwpstate", - "", /* constant_tsc - moved to flags */ - /* nothing */ - }; - struct cpuinfo_x86 *c = v; - int i, n = c - cpu_data; - int fpu_exception; - -#ifdef CONFIG_SMP - if (!cpu_online(n)) - return 0; -#endif - seq_printf(m, "processor\t: %d\n" - "vendor_id\t: %s\n" - "cpu family\t: %d\n" - "model\t\t: %d\n" - "model name\t: %s\n", - n, - c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown", - c->x86, - c->x86_model, - c->x86_model_id[0] ? c->x86_model_id : "unknown"); - - if (c->x86_mask || c->cpuid_level >= 0) - seq_printf(m, "stepping\t: %d\n", c->x86_mask); - else - seq_printf(m, "stepping\t: unknown\n"); - - if ( cpu_has(c, X86_FEATURE_TSC) ) { - unsigned int freq = cpufreq_quick_get(n); - if (!freq) - freq = cpu_khz; - seq_printf(m, "cpu MHz\t\t: %u.%03u\n", - freq / 1000, (freq % 1000)); - } - - /* Cache size */ - if (c->x86_cache_size >= 0) - seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); -#ifdef CONFIG_X86_HT - if (c->x86_max_cores * smp_num_siblings > 1) { - seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); - seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[n])); - seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); - seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); - } -#endif - - /* We use exception 16 if we have hardware math and we've either seen it or the CPU claims it is internal */ - fpu_exception = c->hard_math && (ignore_fpu_irq || cpu_has_fpu); - seq_printf(m, "fdiv_bug\t: %s\n" - "hlt_bug\t\t: %s\n" - "f00f_bug\t: %s\n" - "coma_bug\t: %s\n" - "fpu\t\t: %s\n" - "fpu_exception\t: %s\n" - "cpuid level\t: %d\n" - "wp\t\t: %s\n" - "flags\t\t:", - c->fdiv_bug ? "yes" : "no", - c->hlt_works_ok ? "no" : "yes", - c->f00f_bug ? "yes" : "no", - c->coma_bug ? "yes" : "no", - c->hard_math ? "yes" : "no", - fpu_exception ? "yes" : "no", - c->cpuid_level, - c->wp_works_ok ? "yes" : "no"); - - for ( i = 0 ; i < 32*NCAPINTS ; i++ ) - if ( test_bit(i, c->x86_capability) && - x86_cap_flags[i] != NULL ) - seq_printf(m, " %s", x86_cap_flags[i]); - - for (i = 0; i < 32; i++) - if (c->x86_power & (1 << i)) { - if (i < ARRAY_SIZE(x86_power_flags) && - x86_power_flags[i]) - seq_printf(m, "%s%s", - x86_power_flags[i][0]?" ":"", - x86_power_flags[i]); - else - seq_printf(m, " [%d]", i); - } - - seq_printf(m, "\nbogomips\t: %lu.%02lu\n", - c->loops_per_jiffy/(500000/HZ), - (c->loops_per_jiffy/(5000/HZ)) % 100); - seq_printf(m, "clflush size\t: %u\n\n", c->x86_clflush_size); - - return 0; -} - -static void *c_start(struct seq_file *m, loff_t *pos) -{ - return *pos < NR_CPUS ? cpu_data + *pos : NULL; -} -static void *c_next(struct seq_file *m, void *v, loff_t *pos) -{ - ++*pos; - return c_start(m, pos); -} -static void c_stop(struct seq_file *m, void *v) -{ -} -struct seq_operations cpuinfo_op = { - .start = c_start, - .next = c_next, - .stop = c_stop, - .show = show_cpuinfo, -}; diff --git a/arch/i386/kernel/cpu/transmeta.c b/arch/i386/kernel/cpu/transmeta.c deleted file mode 100644 index 200fb3f9ebf..00000000000 --- a/arch/i386/kernel/cpu/transmeta.c +++ /dev/null @@ -1,116 +0,0 @@ -#include -#include -#include -#include -#include -#include "cpu.h" - -static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) -{ - unsigned int cap_mask, uk, max, dummy; - unsigned int cms_rev1, cms_rev2; - unsigned int cpu_rev, cpu_freq = 0, cpu_flags, new_cpu_rev; - char cpu_info[65]; - - get_model_name(c); /* Same as AMD/Cyrix */ - display_cacheinfo(c); - - /* Print CMS and CPU revision */ - max = cpuid_eax(0x80860000); - cpu_rev = 0; - if ( max >= 0x80860001 ) { - cpuid(0x80860001, &dummy, &cpu_rev, &cpu_freq, &cpu_flags); - if (cpu_rev != 0x02000000) { - printk(KERN_INFO "CPU: Processor revision %u.%u.%u.%u, %u MHz\n", - (cpu_rev >> 24) & 0xff, - (cpu_rev >> 16) & 0xff, - (cpu_rev >> 8) & 0xff, - cpu_rev & 0xff, - cpu_freq); - } - } - if ( max >= 0x80860002 ) { - cpuid(0x80860002, &new_cpu_rev, &cms_rev1, &cms_rev2, &dummy); - if (cpu_rev == 0x02000000) { - printk(KERN_INFO "CPU: Processor revision %08X, %u MHz\n", - new_cpu_rev, cpu_freq); - } - printk(KERN_INFO "CPU: Code Morphing Software revision %u.%u.%u-%u-%u\n", - (cms_rev1 >> 24) & 0xff, - (cms_rev1 >> 16) & 0xff, - (cms_rev1 >> 8) & 0xff, - cms_rev1 & 0xff, - cms_rev2); - } - if ( max >= 0x80860006 ) { - cpuid(0x80860003, - (void *)&cpu_info[0], - (void *)&cpu_info[4], - (void *)&cpu_info[8], - (void *)&cpu_info[12]); - cpuid(0x80860004, - (void *)&cpu_info[16], - (void *)&cpu_info[20], - (void *)&cpu_info[24], - (void *)&cpu_info[28]); - cpuid(0x80860005, - (void *)&cpu_info[32], - (void *)&cpu_info[36], - (void *)&cpu_info[40], - (void *)&cpu_info[44]); - cpuid(0x80860006, - (void *)&cpu_info[48], - (void *)&cpu_info[52], - (void *)&cpu_info[56], - (void *)&cpu_info[60]); - cpu_info[64] = '\0'; - printk(KERN_INFO "CPU: %s\n", cpu_info); - } - - /* Unhide possibly hidden capability flags */ - rdmsr(0x80860004, cap_mask, uk); - wrmsr(0x80860004, ~0, uk); - c->x86_capability[0] = cpuid_edx(0x00000001); - wrmsr(0x80860004, cap_mask, uk); - - /* All Transmeta CPUs have a constant TSC */ - set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); - - /* If we can run i686 user-space code, call us an i686 */ -#define USER686 ((1 << X86_FEATURE_TSC)|\ - (1 << X86_FEATURE_CX8)|\ - (1 << X86_FEATURE_CMOV)) - if (c->x86 == 5 && (c->x86_capability[0] & USER686) == USER686) - c->x86 = 6; - -#ifdef CONFIG_SYSCTL - /* randomize_va_space slows us down enormously; - it probably triggers retranslation of x86->native bytecode */ - randomize_va_space = 0; -#endif -} - -static void __cpuinit transmeta_identify(struct cpuinfo_x86 * c) -{ - u32 xlvl; - - /* Transmeta-defined flags: level 0x80860001 */ - xlvl = cpuid_eax(0x80860000); - if ( (xlvl & 0xffff0000) == 0x80860000 ) { - if ( xlvl >= 0x80860001 ) - c->x86_capability[2] = cpuid_edx(0x80860001); - } -} - -static struct cpu_dev transmeta_cpu_dev __cpuinitdata = { - .c_vendor = "Transmeta", - .c_ident = { "GenuineTMx86", "TransmetaCPU" }, - .c_init = init_transmeta, - .c_identify = transmeta_identify, -}; - -int __init transmeta_init_cpu(void) -{ - cpu_devs[X86_VENDOR_TRANSMETA] = &transmeta_cpu_dev; - return 0; -} diff --git a/arch/i386/kernel/cpu/umc.c b/arch/i386/kernel/cpu/umc.c deleted file mode 100644 index a7a4e75bdcd..00000000000 --- a/arch/i386/kernel/cpu/umc.c +++ /dev/null @@ -1,26 +0,0 @@ -#include -#include -#include -#include "cpu.h" - -/* UMC chips appear to be only either 386 or 486, so no special init takes place. - */ - -static struct cpu_dev umc_cpu_dev __cpuinitdata = { - .c_vendor = "UMC", - .c_ident = { "UMC UMC UMC" }, - .c_models = { - { .vendor = X86_VENDOR_UMC, .family = 4, .model_names = - { - [1] = "U5D", - [2] = "U5S", - } - }, - }, -}; - -int __init umc_init_cpu(void) -{ - cpu_devs[X86_VENDOR_UMC] = &umc_cpu_dev; - return 0; -} diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile new file mode 100644 index 00000000000..6687f6d5ad2 --- /dev/null +++ b/arch/x86/kernel/cpu/Makefile @@ -0,0 +1,20 @@ +# +# Makefile for x86-compatible CPU details and quirks +# + +obj-y := common.o proc.o bugs.o + +obj-y += amd.o +obj-y += cyrix.o +obj-y += centaur.o +obj-y += transmeta.o +obj-y += intel.o intel_cacheinfo.o addon_cpuid_features.o +obj-y += nexgen.o +obj-y += umc.o + +obj-$(CONFIG_X86_MCE) += ../../../x86/kernel/cpu/mcheck/ + +obj-$(CONFIG_MTRR) += ../../../x86/kernel/cpu/mtrr/ +obj-$(CONFIG_CPU_FREQ) += ../../../x86/kernel/cpu/cpufreq/ + +obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c new file mode 100644 index 00000000000..3e91d3ee26e --- /dev/null +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -0,0 +1,50 @@ + +/* + * Routines to indentify additional cpu features that are scattered in + * cpuid space. + */ + +#include + +#include + +struct cpuid_bit { + u16 feature; + u8 reg; + u8 bit; + u32 level; +}; + +enum cpuid_regs { + CR_EAX = 0, + CR_ECX, + CR_EDX, + CR_EBX +}; + +void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) +{ + u32 max_level; + u32 regs[4]; + const struct cpuid_bit *cb; + + static const struct cpuid_bit cpuid_bits[] = { + { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 }, + { 0, 0, 0, 0 } + }; + + for (cb = cpuid_bits; cb->feature; cb++) { + + /* Verify that the level is valid */ + max_level = cpuid_eax(cb->level & 0xffff0000); + if (max_level < cb->level || + max_level > (cb->level | 0xffff)) + continue; + + cpuid(cb->level, ®s[CR_EAX], ®s[CR_EBX], + ®s[CR_ECX], ®s[CR_EDX]); + + if (regs[cb->reg] & (1 << cb->bit)) + set_bit(cb->feature, c->x86_capability); + } +} diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c new file mode 100644 index 00000000000..dcf6bbb1c7c --- /dev/null +++ b/arch/x86/kernel/cpu/amd.c @@ -0,0 +1,337 @@ +#include +#include +#include +#include +#include +#include + +#include "cpu.h" + +/* + * B step AMD K6 before B 9730xxxx have hardware bugs that can cause + * misexecution of code under Linux. Owners of such processors should + * contact AMD for precise details and a CPU swap. + * + * See http://www.multimania.com/poulot/k6bug.html + * http://www.amd.com/K6/k6docs/revgd.html + * + * The following test is erm.. interesting. AMD neglected to up + * the chip setting when fixing the bug but they also tweaked some + * performance at the same time.. + */ + +extern void vide(void); +__asm__(".align 4\nvide: ret"); + +#ifdef CONFIG_X86_LOCAL_APIC +#define ENABLE_C1E_MASK 0x18000000 +#define CPUID_PROCESSOR_SIGNATURE 1 +#define CPUID_XFAM 0x0ff00000 +#define CPUID_XFAM_K8 0x00000000 +#define CPUID_XFAM_10H 0x00100000 +#define CPUID_XFAM_11H 0x00200000 +#define CPUID_XMOD 0x000f0000 +#define CPUID_XMOD_REV_F 0x00040000 + +/* AMD systems with C1E don't have a working lAPIC timer. Check for that. */ +static __cpuinit int amd_apic_timer_broken(void) +{ + u32 lo, hi; + u32 eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); + switch (eax & CPUID_XFAM) { + case CPUID_XFAM_K8: + if ((eax & CPUID_XMOD) < CPUID_XMOD_REV_F) + break; + case CPUID_XFAM_10H: + case CPUID_XFAM_11H: + rdmsr(MSR_K8_ENABLE_C1E, lo, hi); + if (lo & ENABLE_C1E_MASK) + return 1; + break; + default: + /* err on the side of caution */ + return 1; + } + return 0; +} +#endif + +int force_mwait __cpuinitdata; + +static void __cpuinit init_amd(struct cpuinfo_x86 *c) +{ + u32 l, h; + int mbytes = num_physpages >> (20-PAGE_SHIFT); + int r; + +#ifdef CONFIG_SMP + unsigned long long value; + + /* Disable TLB flush filter by setting HWCR.FFDIS on K8 + * bit 6 of msr C001_0015 + * + * Errata 63 for SH-B3 steppings + * Errata 122 for all steppings (F+ have it disabled by default) + */ + if (c->x86 == 15) { + rdmsrl(MSR_K7_HWCR, value); + value |= 1 << 6; + wrmsrl(MSR_K7_HWCR, value); + } +#endif + + /* + * FIXME: We should handle the K5 here. Set up the write + * range and also turn on MSR 83 bits 4 and 31 (write alloc, + * no bus pipeline) + */ + + /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; + 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ + clear_bit(0*32+31, c->x86_capability); + + r = get_model_name(c); + + switch(c->x86) + { + case 4: + /* + * General Systems BIOSen alias the cpu frequency registers + * of the Elan at 0x000df000. Unfortuantly, one of the Linux + * drivers subsequently pokes it, and changes the CPU speed. + * Workaround : Remove the unneeded alias. + */ +#define CBAR (0xfffc) /* Configuration Base Address (32-bit) */ +#define CBAR_ENB (0x80000000) +#define CBAR_KEY (0X000000CB) + if (c->x86_model==9 || c->x86_model == 10) { + if (inl (CBAR) & CBAR_ENB) + outl (0 | CBAR_KEY, CBAR); + } + break; + case 5: + if( c->x86_model < 6 ) + { + /* Based on AMD doc 20734R - June 2000 */ + if ( c->x86_model == 0 ) { + clear_bit(X86_FEATURE_APIC, c->x86_capability); + set_bit(X86_FEATURE_PGE, c->x86_capability); + } + break; + } + + if ( c->x86_model == 6 && c->x86_mask == 1 ) { + const int K6_BUG_LOOP = 1000000; + int n; + void (*f_vide)(void); + unsigned long d, d2; + + printk(KERN_INFO "AMD K6 stepping B detected - "); + + /* + * It looks like AMD fixed the 2.6.2 bug and improved indirect + * calls at the same time. + */ + + n = K6_BUG_LOOP; + f_vide = vide; + rdtscl(d); + while (n--) + f_vide(); + rdtscl(d2); + d = d2-d; + + if (d > 20*K6_BUG_LOOP) + printk("system stability may be impaired when more than 32 MB are used.\n"); + else + printk("probably OK (after B9730xxxx).\n"); + printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n"); + } + + /* K6 with old style WHCR */ + if (c->x86_model < 8 || + (c->x86_model== 8 && c->x86_mask < 8)) { + /* We can only write allocate on the low 508Mb */ + if(mbytes>508) + mbytes=508; + + rdmsr(MSR_K6_WHCR, l, h); + if ((l&0x0000FFFF)==0) { + unsigned long flags; + l=(1<<0)|((mbytes/4)<<1); + local_irq_save(flags); + wbinvd(); + wrmsr(MSR_K6_WHCR, l, h); + local_irq_restore(flags); + printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n", + mbytes); + } + break; + } + + if ((c->x86_model == 8 && c->x86_mask >7) || + c->x86_model == 9 || c->x86_model == 13) { + /* The more serious chips .. */ + + if(mbytes>4092) + mbytes=4092; + + rdmsr(MSR_K6_WHCR, l, h); + if ((l&0xFFFF0000)==0) { + unsigned long flags; + l=((mbytes>>2)<<22)|(1<<16); + local_irq_save(flags); + wbinvd(); + wrmsr(MSR_K6_WHCR, l, h); + local_irq_restore(flags); + printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n", + mbytes); + } + + /* Set MTRR capability flag if appropriate */ + if (c->x86_model == 13 || c->x86_model == 9 || + (c->x86_model == 8 && c->x86_mask >= 8)) + set_bit(X86_FEATURE_K6_MTRR, c->x86_capability); + break; + } + + if (c->x86_model == 10) { + /* AMD Geode LX is model 10 */ + /* placeholder for any needed mods */ + break; + } + break; + case 6: /* An Athlon/Duron */ + + /* Bit 15 of Athlon specific MSR 15, needs to be 0 + * to enable SSE on Palomino/Morgan/Barton CPU's. + * If the BIOS didn't enable it already, enable it here. + */ + if (c->x86_model >= 6 && c->x86_model <= 10) { + if (!cpu_has(c, X86_FEATURE_XMM)) { + printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); + rdmsr(MSR_K7_HWCR, l, h); + l &= ~0x00008000; + wrmsr(MSR_K7_HWCR, l, h); + set_bit(X86_FEATURE_XMM, c->x86_capability); + } + } + + /* It's been determined by AMD that Athlons since model 8 stepping 1 + * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx + * As per AMD technical note 27212 0.2 + */ + if ((c->x86_model == 8 && c->x86_mask>=1) || (c->x86_model > 8)) { + rdmsr(MSR_K7_CLK_CTL, l, h); + if ((l & 0xfff00000) != 0x20000000) { + printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l, + ((l & 0x000fffff)|0x20000000)); + wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); + } + } + break; + } + + switch (c->x86) { + case 15: + /* Use K8 tuning for Fam10h and Fam11h */ + case 0x10: + case 0x11: + set_bit(X86_FEATURE_K8, c->x86_capability); + break; + case 6: + set_bit(X86_FEATURE_K7, c->x86_capability); + break; + } + if (c->x86 >= 6) + set_bit(X86_FEATURE_FXSAVE_LEAK, c->x86_capability); + + display_cacheinfo(c); + + if (cpuid_eax(0x80000000) >= 0x80000008) { + c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; + } + + if (cpuid_eax(0x80000000) >= 0x80000007) { + c->x86_power = cpuid_edx(0x80000007); + if (c->x86_power & (1<<8)) + set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); + } + +#ifdef CONFIG_X86_HT + /* + * On a AMD multi core setup the lower bits of the APIC id + * distingush the cores. + */ + if (c->x86_max_cores > 1) { + int cpu = smp_processor_id(); + unsigned bits = (cpuid_ecx(0x80000008) >> 12) & 0xf; + + if (bits == 0) { + while ((1 << bits) < c->x86_max_cores) + bits++; + } + c->cpu_core_id = c->phys_proc_id & ((1<phys_proc_id >>= bits; + printk(KERN_INFO "CPU %d(%d) -> Core %d\n", + cpu, c->x86_max_cores, c->cpu_core_id); + } +#endif + + if (cpuid_eax(0x80000000) >= 0x80000006) { + if ((c->x86 == 0x10) && (cpuid_edx(0x80000006) & 0xf000)) + num_cache_leaves = 4; + else + num_cache_leaves = 3; + } + +#ifdef CONFIG_X86_LOCAL_APIC + if (amd_apic_timer_broken()) + local_apic_timer_disabled = 1; +#endif + + if (c->x86 == 0x10 && !force_mwait) + clear_bit(X86_FEATURE_MWAIT, c->x86_capability); + + /* K6s reports MCEs but don't actually have all the MSRs */ + if (c->x86 < 6) + clear_bit(X86_FEATURE_MCE, c->x86_capability); +} + +static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 * c, unsigned int size) +{ + /* AMD errata T13 (order #21922) */ + if ((c->x86 == 6)) { + if (c->x86_model == 3 && c->x86_mask == 0) /* Duron Rev A0 */ + size = 64; + if (c->x86_model == 4 && + (c->x86_mask==0 || c->x86_mask==1)) /* Tbird rev A1/A2 */ + size = 256; + } + return size; +} + +static struct cpu_dev amd_cpu_dev __cpuinitdata = { + .c_vendor = "AMD", + .c_ident = { "AuthenticAMD" }, + .c_models = { + { .vendor = X86_VENDOR_AMD, .family = 4, .model_names = + { + [3] = "486 DX/2", + [7] = "486 DX/2-WB", + [8] = "486 DX/4", + [9] = "486 DX/4-WB", + [14] = "Am5x86-WT", + [15] = "Am5x86-WB" + } + }, + }, + .c_init = init_amd, + .c_size_cache = amd_size_cache, +}; + +int __init amd_init_cpu(void) +{ + cpu_devs[X86_VENDOR_AMD] = &amd_cpu_dev; + return 0; +} diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c new file mode 100644 index 00000000000..59266f03d1c --- /dev/null +++ b/arch/x86/kernel/cpu/bugs.c @@ -0,0 +1,192 @@ +/* + * arch/i386/cpu/bugs.c + * + * Copyright (C) 1994 Linus Torvalds + * + * Cyrix stuff, June 1998 by: + * - Rafael R. Reilova (moved everything from head.S), + * + * - Channing Corn (tests & fixes), + * - Andrew D. Balsa (code cleanup). + */ +#include +#include +#include +#include +#include +#include +#include +#include + +static int __init no_halt(char *s) +{ + boot_cpu_data.hlt_works_ok = 0; + return 1; +} + +__setup("no-hlt", no_halt); + +static int __init mca_pentium(char *s) +{ + mca_pentium_flag = 1; + return 1; +} + +__setup("mca-pentium", mca_pentium); + +static int __init no_387(char *s) +{ + boot_cpu_data.hard_math = 0; + write_cr0(0xE | read_cr0()); + return 1; +} + +__setup("no387", no_387); + +static double __initdata x = 4195835.0; +static double __initdata y = 3145727.0; + +/* + * This used to check for exceptions.. + * However, it turns out that to support that, + * the XMM trap handlers basically had to + * be buggy. So let's have a correct XMM trap + * handler, and forget about printing out + * some status at boot. + * + * We should really only care about bugs here + * anyway. Not features. + */ +static void __init check_fpu(void) +{ + if (!boot_cpu_data.hard_math) { +#ifndef CONFIG_MATH_EMULATION + printk(KERN_EMERG "No coprocessor found and no math emulation present.\n"); + printk(KERN_EMERG "Giving up.\n"); + for (;;) ; +#endif + return; + } + +/* trap_init() enabled FXSR and company _before_ testing for FP problems here. */ + /* Test for the divl bug.. */ + __asm__("fninit\n\t" + "fldl %1\n\t" + "fdivl %2\n\t" + "fmull %2\n\t" + "fldl %1\n\t" + "fsubp %%st,%%st(1)\n\t" + "fistpl %0\n\t" + "fwait\n\t" + "fninit" + : "=m" (*&boot_cpu_data.fdiv_bug) + : "m" (*&x), "m" (*&y)); + if (boot_cpu_data.fdiv_bug) + printk("Hmm, FPU with FDIV bug.\n"); +} + +static void __init check_hlt(void) +{ + if (paravirt_enabled()) + return; + + printk(KERN_INFO "Checking 'hlt' instruction... "); + if (!boot_cpu_data.hlt_works_ok) { + printk("disabled\n"); + return; + } + halt(); + halt(); + halt(); + halt(); + printk("OK.\n"); +} + +/* + * Most 386 processors have a bug where a POPAD can lock the + * machine even from user space. + */ + +static void __init check_popad(void) +{ +#ifndef CONFIG_X86_POPAD_OK + int res, inp = (int) &res; + + printk(KERN_INFO "Checking for popad bug... "); + __asm__ __volatile__( + "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx " + : "=&a" (res) + : "d" (inp) + : "ecx", "edi" ); + /* If this fails, it means that any user program may lock the CPU hard. Too bad. */ + if (res != 12345678) printk( "Buggy.\n" ); + else printk( "OK.\n" ); +#endif +} + +/* + * Check whether we are able to run this kernel safely on SMP. + * + * - In order to run on a i386, we need to be compiled for i386 + * (for due to lack of "invlpg" and working WP on a i386) + * - In order to run on anything without a TSC, we need to be + * compiled for a i486. + * - In order to support the local APIC on a buggy Pentium machine, + * we need to be compiled with CONFIG_X86_GOOD_APIC disabled, + * which happens implicitly if compiled for a Pentium or lower + * (unless an advanced selection of CPU features is used) as an + * otherwise config implies a properly working local APIC without + * the need to do extra reads from the APIC. +*/ + +static void __init check_config(void) +{ +/* + * We'd better not be a i386 if we're configured to use some + * i486+ only features! (WP works in supervisor mode and the + * new "invlpg" and "bswap" instructions) + */ +#if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_BSWAP) + if (boot_cpu_data.x86 == 3) + panic("Kernel requires i486+ for 'invlpg' and other features"); +#endif + +/* + * If we configured ourselves for a TSC, we'd better have one! + */ +#ifdef CONFIG_X86_TSC + if (!cpu_has_tsc && !tsc_disable) + panic("Kernel compiled for Pentium+, requires TSC feature!"); +#endif + +/* + * If we were told we had a good local APIC, check for buggy Pentia, + * i.e. all B steppings and the C2 stepping of P54C when using their + * integrated APIC (see 11AP erratum in "Pentium Processor + * Specification Update"). + */ +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_GOOD_APIC) + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL + && cpu_has_apic + && boot_cpu_data.x86 == 5 + && boot_cpu_data.x86_model == 2 + && (boot_cpu_data.x86_mask < 6 || boot_cpu_data.x86_mask == 11)) + panic("Kernel compiled for PMMX+, assumes a local APIC without the read-before-write bug!"); +#endif +} + + +void __init check_bugs(void) +{ + identify_boot_cpu(); +#ifndef CONFIG_SMP + printk("CPU: "); + print_cpu_info(&boot_cpu_data); +#endif + check_config(); + check_fpu(); + check_hlt(); + check_popad(); + init_utsname()->machine[1] = '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); + alternative_instructions(); +} diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c new file mode 100644 index 00000000000..473eac883c7 --- /dev/null +++ b/arch/x86/kernel/cpu/centaur.c @@ -0,0 +1,471 @@ +#include +#include +#include +#include +#include +#include +#include +#include "cpu.h" + +#ifdef CONFIG_X86_OOSTORE + +static u32 __cpuinit power2(u32 x) +{ + u32 s=1; + while(s<=x) + s<<=1; + return s>>=1; +} + + +/* + * Set up an actual MCR + */ + +static void __cpuinit centaur_mcr_insert(int reg, u32 base, u32 size, int key) +{ + u32 lo, hi; + + hi = base & ~0xFFF; + lo = ~(size-1); /* Size is a power of 2 so this makes a mask */ + lo &= ~0xFFF; /* Remove the ctrl value bits */ + lo |= key; /* Attribute we wish to set */ + wrmsr(reg+MSR_IDT_MCR0, lo, hi); + mtrr_centaur_report_mcr(reg, lo, hi); /* Tell the mtrr driver */ +} + +/* + * Figure what we can cover with MCR's + * + * Shortcut: We know you can't put 4Gig of RAM on a winchip + */ + +static u32 __cpuinit ramtop(void) /* 16388 */ +{ + int i; + u32 top = 0; + u32 clip = 0xFFFFFFFFUL; + + for (i = 0; i < e820.nr_map; i++) { + unsigned long start, end; + + if (e820.map[i].addr > 0xFFFFFFFFUL) + continue; + /* + * Don't MCR over reserved space. Ignore the ISA hole + * we frob around that catastrophy already + */ + + if (e820.map[i].type == E820_RESERVED) + { + if(e820.map[i].addr >= 0x100000UL && e820.map[i].addr < clip) + clip = e820.map[i].addr; + continue; + } + start = e820.map[i].addr; + end = e820.map[i].addr + e820.map[i].size; + if (start >= end) + continue; + if (end > top) + top = end; + } + /* Everything below 'top' should be RAM except for the ISA hole. + Because of the limited MCR's we want to map NV/ACPI into our + MCR range for gunk in RAM + + Clip might cause us to MCR insufficient RAM but that is an + acceptable failure mode and should only bite obscure boxes with + a VESA hole at 15Mb + + The second case Clip sometimes kicks in is when the EBDA is marked + as reserved. Again we fail safe with reasonable results + */ + + if(top>clip) + top=clip; + + return top; +} + +/* + * Compute a set of MCR's to give maximum coverage + */ + +static int __cpuinit centaur_mcr_compute(int nr, int key) +{ + u32 mem = ramtop(); + u32 root = power2(mem); + u32 base = root; + u32 top = root; + u32 floor = 0; + int ct = 0; + + while(ct high && fspace > low) + { + centaur_mcr_insert(ct, floor, fspace, key); + floor += fspace; + } + else if(high > low) + { + centaur_mcr_insert(ct, top, high, key); + top += high; + } + else if(low > 0) + { + base -= low; + centaur_mcr_insert(ct, base, low, key); + } + else break; + ct++; + } + /* + * We loaded ct values. We now need to set the mask. The caller + * must do this bit. + */ + + return ct; +} + +static void __cpuinit centaur_create_optimal_mcr(void) +{ + int i; + /* + * Allocate up to 6 mcrs to mark as much of ram as possible + * as write combining and weak write ordered. + * + * To experiment with: Linux never uses stack operations for + * mmio spaces so we could globally enable stack operation wc + * + * Load the registers with type 31 - full write combining, all + * writes weakly ordered. + */ + int used = centaur_mcr_compute(6, 31); + + /* + * Wipe unused MCRs + */ + + for(i=used;i<8;i++) + wrmsr(MSR_IDT_MCR0+i, 0, 0); +} + +static void __cpuinit winchip2_create_optimal_mcr(void) +{ + u32 lo, hi; + int i; + + /* + * Allocate up to 6 mcrs to mark as much of ram as possible + * as write combining, weak store ordered. + * + * Load the registers with type 25 + * 8 - weak write ordering + * 16 - weak read ordering + * 1 - write combining + */ + + int used = centaur_mcr_compute(6, 25); + + /* + * Mark the registers we are using. + */ + + rdmsr(MSR_IDT_MCR_CTRL, lo, hi); + for(i=0;i>17) & 7; + lo |= key<<6; /* replace with unlock key */ + wrmsr(MSR_IDT_MCR_CTRL, lo, hi); +} + +static void __cpuinit winchip2_protect_mcr(void) +{ + u32 lo, hi; + + rdmsr(MSR_IDT_MCR_CTRL, lo, hi); + lo&=~0x1C0; /* blank bits 8-6 */ + wrmsr(MSR_IDT_MCR_CTRL, lo, hi); +} +#endif /* CONFIG_X86_OOSTORE */ + +#define ACE_PRESENT (1 << 6) +#define ACE_ENABLED (1 << 7) +#define ACE_FCR (1 << 28) /* MSR_VIA_FCR */ + +#define RNG_PRESENT (1 << 2) +#define RNG_ENABLED (1 << 3) +#define RNG_ENABLE (1 << 6) /* MSR_VIA_RNG */ + +static void __cpuinit init_c3(struct cpuinfo_x86 *c) +{ + u32 lo, hi; + + /* Test for Centaur Extended Feature Flags presence */ + if (cpuid_eax(0xC0000000) >= 0xC0000001) { + u32 tmp = cpuid_edx(0xC0000001); + + /* enable ACE unit, if present and disabled */ + if ((tmp & (ACE_PRESENT | ACE_ENABLED)) == ACE_PRESENT) { + rdmsr (MSR_VIA_FCR, lo, hi); + lo |= ACE_FCR; /* enable ACE unit */ + wrmsr (MSR_VIA_FCR, lo, hi); + printk(KERN_INFO "CPU: Enabled ACE h/w crypto\n"); + } + + /* enable RNG unit, if present and disabled */ + if ((tmp & (RNG_PRESENT | RNG_ENABLED)) == RNG_PRESENT) { + rdmsr (MSR_VIA_RNG, lo, hi); + lo |= RNG_ENABLE; /* enable RNG unit */ + wrmsr (MSR_VIA_RNG, lo, hi); + printk(KERN_INFO "CPU: Enabled h/w RNG\n"); + } + + /* store Centaur Extended Feature Flags as + * word 5 of the CPU capability bit array + */ + c->x86_capability[5] = cpuid_edx(0xC0000001); + } + + /* Cyrix III family needs CX8 & PGE explicity enabled. */ + if (c->x86_model >=6 && c->x86_model <= 9) { + rdmsr (MSR_VIA_FCR, lo, hi); + lo |= (1<<1 | 1<<7); + wrmsr (MSR_VIA_FCR, lo, hi); + set_bit(X86_FEATURE_CX8, c->x86_capability); + } + + /* Before Nehemiah, the C3's had 3dNOW! */ + if (c->x86_model >=6 && c->x86_model <9) + set_bit(X86_FEATURE_3DNOW, c->x86_capability); + + get_model_name(c); + display_cacheinfo(c); +} + +static void __cpuinit init_centaur(struct cpuinfo_x86 *c) +{ + enum { + ECX8=1<<1, + EIERRINT=1<<2, + DPM=1<<3, + DMCE=1<<4, + DSTPCLK=1<<5, + ELINEAR=1<<6, + DSMC=1<<7, + DTLOCK=1<<8, + EDCTLB=1<<8, + EMMX=1<<9, + DPDC=1<<11, + EBRPRED=1<<12, + DIC=1<<13, + DDC=1<<14, + DNA=1<<15, + ERETSTK=1<<16, + E2MMX=1<<19, + EAMD3D=1<<20, + }; + + char *name; + u32 fcr_set=0; + u32 fcr_clr=0; + u32 lo,hi,newlo; + u32 aa,bb,cc,dd; + + /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; + 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ + clear_bit(0*32+31, c->x86_capability); + + switch (c->x86) { + + case 5: + switch(c->x86_model) { + case 4: + name="C6"; + fcr_set=ECX8|DSMC|EDCTLB|EMMX|ERETSTK; + fcr_clr=DPDC; + printk(KERN_NOTICE "Disabling bugged TSC.\n"); + clear_bit(X86_FEATURE_TSC, c->x86_capability); +#ifdef CONFIG_X86_OOSTORE + centaur_create_optimal_mcr(); + /* Enable + write combining on non-stack, non-string + write combining on string, all types + weak write ordering + + The C6 original lacks weak read order + + Note 0x120 is write only on Winchip 1 */ + + wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0); +#endif + break; + case 8: + switch(c->x86_mask) { + default: + name="2"; + break; + case 7 ... 9: + name="2A"; + break; + case 10 ... 15: + name="2B"; + break; + } + fcr_set=ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|E2MMX|EAMD3D; + fcr_clr=DPDC; +#ifdef CONFIG_X86_OOSTORE + winchip2_unprotect_mcr(); + winchip2_create_optimal_mcr(); + rdmsr(MSR_IDT_MCR_CTRL, lo, hi); + /* Enable + write combining on non-stack, non-string + write combining on string, all types + weak write ordering + */ + lo|=31; + wrmsr(MSR_IDT_MCR_CTRL, lo, hi); + winchip2_protect_mcr(); +#endif + break; + case 9: + name="3"; + fcr_set=ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|E2MMX|EAMD3D; + fcr_clr=DPDC; +#ifdef CONFIG_X86_OOSTORE + winchip2_unprotect_mcr(); + winchip2_create_optimal_mcr(); + rdmsr(MSR_IDT_MCR_CTRL, lo, hi); + /* Enable + write combining on non-stack, non-string + write combining on string, all types + weak write ordering + */ + lo|=31; + wrmsr(MSR_IDT_MCR_CTRL, lo, hi); + winchip2_protect_mcr(); +#endif + break; + default: + name="??"; + } + + rdmsr(MSR_IDT_FCR1, lo, hi); + newlo=(lo|fcr_set) & (~fcr_clr); + + if (newlo!=lo) { + printk(KERN_INFO "Centaur FCR was 0x%X now 0x%X\n", lo, newlo ); + wrmsr(MSR_IDT_FCR1, newlo, hi ); + } else { + printk(KERN_INFO "Centaur FCR is 0x%X\n",lo); + } + /* Emulate MTRRs using Centaur's MCR. */ + set_bit(X86_FEATURE_CENTAUR_MCR, c->x86_capability); + /* Report CX8 */ + set_bit(X86_FEATURE_CX8, c->x86_capability); + /* Set 3DNow! on Winchip 2 and above. */ + if (c->x86_model >=8) + set_bit(X86_FEATURE_3DNOW, c->x86_capability); + /* See if we can find out some more. */ + if ( cpuid_eax(0x80000000) >= 0x80000005 ) { + /* Yes, we can. */ + cpuid(0x80000005,&aa,&bb,&cc,&dd); + /* Add L1 data and code cache sizes. */ + c->x86_cache_size = (cc>>24)+(dd>>24); + } + sprintf( c->x86_model_id, "WinChip %s", name ); + break; + + case 6: + init_c3(c); + break; + } +} + +static unsigned int __cpuinit centaur_size_cache(struct cpuinfo_x86 * c, unsigned int size) +{ + /* VIA C3 CPUs (670-68F) need further shifting. */ + if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8))) + size >>= 8; + + /* VIA also screwed up Nehemiah stepping 1, and made + it return '65KB' instead of '64KB' + - Note, it seems this may only be in engineering samples. */ + if ((c->x86==6) && (c->x86_model==9) && (c->x86_mask==1) && (size==65)) + size -=1; + + return size; +} + +static struct cpu_dev centaur_cpu_dev __cpuinitdata = { + .c_vendor = "Centaur", + .c_ident = { "CentaurHauls" }, + .c_init = init_centaur, + .c_size_cache = centaur_size_cache, +}; + +int __init centaur_init_cpu(void) +{ + cpu_devs[X86_VENDOR_CENTAUR] = ¢aur_cpu_dev; + return 0; +} diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c new file mode 100644 index 00000000000..d506201d397 --- /dev/null +++ b/arch/x86/kernel/cpu/common.c @@ -0,0 +1,733 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_X86_LOCAL_APIC +#include +#include +#include +#endif + +#include "cpu.h" + +DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { + [GDT_ENTRY_KERNEL_CS] = { 0x0000ffff, 0x00cf9a00 }, + [GDT_ENTRY_KERNEL_DS] = { 0x0000ffff, 0x00cf9200 }, + [GDT_ENTRY_DEFAULT_USER_CS] = { 0x0000ffff, 0x00cffa00 }, + [GDT_ENTRY_DEFAULT_USER_DS] = { 0x0000ffff, 0x00cff200 }, + /* + * Segments used for calling PnP BIOS have byte granularity. + * They code segments and data segments have fixed 64k limits, + * the transfer segment sizes are set at run time. + */ + [GDT_ENTRY_PNPBIOS_CS32] = { 0x0000ffff, 0x00409a00 },/* 32-bit code */ + [GDT_ENTRY_PNPBIOS_CS16] = { 0x0000ffff, 0x00009a00 },/* 16-bit code */ + [GDT_ENTRY_PNPBIOS_DS] = { 0x0000ffff, 0x00009200 }, /* 16-bit data */ + [GDT_ENTRY_PNPBIOS_TS1] = { 0x00000000, 0x00009200 },/* 16-bit data */ + [GDT_ENTRY_PNPBIOS_TS2] = { 0x00000000, 0x00009200 },/* 16-bit data */ + /* + * The APM segments have byte granularity and their bases + * are set at run time. All have 64k limits. + */ + [GDT_ENTRY_APMBIOS_BASE] = { 0x0000ffff, 0x00409a00 },/* 32-bit code */ + /* 16-bit code */ + [GDT_ENTRY_APMBIOS_BASE+1] = { 0x0000ffff, 0x00009a00 }, + [GDT_ENTRY_APMBIOS_BASE+2] = { 0x0000ffff, 0x00409200 }, /* data */ + + [GDT_ENTRY_ESPFIX_SS] = { 0x00000000, 0x00c09200 }, + [GDT_ENTRY_PERCPU] = { 0x00000000, 0x00000000 }, +} }; +EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); + +static int cachesize_override __cpuinitdata = -1; +static int disable_x86_fxsr __cpuinitdata; +static int disable_x86_serial_nr __cpuinitdata = 1; +static int disable_x86_sep __cpuinitdata; + +struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {}; + +extern int disable_pse; + +static void __cpuinit default_init(struct cpuinfo_x86 * c) +{ + /* Not much we can do here... */ + /* Check if at least it has cpuid */ + if (c->cpuid_level == -1) { + /* No cpuid. It must be an ancient CPU */ + if (c->x86 == 4) + strcpy(c->x86_model_id, "486"); + else if (c->x86 == 3) + strcpy(c->x86_model_id, "386"); + } +} + +static struct cpu_dev __cpuinitdata default_cpu = { + .c_init = default_init, + .c_vendor = "Unknown", +}; +static struct cpu_dev * this_cpu __cpuinitdata = &default_cpu; + +static int __init cachesize_setup(char *str) +{ + get_option (&str, &cachesize_override); + return 1; +} +__setup("cachesize=", cachesize_setup); + +int __cpuinit get_model_name(struct cpuinfo_x86 *c) +{ + unsigned int *v; + char *p, *q; + + if (cpuid_eax(0x80000000) < 0x80000004) + return 0; + + v = (unsigned int *) c->x86_model_id; + cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); + cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); + cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); + c->x86_model_id[48] = 0; + + /* Intel chips right-justify this string for some dumb reason; + undo that brain damage */ + p = q = &c->x86_model_id[0]; + while ( *p == ' ' ) + p++; + if ( p != q ) { + while ( *p ) + *q++ = *p++; + while ( q <= &c->x86_model_id[48] ) + *q++ = '\0'; /* Zero-pad the rest */ + } + + return 1; +} + + +void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) +{ + unsigned int n, dummy, ecx, edx, l2size; + + n = cpuid_eax(0x80000000); + + if (n >= 0x80000005) { + cpuid(0x80000005, &dummy, &dummy, &ecx, &edx); + printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", + edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); + c->x86_cache_size=(ecx>>24)+(edx>>24); + } + + if (n < 0x80000006) /* Some chips just has a large L1. */ + return; + + ecx = cpuid_ecx(0x80000006); + l2size = ecx >> 16; + + /* do processor-specific cache resizing */ + if (this_cpu->c_size_cache) + l2size = this_cpu->c_size_cache(c,l2size); + + /* Allow user to override all this if necessary. */ + if (cachesize_override != -1) + l2size = cachesize_override; + + if ( l2size == 0 ) + return; /* Again, no L2 cache is possible */ + + c->x86_cache_size = l2size; + + printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", + l2size, ecx & 0xFF); +} + +/* Naming convention should be: [()] */ +/* This table only is used unless init_() below doesn't set it; */ +/* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */ + +/* Look up CPU names by table lookup. */ +static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) +{ + struct cpu_model_info *info; + + if ( c->x86_model >= 16 ) + return NULL; /* Range check */ + + if (!this_cpu) + return NULL; + + info = this_cpu->c_models; + + while (info && info->family) { + if (info->family == c->x86) + return info->model_names[c->x86_model]; + info++; + } + return NULL; /* Not found */ +} + + +static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) +{ + char *v = c->x86_vendor_id; + int i; + static int printed; + + for (i = 0; i < X86_VENDOR_NUM; i++) { + if (cpu_devs[i]) { + if (!strcmp(v,cpu_devs[i]->c_ident[0]) || + (cpu_devs[i]->c_ident[1] && + !strcmp(v,cpu_devs[i]->c_ident[1]))) { + c->x86_vendor = i; + if (!early) + this_cpu = cpu_devs[i]; + return; + } + } + } + if (!printed) { + printed++; + printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n"); + printk(KERN_ERR "CPU: Your system may be unstable.\n"); + } + c->x86_vendor = X86_VENDOR_UNKNOWN; + this_cpu = &default_cpu; +} + + +static int __init x86_fxsr_setup(char * s) +{ + /* Tell all the other CPU's to not use it... */ + disable_x86_fxsr = 1; + + /* + * ... and clear the bits early in the boot_cpu_data + * so that the bootup process doesn't try to do this + * either. + */ + clear_bit(X86_FEATURE_FXSR, boot_cpu_data.x86_capability); + clear_bit(X86_FEATURE_XMM, boot_cpu_data.x86_capability); + return 1; +} +__setup("nofxsr", x86_fxsr_setup); + + +static int __init x86_sep_setup(char * s) +{ + disable_x86_sep = 1; + return 1; +} +__setup("nosep", x86_sep_setup); + + +/* Standard macro to see if a specific flag is changeable */ +static inline int flag_is_changeable_p(u32 flag) +{ + u32 f1, f2; + + asm("pushfl\n\t" + "pushfl\n\t" + "popl %0\n\t" + "movl %0,%1\n\t" + "xorl %2,%0\n\t" + "pushl %0\n\t" + "popfl\n\t" + "pushfl\n\t" + "popl %0\n\t" + "popfl\n\t" + : "=&r" (f1), "=&r" (f2) + : "ir" (flag)); + + return ((f1^f2) & flag) != 0; +} + + +/* Probe for the CPUID instruction */ +static int __cpuinit have_cpuid_p(void) +{ + return flag_is_changeable_p(X86_EFLAGS_ID); +} + +void __init cpu_detect(struct cpuinfo_x86 *c) +{ + /* Get vendor name */ + cpuid(0x00000000, &c->cpuid_level, + (int *)&c->x86_vendor_id[0], + (int *)&c->x86_vendor_id[8], + (int *)&c->x86_vendor_id[4]); + + c->x86 = 4; + if (c->cpuid_level >= 0x00000001) { + u32 junk, tfms, cap0, misc; + cpuid(0x00000001, &tfms, &misc, &junk, &cap0); + c->x86 = (tfms >> 8) & 15; + c->x86_model = (tfms >> 4) & 15; + if (c->x86 == 0xf) + c->x86 += (tfms >> 20) & 0xff; + if (c->x86 >= 0x6) + c->x86_model += ((tfms >> 16) & 0xF) << 4; + c->x86_mask = tfms & 15; + if (cap0 & (1<<19)) + c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8; + } +} + +/* Do minimum CPU detection early. + Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment. + The others are not touched to avoid unwanted side effects. + + WARNING: this function is only called on the BP. Don't add code here + that is supposed to run on all CPUs. */ +static void __init early_cpu_detect(void) +{ + struct cpuinfo_x86 *c = &boot_cpu_data; + + c->x86_cache_alignment = 32; + + if (!have_cpuid_p()) + return; + + cpu_detect(c); + + get_cpu_vendor(c, 1); +} + +static void __cpuinit generic_identify(struct cpuinfo_x86 * c) +{ + u32 tfms, xlvl; + int ebx; + + if (have_cpuid_p()) { + /* Get vendor name */ + cpuid(0x00000000, &c->cpuid_level, + (int *)&c->x86_vendor_id[0], + (int *)&c->x86_vendor_id[8], + (int *)&c->x86_vendor_id[4]); + + get_cpu_vendor(c, 0); + /* Initialize the standard set of capabilities */ + /* Note that the vendor-specific code below might override */ + + /* Intel-defined flags: level 0x00000001 */ + if ( c->cpuid_level >= 0x00000001 ) { + u32 capability, excap; + cpuid(0x00000001, &tfms, &ebx, &excap, &capability); + c->x86_capability[0] = capability; + c->x86_capability[4] = excap; + c->x86 = (tfms >> 8) & 15; + c->x86_model = (tfms >> 4) & 15; + if (c->x86 == 0xf) + c->x86 += (tfms >> 20) & 0xff; + if (c->x86 >= 0x6) + c->x86_model += ((tfms >> 16) & 0xF) << 4; + c->x86_mask = tfms & 15; +#ifdef CONFIG_X86_HT + c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0); +#else + c->apicid = (ebx >> 24) & 0xFF; +#endif + if (c->x86_capability[0] & (1<<19)) + c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8; + } else { + /* Have CPUID level 0 only - unheard of */ + c->x86 = 4; + } + + /* AMD-defined flags: level 0x80000001 */ + xlvl = cpuid_eax(0x80000000); + if ( (xlvl & 0xffff0000) == 0x80000000 ) { + if ( xlvl >= 0x80000001 ) { + c->x86_capability[1] = cpuid_edx(0x80000001); + c->x86_capability[6] = cpuid_ecx(0x80000001); + } + if ( xlvl >= 0x80000004 ) + get_model_name(c); /* Default name */ + } + + init_scattered_cpuid_features(c); + } + + early_intel_workaround(c); + +#ifdef CONFIG_X86_HT + c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff; +#endif +} + +static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) +{ + if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) { + /* Disable processor serial number */ + unsigned long lo,hi; + rdmsr(MSR_IA32_BBL_CR_CTL,lo,hi); + lo |= 0x200000; + wrmsr(MSR_IA32_BBL_CR_CTL,lo,hi); + printk(KERN_NOTICE "CPU serial number disabled.\n"); + clear_bit(X86_FEATURE_PN, c->x86_capability); + + /* Disabling the serial number may affect the cpuid level */ + c->cpuid_level = cpuid_eax(0); + } +} + +static int __init x86_serial_nr_setup(char *s) +{ + disable_x86_serial_nr = 0; + return 1; +} +__setup("serialnumber", x86_serial_nr_setup); + + + +/* + * This does the hard work of actually picking apart the CPU stuff... + */ +static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) +{ + int i; + + c->loops_per_jiffy = loops_per_jiffy; + c->x86_cache_size = -1; + c->x86_vendor = X86_VENDOR_UNKNOWN; + c->cpuid_level = -1; /* CPUID not detected */ + c->x86_model = c->x86_mask = 0; /* So far unknown... */ + c->x86_vendor_id[0] = '\0'; /* Unset */ + c->x86_model_id[0] = '\0'; /* Unset */ + c->x86_max_cores = 1; + c->x86_clflush_size = 32; + memset(&c->x86_capability, 0, sizeof c->x86_capability); + + if (!have_cpuid_p()) { + /* First of all, decide if this is a 486 or higher */ + /* It's a 486 if we can modify the AC flag */ + if ( flag_is_changeable_p(X86_EFLAGS_AC) ) + c->x86 = 4; + else + c->x86 = 3; + } + + generic_identify(c); + + printk(KERN_DEBUG "CPU: After generic identify, caps:"); + for (i = 0; i < NCAPINTS; i++) + printk(" %08lx", c->x86_capability[i]); + printk("\n"); + + if (this_cpu->c_identify) { + this_cpu->c_identify(c); + + printk(KERN_DEBUG "CPU: After vendor identify, caps:"); + for (i = 0; i < NCAPINTS; i++) + printk(" %08lx", c->x86_capability[i]); + printk("\n"); + } + + /* + * Vendor-specific initialization. In this section we + * canonicalize the feature flags, meaning if there are + * features a certain CPU supports which CPUID doesn't + * tell us, CPUID claiming incorrect flags, or other bugs, + * we handle them here. + * + * At the end of this section, c->x86_capability better + * indicate the features this CPU genuinely supports! + */ + if (this_cpu->c_init) + this_cpu->c_init(c); + + /* Disable the PN if appropriate */ + squash_the_stupid_serial_number(c); + + /* + * The vendor-specific functions might have changed features. Now + * we do "generic changes." + */ + + /* TSC disabled? */ + if ( tsc_disable ) + clear_bit(X86_FEATURE_TSC, c->x86_capability); + + /* FXSR disabled? */ + if (disable_x86_fxsr) { + clear_bit(X86_FEATURE_FXSR, c->x86_capability); + clear_bit(X86_FEATURE_XMM, c->x86_capability); + } + + /* SEP disabled? */ + if (disable_x86_sep) + clear_bit(X86_FEATURE_SEP, c->x86_capability); + + if (disable_pse) + clear_bit(X86_FEATURE_PSE, c->x86_capability); + + /* If the model name is still unset, do table lookup. */ + if ( !c->x86_model_id[0] ) { + char *p; + p = table_lookup_model(c); + if ( p ) + strcpy(c->x86_model_id, p); + else + /* Last resort... */ + sprintf(c->x86_model_id, "%02x/%02x", + c->x86, c->x86_model); + } + + /* Now the feature flags better reflect actual CPU features! */ + + printk(KERN_DEBUG "CPU: After all inits, caps:"); + for (i = 0; i < NCAPINTS; i++) + printk(" %08lx", c->x86_capability[i]); + printk("\n"); + + /* + * On SMP, boot_cpu_data holds the common feature set between + * all CPUs; so make sure that we indicate which features are + * common between the CPUs. The first time this routine gets + * executed, c == &boot_cpu_data. + */ + if ( c != &boot_cpu_data ) { + /* AND the already accumulated flags with these */ + for ( i = 0 ; i < NCAPINTS ; i++ ) + boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; + } + + /* Init Machine Check Exception if available. */ + mcheck_init(c); +} + +void __init identify_boot_cpu(void) +{ + identify_cpu(&boot_cpu_data); + sysenter_setup(); + enable_sep_cpu(); + mtrr_bp_init(); +} + +void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) +{ + BUG_ON(c == &boot_cpu_data); + identify_cpu(c); + enable_sep_cpu(); + mtrr_ap_init(); +} + +#ifdef CONFIG_X86_HT +void __cpuinit detect_ht(struct cpuinfo_x86 *c) +{ + u32 eax, ebx, ecx, edx; + int index_msb, core_bits; + + cpuid(1, &eax, &ebx, &ecx, &edx); + + if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) + return; + + smp_num_siblings = (ebx & 0xff0000) >> 16; + + if (smp_num_siblings == 1) { + printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); + } else if (smp_num_siblings > 1 ) { + + if (smp_num_siblings > NR_CPUS) { + printk(KERN_WARNING "CPU: Unsupported number of the " + "siblings %d", smp_num_siblings); + smp_num_siblings = 1; + return; + } + + index_msb = get_count_order(smp_num_siblings); + c->phys_proc_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb); + + printk(KERN_INFO "CPU: Physical Processor ID: %d\n", + c->phys_proc_id); + + smp_num_siblings = smp_num_siblings / c->x86_max_cores; + + index_msb = get_count_order(smp_num_siblings) ; + + core_bits = get_count_order(c->x86_max_cores); + + c->cpu_core_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) & + ((1 << core_bits) - 1); + + if (c->x86_max_cores > 1) + printk(KERN_INFO "CPU: Processor Core ID: %d\n", + c->cpu_core_id); + } +} +#endif + +void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) +{ + char *vendor = NULL; + + if (c->x86_vendor < X86_VENDOR_NUM) + vendor = this_cpu->c_vendor; + else if (c->cpuid_level >= 0) + vendor = c->x86_vendor_id; + + if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor))) + printk("%s ", vendor); + + if (!c->x86_model_id[0]) + printk("%d86", c->x86); + else + printk("%s", c->x86_model_id); + + if (c->x86_mask || c->cpuid_level >= 0) + printk(" stepping %02x\n", c->x86_mask); + else + printk("\n"); +} + +cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; + +/* This is hacky. :) + * We're emulating future behavior. + * In the future, the cpu-specific init functions will be called implicitly + * via the magic of initcalls. + * They will insert themselves into the cpu_devs structure. + * Then, when cpu_init() is called, we can just iterate over that array. + */ + +extern int intel_cpu_init(void); +extern int cyrix_init_cpu(void); +extern int nsc_init_cpu(void); +extern int amd_init_cpu(void); +extern int centaur_init_cpu(void); +extern int transmeta_init_cpu(void); +extern int nexgen_init_cpu(void); +extern int umc_init_cpu(void); + +void __init early_cpu_init(void) +{ + intel_cpu_init(); + cyrix_init_cpu(); + nsc_init_cpu(); + amd_init_cpu(); + centaur_init_cpu(); + transmeta_init_cpu(); + nexgen_init_cpu(); + umc_init_cpu(); + early_cpu_detect(); + +#ifdef CONFIG_DEBUG_PAGEALLOC + /* pse is not compatible with on-the-fly unmapping, + * disable it even if the cpus claim to support it. + */ + clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability); + disable_pse = 1; +#endif +} + +/* Make sure %fs is initialized properly in idle threads */ +struct pt_regs * __devinit idle_regs(struct pt_regs *regs) +{ + memset(regs, 0, sizeof(struct pt_regs)); + regs->xfs = __KERNEL_PERCPU; + return regs; +} + +/* Current gdt points %fs at the "master" per-cpu area: after this, + * it's on the real one. */ +void switch_to_new_gdt(void) +{ + struct Xgt_desc_struct gdt_descr; + + gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); + gdt_descr.size = GDT_SIZE - 1; + load_gdt(&gdt_descr); + asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); +} + +/* + * cpu_init() initializes state that is per-CPU. Some data is already + * initialized (naturally) in the bootstrap process, such as the GDT + * and IDT. We reload them nevertheless, this function acts as a + * 'CPU state barrier', nothing should get across. + */ +void __cpuinit cpu_init(void) +{ + int cpu = smp_processor_id(); + struct task_struct *curr = current; + struct tss_struct * t = &per_cpu(init_tss, cpu); + struct thread_struct *thread = &curr->thread; + + if (cpu_test_and_set(cpu, cpu_initialized)) { + printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); + for (;;) local_irq_enable(); + } + + printk(KERN_INFO "Initializing CPU#%d\n", cpu); + + if (cpu_has_vme || cpu_has_tsc || cpu_has_de) + clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); + if (tsc_disable && cpu_has_tsc) { + printk(KERN_NOTICE "Disabling TSC...\n"); + /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/ + clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability); + set_in_cr4(X86_CR4_TSD); + } + + load_idt(&idt_descr); + switch_to_new_gdt(); + + /* + * Set up and load the per-CPU TSS and LDT + */ + atomic_inc(&init_mm.mm_count); + curr->active_mm = &init_mm; + if (curr->mm) + BUG(); + enter_lazy_tlb(&init_mm, curr); + + load_esp0(t, thread); + set_tss_desc(cpu,t); + load_TR_desc(); + load_LDT(&init_mm.context); + +#ifdef CONFIG_DOUBLEFAULT + /* Set up doublefault TSS pointer in the GDT */ + __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); +#endif + + /* Clear %gs. */ + asm volatile ("mov %0, %%gs" : : "r" (0)); + + /* Clear all 6 debug registers: */ + set_debugreg(0, 0); + set_debugreg(0, 1); + set_debugreg(0, 2); + set_debugreg(0, 3); + set_debugreg(0, 6); + set_debugreg(0, 7); + + /* + * Force FPU initialization: + */ + current_thread_info()->status = 0; + clear_used_math(); + mxcsr_feature_mask_init(); +} + +#ifdef CONFIG_HOTPLUG_CPU +void __cpuinit cpu_uninit(void) +{ + int cpu = raw_smp_processor_id(); + cpu_clear(cpu, cpu_initialized); + + /* lazy TLB state */ + per_cpu(cpu_tlbstate, cpu).state = 0; + per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; +} +#endif diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h new file mode 100644 index 00000000000..2f6432cef6f --- /dev/null +++ b/arch/x86/kernel/cpu/cpu.h @@ -0,0 +1,28 @@ + +struct cpu_model_info { + int vendor; + int family; + char *model_names[16]; +}; + +/* attempt to consolidate cpu attributes */ +struct cpu_dev { + char * c_vendor; + + /* some have two possibilities for cpuid string */ + char * c_ident[2]; + + struct cpu_model_info c_models[4]; + + void (*c_init)(struct cpuinfo_x86 * c); + void (*c_identify)(struct cpuinfo_x86 * c); + unsigned int (*c_size_cache)(struct cpuinfo_x86 * c, unsigned int size); +}; + +extern struct cpu_dev * cpu_devs [X86_VENDOR_NUM]; + +extern int get_model_name(struct cpuinfo_x86 *c); +extern void display_cacheinfo(struct cpuinfo_x86 *c); + +extern void early_intel_workaround(struct cpuinfo_x86 *c); + diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c new file mode 100644 index 00000000000..122d2d75aa9 --- /dev/null +++ b/arch/x86/kernel/cpu/cyrix.c @@ -0,0 +1,463 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cpu.h" + +/* + * Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU + */ +static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) +{ + unsigned char ccr2, ccr3; + unsigned long flags; + + /* we test for DEVID by checking whether CCR3 is writable */ + local_irq_save(flags); + ccr3 = getCx86(CX86_CCR3); + setCx86(CX86_CCR3, ccr3 ^ 0x80); + getCx86(0xc0); /* dummy to change bus */ + + if (getCx86(CX86_CCR3) == ccr3) { /* no DEVID regs. */ + ccr2 = getCx86(CX86_CCR2); + setCx86(CX86_CCR2, ccr2 ^ 0x04); + getCx86(0xc0); /* dummy */ + + if (getCx86(CX86_CCR2) == ccr2) /* old Cx486SLC/DLC */ + *dir0 = 0xfd; + else { /* Cx486S A step */ + setCx86(CX86_CCR2, ccr2); + *dir0 = 0xfe; + } + } + else { + setCx86(CX86_CCR3, ccr3); /* restore CCR3 */ + + /* read DIR0 and DIR1 CPU registers */ + *dir0 = getCx86(CX86_DIR0); + *dir1 = getCx86(CX86_DIR1); + } + local_irq_restore(flags); +} + +/* + * Cx86_dir0_msb is a HACK needed by check_cx686_cpuid/slop in bugs.h in + * order to identify the Cyrix CPU model after we're out of setup.c + * + * Actually since bugs.h doesn't even reference this perhaps someone should + * fix the documentation ??? + */ +static unsigned char Cx86_dir0_msb __cpuinitdata = 0; + +static char Cx86_model[][9] __cpuinitdata = { + "Cx486", "Cx486", "5x86 ", "6x86", "MediaGX ", "6x86MX ", + "M II ", "Unknown" +}; +static char Cx486_name[][5] __cpuinitdata = { + "SLC", "DLC", "SLC2", "DLC2", "SRx", "DRx", + "SRx2", "DRx2" +}; +static char Cx486S_name[][4] __cpuinitdata = { + "S", "S2", "Se", "S2e" +}; +static char Cx486D_name[][4] __cpuinitdata = { + "DX", "DX2", "?", "?", "?", "DX4" +}; +static char Cx86_cb[] __cpuinitdata = "?.5x Core/Bus Clock"; +static char cyrix_model_mult1[] __cpuinitdata = "12??43"; +static char cyrix_model_mult2[] __cpuinitdata = "12233445"; + +/* + * Reset the slow-loop (SLOP) bit on the 686(L) which is set by some old + * BIOSes for compatibility with DOS games. This makes the udelay loop + * work correctly, and improves performance. + * + * FIXME: our newer udelay uses the tsc. We don't need to frob with SLOP + */ + +extern void calibrate_delay(void) __init; + +static void __cpuinit check_cx686_slop(struct cpuinfo_x86 *c) +{ + unsigned long flags; + + if (Cx86_dir0_msb == 3) { + unsigned char ccr3, ccr5; + + local_irq_save(flags); + ccr3 = getCx86(CX86_CCR3); + setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ + ccr5 = getCx86(CX86_CCR5); + if (ccr5 & 2) + setCx86(CX86_CCR5, ccr5 & 0xfd); /* reset SLOP */ + setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ + local_irq_restore(flags); + + if (ccr5 & 2) { /* possible wrong calibration done */ + printk(KERN_INFO "Recalibrating delay loop with SLOP bit reset\n"); + calibrate_delay(); + c->loops_per_jiffy = loops_per_jiffy; + } + } +} + + +static void __cpuinit set_cx86_reorder(void) +{ + u8 ccr3; + + printk(KERN_INFO "Enable Memory access reorder on Cyrix/NSC processor.\n"); + ccr3 = getCx86(CX86_CCR3); + setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN  */ + + /* Load/Store Serialize to mem access disable (=reorder it)  */ + setCx86(CX86_PCR0, getCx86(CX86_PCR0) & ~0x80); + /* set load/store serialize from 1GB to 4GB */ + ccr3 |= 0xe0; + setCx86(CX86_CCR3, ccr3); +} + +static void __cpuinit set_cx86_memwb(void) +{ + u32 cr0; + + printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n"); + + /* CCR2 bit 2: unlock NW bit */ + setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04); + /* set 'Not Write-through' */ + cr0 = 0x20000000; + write_cr0(read_cr0() | cr0); + /* CCR2 bit 2: lock NW bit and set WT1 */ + setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 ); +} + +static void __cpuinit set_cx86_inc(void) +{ + unsigned char ccr3; + + printk(KERN_INFO "Enable Incrementor on Cyrix/NSC processor.\n"); + + ccr3 = getCx86(CX86_CCR3); + setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN  */ + /* PCR1 -- Performance Control */ + /* Incrementor on, whatever that is */ + setCx86(CX86_PCR1, getCx86(CX86_PCR1) | 0x02); + /* PCR0 -- Performance Control */ + /* Incrementor Margin 10 */ + setCx86(CX86_PCR0, getCx86(CX86_PCR0) | 0x04); + setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ +} + +/* + * Configure later MediaGX and/or Geode processor. + */ + +static void __cpuinit geode_configure(void) +{ + unsigned long flags; + u8 ccr3; + local_irq_save(flags); + + /* Suspend on halt power saving and enable #SUSP pin */ + setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88); + + ccr3 = getCx86(CX86_CCR3); + setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ + + + /* FPU fast, DTE cache, Mem bypass */ + setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x38); + setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ + + set_cx86_memwb(); + set_cx86_reorder(); + set_cx86_inc(); + + local_irq_restore(flags); +} + + +static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) +{ + unsigned char dir0, dir0_msn, dir0_lsn, dir1 = 0; + char *buf = c->x86_model_id; + const char *p = NULL; + + /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; + 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ + clear_bit(0*32+31, c->x86_capability); + + /* Cyrix used bit 24 in extended (AMD) CPUID for Cyrix MMX extensions */ + if ( test_bit(1*32+24, c->x86_capability) ) { + clear_bit(1*32+24, c->x86_capability); + set_bit(X86_FEATURE_CXMMX, c->x86_capability); + } + + do_cyrix_devid(&dir0, &dir1); + + check_cx686_slop(c); + + Cx86_dir0_msb = dir0_msn = dir0 >> 4; /* identifies CPU "family" */ + dir0_lsn = dir0 & 0xf; /* model or clock multiplier */ + + /* common case step number/rev -- exceptions handled below */ + c->x86_model = (dir1 >> 4) + 1; + c->x86_mask = dir1 & 0xf; + + /* Now cook; the original recipe is by Channing Corn, from Cyrix. + * We do the same thing for each generation: we work out + * the model, multiplier and stepping. Black magic included, + * to make the silicon step/rev numbers match the printed ones. + */ + + switch (dir0_msn) { + unsigned char tmp; + + case 0: /* Cx486SLC/DLC/SRx/DRx */ + p = Cx486_name[dir0_lsn & 7]; + break; + + case 1: /* Cx486S/DX/DX2/DX4 */ + p = (dir0_lsn & 8) ? Cx486D_name[dir0_lsn & 5] + : Cx486S_name[dir0_lsn & 3]; + break; + + case 2: /* 5x86 */ + Cx86_cb[2] = cyrix_model_mult1[dir0_lsn & 5]; + p = Cx86_cb+2; + break; + + case 3: /* 6x86/6x86L */ + Cx86_cb[1] = ' '; + Cx86_cb[2] = cyrix_model_mult1[dir0_lsn & 5]; + if (dir1 > 0x21) { /* 686L */ + Cx86_cb[0] = 'L'; + p = Cx86_cb; + (c->x86_model)++; + } else /* 686 */ + p = Cx86_cb+1; + /* Emulate MTRRs using Cyrix's ARRs. */ + set_bit(X86_FEATURE_CYRIX_ARR, c->x86_capability); + /* 6x86's contain this bug */ + c->coma_bug = 1; + break; + + case 4: /* MediaGX/GXm or Geode GXM/GXLV/GX1 */ +#ifdef CONFIG_PCI + { + u32 vendor, device; + /* It isn't really a PCI quirk directly, but the cure is the + same. The MediaGX has deep magic SMM stuff that handles the + SB emulation. It thows away the fifo on disable_dma() which + is wrong and ruins the audio. + + Bug2: VSA1 has a wrap bug so that using maximum sized DMA + causes bad things. According to NatSemi VSA2 has another + bug to do with 'hlt'. I've not seen any boards using VSA2 + and X doesn't seem to support it either so who cares 8). + VSA1 we work around however. + */ + + printk(KERN_INFO "Working around Cyrix MediaGX virtual DMA bugs.\n"); + isa_dma_bridge_buggy = 2; + + /* We do this before the PCI layer is running. However we + are safe here as we know the bridge must be a Cyrix + companion and must be present */ + vendor = read_pci_config_16(0, 0, 0x12, PCI_VENDOR_ID); + device = read_pci_config_16(0, 0, 0x12, PCI_DEVICE_ID); + + /* + * The 5510/5520 companion chips have a funky PIT. + */ + if (vendor == PCI_VENDOR_ID_CYRIX && + (device == PCI_DEVICE_ID_CYRIX_5510 || device == PCI_DEVICE_ID_CYRIX_5520)) + mark_tsc_unstable("cyrix 5510/5520 detected"); + } +#endif + c->x86_cache_size=16; /* Yep 16K integrated cache thats it */ + + /* GXm supports extended cpuid levels 'ala' AMD */ + if (c->cpuid_level == 2) { + /* Enable cxMMX extensions (GX1 Datasheet 54) */ + setCx86(CX86_CCR7, getCx86(CX86_CCR7) | 1); + + /* + * GXm : 0x30 ... 0x5f GXm datasheet 51 + * GXlv: 0x6x GXlv datasheet 54 + * ? : 0x7x + * GX1 : 0x8x GX1 datasheet 56 + */ + if((0x30 <= dir1 && dir1 <= 0x6f) || (0x80 <=dir1 && dir1 <= 0x8f)) + geode_configure(); + get_model_name(c); /* get CPU marketing name */ + return; + } + else { /* MediaGX */ + Cx86_cb[2] = (dir0_lsn & 1) ? '3' : '4'; + p = Cx86_cb+2; + c->x86_model = (dir1 & 0x20) ? 1 : 2; + } + break; + + case 5: /* 6x86MX/M II */ + if (dir1 > 7) + { + dir0_msn++; /* M II */ + /* Enable MMX extensions (App note 108) */ + setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1); + } + else + { + c->coma_bug = 1; /* 6x86MX, it has the bug. */ + } + tmp = (!(dir0_lsn & 7) || dir0_lsn & 1) ? 2 : 0; + Cx86_cb[tmp] = cyrix_model_mult2[dir0_lsn & 7]; + p = Cx86_cb+tmp; + if (((dir1 & 0x0f) > 4) || ((dir1 & 0xf0) == 0x20)) + (c->x86_model)++; + /* Emulate MTRRs using Cyrix's ARRs. */ + set_bit(X86_FEATURE_CYRIX_ARR, c->x86_capability); + break; + + case 0xf: /* Cyrix 486 without DEVID registers */ + switch (dir0_lsn) { + case 0xd: /* either a 486SLC or DLC w/o DEVID */ + dir0_msn = 0; + p = Cx486_name[(c->hard_math) ? 1 : 0]; + break; + + case 0xe: /* a 486S A step */ + dir0_msn = 0; + p = Cx486S_name[0]; + break; + } + break; + + default: /* unknown (shouldn't happen, we know everyone ;-) */ + dir0_msn = 7; + break; + } + strcpy(buf, Cx86_model[dir0_msn & 7]); + if (p) strcat(buf, p); + return; +} + +/* + * Handle National Semiconductor branded processors + */ +static void __cpuinit init_nsc(struct cpuinfo_x86 *c) +{ + /* There may be GX1 processors in the wild that are branded + * NSC and not Cyrix. + * + * This function only handles the GX processor, and kicks every + * thing else to the Cyrix init function above - that should + * cover any processors that might have been branded differently + * after NSC acquired Cyrix. + * + * If this breaks your GX1 horribly, please e-mail + * info-linux@ldcmail.amd.com to tell us. + */ + + /* Handle the GX (Formally known as the GX2) */ + + if (c->x86 == 5 && c->x86_model == 5) + display_cacheinfo(c); + else + init_cyrix(c); +} + +/* + * Cyrix CPUs without cpuid or with cpuid not yet enabled can be detected + * by the fact that they preserve the flags across the division of 5/2. + * PII and PPro exhibit this behavior too, but they have cpuid available. + */ + +/* + * Perform the Cyrix 5/2 test. A Cyrix won't change + * the flags, while other 486 chips will. + */ +static inline int test_cyrix_52div(void) +{ + unsigned int test; + + __asm__ __volatile__( + "sahf\n\t" /* clear flags (%eax = 0x0005) */ + "div %b2\n\t" /* divide 5 by 2 */ + "lahf" /* store flags into %ah */ + : "=a" (test) + : "0" (5), "q" (2) + : "cc"); + + /* AH is 0x02 on Cyrix after the divide.. */ + return (unsigned char) (test >> 8) == 0x02; +} + +static void __cpuinit cyrix_identify(struct cpuinfo_x86 * c) +{ + /* Detect Cyrix with disabled CPUID */ + if ( c->x86 == 4 && test_cyrix_52div() ) { + unsigned char dir0, dir1; + + strcpy(c->x86_vendor_id, "CyrixInstead"); + c->x86_vendor = X86_VENDOR_CYRIX; + + /* Actually enable cpuid on the older cyrix */ + + /* Retrieve CPU revisions */ + + do_cyrix_devid(&dir0, &dir1); + + dir0>>=4; + + /* Check it is an affected model */ + + if (dir0 == 5 || dir0 == 3) + { + unsigned char ccr3; + unsigned long flags; + printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n"); + local_irq_save(flags); + ccr3 = getCx86(CX86_CCR3); + setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ + setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x80); /* enable cpuid */ + setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ + local_irq_restore(flags); + } + } +} + +static struct cpu_dev cyrix_cpu_dev __cpuinitdata = { + .c_vendor = "Cyrix", + .c_ident = { "CyrixInstead" }, + .c_init = init_cyrix, + .c_identify = cyrix_identify, +}; + +int __init cyrix_init_cpu(void) +{ + cpu_devs[X86_VENDOR_CYRIX] = &cyrix_cpu_dev; + return 0; +} + +static struct cpu_dev nsc_cpu_dev __cpuinitdata = { + .c_vendor = "NSC", + .c_ident = { "Geode by NSC" }, + .c_init = init_nsc, +}; + +int __init nsc_init_cpu(void) +{ + cpu_devs[X86_VENDOR_NSC] = &nsc_cpu_dev; + return 0; +} + diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c new file mode 100644 index 00000000000..dc4e08147b1 --- /dev/null +++ b/arch/x86/kernel/cpu/intel.c @@ -0,0 +1,333 @@ +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "cpu.h" + +#ifdef CONFIG_X86_LOCAL_APIC +#include +#include +#include +#endif + +extern int trap_init_f00f_bug(void); + +#ifdef CONFIG_X86_INTEL_USERCOPY +/* + * Alignment at which movsl is preferred for bulk memory copies. + */ +struct movsl_mask movsl_mask __read_mostly; +#endif + +void __cpuinit early_intel_workaround(struct cpuinfo_x86 *c) +{ + if (c->x86_vendor != X86_VENDOR_INTEL) + return; + /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ + if (c->x86 == 15 && c->x86_cache_alignment == 64) + c->x86_cache_alignment = 128; +} + +/* + * Early probe support logic for ppro memory erratum #50 + * + * This is called before we do cpu ident work + */ + +int __cpuinit ppro_with_ram_bug(void) +{ + /* Uses data from early_cpu_detect now */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_data.x86 == 6 && + boot_cpu_data.x86_model == 1 && + boot_cpu_data.x86_mask < 8) { + printk(KERN_INFO "Pentium Pro with Errata#50 detected. Taking evasive action.\n"); + return 1; + } + return 0; +} + + +/* + * P4 Xeon errata 037 workaround. + * Hardware prefetcher may cause stale data to be loaded into the cache. + */ +static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c) +{ + unsigned long lo, hi; + + if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { + rdmsr (MSR_IA32_MISC_ENABLE, lo, hi); + if ((lo & (1<<9)) == 0) { + printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n"); + printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n"); + lo |= (1<<9); /* Disable hw prefetching */ + wrmsr (MSR_IA32_MISC_ENABLE, lo, hi); + } + } +} + + +/* + * find out the number of processor cores on the die + */ +static int __cpuinit num_cpu_cores(struct cpuinfo_x86 *c) +{ + unsigned int eax, ebx, ecx, edx; + + if (c->cpuid_level < 4) + return 1; + + /* Intel has a non-standard dependency on %ecx for this CPUID level. */ + cpuid_count(4, 0, &eax, &ebx, &ecx, &edx); + if (eax & 0x1f) + return ((eax >> 26) + 1); + else + return 1; +} + +static void __cpuinit init_intel(struct cpuinfo_x86 *c) +{ + unsigned int l2 = 0; + char *p = NULL; + +#ifdef CONFIG_X86_F00F_BUG + /* + * All current models of Pentium and Pentium with MMX technology CPUs + * have the F0 0F bug, which lets nonprivileged users lock up the system. + * Note that the workaround only should be initialized once... + */ + c->f00f_bug = 0; + if (!paravirt_enabled() && c->x86 == 5) { + static int f00f_workaround_enabled = 0; + + c->f00f_bug = 1; + if ( !f00f_workaround_enabled ) { + trap_init_f00f_bug(); + printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n"); + f00f_workaround_enabled = 1; + } + } +#endif + + select_idle_routine(c); + l2 = init_intel_cacheinfo(c); + if (c->cpuid_level > 9 ) { + unsigned eax = cpuid_eax(10); + /* Check for version and the number of counters */ + if ((eax & 0xff) && (((eax>>8) & 0xff) > 1)) + set_bit(X86_FEATURE_ARCH_PERFMON, c->x86_capability); + } + + /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ + if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) + clear_bit(X86_FEATURE_SEP, c->x86_capability); + + /* Names for the Pentium II/Celeron processors + detectable only by also checking the cache size. + Dixon is NOT a Celeron. */ + if (c->x86 == 6) { + switch (c->x86_model) { + case 5: + if (c->x86_mask == 0) { + if (l2 == 0) + p = "Celeron (Covington)"; + else if (l2 == 256) + p = "Mobile Pentium II (Dixon)"; + } + break; + + case 6: + if (l2 == 128) + p = "Celeron (Mendocino)"; + else if (c->x86_mask == 0 || c->x86_mask == 5) + p = "Celeron-A"; + break; + + case 8: + if (l2 == 128) + p = "Celeron (Coppermine)"; + break; + } + } + + if ( p ) + strcpy(c->x86_model_id, p); + + c->x86_max_cores = num_cpu_cores(c); + + detect_ht(c); + + /* Work around errata */ + Intel_errata_workarounds(c); + +#ifdef CONFIG_X86_INTEL_USERCOPY + /* + * Set up the preferred alignment for movsl bulk memory moves + */ + switch (c->x86) { + case 4: /* 486: untested */ + break; + case 5: /* Old Pentia: untested */ + break; + case 6: /* PII/PIII only like movsl with 8-byte alignment */ + movsl_mask.mask = 7; + break; + case 15: /* P4 is OK down to 8-byte alignment */ + movsl_mask.mask = 7; + break; + } +#endif + + if (c->x86 == 15) { + set_bit(X86_FEATURE_P4, c->x86_capability); + set_bit(X86_FEATURE_SYNC_RDTSC, c->x86_capability); + } + if (c->x86 == 6) + set_bit(X86_FEATURE_P3, c->x86_capability); + if ((c->x86 == 0xf && c->x86_model >= 0x03) || + (c->x86 == 0x6 && c->x86_model >= 0x0e)) + set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); + + if (cpu_has_ds) { + unsigned int l1; + rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); + if (!(l1 & (1<<11))) + set_bit(X86_FEATURE_BTS, c->x86_capability); + if (!(l1 & (1<<12))) + set_bit(X86_FEATURE_PEBS, c->x86_capability); + } +} + +static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 * c, unsigned int size) +{ + /* Intel PIII Tualatin. This comes in two flavours. + * One has 256kb of cache, the other 512. We have no way + * to determine which, so we use a boottime override + * for the 512kb model, and assume 256 otherwise. + */ + if ((c->x86 == 6) && (c->x86_model == 11) && (size == 0)) + size = 256; + return size; +} + +static struct cpu_dev intel_cpu_dev __cpuinitdata = { + .c_vendor = "Intel", + .c_ident = { "GenuineIntel" }, + .c_models = { + { .vendor = X86_VENDOR_INTEL, .family = 4, .model_names = + { + [0] = "486 DX-25/33", + [1] = "486 DX-50", + [2] = "486 SX", + [3] = "486 DX/2", + [4] = "486 SL", + [5] = "486 SX/2", + [7] = "486 DX/2-WB", + [8] = "486 DX/4", + [9] = "486 DX/4-WB" + } + }, + { .vendor = X86_VENDOR_INTEL, .family = 5, .model_names = + { + [0] = "Pentium 60/66 A-step", + [1] = "Pentium 60/66", + [2] = "Pentium 75 - 200", + [3] = "OverDrive PODP5V83", + [4] = "Pentium MMX", + [7] = "Mobile Pentium 75 - 200", + [8] = "Mobile Pentium MMX" + } + }, + { .vendor = X86_VENDOR_INTEL, .family = 6, .model_names = + { + [0] = "Pentium Pro A-step", + [1] = "Pentium Pro", + [3] = "Pentium II (Klamath)", + [4] = "Pentium II (Deschutes)", + [5] = "Pentium II (Deschutes)", + [6] = "Mobile Pentium II", + [7] = "Pentium III (Katmai)", + [8] = "Pentium III (Coppermine)", + [10] = "Pentium III (Cascades)", + [11] = "Pentium III (Tualatin)", + } + }, + { .vendor = X86_VENDOR_INTEL, .family = 15, .model_names = + { + [0] = "Pentium 4 (Unknown)", + [1] = "Pentium 4 (Willamette)", + [2] = "Pentium 4 (Northwood)", + [4] = "Pentium 4 (Foster)", + [5] = "Pentium 4 (Foster)", + } + }, + }, + .c_init = init_intel, + .c_size_cache = intel_size_cache, +}; + +__init int intel_cpu_init(void) +{ + cpu_devs[X86_VENDOR_INTEL] = &intel_cpu_dev; + return 0; +} + +#ifndef CONFIG_X86_CMPXCHG +unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new) +{ + u8 prev; + unsigned long flags; + + /* Poor man's cmpxchg for 386. Unsuitable for SMP */ + local_irq_save(flags); + prev = *(u8 *)ptr; + if (prev == old) + *(u8 *)ptr = new; + local_irq_restore(flags); + return prev; +} +EXPORT_SYMBOL(cmpxchg_386_u8); + +unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new) +{ + u16 prev; + unsigned long flags; + + /* Poor man's cmpxchg for 386. Unsuitable for SMP */ + local_irq_save(flags); + prev = *(u16 *)ptr; + if (prev == old) + *(u16 *)ptr = new; + local_irq_restore(flags); + return prev; +} +EXPORT_SYMBOL(cmpxchg_386_u16); + +unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new) +{ + u32 prev; + unsigned long flags; + + /* Poor man's cmpxchg for 386. Unsuitable for SMP */ + local_irq_save(flags); + prev = *(u32 *)ptr; + if (prev == old) + *(u32 *)ptr = new; + local_irq_restore(flags); + return prev; +} +EXPORT_SYMBOL(cmpxchg_386_u32); +#endif + +// arch_initcall(intel_cpu_init); + diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c new file mode 100644 index 00000000000..db6c25aa577 --- /dev/null +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -0,0 +1,806 @@ +/* + * Routines to indentify caches on Intel CPU. + * + * Changes: + * Venkatesh Pallipadi : Adding cache identification through cpuid(4) + * Ashok Raj : Work with CPU hotplug infrastructure. + * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#define LVL_1_INST 1 +#define LVL_1_DATA 2 +#define LVL_2 3 +#define LVL_3 4 +#define LVL_TRACE 5 + +struct _cache_table +{ + unsigned char descriptor; + char cache_type; + short size; +}; + +/* all the cache descriptor types we care about (no TLB or trace cache entries) */ +static struct _cache_table cache_table[] __cpuinitdata = +{ + { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */ + { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */ + { 0x0a, LVL_1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */ + { 0x0c, LVL_1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */ + { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ + { 0x23, LVL_3, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */ + { 0x25, LVL_3, 2048 }, /* 8-way set assoc, sectored cache, 64 byte line size */ + { 0x29, LVL_3, 4096 }, /* 8-way set assoc, sectored cache, 64 byte line size */ + { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */ + { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */ + { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */ + { 0x3a, LVL_2, 192 }, /* 6-way set assoc, sectored cache, 64 byte line size */ + { 0x3b, LVL_2, 128 }, /* 2-way set assoc, sectored cache, 64 byte line size */ + { 0x3c, LVL_2, 256 }, /* 4-way set assoc, sectored cache, 64 byte line size */ + { 0x3d, LVL_2, 384 }, /* 6-way set assoc, sectored cache, 64 byte line size */ + { 0x3e, LVL_2, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ + { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */ + { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */ + { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */ + { 0x44, LVL_2, 1024 }, /* 4-way set assoc, 32 byte line size */ + { 0x45, LVL_2, 2048 }, /* 4-way set assoc, 32 byte line size */ + { 0x46, LVL_3, 4096 }, /* 4-way set assoc, 64 byte line size */ + { 0x47, LVL_3, 8192 }, /* 8-way set assoc, 64 byte line size */ + { 0x49, LVL_3, 4096 }, /* 16-way set assoc, 64 byte line size */ + { 0x4a, LVL_3, 6144 }, /* 12-way set assoc, 64 byte line size */ + { 0x4b, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */ + { 0x4c, LVL_3, 12288 }, /* 12-way set assoc, 64 byte line size */ + { 0x4d, LVL_3, 16384 }, /* 16-way set assoc, 64 byte line size */ + { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */ + { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */ + { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */ + { 0x68, LVL_1_DATA, 32 }, /* 4-way set assoc, sectored cache, 64 byte line size */ + { 0x70, LVL_TRACE, 12 }, /* 8-way set assoc */ + { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */ + { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */ + { 0x73, LVL_TRACE, 64 }, /* 8-way set assoc */ + { 0x78, LVL_2, 1024 }, /* 4-way set assoc, 64 byte line size */ + { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */ + { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */ + { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */ + { 0x7c, LVL_2, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */ + { 0x7d, LVL_2, 2048 }, /* 8-way set assoc, 64 byte line size */ + { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */ + { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */ + { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */ + { 0x84, LVL_2, 1024 }, /* 8-way set assoc, 32 byte line size */ + { 0x85, LVL_2, 2048 }, /* 8-way set assoc, 32 byte line size */ + { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */ + { 0x87, LVL_2, 1024 }, /* 8-way set assoc, 64 byte line size */ + { 0x00, 0, 0} +}; + + +enum _cache_type +{ + CACHE_TYPE_NULL = 0, + CACHE_TYPE_DATA = 1, + CACHE_TYPE_INST = 2, + CACHE_TYPE_UNIFIED = 3 +}; + +union _cpuid4_leaf_eax { + struct { + enum _cache_type type:5; + unsigned int level:3; + unsigned int is_self_initializing:1; + unsigned int is_fully_associative:1; + unsigned int reserved:4; + unsigned int num_threads_sharing:12; + unsigned int num_cores_on_die:6; + } split; + u32 full; +}; + +union _cpuid4_leaf_ebx { + struct { + unsigned int coherency_line_size:12; + unsigned int physical_line_partition:10; + unsigned int ways_of_associativity:10; + } split; + u32 full; +}; + +union _cpuid4_leaf_ecx { + struct { + unsigned int number_of_sets:32; + } split; + u32 full; +}; + +struct _cpuid4_info { + union _cpuid4_leaf_eax eax; + union _cpuid4_leaf_ebx ebx; + union _cpuid4_leaf_ecx ecx; + unsigned long size; + cpumask_t shared_cpu_map; +}; + +unsigned short num_cache_leaves; + +/* AMD doesn't have CPUID4. Emulate it here to report the same + information to the user. This makes some assumptions about the machine: + L2 not shared, no SMT etc. that is currently true on AMD CPUs. + + In theory the TLBs could be reported as fake type (they are in "dummy"). + Maybe later */ +union l1_cache { + struct { + unsigned line_size : 8; + unsigned lines_per_tag : 8; + unsigned assoc : 8; + unsigned size_in_kb : 8; + }; + unsigned val; +}; + +union l2_cache { + struct { + unsigned line_size : 8; + unsigned lines_per_tag : 4; + unsigned assoc : 4; + unsigned size_in_kb : 16; + }; + unsigned val; +}; + +union l3_cache { + struct { + unsigned line_size : 8; + unsigned lines_per_tag : 4; + unsigned assoc : 4; + unsigned res : 2; + unsigned size_encoded : 14; + }; + unsigned val; +}; + +static const unsigned short assocs[] = { + [1] = 1, [2] = 2, [4] = 4, [6] = 8, + [8] = 16, [0xa] = 32, [0xb] = 48, + [0xc] = 64, + [0xf] = 0xffff // ?? +}; + +static const unsigned char levels[] = { 1, 1, 2, 3 }; +static const unsigned char types[] = { 1, 2, 3, 3 }; + +static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, + union _cpuid4_leaf_ebx *ebx, + union _cpuid4_leaf_ecx *ecx) +{ + unsigned dummy; + unsigned line_size, lines_per_tag, assoc, size_in_kb; + union l1_cache l1i, l1d; + union l2_cache l2; + union l3_cache l3; + union l1_cache *l1 = &l1d; + + eax->full = 0; + ebx->full = 0; + ecx->full = 0; + + cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val); + cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val); + + switch (leaf) { + case 1: + l1 = &l1i; + case 0: + if (!l1->val) + return; + assoc = l1->assoc; + line_size = l1->line_size; + lines_per_tag = l1->lines_per_tag; + size_in_kb = l1->size_in_kb; + break; + case 2: + if (!l2.val) + return; + assoc = l2.assoc; + line_size = l2.line_size; + lines_per_tag = l2.lines_per_tag; + /* cpu_data has errata corrections for K7 applied */ + size_in_kb = current_cpu_data.x86_cache_size; + break; + case 3: + if (!l3.val) + return; + assoc = l3.assoc; + line_size = l3.line_size; + lines_per_tag = l3.lines_per_tag; + size_in_kb = l3.size_encoded * 512; + break; + default: + return; + } + + eax->split.is_self_initializing = 1; + eax->split.type = types[leaf]; + eax->split.level = levels[leaf]; + if (leaf == 3) + eax->split.num_threads_sharing = current_cpu_data.x86_max_cores - 1; + else + eax->split.num_threads_sharing = 0; + eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1; + + + if (assoc == 0xf) + eax->split.is_fully_associative = 1; + ebx->split.coherency_line_size = line_size - 1; + ebx->split.ways_of_associativity = assocs[assoc] - 1; + ebx->split.physical_line_partition = lines_per_tag - 1; + ecx->split.number_of_sets = (size_in_kb * 1024) / line_size / + (ebx->split.ways_of_associativity + 1) - 1; +} + +static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) +{ + union _cpuid4_leaf_eax eax; + union _cpuid4_leaf_ebx ebx; + union _cpuid4_leaf_ecx ecx; + unsigned edx; + + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + amd_cpuid4(index, &eax, &ebx, &ecx); + else + cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); + if (eax.split.type == CACHE_TYPE_NULL) + return -EIO; /* better error ? */ + + this_leaf->eax = eax; + this_leaf->ebx = ebx; + this_leaf->ecx = ecx; + this_leaf->size = (ecx.split.number_of_sets + 1) * + (ebx.split.coherency_line_size + 1) * + (ebx.split.physical_line_partition + 1) * + (ebx.split.ways_of_associativity + 1); + return 0; +} + +static int __cpuinit find_num_cache_leaves(void) +{ + unsigned int eax, ebx, ecx, edx; + union _cpuid4_leaf_eax cache_eax; + int i = -1; + + do { + ++i; + /* Do cpuid(4) loop to find out num_cache_leaves */ + cpuid_count(4, i, &eax, &ebx, &ecx, &edx); + cache_eax.full = eax; + } while (cache_eax.split.type != CACHE_TYPE_NULL); + return i; +} + +unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) +{ + unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */ + unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ + unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ + unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; +#ifdef CONFIG_X86_HT + unsigned int cpu = (c == &boot_cpu_data) ? 0 : (c - cpu_data); +#endif + + if (c->cpuid_level > 3) { + static int is_initialized; + + if (is_initialized == 0) { + /* Init num_cache_leaves from boot CPU */ + num_cache_leaves = find_num_cache_leaves(); + is_initialized++; + } + + /* + * Whenever possible use cpuid(4), deterministic cache + * parameters cpuid leaf to find the cache details + */ + for (i = 0; i < num_cache_leaves; i++) { + struct _cpuid4_info this_leaf; + + int retval; + + retval = cpuid4_cache_lookup(i, &this_leaf); + if (retval >= 0) { + switch(this_leaf.eax.split.level) { + case 1: + if (this_leaf.eax.split.type == + CACHE_TYPE_DATA) + new_l1d = this_leaf.size/1024; + else if (this_leaf.eax.split.type == + CACHE_TYPE_INST) + new_l1i = this_leaf.size/1024; + break; + case 2: + new_l2 = this_leaf.size/1024; + num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; + index_msb = get_count_order(num_threads_sharing); + l2_id = c->apicid >> index_msb; + break; + case 3: + new_l3 = this_leaf.size/1024; + num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; + index_msb = get_count_order(num_threads_sharing); + l3_id = c->apicid >> index_msb; + break; + default: + break; + } + } + } + } + /* + * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for + * trace cache + */ + if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) { + /* supports eax=2 call */ + int i, j, n; + int regs[4]; + unsigned char *dp = (unsigned char *)regs; + int only_trace = 0; + + if (num_cache_leaves != 0 && c->x86 == 15) + only_trace = 1; + + /* Number of times to iterate */ + n = cpuid_eax(2) & 0xFF; + + for ( i = 0 ; i < n ; i++ ) { + cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]); + + /* If bit 31 is set, this is an unknown format */ + for ( j = 0 ; j < 3 ; j++ ) { + if ( regs[j] < 0 ) regs[j] = 0; + } + + /* Byte 0 is level count, not a descriptor */ + for ( j = 1 ; j < 16 ; j++ ) { + unsigned char des = dp[j]; + unsigned char k = 0; + + /* look up this descriptor in the table */ + while (cache_table[k].descriptor != 0) + { + if (cache_table[k].descriptor == des) { + if (only_trace && cache_table[k].cache_type != LVL_TRACE) + break; + switch (cache_table[k].cache_type) { + case LVL_1_INST: + l1i += cache_table[k].size; + break; + case LVL_1_DATA: + l1d += cache_table[k].size; + break; + case LVL_2: + l2 += cache_table[k].size; + break; + case LVL_3: + l3 += cache_table[k].size; + break; + case LVL_TRACE: + trace += cache_table[k].size; + break; + } + + break; + } + + k++; + } + } + } + } + + if (new_l1d) + l1d = new_l1d; + + if (new_l1i) + l1i = new_l1i; + + if (new_l2) { + l2 = new_l2; +#ifdef CONFIG_X86_HT + cpu_llc_id[cpu] = l2_id; +#endif + } + + if (new_l3) { + l3 = new_l3; +#ifdef CONFIG_X86_HT + cpu_llc_id[cpu] = l3_id; +#endif + } + + if (trace) + printk (KERN_INFO "CPU: Trace cache: %dK uops", trace); + else if ( l1i ) + printk (KERN_INFO "CPU: L1 I cache: %dK", l1i); + + if (l1d) + printk(", L1 D cache: %dK\n", l1d); + else + printk("\n"); + + if (l2) + printk(KERN_INFO "CPU: L2 cache: %dK\n", l2); + + if (l3) + printk(KERN_INFO "CPU: L3 cache: %dK\n", l3); + + c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); + + return l2; +} + +/* pointer to _cpuid4_info array (for each cache leaf) */ +static struct _cpuid4_info *cpuid4_info[NR_CPUS]; +#define CPUID4_INFO_IDX(x,y) (&((cpuid4_info[x])[y])) + +#ifdef CONFIG_SMP +static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) +{ + struct _cpuid4_info *this_leaf, *sibling_leaf; + unsigned long num_threads_sharing; + int index_msb, i; + struct cpuinfo_x86 *c = cpu_data; + + this_leaf = CPUID4_INFO_IDX(cpu, index); + num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; + + if (num_threads_sharing == 1) + cpu_set(cpu, this_leaf->shared_cpu_map); + else { + index_msb = get_count_order(num_threads_sharing); + + for_each_online_cpu(i) { + if (c[i].apicid >> index_msb == + c[cpu].apicid >> index_msb) { + cpu_set(i, this_leaf->shared_cpu_map); + if (i != cpu && cpuid4_info[i]) { + sibling_leaf = CPUID4_INFO_IDX(i, index); + cpu_set(cpu, sibling_leaf->shared_cpu_map); + } + } + } + } +} +static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) +{ + struct _cpuid4_info *this_leaf, *sibling_leaf; + int sibling; + + this_leaf = CPUID4_INFO_IDX(cpu, index); + for_each_cpu_mask(sibling, this_leaf->shared_cpu_map) { + sibling_leaf = CPUID4_INFO_IDX(sibling, index); + cpu_clear(cpu, sibling_leaf->shared_cpu_map); + } +} +#else +static void __init cache_shared_cpu_map_setup(unsigned int cpu, int index) {} +static void __init cache_remove_shared_cpu_map(unsigned int cpu, int index) {} +#endif + +static void free_cache_attributes(unsigned int cpu) +{ + kfree(cpuid4_info[cpu]); + cpuid4_info[cpu] = NULL; +} + +static int __cpuinit detect_cache_attributes(unsigned int cpu) +{ + struct _cpuid4_info *this_leaf; + unsigned long j; + int retval; + cpumask_t oldmask; + + if (num_cache_leaves == 0) + return -ENOENT; + + cpuid4_info[cpu] = kzalloc( + sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL); + if (cpuid4_info[cpu] == NULL) + return -ENOMEM; + + oldmask = current->cpus_allowed; + retval = set_cpus_allowed(current, cpumask_of_cpu(cpu)); + if (retval) + goto out; + + /* Do cpuid and store the results */ + retval = 0; + for (j = 0; j < num_cache_leaves; j++) { + this_leaf = CPUID4_INFO_IDX(cpu, j); + retval = cpuid4_cache_lookup(j, this_leaf); + if (unlikely(retval < 0)) + break; + cache_shared_cpu_map_setup(cpu, j); + } + set_cpus_allowed(current, oldmask); + +out: + if (retval) + free_cache_attributes(cpu); + return retval; +} + +#ifdef CONFIG_SYSFS + +#include +#include + +extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */ + +/* pointer to kobject for cpuX/cache */ +static struct kobject * cache_kobject[NR_CPUS]; + +struct _index_kobject { + struct kobject kobj; + unsigned int cpu; + unsigned short index; +}; + +/* pointer to array of kobjects for cpuX/cache/indexY */ +static struct _index_kobject *index_kobject[NR_CPUS]; +#define INDEX_KOBJECT_PTR(x,y) (&((index_kobject[x])[y])) + +#define show_one_plus(file_name, object, val) \ +static ssize_t show_##file_name \ + (struct _cpuid4_info *this_leaf, char *buf) \ +{ \ + return sprintf (buf, "%lu\n", (unsigned long)this_leaf->object + val); \ +} + +show_one_plus(level, eax.split.level, 0); +show_one_plus(coherency_line_size, ebx.split.coherency_line_size, 1); +show_one_plus(physical_line_partition, ebx.split.physical_line_partition, 1); +show_one_plus(ways_of_associativity, ebx.split.ways_of_associativity, 1); +show_one_plus(number_of_sets, ecx.split.number_of_sets, 1); + +static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf) +{ + return sprintf (buf, "%luK\n", this_leaf->size / 1024); +} + +static ssize_t show_shared_cpu_map(struct _cpuid4_info *this_leaf, char *buf) +{ + char mask_str[NR_CPUS]; + cpumask_scnprintf(mask_str, NR_CPUS, this_leaf->shared_cpu_map); + return sprintf(buf, "%s\n", mask_str); +} + +static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) { + switch(this_leaf->eax.split.type) { + case CACHE_TYPE_DATA: + return sprintf(buf, "Data\n"); + break; + case CACHE_TYPE_INST: + return sprintf(buf, "Instruction\n"); + break; + case CACHE_TYPE_UNIFIED: + return sprintf(buf, "Unified\n"); + break; + default: + return sprintf(buf, "Unknown\n"); + break; + } +} + +struct _cache_attr { + struct attribute attr; + ssize_t (*show)(struct _cpuid4_info *, char *); + ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count); +}; + +#define define_one_ro(_name) \ +static struct _cache_attr _name = \ + __ATTR(_name, 0444, show_##_name, NULL) + +define_one_ro(level); +define_one_ro(type); +define_one_ro(coherency_line_size); +define_one_ro(physical_line_partition); +define_one_ro(ways_of_associativity); +define_one_ro(number_of_sets); +define_one_ro(size); +define_one_ro(shared_cpu_map); + +static struct attribute * default_attrs[] = { + &type.attr, + &level.attr, + &coherency_line_size.attr, + &physical_line_partition.attr, + &ways_of_associativity.attr, + &number_of_sets.attr, + &size.attr, + &shared_cpu_map.attr, + NULL +}; + +#define to_object(k) container_of(k, struct _index_kobject, kobj) +#define to_attr(a) container_of(a, struct _cache_attr, attr) + +static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf) +{ + struct _cache_attr *fattr = to_attr(attr); + struct _index_kobject *this_leaf = to_object(kobj); + ssize_t ret; + + ret = fattr->show ? + fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), + buf) : + 0; + return ret; +} + +static ssize_t store(struct kobject * kobj, struct attribute * attr, + const char * buf, size_t count) +{ + return 0; +} + +static struct sysfs_ops sysfs_ops = { + .show = show, + .store = store, +}; + +static struct kobj_type ktype_cache = { + .sysfs_ops = &sysfs_ops, + .default_attrs = default_attrs, +}; + +static struct kobj_type ktype_percpu_entry = { + .sysfs_ops = &sysfs_ops, +}; + +static void cpuid4_cache_sysfs_exit(unsigned int cpu) +{ + kfree(cache_kobject[cpu]); + kfree(index_kobject[cpu]); + cache_kobject[cpu] = NULL; + index_kobject[cpu] = NULL; + free_cache_attributes(cpu); +} + +static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu) +{ + + if (num_cache_leaves == 0) + return -ENOENT; + + detect_cache_attributes(cpu); + if (cpuid4_info[cpu] == NULL) + return -ENOENT; + + /* Allocate all required memory */ + cache_kobject[cpu] = kzalloc(sizeof(struct kobject), GFP_KERNEL); + if (unlikely(cache_kobject[cpu] == NULL)) + goto err_out; + + index_kobject[cpu] = kzalloc( + sizeof(struct _index_kobject ) * num_cache_leaves, GFP_KERNEL); + if (unlikely(index_kobject[cpu] == NULL)) + goto err_out; + + return 0; + +err_out: + cpuid4_cache_sysfs_exit(cpu); + return -ENOMEM; +} + +/* Add/Remove cache interface for CPU device */ +static int __cpuinit cache_add_dev(struct sys_device * sys_dev) +{ + unsigned int cpu = sys_dev->id; + unsigned long i, j; + struct _index_kobject *this_object; + int retval = 0; + + retval = cpuid4_cache_sysfs_init(cpu); + if (unlikely(retval < 0)) + return retval; + + cache_kobject[cpu]->parent = &sys_dev->kobj; + kobject_set_name(cache_kobject[cpu], "%s", "cache"); + cache_kobject[cpu]->ktype = &ktype_percpu_entry; + retval = kobject_register(cache_kobject[cpu]); + + for (i = 0; i < num_cache_leaves; i++) { + this_object = INDEX_KOBJECT_PTR(cpu,i); + this_object->cpu = cpu; + this_object->index = i; + this_object->kobj.parent = cache_kobject[cpu]; + kobject_set_name(&(this_object->kobj), "index%1lu", i); + this_object->kobj.ktype = &ktype_cache; + retval = kobject_register(&(this_object->kobj)); + if (unlikely(retval)) { + for (j = 0; j < i; j++) { + kobject_unregister( + &(INDEX_KOBJECT_PTR(cpu,j)->kobj)); + } + kobject_unregister(cache_kobject[cpu]); + cpuid4_cache_sysfs_exit(cpu); + break; + } + } + return retval; +} + +static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) +{ + unsigned int cpu = sys_dev->id; + unsigned long i; + + if (cpuid4_info[cpu] == NULL) + return; + for (i = 0; i < num_cache_leaves; i++) { + cache_remove_shared_cpu_map(cpu, i); + kobject_unregister(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); + } + kobject_unregister(cache_kobject[cpu]); + cpuid4_cache_sysfs_exit(cpu); + return; +} + +static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + struct sys_device *sys_dev; + + sys_dev = get_cpu_sysdev(cpu); + switch (action) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + cache_add_dev(sys_dev); + break; + case CPU_DEAD: + case CPU_DEAD_FROZEN: + cache_remove_dev(sys_dev); + break; + } + return NOTIFY_OK; +} + +static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = +{ + .notifier_call = cacheinfo_cpu_callback, +}; + +static int __cpuinit cache_sysfs_init(void) +{ + int i; + + if (num_cache_leaves == 0) + return 0; + + register_hotcpu_notifier(&cacheinfo_cpu_notifier); + + for_each_online_cpu(i) { + cacheinfo_cpu_callback(&cacheinfo_cpu_notifier, CPU_ONLINE, + (void *)(long)i); + } + + return 0; +} + +device_initcall(cache_sysfs_init); + +#endif diff --git a/arch/x86/kernel/cpu/nexgen.c b/arch/x86/kernel/cpu/nexgen.c new file mode 100644 index 00000000000..961fbe1a748 --- /dev/null +++ b/arch/x86/kernel/cpu/nexgen.c @@ -0,0 +1,60 @@ +#include +#include +#include +#include + +#include "cpu.h" + +/* + * Detect a NexGen CPU running without BIOS hypercode new enough + * to have CPUID. (Thanks to Herbert Oppmann) + */ + +static int __cpuinit deep_magic_nexgen_probe(void) +{ + int ret; + + __asm__ __volatile__ ( + " movw $0x5555, %%ax\n" + " xorw %%dx,%%dx\n" + " movw $2, %%cx\n" + " divw %%cx\n" + " movl $0, %%eax\n" + " jnz 1f\n" + " movl $1, %%eax\n" + "1:\n" + : "=a" (ret) : : "cx", "dx" ); + return ret; +} + +static void __cpuinit init_nexgen(struct cpuinfo_x86 * c) +{ + c->x86_cache_size = 256; /* A few had 1 MB... */ +} + +static void __cpuinit nexgen_identify(struct cpuinfo_x86 * c) +{ + /* Detect NexGen with old hypercode */ + if ( deep_magic_nexgen_probe() ) { + strcpy(c->x86_vendor_id, "NexGenDriven"); + } +} + +static struct cpu_dev nexgen_cpu_dev __cpuinitdata = { + .c_vendor = "Nexgen", + .c_ident = { "NexGenDriven" }, + .c_models = { + { .vendor = X86_VENDOR_NEXGEN, + .family = 5, + .model_names = { [1] = "Nx586" } + }, + }, + .c_init = init_nexgen, + .c_identify = nexgen_identify, +}; + +int __init nexgen_init_cpu(void) +{ + cpu_devs[X86_VENDOR_NEXGEN] = &nexgen_cpu_dev; + return 0; +} diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c new file mode 100644 index 00000000000..93fecd4b03d --- /dev/null +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -0,0 +1,713 @@ +/* local apic based NMI watchdog for various CPUs. + This file also handles reservation of performance counters for coordination + with other users (like oprofile). + + Note that these events normally don't tick when the CPU idles. This means + the frequency varies with CPU load. + + Original code for K7/P6 written by Keith Owens */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nmi_watchdog_ctlblk { + unsigned int cccr_msr; + unsigned int perfctr_msr; /* the MSR to reset in NMI handler */ + unsigned int evntsel_msr; /* the MSR to select the events to handle */ +}; + +/* Interface defining a CPU specific perfctr watchdog */ +struct wd_ops { + int (*reserve)(void); + void (*unreserve)(void); + int (*setup)(unsigned nmi_hz); + void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz); + void (*stop)(void); + unsigned perfctr; + unsigned evntsel; + u64 checkbit; +}; + +static struct wd_ops *wd_ops; + +/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's + * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) + */ +#define NMI_MAX_COUNTER_BITS 66 + +/* perfctr_nmi_owner tracks the ownership of the perfctr registers: + * evtsel_nmi_owner tracks the ownership of the event selection + * - different performance counters/ event selection may be reserved for + * different subsystems this reservation system just tries to coordinate + * things a little + */ +static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS); +static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS); + +static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk); + +/* converts an msr to an appropriate reservation bit */ +static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) +{ + /* returns the bit offset of the performance counter register */ + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + return (msr - MSR_K7_PERFCTR0); + case X86_VENDOR_INTEL: + if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) + return (msr - MSR_ARCH_PERFMON_PERFCTR0); + + switch (boot_cpu_data.x86) { + case 6: + return (msr - MSR_P6_PERFCTR0); + case 15: + return (msr - MSR_P4_BPU_PERFCTR0); + } + } + return 0; +} + +/* converts an msr to an appropriate reservation bit */ +/* returns the bit offset of the event selection register */ +static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) +{ + /* returns the bit offset of the event selection register */ + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + return (msr - MSR_K7_EVNTSEL0); + case X86_VENDOR_INTEL: + if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) + return (msr - MSR_ARCH_PERFMON_EVENTSEL0); + + switch (boot_cpu_data.x86) { + case 6: + return (msr - MSR_P6_EVNTSEL0); + case 15: + return (msr - MSR_P4_BSU_ESCR0); + } + } + return 0; + +} + +/* checks for a bit availability (hack for oprofile) */ +int avail_to_resrv_perfctr_nmi_bit(unsigned int counter) +{ + BUG_ON(counter > NMI_MAX_COUNTER_BITS); + + return (!test_bit(counter, perfctr_nmi_owner)); +} + +/* checks the an msr for availability */ +int avail_to_resrv_perfctr_nmi(unsigned int msr) +{ + unsigned int counter; + + counter = nmi_perfctr_msr_to_bit(msr); + BUG_ON(counter > NMI_MAX_COUNTER_BITS); + + return (!test_bit(counter, perfctr_nmi_owner)); +} + +int reserve_perfctr_nmi(unsigned int msr) +{ + unsigned int counter; + + counter = nmi_perfctr_msr_to_bit(msr); + BUG_ON(counter > NMI_MAX_COUNTER_BITS); + + if (!test_and_set_bit(counter, perfctr_nmi_owner)) + return 1; + return 0; +} + +void release_perfctr_nmi(unsigned int msr) +{ + unsigned int counter; + + counter = nmi_perfctr_msr_to_bit(msr); + BUG_ON(counter > NMI_MAX_COUNTER_BITS); + + clear_bit(counter, perfctr_nmi_owner); +} + +int reserve_evntsel_nmi(unsigned int msr) +{ + unsigned int counter; + + counter = nmi_evntsel_msr_to_bit(msr); + BUG_ON(counter > NMI_MAX_COUNTER_BITS); + + if (!test_and_set_bit(counter, evntsel_nmi_owner)) + return 1; + return 0; +} + +void release_evntsel_nmi(unsigned int msr) +{ + unsigned int counter; + + counter = nmi_evntsel_msr_to_bit(msr); + BUG_ON(counter > NMI_MAX_COUNTER_BITS); + + clear_bit(counter, evntsel_nmi_owner); +} + +EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi); +EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); +EXPORT_SYMBOL(reserve_perfctr_nmi); +EXPORT_SYMBOL(release_perfctr_nmi); +EXPORT_SYMBOL(reserve_evntsel_nmi); +EXPORT_SYMBOL(release_evntsel_nmi); + +void disable_lapic_nmi_watchdog(void) +{ + BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); + + if (atomic_read(&nmi_active) <= 0) + return; + + on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1); + wd_ops->unreserve(); + + BUG_ON(atomic_read(&nmi_active) != 0); +} + +void enable_lapic_nmi_watchdog(void) +{ + BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); + + /* are we already enabled */ + if (atomic_read(&nmi_active) != 0) + return; + + /* are we lapic aware */ + if (!wd_ops) + return; + if (!wd_ops->reserve()) { + printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n"); + return; + } + + on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1); + touch_nmi_watchdog(); +} + +/* + * Activate the NMI watchdog via the local APIC. + */ + +static unsigned int adjust_for_32bit_ctr(unsigned int hz) +{ + u64 counter_val; + unsigned int retval = hz; + + /* + * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter + * are writable, with higher bits sign extending from bit 31. + * So, we can only program the counter with 31 bit values and + * 32nd bit should be 1, for 33.. to be 1. + * Find the appropriate nmi_hz + */ + counter_val = (u64)cpu_khz * 1000; + do_div(counter_val, retval); + if (counter_val > 0x7fffffffULL) { + u64 count = (u64)cpu_khz * 1000; + do_div(count, 0x7fffffffUL); + retval = count + 1; + } + return retval; +} + +static void +write_watchdog_counter(unsigned int perfctr_msr, const char *descr, unsigned nmi_hz) +{ + u64 count = (u64)cpu_khz * 1000; + + do_div(count, nmi_hz); + if(descr) + Dprintk("setting %s to -0x%08Lx\n", descr, count); + wrmsrl(perfctr_msr, 0 - count); +} + +static void write_watchdog_counter32(unsigned int perfctr_msr, + const char *descr, unsigned nmi_hz) +{ + u64 count = (u64)cpu_khz * 1000; + + do_div(count, nmi_hz); + if(descr) + Dprintk("setting %s to -0x%08Lx\n", descr, count); + wrmsr(perfctr_msr, (u32)(-count), 0); +} + +/* AMD K7/K8/Family10h/Family11h support. AMD keeps this interface + nicely stable so there is not much variety */ + +#define K7_EVNTSEL_ENABLE (1 << 22) +#define K7_EVNTSEL_INT (1 << 20) +#define K7_EVNTSEL_OS (1 << 17) +#define K7_EVNTSEL_USR (1 << 16) +#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 +#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING + +static int setup_k7_watchdog(unsigned nmi_hz) +{ + unsigned int perfctr_msr, evntsel_msr; + unsigned int evntsel; + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + + perfctr_msr = wd_ops->perfctr; + evntsel_msr = wd_ops->evntsel; + + wrmsrl(perfctr_msr, 0UL); + + evntsel = K7_EVNTSEL_INT + | K7_EVNTSEL_OS + | K7_EVNTSEL_USR + | K7_NMI_EVENT; + + /* setup the timer */ + wrmsr(evntsel_msr, evntsel, 0); + write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz); + apic_write(APIC_LVTPC, APIC_DM_NMI); + evntsel |= K7_EVNTSEL_ENABLE; + wrmsr(evntsel_msr, evntsel, 0); + + wd->perfctr_msr = perfctr_msr; + wd->evntsel_msr = evntsel_msr; + wd->cccr_msr = 0; //unused + return 1; +} + +static void single_msr_stop_watchdog(void) +{ + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + + wrmsr(wd->evntsel_msr, 0, 0); +} + +static int single_msr_reserve(void) +{ + if (!reserve_perfctr_nmi(wd_ops->perfctr)) + return 0; + + if (!reserve_evntsel_nmi(wd_ops->evntsel)) { + release_perfctr_nmi(wd_ops->perfctr); + return 0; + } + return 1; +} + +static void single_msr_unreserve(void) +{ + release_evntsel_nmi(wd_ops->evntsel); + release_perfctr_nmi(wd_ops->perfctr); +} + +static void single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) +{ + /* start the cycle over again */ + write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); +} + +static struct wd_ops k7_wd_ops = { + .reserve = single_msr_reserve, + .unreserve = single_msr_unreserve, + .setup = setup_k7_watchdog, + .rearm = single_msr_rearm, + .stop = single_msr_stop_watchdog, + .perfctr = MSR_K7_PERFCTR0, + .evntsel = MSR_K7_EVNTSEL0, + .checkbit = 1ULL<<47, +}; + +/* Intel Model 6 (PPro+,P2,P3,P-M,Core1) */ + +#define P6_EVNTSEL0_ENABLE (1 << 22) +#define P6_EVNTSEL_INT (1 << 20) +#define P6_EVNTSEL_OS (1 << 17) +#define P6_EVNTSEL_USR (1 << 16) +#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 +#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED + +static int setup_p6_watchdog(unsigned nmi_hz) +{ + unsigned int perfctr_msr, evntsel_msr; + unsigned int evntsel; + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + + perfctr_msr = wd_ops->perfctr; + evntsel_msr = wd_ops->evntsel; + + /* KVM doesn't implement this MSR */ + if (wrmsr_safe(perfctr_msr, 0, 0) < 0) + return 0; + + evntsel = P6_EVNTSEL_INT + | P6_EVNTSEL_OS + | P6_EVNTSEL_USR + | P6_NMI_EVENT; + + /* setup the timer */ + wrmsr(evntsel_msr, evntsel, 0); + nmi_hz = adjust_for_32bit_ctr(nmi_hz); + write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz); + apic_write(APIC_LVTPC, APIC_DM_NMI); + evntsel |= P6_EVNTSEL0_ENABLE; + wrmsr(evntsel_msr, evntsel, 0); + + wd->perfctr_msr = perfctr_msr; + wd->evntsel_msr = evntsel_msr; + wd->cccr_msr = 0; //unused + return 1; +} + +static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) +{ + /* P6 based Pentium M need to re-unmask + * the apic vector but it doesn't hurt + * other P6 variant. + * ArchPerfom/Core Duo also needs this */ + apic_write(APIC_LVTPC, APIC_DM_NMI); + /* P6/ARCH_PERFMON has 32 bit counter write */ + write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz); +} + +static struct wd_ops p6_wd_ops = { + .reserve = single_msr_reserve, + .unreserve = single_msr_unreserve, + .setup = setup_p6_watchdog, + .rearm = p6_rearm, + .stop = single_msr_stop_watchdog, + .perfctr = MSR_P6_PERFCTR0, + .evntsel = MSR_P6_EVNTSEL0, + .checkbit = 1ULL<<39, +}; + +/* Intel P4 performance counters. By far the most complicated of all. */ + +#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) +#define P4_ESCR_EVENT_SELECT(N) ((N)<<25) +#define P4_ESCR_OS (1<<3) +#define P4_ESCR_USR (1<<2) +#define P4_CCCR_OVF_PMI0 (1<<26) +#define P4_CCCR_OVF_PMI1 (1<<27) +#define P4_CCCR_THRESHOLD(N) ((N)<<20) +#define P4_CCCR_COMPLEMENT (1<<19) +#define P4_CCCR_COMPARE (1<<18) +#define P4_CCCR_REQUIRED (3<<16) +#define P4_CCCR_ESCR_SELECT(N) ((N)<<13) +#define P4_CCCR_ENABLE (1<<12) +#define P4_CCCR_OVF (1<<31) + +/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter + CRU_ESCR0 (with any non-null event selector) through a complemented + max threshold. [IA32-Vol3, Section 14.9.9] */ + +static int setup_p4_watchdog(unsigned nmi_hz) +{ + unsigned int perfctr_msr, evntsel_msr, cccr_msr; + unsigned int evntsel, cccr_val; + unsigned int misc_enable, dummy; + unsigned int ht_num; + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + + rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy); + if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) + return 0; + +#ifdef CONFIG_SMP + /* detect which hyperthread we are on */ + if (smp_num_siblings == 2) { + unsigned int ebx, apicid; + + ebx = cpuid_ebx(1); + apicid = (ebx >> 24) & 0xff; + ht_num = apicid & 1; + } else +#endif + ht_num = 0; + + /* performance counters are shared resources + * assign each hyperthread its own set + * (re-use the ESCR0 register, seems safe + * and keeps the cccr_val the same) + */ + if (!ht_num) { + /* logical cpu 0 */ + perfctr_msr = MSR_P4_IQ_PERFCTR0; + evntsel_msr = MSR_P4_CRU_ESCR0; + cccr_msr = MSR_P4_IQ_CCCR0; + cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4); + } else { + /* logical cpu 1 */ + perfctr_msr = MSR_P4_IQ_PERFCTR1; + evntsel_msr = MSR_P4_CRU_ESCR0; + cccr_msr = MSR_P4_IQ_CCCR1; + cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4); + } + + evntsel = P4_ESCR_EVENT_SELECT(0x3F) + | P4_ESCR_OS + | P4_ESCR_USR; + + cccr_val |= P4_CCCR_THRESHOLD(15) + | P4_CCCR_COMPLEMENT + | P4_CCCR_COMPARE + | P4_CCCR_REQUIRED; + + wrmsr(evntsel_msr, evntsel, 0); + wrmsr(cccr_msr, cccr_val, 0); + write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz); + apic_write(APIC_LVTPC, APIC_DM_NMI); + cccr_val |= P4_CCCR_ENABLE; + wrmsr(cccr_msr, cccr_val, 0); + wd->perfctr_msr = perfctr_msr; + wd->evntsel_msr = evntsel_msr; + wd->cccr_msr = cccr_msr; + return 1; +} + +static void stop_p4_watchdog(void) +{ + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + wrmsr(wd->cccr_msr, 0, 0); + wrmsr(wd->evntsel_msr, 0, 0); +} + +static int p4_reserve(void) +{ + if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0)) + return 0; +#ifdef CONFIG_SMP + if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1)) + goto fail1; +#endif + if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0)) + goto fail2; + /* RED-PEN why is ESCR1 not reserved here? */ + return 1; + fail2: +#ifdef CONFIG_SMP + if (smp_num_siblings > 1) + release_perfctr_nmi(MSR_P4_IQ_PERFCTR1); + fail1: +#endif + release_perfctr_nmi(MSR_P4_IQ_PERFCTR0); + return 0; +} + +static void p4_unreserve(void) +{ +#ifdef CONFIG_SMP + if (smp_num_siblings > 1) + release_perfctr_nmi(MSR_P4_IQ_PERFCTR1); +#endif + release_evntsel_nmi(MSR_P4_CRU_ESCR0); + release_perfctr_nmi(MSR_P4_IQ_PERFCTR0); +} + +static void p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) +{ + unsigned dummy; + /* + * P4 quirks: + * - An overflown perfctr will assert its interrupt + * until the OVF flag in its CCCR is cleared. + * - LVTPC is masked on interrupt and must be + * unmasked by the LVTPC handler. + */ + rdmsrl(wd->cccr_msr, dummy); + dummy &= ~P4_CCCR_OVF; + wrmsrl(wd->cccr_msr, dummy); + apic_write(APIC_LVTPC, APIC_DM_NMI); + /* start the cycle over again */ + write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); +} + +static struct wd_ops p4_wd_ops = { + .reserve = p4_reserve, + .unreserve = p4_unreserve, + .setup = setup_p4_watchdog, + .rearm = p4_rearm, + .stop = stop_p4_watchdog, + /* RED-PEN this is wrong for the other sibling */ + .perfctr = MSR_P4_BPU_PERFCTR0, + .evntsel = MSR_P4_BSU_ESCR0, + .checkbit = 1ULL<<39, +}; + +/* Watchdog using the Intel architected PerfMon. Used for Core2 and hopefully + all future Intel CPUs. */ + +#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL +#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK + +static int setup_intel_arch_watchdog(unsigned nmi_hz) +{ + unsigned int ebx; + union cpuid10_eax eax; + unsigned int unused; + unsigned int perfctr_msr, evntsel_msr; + unsigned int evntsel; + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + + /* + * Check whether the Architectural PerfMon supports + * Unhalted Core Cycles Event or not. + * NOTE: Corresponding bit = 0 in ebx indicates event present. + */ + cpuid(10, &(eax.full), &ebx, &unused, &unused); + if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || + (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) + return 0; + + perfctr_msr = wd_ops->perfctr; + evntsel_msr = wd_ops->evntsel; + + wrmsrl(perfctr_msr, 0UL); + + evntsel = ARCH_PERFMON_EVENTSEL_INT + | ARCH_PERFMON_EVENTSEL_OS + | ARCH_PERFMON_EVENTSEL_USR + | ARCH_PERFMON_NMI_EVENT_SEL + | ARCH_PERFMON_NMI_EVENT_UMASK; + + /* setup the timer */ + wrmsr(evntsel_msr, evntsel, 0); + nmi_hz = adjust_for_32bit_ctr(nmi_hz); + write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz); + apic_write(APIC_LVTPC, APIC_DM_NMI); + evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; + wrmsr(evntsel_msr, evntsel, 0); + + wd->perfctr_msr = perfctr_msr; + wd->evntsel_msr = evntsel_msr; + wd->cccr_msr = 0; //unused + wd_ops->checkbit = 1ULL << (eax.split.bit_width - 1); + return 1; +} + +static struct wd_ops intel_arch_wd_ops = { + .reserve = single_msr_reserve, + .unreserve = single_msr_unreserve, + .setup = setup_intel_arch_watchdog, + .rearm = p6_rearm, + .stop = single_msr_stop_watchdog, + .perfctr = MSR_ARCH_PERFMON_PERFCTR1, + .evntsel = MSR_ARCH_PERFMON_EVENTSEL1, +}; + +static struct wd_ops coreduo_wd_ops = { + .reserve = single_msr_reserve, + .unreserve = single_msr_unreserve, + .setup = setup_intel_arch_watchdog, + .rearm = p6_rearm, + .stop = single_msr_stop_watchdog, + .perfctr = MSR_ARCH_PERFMON_PERFCTR0, + .evntsel = MSR_ARCH_PERFMON_EVENTSEL0, +}; + +static void probe_nmi_watchdog(void) +{ + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 && + boot_cpu_data.x86 != 16) + return; + wd_ops = &k7_wd_ops; + break; + case X86_VENDOR_INTEL: + /* Work around Core Duo (Yonah) errata AE49 where perfctr1 + doesn't have a working enable bit. */ + if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) { + wd_ops = &coreduo_wd_ops; + break; + } + if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { + wd_ops = &intel_arch_wd_ops; + break; + } + switch (boot_cpu_data.x86) { + case 6: + if (boot_cpu_data.x86_model > 0xd) + return; + + wd_ops = &p6_wd_ops; + break; + case 15: + if (boot_cpu_data.x86_model > 0x4) + return; + + wd_ops = &p4_wd_ops; + break; + default: + return; + } + break; + } +} + +/* Interface to nmi.c */ + +int lapic_watchdog_init(unsigned nmi_hz) +{ + if (!wd_ops) { + probe_nmi_watchdog(); + if (!wd_ops) + return -1; + + if (!wd_ops->reserve()) { + printk(KERN_ERR + "NMI watchdog: cannot reserve perfctrs\n"); + return -1; + } + } + + if (!(wd_ops->setup(nmi_hz))) { + printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n", + raw_smp_processor_id()); + return -1; + } + + return 0; +} + +void lapic_watchdog_stop(void) +{ + if (wd_ops) + wd_ops->stop(); +} + +unsigned lapic_adjust_nmi_hz(unsigned hz) +{ + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + if (wd->perfctr_msr == MSR_P6_PERFCTR0 || + wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1) + hz = adjust_for_32bit_ctr(hz); + return hz; +} + +int lapic_wd_event(unsigned nmi_hz) +{ + struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); + u64 ctr; + rdmsrl(wd->perfctr_msr, ctr); + if (ctr & wd_ops->checkbit) { /* perfctr still running? */ + return 0; + } + wd_ops->rearm(wd, nmi_hz); + return 1; +} + +int lapic_watchdog_ok(void) +{ + return wd_ops != NULL; +} diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c new file mode 100644 index 00000000000..1e31b6caffb --- /dev/null +++ b/arch/x86/kernel/cpu/proc.c @@ -0,0 +1,192 @@ +#include +#include +#include +#include +#include +#include + +/* + * Get CPU information for use by the procfs. + */ +static int show_cpuinfo(struct seq_file *m, void *v) +{ + /* + * These flag bits must match the definitions in . + * NULL means this bit is undefined or reserved; either way it doesn't + * have meaning as far as Linux is concerned. Note that it's important + * to realize there is a difference between this table and CPUID -- if + * applications want to get the raw CPUID data, they should access + * /dev/cpu//cpuid instead. + */ + static const char * const x86_cap_flags[] = { + /* Intel-defined */ + "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce", + "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov", + "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx", + "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe", + + /* AMD-defined */ + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, + NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL, + NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm", + "3dnowext", "3dnow", + + /* Transmeta-defined */ + "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /* Other (Linux-defined) */ + "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", + NULL, NULL, NULL, NULL, + "constant_tsc", "up", NULL, "arch_perfmon", + "pebs", "bts", NULL, "sync_rdtsc", + "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /* Intel-defined (#2) */ + "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", + "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, + NULL, NULL, "dca", NULL, NULL, NULL, NULL, "popcnt", + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /* VIA/Cyrix/Centaur-defined */ + NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en", + "ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /* AMD-defined (#2) */ + "lahf_lm", "cmp_legacy", "svm", "extapic", "cr8_legacy", + "altmovcr8", "abm", "sse4a", + "misalignsse", "3dnowprefetch", + "osvw", "ibs", NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /* Auxiliary (Linux-defined) */ + "ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + }; + static const char * const x86_power_flags[] = { + "ts", /* temperature sensor */ + "fid", /* frequency id control */ + "vid", /* voltage id control */ + "ttp", /* thermal trip */ + "tm", + "stc", + "100mhzsteps", + "hwpstate", + "", /* constant_tsc - moved to flags */ + /* nothing */ + }; + struct cpuinfo_x86 *c = v; + int i, n = c - cpu_data; + int fpu_exception; + +#ifdef CONFIG_SMP + if (!cpu_online(n)) + return 0; +#endif + seq_printf(m, "processor\t: %d\n" + "vendor_id\t: %s\n" + "cpu family\t: %d\n" + "model\t\t: %d\n" + "model name\t: %s\n", + n, + c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown", + c->x86, + c->x86_model, + c->x86_model_id[0] ? c->x86_model_id : "unknown"); + + if (c->x86_mask || c->cpuid_level >= 0) + seq_printf(m, "stepping\t: %d\n", c->x86_mask); + else + seq_printf(m, "stepping\t: unknown\n"); + + if ( cpu_has(c, X86_FEATURE_TSC) ) { + unsigned int freq = cpufreq_quick_get(n); + if (!freq) + freq = cpu_khz; + seq_printf(m, "cpu MHz\t\t: %u.%03u\n", + freq / 1000, (freq % 1000)); + } + + /* Cache size */ + if (c->x86_cache_size >= 0) + seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); +#ifdef CONFIG_X86_HT + if (c->x86_max_cores * smp_num_siblings > 1) { + seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); + seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[n])); + seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); + seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); + } +#endif + + /* We use exception 16 if we have hardware math and we've either seen it or the CPU claims it is internal */ + fpu_exception = c->hard_math && (ignore_fpu_irq || cpu_has_fpu); + seq_printf(m, "fdiv_bug\t: %s\n" + "hlt_bug\t\t: %s\n" + "f00f_bug\t: %s\n" + "coma_bug\t: %s\n" + "fpu\t\t: %s\n" + "fpu_exception\t: %s\n" + "cpuid level\t: %d\n" + "wp\t\t: %s\n" + "flags\t\t:", + c->fdiv_bug ? "yes" : "no", + c->hlt_works_ok ? "no" : "yes", + c->f00f_bug ? "yes" : "no", + c->coma_bug ? "yes" : "no", + c->hard_math ? "yes" : "no", + fpu_exception ? "yes" : "no", + c->cpuid_level, + c->wp_works_ok ? "yes" : "no"); + + for ( i = 0 ; i < 32*NCAPINTS ; i++ ) + if ( test_bit(i, c->x86_capability) && + x86_cap_flags[i] != NULL ) + seq_printf(m, " %s", x86_cap_flags[i]); + + for (i = 0; i < 32; i++) + if (c->x86_power & (1 << i)) { + if (i < ARRAY_SIZE(x86_power_flags) && + x86_power_flags[i]) + seq_printf(m, "%s%s", + x86_power_flags[i][0]?" ":"", + x86_power_flags[i]); + else + seq_printf(m, " [%d]", i); + } + + seq_printf(m, "\nbogomips\t: %lu.%02lu\n", + c->loops_per_jiffy/(500000/HZ), + (c->loops_per_jiffy/(5000/HZ)) % 100); + seq_printf(m, "clflush size\t: %u\n\n", c->x86_clflush_size); + + return 0; +} + +static void *c_start(struct seq_file *m, loff_t *pos) +{ + return *pos < NR_CPUS ? cpu_data + *pos : NULL; +} +static void *c_next(struct seq_file *m, void *v, loff_t *pos) +{ + ++*pos; + return c_start(m, pos); +} +static void c_stop(struct seq_file *m, void *v) +{ +} +struct seq_operations cpuinfo_op = { + .start = c_start, + .next = c_next, + .stop = c_stop, + .show = show_cpuinfo, +}; diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c new file mode 100644 index 00000000000..200fb3f9ebf --- /dev/null +++ b/arch/x86/kernel/cpu/transmeta.c @@ -0,0 +1,116 @@ +#include +#include +#include +#include +#include +#include "cpu.h" + +static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) +{ + unsigned int cap_mask, uk, max, dummy; + unsigned int cms_rev1, cms_rev2; + unsigned int cpu_rev, cpu_freq = 0, cpu_flags, new_cpu_rev; + char cpu_info[65]; + + get_model_name(c); /* Same as AMD/Cyrix */ + display_cacheinfo(c); + + /* Print CMS and CPU revision */ + max = cpuid_eax(0x80860000); + cpu_rev = 0; + if ( max >= 0x80860001 ) { + cpuid(0x80860001, &dummy, &cpu_rev, &cpu_freq, &cpu_flags); + if (cpu_rev != 0x02000000) { + printk(KERN_INFO "CPU: Processor revision %u.%u.%u.%u, %u MHz\n", + (cpu_rev >> 24) & 0xff, + (cpu_rev >> 16) & 0xff, + (cpu_rev >> 8) & 0xff, + cpu_rev & 0xff, + cpu_freq); + } + } + if ( max >= 0x80860002 ) { + cpuid(0x80860002, &new_cpu_rev, &cms_rev1, &cms_rev2, &dummy); + if (cpu_rev == 0x02000000) { + printk(KERN_INFO "CPU: Processor revision %08X, %u MHz\n", + new_cpu_rev, cpu_freq); + } + printk(KERN_INFO "CPU: Code Morphing Software revision %u.%u.%u-%u-%u\n", + (cms_rev1 >> 24) & 0xff, + (cms_rev1 >> 16) & 0xff, + (cms_rev1 >> 8) & 0xff, + cms_rev1 & 0xff, + cms_rev2); + } + if ( max >= 0x80860006 ) { + cpuid(0x80860003, + (void *)&cpu_info[0], + (void *)&cpu_info[4], + (void *)&cpu_info[8], + (void *)&cpu_info[12]); + cpuid(0x80860004, + (void *)&cpu_info[16], + (void *)&cpu_info[20], + (void *)&cpu_info[24], + (void *)&cpu_info[28]); + cpuid(0x80860005, + (void *)&cpu_info[32], + (void *)&cpu_info[36], + (void *)&cpu_info[40], + (void *)&cpu_info[44]); + cpuid(0x80860006, + (void *)&cpu_info[48], + (void *)&cpu_info[52], + (void *)&cpu_info[56], + (void *)&cpu_info[60]); + cpu_info[64] = '\0'; + printk(KERN_INFO "CPU: %s\n", cpu_info); + } + + /* Unhide possibly hidden capability flags */ + rdmsr(0x80860004, cap_mask, uk); + wrmsr(0x80860004, ~0, uk); + c->x86_capability[0] = cpuid_edx(0x00000001); + wrmsr(0x80860004, cap_mask, uk); + + /* All Transmeta CPUs have a constant TSC */ + set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); + + /* If we can run i686 user-space code, call us an i686 */ +#define USER686 ((1 << X86_FEATURE_TSC)|\ + (1 << X86_FEATURE_CX8)|\ + (1 << X86_FEATURE_CMOV)) + if (c->x86 == 5 && (c->x86_capability[0] & USER686) == USER686) + c->x86 = 6; + +#ifdef CONFIG_SYSCTL + /* randomize_va_space slows us down enormously; + it probably triggers retranslation of x86->native bytecode */ + randomize_va_space = 0; +#endif +} + +static void __cpuinit transmeta_identify(struct cpuinfo_x86 * c) +{ + u32 xlvl; + + /* Transmeta-defined flags: level 0x80860001 */ + xlvl = cpuid_eax(0x80860000); + if ( (xlvl & 0xffff0000) == 0x80860000 ) { + if ( xlvl >= 0x80860001 ) + c->x86_capability[2] = cpuid_edx(0x80860001); + } +} + +static struct cpu_dev transmeta_cpu_dev __cpuinitdata = { + .c_vendor = "Transmeta", + .c_ident = { "GenuineTMx86", "TransmetaCPU" }, + .c_init = init_transmeta, + .c_identify = transmeta_identify, +}; + +int __init transmeta_init_cpu(void) +{ + cpu_devs[X86_VENDOR_TRANSMETA] = &transmeta_cpu_dev; + return 0; +} diff --git a/arch/x86/kernel/cpu/umc.c b/arch/x86/kernel/cpu/umc.c new file mode 100644 index 00000000000..a7a4e75bdcd --- /dev/null +++ b/arch/x86/kernel/cpu/umc.c @@ -0,0 +1,26 @@ +#include +#include +#include +#include "cpu.h" + +/* UMC chips appear to be only either 386 or 486, so no special init takes place. + */ + +static struct cpu_dev umc_cpu_dev __cpuinitdata = { + .c_vendor = "UMC", + .c_ident = { "UMC UMC UMC" }, + .c_models = { + { .vendor = X86_VENDOR_UMC, .family = 4, .model_names = + { + [1] = "U5D", + [2] = "U5S", + } + }, + }, +}; + +int __init umc_init_cpu(void) +{ + cpu_devs[X86_VENDOR_UMC] = &umc_cpu_dev; + return 0; +} diff --git a/arch/x86_64/kernel/Makefile_64 b/arch/x86_64/kernel/Makefile_64 index e7480509103..690aebf37c3 100644 --- a/arch/x86_64/kernel/Makefile_64 +++ b/arch/x86_64/kernel/Makefile_64 @@ -53,11 +53,11 @@ bootflag-y += ../../i386/kernel/bootflag.o cpuid-$(subst m,y,$(CONFIG_X86_CPUID)) += ../../i386/kernel/cpuid.o topology-y += ../../i386/kernel/topology.o microcode-$(subst m,y,$(CONFIG_MICROCODE)) += ../../i386/kernel/microcode.o -intel_cacheinfo-y += ../../i386/kernel/cpu/intel_cacheinfo.o -addon_cpuid_features-y += ../../i386/kernel/cpu/addon_cpuid_features.o +intel_cacheinfo-y += ../../x86/kernel/cpu/intel_cacheinfo.o +addon_cpuid_features-y += ../../x86/kernel/cpu/addon_cpuid_features.o quirks-y += ../../i386/kernel/quirks.o i8237-y += ../../i386/kernel/i8237.o msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o alternative-y += ../../i386/kernel/alternative.o pcspeaker-y += ../../i386/kernel/pcspeaker.o -perfctr-watchdog-y += ../../i386/kernel/cpu/perfctr-watchdog.o +perfctr-watchdog-y += ../../x86/kernel/cpu/perfctr-watchdog.o -- cgit v1.2.3-70-g09d2 From 9a163ed8e0552fdcffe405d2ea7134819a81456e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 11 Oct 2007 11:17:01 +0200 Subject: i386: move kernel Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/i386/Makefile | 11 +- arch/i386/kernel/.gitignore | 1 - arch/i386/kernel/Makefile | 5 - arch/i386/kernel/Makefile_32 | 88 - arch/i386/kernel/alternative.c | 450 ----- arch/i386/kernel/apic_32.c | 1566 ---------------- arch/i386/kernel/apm_32.c | 2403 ------------------------- arch/i386/kernel/asm-offsets.c | 5 - arch/i386/kernel/asm-offsets_32.c | 147 -- arch/i386/kernel/bootflag.c | 98 - arch/i386/kernel/cpuid.c | 242 --- arch/i386/kernel/crash_32.c | 137 -- arch/i386/kernel/crash_dump_32.c | 74 - arch/i386/kernel/doublefault_32.c | 70 - arch/i386/kernel/e820_32.c | 944 ---------- arch/i386/kernel/early_printk.c | 2 - arch/i386/kernel/efi_32.c | 712 -------- arch/i386/kernel/efi_stub_32.S | 122 -- arch/i386/kernel/entry_32.S | 1112 ------------ arch/i386/kernel/geode_32.c | 155 -- arch/i386/kernel/head_32.S | 578 ------ arch/i386/kernel/hpet_32.c | 553 ------ arch/i386/kernel/i386_ksyms_32.c | 30 - arch/i386/kernel/i387_32.c | 546 ------ arch/i386/kernel/i8237.c | 72 - arch/i386/kernel/i8253_32.c | 206 --- arch/i386/kernel/i8259_32.c | 420 ----- arch/i386/kernel/init_task_32.c | 46 - arch/i386/kernel/io_apic_32.c | 2847 ------------------------------ arch/i386/kernel/ioport_32.c | 153 -- arch/i386/kernel/irq_32.c | 343 ---- arch/i386/kernel/kprobes_32.c | 751 -------- arch/i386/kernel/ldt_32.c | 250 --- arch/i386/kernel/machine_kexec_32.c | 171 -- arch/i386/kernel/mca_32.c | 470 ----- arch/i386/kernel/microcode.c | 850 --------- arch/i386/kernel/module_32.c | 152 -- arch/i386/kernel/mpparse_32.c | 1132 ------------ arch/i386/kernel/msr.c | 224 --- arch/i386/kernel/nmi_32.c | 468 ----- arch/i386/kernel/numaq_32.c | 89 - arch/i386/kernel/paravirt_32.c | 392 ---- arch/i386/kernel/pci-dma_32.c | 177 -- arch/i386/kernel/pcspeaker.c | 20 - arch/i386/kernel/process_32.c | 951 ---------- arch/i386/kernel/ptrace_32.c | 723 -------- arch/i386/kernel/quirks.c | 49 - arch/i386/kernel/reboot_32.c | 413 ----- arch/i386/kernel/reboot_fixups_32.c | 68 - arch/i386/kernel/relocate_kernel_32.S | 252 --- arch/i386/kernel/scx200_32.c | 131 -- arch/i386/kernel/setup_32.c | 653 ------- arch/i386/kernel/sigframe_32.h | 21 - arch/i386/kernel/signal_32.c | 667 ------- arch/i386/kernel/smp_32.c | 707 -------- arch/i386/kernel/smpboot_32.c | 1322 -------------- arch/i386/kernel/smpcommon_32.c | 81 - arch/i386/kernel/srat_32.c | 360 ---- arch/i386/kernel/summit_32.c | 180 -- arch/i386/kernel/sys_i386_32.c | 265 --- arch/i386/kernel/syscall_table_32.S | 326 ---- arch/i386/kernel/sysenter_32.c | 348 ---- arch/i386/kernel/time_32.c | 236 --- arch/i386/kernel/topology.c | 77 - arch/i386/kernel/trampoline_32.S | 85 - arch/i386/kernel/traps_32.c | 1250 ------------- arch/i386/kernel/tsc_32.c | 413 ----- arch/i386/kernel/tsc_sync.c | 1 - arch/i386/kernel/vm86_32.c | 843 --------- arch/i386/kernel/vmi_32.c | 981 ---------- arch/i386/kernel/vmiclock_32.c | 320 ---- arch/i386/kernel/vmlinux.lds.S | 5 - arch/i386/kernel/vmlinux_32.lds.S | 213 --- arch/i386/kernel/vsyscall-int80_32.S | 53 - arch/i386/kernel/vsyscall-note_32.S | 45 - arch/i386/kernel/vsyscall-sigreturn_32.S | 143 -- arch/i386/kernel/vsyscall-sysenter_32.S | 122 -- arch/i386/kernel/vsyscall_32.S | 15 - arch/i386/kernel/vsyscall_32.lds.S | 67 - arch/um/sys-i386/sys_call_table.S | 2 +- arch/x86/kernel/.gitignore | 1 + arch/x86/kernel/Makefile | 5 + arch/x86/kernel/Makefile_32 | 88 + arch/x86/kernel/alternative.c | 450 +++++ arch/x86/kernel/apic_32.c | 1566 ++++++++++++++++ arch/x86/kernel/apm_32.c | 2403 +++++++++++++++++++++++++ arch/x86/kernel/asm-offsets.c | 5 + arch/x86/kernel/asm-offsets_32.c | 147 ++ arch/x86/kernel/bootflag.c | 98 + arch/x86/kernel/cpuid.c | 242 +++ arch/x86/kernel/crash_32.c | 137 ++ arch/x86/kernel/crash_dump_32.c | 74 + arch/x86/kernel/doublefault_32.c | 70 + arch/x86/kernel/e820_32.c | 944 ++++++++++ arch/x86/kernel/early_printk.c | 2 + arch/x86/kernel/efi_32.c | 712 ++++++++ arch/x86/kernel/efi_stub_32.S | 122 ++ arch/x86/kernel/entry_32.S | 1112 ++++++++++++ arch/x86/kernel/geode_32.c | 155 ++ arch/x86/kernel/head_32.S | 578 ++++++ arch/x86/kernel/hpet_32.c | 553 ++++++ arch/x86/kernel/i386_ksyms_32.c | 30 + arch/x86/kernel/i387_32.c | 546 ++++++ arch/x86/kernel/i8237.c | 72 + arch/x86/kernel/i8253_32.c | 206 +++ arch/x86/kernel/i8259_32.c | 420 +++++ arch/x86/kernel/init_task_32.c | 46 + arch/x86/kernel/io_apic_32.c | 2847 ++++++++++++++++++++++++++++++ arch/x86/kernel/ioport_32.c | 153 ++ arch/x86/kernel/irq_32.c | 343 ++++ arch/x86/kernel/kprobes_32.c | 751 ++++++++ arch/x86/kernel/ldt_32.c | 250 +++ arch/x86/kernel/machine_kexec_32.c | 171 ++ arch/x86/kernel/mca_32.c | 470 +++++ arch/x86/kernel/microcode.c | 850 +++++++++ arch/x86/kernel/module_32.c | 152 ++ arch/x86/kernel/mpparse_32.c | 1132 ++++++++++++ arch/x86/kernel/msr.c | 224 +++ arch/x86/kernel/nmi_32.c | 468 +++++ arch/x86/kernel/numaq_32.c | 89 + arch/x86/kernel/paravirt_32.c | 392 ++++ arch/x86/kernel/pci-dma_32.c | 177 ++ arch/x86/kernel/pcspeaker.c | 20 + arch/x86/kernel/process_32.c | 951 ++++++++++ arch/x86/kernel/ptrace_32.c | 723 ++++++++ arch/x86/kernel/quirks.c | 49 + arch/x86/kernel/reboot_32.c | 413 +++++ arch/x86/kernel/reboot_fixups_32.c | 68 + arch/x86/kernel/relocate_kernel_32.S | 252 +++ arch/x86/kernel/scx200_32.c | 131 ++ arch/x86/kernel/setup_32.c | 653 +++++++ arch/x86/kernel/sigframe_32.h | 21 + arch/x86/kernel/signal_32.c | 667 +++++++ arch/x86/kernel/smp_32.c | 707 ++++++++ arch/x86/kernel/smpboot_32.c | 1322 ++++++++++++++ arch/x86/kernel/smpcommon_32.c | 81 + arch/x86/kernel/srat_32.c | 360 ++++ arch/x86/kernel/summit_32.c | 180 ++ arch/x86/kernel/sys_i386_32.c | 265 +++ arch/x86/kernel/syscall_table_32.S | 326 ++++ arch/x86/kernel/sysenter_32.c | 348 ++++ arch/x86/kernel/time_32.c | 236 +++ arch/x86/kernel/topology.c | 77 + arch/x86/kernel/trampoline_32.S | 85 + arch/x86/kernel/traps_32.c | 1250 +++++++++++++ arch/x86/kernel/tsc_32.c | 413 +++++ arch/x86/kernel/tsc_sync.c | 1 + arch/x86/kernel/vm86_32.c | 843 +++++++++ arch/x86/kernel/vmi_32.c | 981 ++++++++++ arch/x86/kernel/vmiclock_32.c | 320 ++++ arch/x86/kernel/vmlinux.lds.S | 5 + arch/x86/kernel/vmlinux_32.lds.S | 213 +++ arch/x86/kernel/vsyscall-int80_32.S | 53 + arch/x86/kernel/vsyscall-note_32.S | 45 + arch/x86/kernel/vsyscall-sigreturn_32.S | 143 ++ arch/x86/kernel/vsyscall-sysenter_32.S | 122 ++ arch/x86/kernel/vsyscall_32.S | 15 + arch/x86/kernel/vsyscall_32.lds.S | 67 + arch/x86/mach-generic/Makefile | 2 +- arch/x86/mach-voyager/Makefile | 2 +- arch/x86_64/ia32/vsyscall-sigreturn.S | 2 +- arch/x86_64/kernel/Makefile | 2 +- arch/x86_64/kernel/Makefile_64 | 18 +- 163 files changed, 31682 insertions(+), 31675 deletions(-) delete mode 100644 arch/i386/kernel/.gitignore delete mode 100644 arch/i386/kernel/Makefile delete mode 100644 arch/i386/kernel/Makefile_32 delete mode 100644 arch/i386/kernel/alternative.c delete mode 100644 arch/i386/kernel/apic_32.c delete mode 100644 arch/i386/kernel/apm_32.c delete mode 100644 arch/i386/kernel/asm-offsets.c delete mode 100644 arch/i386/kernel/asm-offsets_32.c delete mode 100644 arch/i386/kernel/bootflag.c delete mode 100644 arch/i386/kernel/cpuid.c delete mode 100644 arch/i386/kernel/crash_32.c delete mode 100644 arch/i386/kernel/crash_dump_32.c delete mode 100644 arch/i386/kernel/doublefault_32.c delete mode 100644 arch/i386/kernel/e820_32.c delete mode 100644 arch/i386/kernel/early_printk.c delete mode 100644 arch/i386/kernel/efi_32.c delete mode 100644 arch/i386/kernel/efi_stub_32.S delete mode 100644 arch/i386/kernel/entry_32.S delete mode 100644 arch/i386/kernel/geode_32.c delete mode 100644 arch/i386/kernel/head_32.S delete mode 100644 arch/i386/kernel/hpet_32.c delete mode 100644 arch/i386/kernel/i386_ksyms_32.c delete mode 100644 arch/i386/kernel/i387_32.c delete mode 100644 arch/i386/kernel/i8237.c delete mode 100644 arch/i386/kernel/i8253_32.c delete mode 100644 arch/i386/kernel/i8259_32.c delete mode 100644 arch/i386/kernel/init_task_32.c delete mode 100644 arch/i386/kernel/io_apic_32.c delete mode 100644 arch/i386/kernel/ioport_32.c delete mode 100644 arch/i386/kernel/irq_32.c delete mode 100644 arch/i386/kernel/kprobes_32.c delete mode 100644 arch/i386/kernel/ldt_32.c delete mode 100644 arch/i386/kernel/machine_kexec_32.c delete mode 100644 arch/i386/kernel/mca_32.c delete mode 100644 arch/i386/kernel/microcode.c delete mode 100644 arch/i386/kernel/module_32.c delete mode 100644 arch/i386/kernel/mpparse_32.c delete mode 100644 arch/i386/kernel/msr.c delete mode 100644 arch/i386/kernel/nmi_32.c delete mode 100644 arch/i386/kernel/numaq_32.c delete mode 100644 arch/i386/kernel/paravirt_32.c delete mode 100644 arch/i386/kernel/pci-dma_32.c delete mode 100644 arch/i386/kernel/pcspeaker.c delete mode 100644 arch/i386/kernel/process_32.c delete mode 100644 arch/i386/kernel/ptrace_32.c delete mode 100644 arch/i386/kernel/quirks.c delete mode 100644 arch/i386/kernel/reboot_32.c delete mode 100644 arch/i386/kernel/reboot_fixups_32.c delete mode 100644 arch/i386/kernel/relocate_kernel_32.S delete mode 100644 arch/i386/kernel/scx200_32.c delete mode 100644 arch/i386/kernel/setup_32.c delete mode 100644 arch/i386/kernel/sigframe_32.h delete mode 100644 arch/i386/kernel/signal_32.c delete mode 100644 arch/i386/kernel/smp_32.c delete mode 100644 arch/i386/kernel/smpboot_32.c delete mode 100644 arch/i386/kernel/smpcommon_32.c delete mode 100644 arch/i386/kernel/srat_32.c delete mode 100644 arch/i386/kernel/summit_32.c delete mode 100644 arch/i386/kernel/sys_i386_32.c delete mode 100644 arch/i386/kernel/syscall_table_32.S delete mode 100644 arch/i386/kernel/sysenter_32.c delete mode 100644 arch/i386/kernel/time_32.c delete mode 100644 arch/i386/kernel/topology.c delete mode 100644 arch/i386/kernel/trampoline_32.S delete mode 100644 arch/i386/kernel/traps_32.c delete mode 100644 arch/i386/kernel/tsc_32.c delete mode 100644 arch/i386/kernel/tsc_sync.c delete mode 100644 arch/i386/kernel/vm86_32.c delete mode 100644 arch/i386/kernel/vmi_32.c delete mode 100644 arch/i386/kernel/vmiclock_32.c delete mode 100644 arch/i386/kernel/vmlinux.lds.S delete mode 100644 arch/i386/kernel/vmlinux_32.lds.S delete mode 100644 arch/i386/kernel/vsyscall-int80_32.S delete mode 100644 arch/i386/kernel/vsyscall-note_32.S delete mode 100644 arch/i386/kernel/vsyscall-sigreturn_32.S delete mode 100644 arch/i386/kernel/vsyscall-sysenter_32.S delete mode 100644 arch/i386/kernel/vsyscall_32.S delete mode 100644 arch/i386/kernel/vsyscall_32.lds.S create mode 100644 arch/x86/kernel/.gitignore create mode 100644 arch/x86/kernel/Makefile create mode 100644 arch/x86/kernel/Makefile_32 create mode 100644 arch/x86/kernel/alternative.c create mode 100644 arch/x86/kernel/apic_32.c create mode 100644 arch/x86/kernel/apm_32.c create mode 100644 arch/x86/kernel/asm-offsets.c create mode 100644 arch/x86/kernel/asm-offsets_32.c create mode 100644 arch/x86/kernel/bootflag.c create mode 100644 arch/x86/kernel/cpuid.c create mode 100644 arch/x86/kernel/crash_32.c create mode 100644 arch/x86/kernel/crash_dump_32.c create mode 100644 arch/x86/kernel/doublefault_32.c create mode 100644 arch/x86/kernel/e820_32.c create mode 100644 arch/x86/kernel/early_printk.c create mode 100644 arch/x86/kernel/efi_32.c create mode 100644 arch/x86/kernel/efi_stub_32.S create mode 100644 arch/x86/kernel/entry_32.S create mode 100644 arch/x86/kernel/geode_32.c create mode 100644 arch/x86/kernel/head_32.S create mode 100644 arch/x86/kernel/hpet_32.c create mode 100644 arch/x86/kernel/i386_ksyms_32.c create mode 100644 arch/x86/kernel/i387_32.c create mode 100644 arch/x86/kernel/i8237.c create mode 100644 arch/x86/kernel/i8253_32.c create mode 100644 arch/x86/kernel/i8259_32.c create mode 100644 arch/x86/kernel/init_task_32.c create mode 100644 arch/x86/kernel/io_apic_32.c create mode 100644 arch/x86/kernel/ioport_32.c create mode 100644 arch/x86/kernel/irq_32.c create mode 100644 arch/x86/kernel/kprobes_32.c create mode 100644 arch/x86/kernel/ldt_32.c create mode 100644 arch/x86/kernel/machine_kexec_32.c create mode 100644 arch/x86/kernel/mca_32.c create mode 100644 arch/x86/kernel/microcode.c create mode 100644 arch/x86/kernel/module_32.c create mode 100644 arch/x86/kernel/mpparse_32.c create mode 100644 arch/x86/kernel/msr.c create mode 100644 arch/x86/kernel/nmi_32.c create mode 100644 arch/x86/kernel/numaq_32.c create mode 100644 arch/x86/kernel/paravirt_32.c create mode 100644 arch/x86/kernel/pci-dma_32.c create mode 100644 arch/x86/kernel/pcspeaker.c create mode 100644 arch/x86/kernel/process_32.c create mode 100644 arch/x86/kernel/ptrace_32.c create mode 100644 arch/x86/kernel/quirks.c create mode 100644 arch/x86/kernel/reboot_32.c create mode 100644 arch/x86/kernel/reboot_fixups_32.c create mode 100644 arch/x86/kernel/relocate_kernel_32.S create mode 100644 arch/x86/kernel/scx200_32.c create mode 100644 arch/x86/kernel/setup_32.c create mode 100644 arch/x86/kernel/sigframe_32.h create mode 100644 arch/x86/kernel/signal_32.c create mode 100644 arch/x86/kernel/smp_32.c create mode 100644 arch/x86/kernel/smpboot_32.c create mode 100644 arch/x86/kernel/smpcommon_32.c create mode 100644 arch/x86/kernel/srat_32.c create mode 100644 arch/x86/kernel/summit_32.c create mode 100644 arch/x86/kernel/sys_i386_32.c create mode 100644 arch/x86/kernel/syscall_table_32.S create mode 100644 arch/x86/kernel/sysenter_32.c create mode 100644 arch/x86/kernel/time_32.c create mode 100644 arch/x86/kernel/topology.c create mode 100644 arch/x86/kernel/trampoline_32.S create mode 100644 arch/x86/kernel/traps_32.c create mode 100644 arch/x86/kernel/tsc_32.c create mode 100644 arch/x86/kernel/tsc_sync.c create mode 100644 arch/x86/kernel/vm86_32.c create mode 100644 arch/x86/kernel/vmi_32.c create mode 100644 arch/x86/kernel/vmiclock_32.c create mode 100644 arch/x86/kernel/vmlinux.lds.S create mode 100644 arch/x86/kernel/vmlinux_32.lds.S create mode 100644 arch/x86/kernel/vsyscall-int80_32.S create mode 100644 arch/x86/kernel/vsyscall-note_32.S create mode 100644 arch/x86/kernel/vsyscall-sigreturn_32.S create mode 100644 arch/x86/kernel/vsyscall-sysenter_32.S create mode 100644 arch/x86/kernel/vsyscall_32.S create mode 100644 arch/x86/kernel/vsyscall_32.lds.S (limited to 'arch/x86') diff --git a/arch/i386/Makefile b/arch/i386/Makefile index 397cfedb4b1..9c1da722964 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -17,6 +17,13 @@ # 20050320 Kianusch Sayah Karadji # Added support for GEODE CPU +# Fill in SRCARCH +SRCARCH := x86 + +archprepare: + @mkdir -p ${objtree}/arch/x86/kernel + + HAS_BIARCH := $(call cc-option-yn, -m32) ifeq ($(HAS_BIARCH),y) AS := $(AS) --32 @@ -99,10 +106,10 @@ core-$(CONFIG_XEN) += arch/x86/xen/ # default subarch .h files mflags-y += -Iinclude/asm-i386/mach-default -head-y := arch/i386/kernel/head_32.o arch/i386/kernel/init_task_32.o +head-y := arch/x86/kernel/head_32.o arch/x86/kernel/init_task_32.o libs-y += arch/x86/lib/ -core-y += arch/i386/kernel/ \ +core-y += arch/x86/kernel/ \ arch/x86/mm/ \ $(mcore-y)/ \ arch/x86/crypto/ diff --git a/arch/i386/kernel/.gitignore b/arch/i386/kernel/.gitignore deleted file mode 100644 index 40836ad9079..00000000000 --- a/arch/i386/kernel/.gitignore +++ /dev/null @@ -1 +0,0 @@ -vsyscall.lds diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile deleted file mode 100644 index d3ebd169982..00000000000 --- a/arch/i386/kernel/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -ifeq ($(CONFIG_X86_32),y) -include ${srctree}/arch/i386/kernel/Makefile_32 -else -include ${srctree}/arch/x86_64/kernel/Makefile_64 -endif diff --git a/arch/i386/kernel/Makefile_32 b/arch/i386/kernel/Makefile_32 deleted file mode 100644 index 5096f486d38..00000000000 --- a/arch/i386/kernel/Makefile_32 +++ /dev/null @@ -1,88 +0,0 @@ -# -# Makefile for the linux kernel. -# - -extra-y := head_32.o init_task_32.o vmlinux.lds - -obj-y := process_32.o signal_32.o entry_32.o traps_32.o irq_32.o \ - ptrace_32.o time_32.o ioport_32.o ldt_32.o setup_32.o i8259_32.o sys_i386_32.o \ - pci-dma_32.o i386_ksyms_32.o i387_32.o bootflag.o e820_32.o\ - quirks.o i8237.o topology.o alternative.o i8253_32.o tsc_32.o - -obj-$(CONFIG_STACKTRACE) += stacktrace.o -obj-y += ../../x86/kernel/cpu/ -obj-y += ../../x86/kernel/acpi/ -obj-$(CONFIG_X86_BIOS_REBOOT) += reboot_32.o -obj-$(CONFIG_MCA) += mca_32.o -obj-$(CONFIG_X86_MSR) += msr.o -obj-$(CONFIG_X86_CPUID) += cpuid.o -obj-$(CONFIG_MICROCODE) += microcode.o -obj-$(CONFIG_APM) += apm_32.o -obj-$(CONFIG_X86_SMP) += smp_32.o smpboot_32.o tsc_sync.o -obj-$(CONFIG_SMP) += smpcommon_32.o -obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_32.o -obj-$(CONFIG_X86_MPPARSE) += mpparse_32.o -obj-$(CONFIG_X86_LOCAL_APIC) += apic_32.o nmi_32.o -obj-$(CONFIG_X86_IO_APIC) += io_apic_32.o -obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o -obj-$(CONFIG_KEXEC) += machine_kexec_32.o relocate_kernel_32.o crash_32.o -obj-$(CONFIG_CRASH_DUMP) += crash_dump_32.o -obj-$(CONFIG_X86_NUMAQ) += numaq_32.o -obj-$(CONFIG_X86_SUMMIT_NUMA) += summit_32.o -obj-$(CONFIG_KPROBES) += kprobes_32.o -obj-$(CONFIG_MODULES) += module_32.o -obj-y += sysenter_32.o vsyscall_32.o -obj-$(CONFIG_ACPI_SRAT) += srat_32.o -obj-$(CONFIG_EFI) += efi_32.o efi_stub_32.o -obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o -obj-$(CONFIG_VM86) += vm86_32.o -obj-$(CONFIG_EARLY_PRINTK) += early_printk.o -obj-$(CONFIG_HPET_TIMER) += hpet_32.o -obj-$(CONFIG_K8_NB) += k8.o -obj-$(CONFIG_MGEODE_LX) += geode_32.o - -obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o -obj-$(CONFIG_PARAVIRT) += paravirt_32.o -obj-y += pcspeaker.o - -obj-$(CONFIG_SCx200) += scx200_32.o - -# vsyscall_32.o contains the vsyscall DSO images as __initdata. -# We must build both images before we can assemble it. -# Note: kbuild does not track this dependency due to usage of .incbin -$(obj)/vsyscall_32.o: $(obj)/vsyscall-int80_32.so $(obj)/vsyscall-sysenter_32.so -targets += $(foreach F,int80 sysenter,vsyscall-$F.o vsyscall-$F.so) -targets += vsyscall-note_32.o vsyscall_32.lds - -# The DSO images are built using a special linker script. -quiet_cmd_syscall = SYSCALL $@ - cmd_syscall = $(CC) -m elf_i386 -nostdlib $(SYSCFLAGS_$(@F)) \ - -Wl,-T,$(filter-out FORCE,$^) -o $@ - -export CPPFLAGS_vsyscall_32.lds += -P -C -U$(ARCH) - -vsyscall-flags = -shared -s -Wl,-soname=linux-gate.so.1 \ - $(call ld-option, -Wl$(comma)--hash-style=sysv) -SYSCFLAGS_vsyscall-sysenter_32.so = $(vsyscall-flags) -SYSCFLAGS_vsyscall-int80_32.so = $(vsyscall-flags) - -$(obj)/vsyscall-int80_32.so $(obj)/vsyscall-sysenter_32.so: \ -$(obj)/vsyscall-%.so: $(src)/vsyscall_32.lds \ - $(obj)/vsyscall-%.o $(obj)/vsyscall-note_32.o FORCE - $(call if_changed,syscall) - -# We also create a special relocatable object that should mirror the symbol -# table and layout of the linked DSO. With ld -R we can then refer to -# these symbols in the kernel code rather than hand-coded addresses. -extra-y += vsyscall-syms.o -$(obj)/built-in.o: $(obj)/vsyscall-syms.o -$(obj)/built-in.o: ld_flags += -R $(obj)/vsyscall-syms.o - -SYSCFLAGS_vsyscall-syms.o = -r -$(obj)/vsyscall-syms.o: $(src)/vsyscall_32.lds \ - $(obj)/vsyscall-sysenter_32.o $(obj)/vsyscall-note_32.o FORCE - $(call if_changed,syscall) - -k8-y += ../../x86_64/kernel/k8.o -stacktrace-y += ../../x86_64/kernel/stacktrace.o - diff --git a/arch/i386/kernel/alternative.c b/arch/i386/kernel/alternative.c deleted file mode 100644 index bd72d94e713..00000000000 --- a/arch/i386/kernel/alternative.c +++ /dev/null @@ -1,450 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define MAX_PATCH_LEN (255-1) - -#ifdef CONFIG_HOTPLUG_CPU -static int smp_alt_once; - -static int __init bootonly(char *str) -{ - smp_alt_once = 1; - return 1; -} -__setup("smp-alt-boot", bootonly); -#else -#define smp_alt_once 1 -#endif - -static int debug_alternative; - -static int __init debug_alt(char *str) -{ - debug_alternative = 1; - return 1; -} -__setup("debug-alternative", debug_alt); - -static int noreplace_smp; - -static int __init setup_noreplace_smp(char *str) -{ - noreplace_smp = 1; - return 1; -} -__setup("noreplace-smp", setup_noreplace_smp); - -#ifdef CONFIG_PARAVIRT -static int noreplace_paravirt = 0; - -static int __init setup_noreplace_paravirt(char *str) -{ - noreplace_paravirt = 1; - return 1; -} -__setup("noreplace-paravirt", setup_noreplace_paravirt); -#endif - -#define DPRINTK(fmt, args...) if (debug_alternative) \ - printk(KERN_DEBUG fmt, args) - -#ifdef GENERIC_NOP1 -/* Use inline assembly to define this because the nops are defined - as inline assembly strings in the include files and we cannot - get them easily into strings. */ -asm("\t.data\nintelnops: " - GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6 - GENERIC_NOP7 GENERIC_NOP8); -extern unsigned char intelnops[]; -static unsigned char *intel_nops[ASM_NOP_MAX+1] = { - NULL, - intelnops, - intelnops + 1, - intelnops + 1 + 2, - intelnops + 1 + 2 + 3, - intelnops + 1 + 2 + 3 + 4, - intelnops + 1 + 2 + 3 + 4 + 5, - intelnops + 1 + 2 + 3 + 4 + 5 + 6, - intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7, -}; -#endif - -#ifdef K8_NOP1 -asm("\t.data\nk8nops: " - K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6 - K8_NOP7 K8_NOP8); -extern unsigned char k8nops[]; -static unsigned char *k8_nops[ASM_NOP_MAX+1] = { - NULL, - k8nops, - k8nops + 1, - k8nops + 1 + 2, - k8nops + 1 + 2 + 3, - k8nops + 1 + 2 + 3 + 4, - k8nops + 1 + 2 + 3 + 4 + 5, - k8nops + 1 + 2 + 3 + 4 + 5 + 6, - k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, -}; -#endif - -#ifdef K7_NOP1 -asm("\t.data\nk7nops: " - K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6 - K7_NOP7 K7_NOP8); -extern unsigned char k7nops[]; -static unsigned char *k7_nops[ASM_NOP_MAX+1] = { - NULL, - k7nops, - k7nops + 1, - k7nops + 1 + 2, - k7nops + 1 + 2 + 3, - k7nops + 1 + 2 + 3 + 4, - k7nops + 1 + 2 + 3 + 4 + 5, - k7nops + 1 + 2 + 3 + 4 + 5 + 6, - k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, -}; -#endif - -#ifdef CONFIG_X86_64 - -extern char __vsyscall_0; -static inline unsigned char** find_nop_table(void) -{ - return k8_nops; -} - -#else /* CONFIG_X86_64 */ - -static struct nop { - int cpuid; - unsigned char **noptable; -} noptypes[] = { - { X86_FEATURE_K8, k8_nops }, - { X86_FEATURE_K7, k7_nops }, - { -1, NULL } -}; - -static unsigned char** find_nop_table(void) -{ - unsigned char **noptable = intel_nops; - int i; - - for (i = 0; noptypes[i].cpuid >= 0; i++) { - if (boot_cpu_has(noptypes[i].cpuid)) { - noptable = noptypes[i].noptable; - break; - } - } - return noptable; -} - -#endif /* CONFIG_X86_64 */ - -/* Use this to add nops to a buffer, then text_poke the whole buffer. */ -static void add_nops(void *insns, unsigned int len) -{ - unsigned char **noptable = find_nop_table(); - - while (len > 0) { - unsigned int noplen = len; - if (noplen > ASM_NOP_MAX) - noplen = ASM_NOP_MAX; - memcpy(insns, noptable[noplen], noplen); - insns += noplen; - len -= noplen; - } -} - -extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; -extern u8 *__smp_locks[], *__smp_locks_end[]; - -/* Replace instructions with better alternatives for this CPU type. - This runs before SMP is initialized to avoid SMP problems with - self modifying code. This implies that assymetric systems where - APs have less capabilities than the boot processor are not handled. - Tough. Make sure you disable such features by hand. */ - -void apply_alternatives(struct alt_instr *start, struct alt_instr *end) -{ - struct alt_instr *a; - char insnbuf[MAX_PATCH_LEN]; - - DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end); - for (a = start; a < end; a++) { - u8 *instr = a->instr; - BUG_ON(a->replacementlen > a->instrlen); - BUG_ON(a->instrlen > sizeof(insnbuf)); - if (!boot_cpu_has(a->cpuid)) - continue; -#ifdef CONFIG_X86_64 - /* vsyscall code is not mapped yet. resolve it manually. */ - if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) { - instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0)); - DPRINTK("%s: vsyscall fixup: %p => %p\n", - __FUNCTION__, a->instr, instr); - } -#endif - memcpy(insnbuf, a->replacement, a->replacementlen); - add_nops(insnbuf + a->replacementlen, - a->instrlen - a->replacementlen); - text_poke(instr, insnbuf, a->instrlen); - } -} - -#ifdef CONFIG_SMP - -static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end) -{ - u8 **ptr; - - for (ptr = start; ptr < end; ptr++) { - if (*ptr < text) - continue; - if (*ptr > text_end) - continue; - text_poke(*ptr, ((unsigned char []){0xf0}), 1); /* add lock prefix */ - }; -} - -static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end) -{ - u8 **ptr; - char insn[1]; - - if (noreplace_smp) - return; - - add_nops(insn, 1); - for (ptr = start; ptr < end; ptr++) { - if (*ptr < text) - continue; - if (*ptr > text_end) - continue; - text_poke(*ptr, insn, 1); - }; -} - -struct smp_alt_module { - /* what is this ??? */ - struct module *mod; - char *name; - - /* ptrs to lock prefixes */ - u8 **locks; - u8 **locks_end; - - /* .text segment, needed to avoid patching init code ;) */ - u8 *text; - u8 *text_end; - - struct list_head next; -}; -static LIST_HEAD(smp_alt_modules); -static DEFINE_SPINLOCK(smp_alt); - -void alternatives_smp_module_add(struct module *mod, char *name, - void *locks, void *locks_end, - void *text, void *text_end) -{ - struct smp_alt_module *smp; - unsigned long flags; - - if (noreplace_smp) - return; - - if (smp_alt_once) { - if (boot_cpu_has(X86_FEATURE_UP)) - alternatives_smp_unlock(locks, locks_end, - text, text_end); - return; - } - - smp = kzalloc(sizeof(*smp), GFP_KERNEL); - if (NULL == smp) - return; /* we'll run the (safe but slow) SMP code then ... */ - - smp->mod = mod; - smp->name = name; - smp->locks = locks; - smp->locks_end = locks_end; - smp->text = text; - smp->text_end = text_end; - DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n", - __FUNCTION__, smp->locks, smp->locks_end, - smp->text, smp->text_end, smp->name); - - spin_lock_irqsave(&smp_alt, flags); - list_add_tail(&smp->next, &smp_alt_modules); - if (boot_cpu_has(X86_FEATURE_UP)) - alternatives_smp_unlock(smp->locks, smp->locks_end, - smp->text, smp->text_end); - spin_unlock_irqrestore(&smp_alt, flags); -} - -void alternatives_smp_module_del(struct module *mod) -{ - struct smp_alt_module *item; - unsigned long flags; - - if (smp_alt_once || noreplace_smp) - return; - - spin_lock_irqsave(&smp_alt, flags); - list_for_each_entry(item, &smp_alt_modules, next) { - if (mod != item->mod) - continue; - list_del(&item->next); - spin_unlock_irqrestore(&smp_alt, flags); - DPRINTK("%s: %s\n", __FUNCTION__, item->name); - kfree(item); - return; - } - spin_unlock_irqrestore(&smp_alt, flags); -} - -void alternatives_smp_switch(int smp) -{ - struct smp_alt_module *mod; - unsigned long flags; - -#ifdef CONFIG_LOCKDEP - /* - * A not yet fixed binutils section handling bug prevents - * alternatives-replacement from working reliably, so turn - * it off: - */ - printk("lockdep: not fixing up alternatives.\n"); - return; -#endif - - if (noreplace_smp || smp_alt_once) - return; - BUG_ON(!smp && (num_online_cpus() > 1)); - - spin_lock_irqsave(&smp_alt, flags); - if (smp) { - printk(KERN_INFO "SMP alternatives: switching to SMP code\n"); - clear_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); - clear_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); - list_for_each_entry(mod, &smp_alt_modules, next) - alternatives_smp_lock(mod->locks, mod->locks_end, - mod->text, mod->text_end); - } else { - printk(KERN_INFO "SMP alternatives: switching to UP code\n"); - set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); - set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); - list_for_each_entry(mod, &smp_alt_modules, next) - alternatives_smp_unlock(mod->locks, mod->locks_end, - mod->text, mod->text_end); - } - spin_unlock_irqrestore(&smp_alt, flags); -} - -#endif - -#ifdef CONFIG_PARAVIRT -void apply_paravirt(struct paravirt_patch_site *start, - struct paravirt_patch_site *end) -{ - struct paravirt_patch_site *p; - char insnbuf[MAX_PATCH_LEN]; - - if (noreplace_paravirt) - return; - - for (p = start; p < end; p++) { - unsigned int used; - - BUG_ON(p->len > MAX_PATCH_LEN); - /* prep the buffer with the original instructions */ - memcpy(insnbuf, p->instr, p->len); - used = paravirt_ops.patch(p->instrtype, p->clobbers, insnbuf, - (unsigned long)p->instr, p->len); - - BUG_ON(used > p->len); - - /* Pad the rest with nops */ - add_nops(insnbuf + used, p->len - used); - text_poke(p->instr, insnbuf, p->len); - } -} -extern struct paravirt_patch_site __start_parainstructions[], - __stop_parainstructions[]; -#endif /* CONFIG_PARAVIRT */ - -void __init alternative_instructions(void) -{ - unsigned long flags; - - /* The patching is not fully atomic, so try to avoid local interruptions - that might execute the to be patched code. - Other CPUs are not running. */ - stop_nmi(); -#ifdef CONFIG_X86_MCE - stop_mce(); -#endif - - local_irq_save(flags); - apply_alternatives(__alt_instructions, __alt_instructions_end); - - /* switch to patch-once-at-boottime-only mode and free the - * tables in case we know the number of CPUs will never ever - * change */ -#ifdef CONFIG_HOTPLUG_CPU - if (num_possible_cpus() < 2) - smp_alt_once = 1; -#endif - -#ifdef CONFIG_SMP - if (smp_alt_once) { - if (1 == num_possible_cpus()) { - printk(KERN_INFO "SMP alternatives: switching to UP code\n"); - set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); - set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); - alternatives_smp_unlock(__smp_locks, __smp_locks_end, - _text, _etext); - } - free_init_pages("SMP alternatives", - (unsigned long)__smp_locks, - (unsigned long)__smp_locks_end); - } else { - alternatives_smp_module_add(NULL, "core kernel", - __smp_locks, __smp_locks_end, - _text, _etext); - alternatives_smp_switch(0); - } -#endif - apply_paravirt(__parainstructions, __parainstructions_end); - local_irq_restore(flags); - - restart_nmi(); -#ifdef CONFIG_X86_MCE - restart_mce(); -#endif -} - -/* - * Warning: - * When you use this code to patch more than one byte of an instruction - * you need to make sure that other CPUs cannot execute this code in parallel. - * Also no thread must be currently preempted in the middle of these instructions. - * And on the local CPU you need to be protected again NMI or MCE handlers - * seeing an inconsistent instruction while you patch. - */ -void __kprobes text_poke(void *addr, unsigned char *opcode, int len) -{ - memcpy(addr, opcode, len); - sync_core(); - /* Could also do a CLFLUSH here to speed up CPU recovery; but - that causes hangs on some VIA CPUs. */ -} diff --git a/arch/i386/kernel/apic_32.c b/arch/i386/kernel/apic_32.c deleted file mode 100644 index 3d67ae18d76..00000000000 --- a/arch/i386/kernel/apic_32.c +++ /dev/null @@ -1,1566 +0,0 @@ -/* - * Local APIC handling, local APIC timers - * - * (c) 1999, 2000 Ingo Molnar - * - * Fixes - * Maciej W. Rozycki : Bits for genuine 82489DX APICs; - * thanks to Eric Gilmore - * and Rolf G. Tews - * for testing these extensively. - * Maciej W. Rozycki : Various updates and fixes. - * Mikael Pettersson : Power Management for UP-APIC. - * Pavel Machek and - * Mikael Pettersson : PM converted to driver model. - */ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "io_ports.h" - -/* - * Sanity check - */ -#if (SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F -# error SPURIOUS_APIC_VECTOR definition error -#endif - -/* - * Knob to control our willingness to enable the local APIC. - * - * -1=force-disable, +1=force-enable - */ -static int enable_local_apic __initdata = 0; - -/* Local APIC timer verification ok */ -static int local_apic_timer_verify_ok; -/* Disable local APIC timer from the kernel commandline or via dmi quirk - or using CPU MSR check */ -int local_apic_timer_disabled; -/* Local APIC timer works in C2 */ -int local_apic_timer_c2_ok; -EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); - -/* - * Debug level, exported for io_apic.c - */ -int apic_verbosity; - -static unsigned int calibration_result; - -static int lapic_next_event(unsigned long delta, - struct clock_event_device *evt); -static void lapic_timer_setup(enum clock_event_mode mode, - struct clock_event_device *evt); -static void lapic_timer_broadcast(cpumask_t mask); -static void apic_pm_activate(void); - -/* - * The local apic timer can be used for any function which is CPU local. - */ -static struct clock_event_device lapic_clockevent = { - .name = "lapic", - .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT - | CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY, - .shift = 32, - .set_mode = lapic_timer_setup, - .set_next_event = lapic_next_event, - .broadcast = lapic_timer_broadcast, - .rating = 100, - .irq = -1, -}; -static DEFINE_PER_CPU(struct clock_event_device, lapic_events); - -/* Local APIC was disabled by the BIOS and enabled by the kernel */ -static int enabled_via_apicbase; - -/* - * Get the LAPIC version - */ -static inline int lapic_get_version(void) -{ - return GET_APIC_VERSION(apic_read(APIC_LVR)); -} - -/* - * Check, if the APIC is integrated or a seperate chip - */ -static inline int lapic_is_integrated(void) -{ - return APIC_INTEGRATED(lapic_get_version()); -} - -/* - * Check, whether this is a modern or a first generation APIC - */ -static int modern_apic(void) -{ - /* AMD systems use old APIC versions, so check the CPU */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && - boot_cpu_data.x86 >= 0xf) - return 1; - return lapic_get_version() >= 0x14; -} - -void apic_wait_icr_idle(void) -{ - while (apic_read(APIC_ICR) & APIC_ICR_BUSY) - cpu_relax(); -} - -unsigned long safe_apic_wait_icr_idle(void) -{ - unsigned long send_status; - int timeout; - - timeout = 0; - do { - send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; - if (!send_status) - break; - udelay(100); - } while (timeout++ < 1000); - - return send_status; -} - -/** - * enable_NMI_through_LVT0 - enable NMI through local vector table 0 - */ -void enable_NMI_through_LVT0 (void * dummy) -{ - unsigned int v = APIC_DM_NMI; - - /* Level triggered for 82489DX */ - if (!lapic_is_integrated()) - v |= APIC_LVT_LEVEL_TRIGGER; - apic_write_around(APIC_LVT0, v); -} - -/** - * get_physical_broadcast - Get number of physical broadcast IDs - */ -int get_physical_broadcast(void) -{ - return modern_apic() ? 0xff : 0xf; -} - -/** - * lapic_get_maxlvt - get the maximum number of local vector table entries - */ -int lapic_get_maxlvt(void) -{ - unsigned int v = apic_read(APIC_LVR); - - /* 82489DXs do not report # of LVT entries. */ - return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2; -} - -/* - * Local APIC timer - */ - -/* Clock divisor is set to 16 */ -#define APIC_DIVISOR 16 - -/* - * This function sets up the local APIC timer, with a timeout of - * 'clocks' APIC bus clock. During calibration we actually call - * this function twice on the boot CPU, once with a bogus timeout - * value, second time for real. The other (noncalibrating) CPUs - * call this function only once, with the real, calibrated value. - * - * We do reads before writes even if unnecessary, to get around the - * P5 APIC double write bug. - */ -static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) -{ - unsigned int lvtt_value, tmp_value; - - lvtt_value = LOCAL_TIMER_VECTOR; - if (!oneshot) - lvtt_value |= APIC_LVT_TIMER_PERIODIC; - if (!lapic_is_integrated()) - lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV); - - if (!irqen) - lvtt_value |= APIC_LVT_MASKED; - - apic_write_around(APIC_LVTT, lvtt_value); - - /* - * Divide PICLK by 16 - */ - tmp_value = apic_read(APIC_TDCR); - apic_write_around(APIC_TDCR, (tmp_value - & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) - | APIC_TDR_DIV_16); - - if (!oneshot) - apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR); -} - -/* - * Program the next event, relative to now - */ -static int lapic_next_event(unsigned long delta, - struct clock_event_device *evt) -{ - apic_write_around(APIC_TMICT, delta); - return 0; -} - -/* - * Setup the lapic timer in periodic or oneshot mode - */ -static void lapic_timer_setup(enum clock_event_mode mode, - struct clock_event_device *evt) -{ - unsigned long flags; - unsigned int v; - - /* Lapic used for broadcast ? */ - if (!local_apic_timer_verify_ok) - return; - - local_irq_save(flags); - - switch (mode) { - case CLOCK_EVT_MODE_PERIODIC: - case CLOCK_EVT_MODE_ONESHOT: - __setup_APIC_LVTT(calibration_result, - mode != CLOCK_EVT_MODE_PERIODIC, 1); - break; - case CLOCK_EVT_MODE_UNUSED: - case CLOCK_EVT_MODE_SHUTDOWN: - v = apic_read(APIC_LVTT); - v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); - apic_write_around(APIC_LVTT, v); - break; - case CLOCK_EVT_MODE_RESUME: - /* Nothing to do here */ - break; - } - - local_irq_restore(flags); -} - -/* - * Local APIC timer broadcast function - */ -static void lapic_timer_broadcast(cpumask_t mask) -{ -#ifdef CONFIG_SMP - send_IPI_mask(mask, LOCAL_TIMER_VECTOR); -#endif -} - -/* - * Setup the local APIC timer for this CPU. Copy the initilized values - * of the boot CPU and register the clock event in the framework. - */ -static void __devinit setup_APIC_timer(void) -{ - struct clock_event_device *levt = &__get_cpu_var(lapic_events); - - memcpy(levt, &lapic_clockevent, sizeof(*levt)); - levt->cpumask = cpumask_of_cpu(smp_processor_id()); - - clockevents_register_device(levt); -} - -/* - * In this functions we calibrate APIC bus clocks to the external timer. - * - * We want to do the calibration only once since we want to have local timer - * irqs syncron. CPUs connected by the same APIC bus have the very same bus - * frequency. - * - * This was previously done by reading the PIT/HPET and waiting for a wrap - * around to find out, that a tick has elapsed. I have a box, where the PIT - * readout is broken, so it never gets out of the wait loop again. This was - * also reported by others. - * - * Monitoring the jiffies value is inaccurate and the clockevents - * infrastructure allows us to do a simple substitution of the interrupt - * handler. - * - * The calibration routine also uses the pm_timer when possible, as the PIT - * happens to run way too slow (factor 2.3 on my VAIO CoreDuo, which goes - * back to normal later in the boot process). - */ - -#define LAPIC_CAL_LOOPS (HZ/10) - -static __initdata int lapic_cal_loops = -1; -static __initdata long lapic_cal_t1, lapic_cal_t2; -static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2; -static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2; -static __initdata unsigned long lapic_cal_j1, lapic_cal_j2; - -/* - * Temporary interrupt handler. - */ -static void __init lapic_cal_handler(struct clock_event_device *dev) -{ - unsigned long long tsc = 0; - long tapic = apic_read(APIC_TMCCT); - unsigned long pm = acpi_pm_read_early(); - - if (cpu_has_tsc) - rdtscll(tsc); - - switch (lapic_cal_loops++) { - case 0: - lapic_cal_t1 = tapic; - lapic_cal_tsc1 = tsc; - lapic_cal_pm1 = pm; - lapic_cal_j1 = jiffies; - break; - - case LAPIC_CAL_LOOPS: - lapic_cal_t2 = tapic; - lapic_cal_tsc2 = tsc; - if (pm < lapic_cal_pm1) - pm += ACPI_PM_OVRRUN; - lapic_cal_pm2 = pm; - lapic_cal_j2 = jiffies; - break; - } -} - -/* - * Setup the boot APIC - * - * Calibrate and verify the result. - */ -void __init setup_boot_APIC_clock(void) -{ - struct clock_event_device *levt = &__get_cpu_var(lapic_events); - const long pm_100ms = PMTMR_TICKS_PER_SEC/10; - const long pm_thresh = pm_100ms/100; - void (*real_handler)(struct clock_event_device *dev); - unsigned long deltaj; - long delta, deltapm; - int pm_referenced = 0; - - /* - * The local apic timer can be disabled via the kernel - * commandline or from the CPU detection code. Register the lapic - * timer as a dummy clock event source on SMP systems, so the - * broadcast mechanism is used. On UP systems simply ignore it. - */ - if (local_apic_timer_disabled) { - /* No broadcast on UP ! */ - if (num_possible_cpus() > 1) - setup_APIC_timer(); - return; - } - - apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n" - "calibrating APIC timer ...\n"); - - local_irq_disable(); - - /* Replace the global interrupt handler */ - real_handler = global_clock_event->event_handler; - global_clock_event->event_handler = lapic_cal_handler; - - /* - * Setup the APIC counter to 1e9. There is no way the lapic - * can underflow in the 100ms detection time frame - */ - __setup_APIC_LVTT(1000000000, 0, 0); - - /* Let the interrupts run */ - local_irq_enable(); - - while (lapic_cal_loops <= LAPIC_CAL_LOOPS) - cpu_relax(); - - local_irq_disable(); - - /* Restore the real event handler */ - global_clock_event->event_handler = real_handler; - - /* Build delta t1-t2 as apic timer counts down */ - delta = lapic_cal_t1 - lapic_cal_t2; - apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta); - - /* Check, if the PM timer is available */ - deltapm = lapic_cal_pm2 - lapic_cal_pm1; - apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm); - - if (deltapm) { - unsigned long mult; - u64 res; - - mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22); - - if (deltapm > (pm_100ms - pm_thresh) && - deltapm < (pm_100ms + pm_thresh)) { - apic_printk(APIC_VERBOSE, "... PM timer result ok\n"); - } else { - res = (((u64) deltapm) * mult) >> 22; - do_div(res, 1000000); - printk(KERN_WARNING "APIC calibration not consistent " - "with PM Timer: %ldms instead of 100ms\n", - (long)res); - /* Correct the lapic counter value */ - res = (((u64) delta ) * pm_100ms); - do_div(res, deltapm); - printk(KERN_INFO "APIC delta adjusted to PM-Timer: " - "%lu (%ld)\n", (unsigned long) res, delta); - delta = (long) res; - } - pm_referenced = 1; - } - - /* Calculate the scaled math multiplication factor */ - lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, 32); - lapic_clockevent.max_delta_ns = - clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); - lapic_clockevent.min_delta_ns = - clockevent_delta2ns(0xF, &lapic_clockevent); - - calibration_result = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS; - - apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta); - apic_printk(APIC_VERBOSE, "..... mult: %ld\n", lapic_clockevent.mult); - apic_printk(APIC_VERBOSE, "..... calibration result: %u\n", - calibration_result); - - if (cpu_has_tsc) { - delta = (long)(lapic_cal_tsc2 - lapic_cal_tsc1); - apic_printk(APIC_VERBOSE, "..... CPU clock speed is " - "%ld.%04ld MHz.\n", - (delta / LAPIC_CAL_LOOPS) / (1000000 / HZ), - (delta / LAPIC_CAL_LOOPS) % (1000000 / HZ)); - } - - apic_printk(APIC_VERBOSE, "..... host bus clock speed is " - "%u.%04u MHz.\n", - calibration_result / (1000000 / HZ), - calibration_result % (1000000 / HZ)); - - local_apic_timer_verify_ok = 1; - - /* We trust the pm timer based calibration */ - if (!pm_referenced) { - apic_printk(APIC_VERBOSE, "... verify APIC timer\n"); - - /* - * Setup the apic timer manually - */ - levt->event_handler = lapic_cal_handler; - lapic_timer_setup(CLOCK_EVT_MODE_PERIODIC, levt); - lapic_cal_loops = -1; - - /* Let the interrupts run */ - local_irq_enable(); - - while (lapic_cal_loops <= LAPIC_CAL_LOOPS) - cpu_relax(); - - local_irq_disable(); - - /* Stop the lapic timer */ - lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt); - - local_irq_enable(); - - /* Jiffies delta */ - deltaj = lapic_cal_j2 - lapic_cal_j1; - apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj); - - /* Check, if the jiffies result is consistent */ - if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2) - apic_printk(APIC_VERBOSE, "... jiffies result ok\n"); - else - local_apic_timer_verify_ok = 0; - } else - local_irq_enable(); - - if (!local_apic_timer_verify_ok) { - printk(KERN_WARNING - "APIC timer disabled due to verification failure.\n"); - /* No broadcast on UP ! */ - if (num_possible_cpus() == 1) - return; - } else { - /* - * If nmi_watchdog is set to IO_APIC, we need the - * PIT/HPET going. Otherwise register lapic as a dummy - * device. - */ - if (nmi_watchdog != NMI_IO_APIC) - lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; - else - printk(KERN_WARNING "APIC timer registered as dummy," - " due to nmi_watchdog=1!\n"); - } - - /* Setup the lapic or request the broadcast */ - setup_APIC_timer(); -} - -void __devinit setup_secondary_APIC_clock(void) -{ - setup_APIC_timer(); -} - -/* - * The guts of the apic timer interrupt - */ -static void local_apic_timer_interrupt(void) -{ - int cpu = smp_processor_id(); - struct clock_event_device *evt = &per_cpu(lapic_events, cpu); - - /* - * Normally we should not be here till LAPIC has been initialized but - * in some cases like kdump, its possible that there is a pending LAPIC - * timer interrupt from previous kernel's context and is delivered in - * new kernel the moment interrupts are enabled. - * - * Interrupts are enabled early and LAPIC is setup much later, hence - * its possible that when we get here evt->event_handler is NULL. - * Check for event_handler being NULL and discard the interrupt as - * spurious. - */ - if (!evt->event_handler) { - printk(KERN_WARNING - "Spurious LAPIC timer interrupt on cpu %d\n", cpu); - /* Switch it off */ - lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt); - return; - } - - per_cpu(irq_stat, cpu).apic_timer_irqs++; - - evt->event_handler(evt); -} - -/* - * Local APIC timer interrupt. This is the most natural way for doing - * local interrupts, but local timer interrupts can be emulated by - * broadcast interrupts too. [in case the hw doesn't support APIC timers] - * - * [ if a single-CPU system runs an SMP kernel then we call the local - * interrupt as well. Thus we cannot inline the local irq ... ] - */ - -void fastcall smp_apic_timer_interrupt(struct pt_regs *regs) -{ - struct pt_regs *old_regs = set_irq_regs(regs); - - /* - * NOTE! We'd better ACK the irq immediately, - * because timer handling can be slow. - */ - ack_APIC_irq(); - /* - * update_process_times() expects us to have done irq_enter(). - * Besides, if we don't timer interrupts ignore the global - * interrupt lock, which is the WrongThing (tm) to do. - */ - irq_enter(); - local_apic_timer_interrupt(); - irq_exit(); - - set_irq_regs(old_regs); -} - -int setup_profiling_timer(unsigned int multiplier) -{ - return -EINVAL; -} - -/* - * Local APIC start and shutdown - */ - -/** - * clear_local_APIC - shutdown the local APIC - * - * This is called, when a CPU is disabled and before rebooting, so the state of - * the local APIC has no dangling leftovers. Also used to cleanout any BIOS - * leftovers during boot. - */ -void clear_local_APIC(void) -{ - int maxlvt = lapic_get_maxlvt(); - unsigned long v; - - /* - * Masking an LVT entry can trigger a local APIC error - * if the vector is zero. Mask LVTERR first to prevent this. - */ - if (maxlvt >= 3) { - v = ERROR_APIC_VECTOR; /* any non-zero vector will do */ - apic_write_around(APIC_LVTERR, v | APIC_LVT_MASKED); - } - /* - * Careful: we have to set masks only first to deassert - * any level-triggered sources. - */ - v = apic_read(APIC_LVTT); - apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED); - v = apic_read(APIC_LVT0); - apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED); - v = apic_read(APIC_LVT1); - apic_write_around(APIC_LVT1, v | APIC_LVT_MASKED); - if (maxlvt >= 4) { - v = apic_read(APIC_LVTPC); - apic_write_around(APIC_LVTPC, v | APIC_LVT_MASKED); - } - - /* lets not touch this if we didn't frob it */ -#ifdef CONFIG_X86_MCE_P4THERMAL - if (maxlvt >= 5) { - v = apic_read(APIC_LVTTHMR); - apic_write_around(APIC_LVTTHMR, v | APIC_LVT_MASKED); - } -#endif - /* - * Clean APIC state for other OSs: - */ - apic_write_around(APIC_LVTT, APIC_LVT_MASKED); - apic_write_around(APIC_LVT0, APIC_LVT_MASKED); - apic_write_around(APIC_LVT1, APIC_LVT_MASKED); - if (maxlvt >= 3) - apic_write_around(APIC_LVTERR, APIC_LVT_MASKED); - if (maxlvt >= 4) - apic_write_around(APIC_LVTPC, APIC_LVT_MASKED); - -#ifdef CONFIG_X86_MCE_P4THERMAL - if (maxlvt >= 5) - apic_write_around(APIC_LVTTHMR, APIC_LVT_MASKED); -#endif - /* Integrated APIC (!82489DX) ? */ - if (lapic_is_integrated()) { - if (maxlvt > 3) - /* Clear ESR due to Pentium errata 3AP and 11AP */ - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - } -} - -/** - * disable_local_APIC - clear and disable the local APIC - */ -void disable_local_APIC(void) -{ - unsigned long value; - - clear_local_APIC(); - - /* - * Disable APIC (implies clearing of registers - * for 82489DX!). - */ - value = apic_read(APIC_SPIV); - value &= ~APIC_SPIV_APIC_ENABLED; - apic_write_around(APIC_SPIV, value); - - /* - * When LAPIC was disabled by the BIOS and enabled by the kernel, - * restore the disabled state. - */ - if (enabled_via_apicbase) { - unsigned int l, h; - - rdmsr(MSR_IA32_APICBASE, l, h); - l &= ~MSR_IA32_APICBASE_ENABLE; - wrmsr(MSR_IA32_APICBASE, l, h); - } -} - -/* - * If Linux enabled the LAPIC against the BIOS default disable it down before - * re-entering the BIOS on shutdown. Otherwise the BIOS may get confused and - * not power-off. Additionally clear all LVT entries before disable_local_APIC - * for the case where Linux didn't enable the LAPIC. - */ -void lapic_shutdown(void) -{ - unsigned long flags; - - if (!cpu_has_apic) - return; - - local_irq_save(flags); - clear_local_APIC(); - - if (enabled_via_apicbase) - disable_local_APIC(); - - local_irq_restore(flags); -} - -/* - * This is to verify that we're looking at a real local APIC. - * Check these against your board if the CPUs aren't getting - * started for no apparent reason. - */ -int __init verify_local_APIC(void) -{ - unsigned int reg0, reg1; - - /* - * The version register is read-only in a real APIC. - */ - reg0 = apic_read(APIC_LVR); - apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0); - apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK); - reg1 = apic_read(APIC_LVR); - apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1); - - /* - * The two version reads above should print the same - * numbers. If the second one is different, then we - * poke at a non-APIC. - */ - if (reg1 != reg0) - return 0; - - /* - * Check if the version looks reasonably. - */ - reg1 = GET_APIC_VERSION(reg0); - if (reg1 == 0x00 || reg1 == 0xff) - return 0; - reg1 = lapic_get_maxlvt(); - if (reg1 < 0x02 || reg1 == 0xff) - return 0; - - /* - * The ID register is read/write in a real APIC. - */ - reg0 = apic_read(APIC_ID); - apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); - - /* - * The next two are just to see if we have sane values. - * They're only really relevant if we're in Virtual Wire - * compatibility mode, but most boxes are anymore. - */ - reg0 = apic_read(APIC_LVT0); - apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0); - reg1 = apic_read(APIC_LVT1); - apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1); - - return 1; -} - -/** - * sync_Arb_IDs - synchronize APIC bus arbitration IDs - */ -void __init sync_Arb_IDs(void) -{ - /* - * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not - * needed on AMD. - */ - if (modern_apic()) - return; - /* - * Wait for idle. - */ - apic_wait_icr_idle(); - - apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); - apic_write_around(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG - | APIC_DM_INIT); -} - -/* - * An initial setup of the virtual wire mode. - */ -void __init init_bsp_APIC(void) -{ - unsigned long value; - - /* - * Don't do the setup now if we have a SMP BIOS as the - * through-I/O-APIC virtual wire mode might be active. - */ - if (smp_found_config || !cpu_has_apic) - return; - - /* - * Do not trust the local APIC being empty at bootup. - */ - clear_local_APIC(); - - /* - * Enable APIC. - */ - value = apic_read(APIC_SPIV); - value &= ~APIC_VECTOR_MASK; - value |= APIC_SPIV_APIC_ENABLED; - - /* This bit is reserved on P4/Xeon and should be cleared */ - if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && - (boot_cpu_data.x86 == 15)) - value &= ~APIC_SPIV_FOCUS_DISABLED; - else - value |= APIC_SPIV_FOCUS_DISABLED; - value |= SPURIOUS_APIC_VECTOR; - apic_write_around(APIC_SPIV, value); - - /* - * Set up the virtual wire mode. - */ - apic_write_around(APIC_LVT0, APIC_DM_EXTINT); - value = APIC_DM_NMI; - if (!lapic_is_integrated()) /* 82489DX */ - value |= APIC_LVT_LEVEL_TRIGGER; - apic_write_around(APIC_LVT1, value); -} - -/** - * setup_local_APIC - setup the local APIC - */ -void __devinit setup_local_APIC(void) -{ - unsigned long oldvalue, value, maxlvt, integrated; - int i, j; - - /* Pound the ESR really hard over the head with a big hammer - mbligh */ - if (esr_disable) { - apic_write(APIC_ESR, 0); - apic_write(APIC_ESR, 0); - apic_write(APIC_ESR, 0); - apic_write(APIC_ESR, 0); - } - - integrated = lapic_is_integrated(); - - /* - * Double-check whether this APIC is really registered. - */ - if (!apic_id_registered()) - BUG(); - - /* - * Intel recommends to set DFR, LDR and TPR before enabling - * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel - * document number 292116). So here it goes... - */ - init_apic_ldr(); - - /* - * Set Task Priority to 'accept all'. We never change this - * later on. - */ - value = apic_read(APIC_TASKPRI); - value &= ~APIC_TPRI_MASK; - apic_write_around(APIC_TASKPRI, value); - - /* - * After a crash, we no longer service the interrupts and a pending - * interrupt from previous kernel might still have ISR bit set. - * - * Most probably by now CPU has serviced that pending interrupt and - * it might not have done the ack_APIC_irq() because it thought, - * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it - * does not clear the ISR bit and cpu thinks it has already serivced - * the interrupt. Hence a vector might get locked. It was noticed - * for timer irq (vector 0x31). Issue an extra EOI to clear ISR. - */ - for (i = APIC_ISR_NR - 1; i >= 0; i--) { - value = apic_read(APIC_ISR + i*0x10); - for (j = 31; j >= 0; j--) { - if (value & (1< 3) /* Due to the Pentium erratum 3AP. */ - apic_write(APIC_ESR, 0); - oldvalue = apic_read(APIC_ESR); - - /* enables sending errors */ - value = ERROR_APIC_VECTOR; - apic_write_around(APIC_LVTERR, value); - /* - * spec says clear errors after enabling vector. - */ - if (maxlvt > 3) - apic_write(APIC_ESR, 0); - value = apic_read(APIC_ESR); - if (value != oldvalue) - apic_printk(APIC_VERBOSE, "ESR value before enabling " - "vector: 0x%08lx after: 0x%08lx\n", - oldvalue, value); - } else { - if (esr_disable) - /* - * Something untraceble is creating bad interrupts on - * secondary quads ... for the moment, just leave the - * ESR disabled - we can't do anything useful with the - * errors anyway - mbligh - */ - printk(KERN_INFO "Leaving ESR disabled.\n"); - else - printk(KERN_INFO "No ESR for 82489DX.\n"); - } - - /* Disable the local apic timer */ - value = apic_read(APIC_LVTT); - value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); - apic_write_around(APIC_LVTT, value); - - setup_apic_nmi_watchdog(NULL); - apic_pm_activate(); -} - -/* - * Detect and initialize APIC - */ -static int __init detect_init_APIC (void) -{ - u32 h, l, features; - - /* Disabled by kernel option? */ - if (enable_local_apic < 0) - return -1; - - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_AMD: - if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) || - (boot_cpu_data.x86 == 15)) - break; - goto no_apic; - case X86_VENDOR_INTEL: - if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 || - (boot_cpu_data.x86 == 5 && cpu_has_apic)) - break; - goto no_apic; - default: - goto no_apic; - } - - if (!cpu_has_apic) { - /* - * Over-ride BIOS and try to enable the local APIC only if - * "lapic" specified. - */ - if (enable_local_apic <= 0) { - printk(KERN_INFO "Local APIC disabled by BIOS -- " - "you can enable it with \"lapic\"\n"); - return -1; - } - /* - * Some BIOSes disable the local APIC in the APIC_BASE - * MSR. This can only be done in software for Intel P6 or later - * and AMD K7 (Model > 1) or later. - */ - rdmsr(MSR_IA32_APICBASE, l, h); - if (!(l & MSR_IA32_APICBASE_ENABLE)) { - printk(KERN_INFO - "Local APIC disabled by BIOS -- reenabling.\n"); - l &= ~MSR_IA32_APICBASE_BASE; - l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; - wrmsr(MSR_IA32_APICBASE, l, h); - enabled_via_apicbase = 1; - } - } - /* - * The APIC feature bit should now be enabled - * in `cpuid' - */ - features = cpuid_edx(1); - if (!(features & (1 << X86_FEATURE_APIC))) { - printk(KERN_WARNING "Could not enable APIC!\n"); - return -1; - } - set_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); - mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; - - /* The BIOS may have set up the APIC at some other address */ - rdmsr(MSR_IA32_APICBASE, l, h); - if (l & MSR_IA32_APICBASE_ENABLE) - mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; - - if (nmi_watchdog != NMI_NONE && nmi_watchdog != NMI_DISABLED) - nmi_watchdog = NMI_LOCAL_APIC; - - printk(KERN_INFO "Found and enabled local APIC!\n"); - - apic_pm_activate(); - - return 0; - -no_apic: - printk(KERN_INFO "No local APIC present or hardware disabled\n"); - return -1; -} - -/** - * init_apic_mappings - initialize APIC mappings - */ -void __init init_apic_mappings(void) -{ - unsigned long apic_phys; - - /* - * If no local APIC can be found then set up a fake all - * zeroes page to simulate the local APIC and another - * one for the IO-APIC. - */ - if (!smp_found_config && detect_init_APIC()) { - apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE); - apic_phys = __pa(apic_phys); - } else - apic_phys = mp_lapic_addr; - - set_fixmap_nocache(FIX_APIC_BASE, apic_phys); - printk(KERN_DEBUG "mapped APIC to %08lx (%08lx)\n", APIC_BASE, - apic_phys); - - /* - * Fetch the APIC ID of the BSP in case we have a - * default configuration (or the MP table is broken). - */ - if (boot_cpu_physical_apicid == -1U) - boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); - -#ifdef CONFIG_X86_IO_APIC - { - unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; - int i; - - for (i = 0; i < nr_ioapics; i++) { - if (smp_found_config) { - ioapic_phys = mp_ioapics[i].mpc_apicaddr; - if (!ioapic_phys) { - printk(KERN_ERR - "WARNING: bogus zero IO-APIC " - "address found in MPTABLE, " - "disabling IO/APIC support!\n"); - smp_found_config = 0; - skip_ioapic_setup = 1; - goto fake_ioapic_page; - } - } else { -fake_ioapic_page: - ioapic_phys = (unsigned long) - alloc_bootmem_pages(PAGE_SIZE); - ioapic_phys = __pa(ioapic_phys); - } - set_fixmap_nocache(idx, ioapic_phys); - printk(KERN_DEBUG "mapped IOAPIC to %08lx (%08lx)\n", - __fix_to_virt(idx), ioapic_phys); - idx++; - } - } -#endif -} - -/* - * This initializes the IO-APIC and APIC hardware if this is - * a UP kernel. - */ -int __init APIC_init_uniprocessor (void) -{ - if (enable_local_apic < 0) - clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); - - if (!smp_found_config && !cpu_has_apic) - return -1; - - /* - * Complain if the BIOS pretends there is one. - */ - if (!cpu_has_apic && - APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { - printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", - boot_cpu_physical_apicid); - clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); - return -1; - } - - verify_local_APIC(); - - connect_bsp_APIC(); - - /* - * Hack: In case of kdump, after a crash, kernel might be booting - * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid - * might be zero if read from MP tables. Get it from LAPIC. - */ -#ifdef CONFIG_CRASH_DUMP - boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); -#endif - phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); - - setup_local_APIC(); - -#ifdef CONFIG_X86_IO_APIC - if (smp_found_config) - if (!skip_ioapic_setup && nr_ioapics) - setup_IO_APIC(); -#endif - setup_boot_clock(); - - return 0; -} - -/* - * APIC command line parameters - */ -static int __init parse_lapic(char *arg) -{ - enable_local_apic = 1; - return 0; -} -early_param("lapic", parse_lapic); - -static int __init parse_nolapic(char *arg) -{ - enable_local_apic = -1; - clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); - return 0; -} -early_param("nolapic", parse_nolapic); - -static int __init parse_disable_lapic_timer(char *arg) -{ - local_apic_timer_disabled = 1; - return 0; -} -early_param("nolapic_timer", parse_disable_lapic_timer); - -static int __init parse_lapic_timer_c2_ok(char *arg) -{ - local_apic_timer_c2_ok = 1; - return 0; -} -early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); - -static int __init apic_set_verbosity(char *str) -{ - if (strcmp("debug", str) == 0) - apic_verbosity = APIC_DEBUG; - else if (strcmp("verbose", str) == 0) - apic_verbosity = APIC_VERBOSE; - return 1; -} - -__setup("apic=", apic_set_verbosity); - - -/* - * Local APIC interrupts - */ - -/* - * This interrupt should _never_ happen with our APIC/SMP architecture - */ -void smp_spurious_interrupt(struct pt_regs *regs) -{ - unsigned long v; - - irq_enter(); - /* - * Check if this really is a spurious interrupt and ACK it - * if it is a vectored one. Just in case... - * Spurious interrupts should not be ACKed. - */ - v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1)); - if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) - ack_APIC_irq(); - - /* see sw-dev-man vol 3, chapter 7.4.13.5 */ - printk(KERN_INFO "spurious APIC interrupt on CPU#%d, " - "should never happen.\n", smp_processor_id()); - irq_exit(); -} - -/* - * This interrupt should never happen with our APIC/SMP architecture - */ -void smp_error_interrupt(struct pt_regs *regs) -{ - unsigned long v, v1; - - irq_enter(); - /* First tickle the hardware, only then report what went on. -- REW */ - v = apic_read(APIC_ESR); - apic_write(APIC_ESR, 0); - v1 = apic_read(APIC_ESR); - ack_APIC_irq(); - atomic_inc(&irq_err_count); - - /* Here is what the APIC error bits mean: - 0: Send CS error - 1: Receive CS error - 2: Send accept error - 3: Receive accept error - 4: Reserved - 5: Send illegal vector - 6: Received illegal vector - 7: Illegal register address - */ - printk (KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n", - smp_processor_id(), v , v1); - irq_exit(); -} - -/* - * Initialize APIC interrupts - */ -void __init apic_intr_init(void) -{ -#ifdef CONFIG_SMP - smp_intr_init(); -#endif - /* self generated IPI for local APIC timer */ - set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); - - /* IPI vectors for APIC spurious and error interrupts */ - set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); - set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); - - /* thermal monitor LVT interrupt */ -#ifdef CONFIG_X86_MCE_P4THERMAL - set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); -#endif -} - -/** - * connect_bsp_APIC - attach the APIC to the interrupt system - */ -void __init connect_bsp_APIC(void) -{ - if (pic_mode) { - /* - * Do not trust the local APIC being empty at bootup. - */ - clear_local_APIC(); - /* - * PIC mode, enable APIC mode in the IMCR, i.e. connect BSP's - * local APIC to INT and NMI lines. - */ - apic_printk(APIC_VERBOSE, "leaving PIC mode, " - "enabling APIC mode.\n"); - outb(0x70, 0x22); - outb(0x01, 0x23); - } - enable_apic_mode(); -} - -/** - * disconnect_bsp_APIC - detach the APIC from the interrupt system - * @virt_wire_setup: indicates, whether virtual wire mode is selected - * - * Virtual wire mode is necessary to deliver legacy interrupts even when the - * APIC is disabled. - */ -void disconnect_bsp_APIC(int virt_wire_setup) -{ - if (pic_mode) { - /* - * Put the board back into PIC mode (has an effect only on - * certain older boards). Note that APIC interrupts, including - * IPIs, won't work beyond this point! The only exception are - * INIT IPIs. - */ - apic_printk(APIC_VERBOSE, "disabling APIC mode, " - "entering PIC mode.\n"); - outb(0x70, 0x22); - outb(0x00, 0x23); - } else { - /* Go back to Virtual Wire compatibility mode */ - unsigned long value; - - /* For the spurious interrupt use vector F, and enable it */ - value = apic_read(APIC_SPIV); - value &= ~APIC_VECTOR_MASK; - value |= APIC_SPIV_APIC_ENABLED; - value |= 0xf; - apic_write_around(APIC_SPIV, value); - - if (!virt_wire_setup) { - /* - * For LVT0 make it edge triggered, active high, - * external and enabled - */ - value = apic_read(APIC_LVT0); - value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | - APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | - APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED ); - value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; - value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); - apic_write_around(APIC_LVT0, value); - } else { - /* Disable LVT0 */ - apic_write_around(APIC_LVT0, APIC_LVT_MASKED); - } - - /* - * For LVT1 make it edge triggered, active high, nmi and - * enabled - */ - value = apic_read(APIC_LVT1); - value &= ~( - APIC_MODE_MASK | APIC_SEND_PENDING | - APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | - APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); - value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; - value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); - apic_write_around(APIC_LVT1, value); - } -} - -/* - * Power management - */ -#ifdef CONFIG_PM - -static struct { - int active; - /* r/w apic fields */ - unsigned int apic_id; - unsigned int apic_taskpri; - unsigned int apic_ldr; - unsigned int apic_dfr; - unsigned int apic_spiv; - unsigned int apic_lvtt; - unsigned int apic_lvtpc; - unsigned int apic_lvt0; - unsigned int apic_lvt1; - unsigned int apic_lvterr; - unsigned int apic_tmict; - unsigned int apic_tdcr; - unsigned int apic_thmr; -} apic_pm_state; - -static int lapic_suspend(struct sys_device *dev, pm_message_t state) -{ - unsigned long flags; - int maxlvt; - - if (!apic_pm_state.active) - return 0; - - maxlvt = lapic_get_maxlvt(); - - apic_pm_state.apic_id = apic_read(APIC_ID); - apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); - apic_pm_state.apic_ldr = apic_read(APIC_LDR); - apic_pm_state.apic_dfr = apic_read(APIC_DFR); - apic_pm_state.apic_spiv = apic_read(APIC_SPIV); - apic_pm_state.apic_lvtt = apic_read(APIC_LVTT); - if (maxlvt >= 4) - apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC); - apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0); - apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1); - apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); - apic_pm_state.apic_tmict = apic_read(APIC_TMICT); - apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); -#ifdef CONFIG_X86_MCE_P4THERMAL - if (maxlvt >= 5) - apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); -#endif - - local_irq_save(flags); - disable_local_APIC(); - local_irq_restore(flags); - return 0; -} - -static int lapic_resume(struct sys_device *dev) -{ - unsigned int l, h; - unsigned long flags; - int maxlvt; - - if (!apic_pm_state.active) - return 0; - - maxlvt = lapic_get_maxlvt(); - - local_irq_save(flags); - - /* - * Make sure the APICBASE points to the right address - * - * FIXME! This will be wrong if we ever support suspend on - * SMP! We'll need to do this as part of the CPU restore! - */ - rdmsr(MSR_IA32_APICBASE, l, h); - l &= ~MSR_IA32_APICBASE_BASE; - l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; - wrmsr(MSR_IA32_APICBASE, l, h); - - apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); - apic_write(APIC_ID, apic_pm_state.apic_id); - apic_write(APIC_DFR, apic_pm_state.apic_dfr); - apic_write(APIC_LDR, apic_pm_state.apic_ldr); - apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri); - apic_write(APIC_SPIV, apic_pm_state.apic_spiv); - apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); - apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); -#ifdef CONFIG_X86_MCE_P4THERMAL - if (maxlvt >= 5) - apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); -#endif - if (maxlvt >= 4) - apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); - apic_write(APIC_LVTT, apic_pm_state.apic_lvtt); - apic_write(APIC_TDCR, apic_pm_state.apic_tdcr); - apic_write(APIC_TMICT, apic_pm_state.apic_tmict); - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - local_irq_restore(flags); - return 0; -} - -/* - * This device has no shutdown method - fully functioning local APICs - * are needed on every CPU up until machine_halt/restart/poweroff. - */ - -static struct sysdev_class lapic_sysclass = { - set_kset_name("lapic"), - .resume = lapic_resume, - .suspend = lapic_suspend, -}; - -static struct sys_device device_lapic = { - .id = 0, - .cls = &lapic_sysclass, -}; - -static void __devinit apic_pm_activate(void) -{ - apic_pm_state.active = 1; -} - -static int __init init_lapic_sysfs(void) -{ - int error; - - if (!cpu_has_apic) - return 0; - /* XXX: remove suspend/resume procs if !apic_pm_state.active? */ - - error = sysdev_class_register(&lapic_sysclass); - if (!error) - error = sysdev_register(&device_lapic); - return error; -} -device_initcall(init_lapic_sysfs); - -#else /* CONFIG_PM */ - -static void apic_pm_activate(void) { } - -#endif /* CONFIG_PM */ diff --git a/arch/i386/kernel/apm_32.c b/arch/i386/kernel/apm_32.c deleted file mode 100644 index f02a8aca826..00000000000 --- a/arch/i386/kernel/apm_32.c +++ /dev/null @@ -1,2403 +0,0 @@ -/* -*- linux-c -*- - * APM BIOS driver for Linux - * Copyright 1994-2001 Stephen Rothwell (sfr@canb.auug.org.au) - * - * Initial development of this driver was funded by NEC Australia P/L - * and NEC Corporation - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2, or (at your option) any - * later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * October 1995, Rik Faith (faith@cs.unc.edu): - * Minor enhancements and updates (to the patch set) for 1.3.x - * Documentation - * January 1996, Rik Faith (faith@cs.unc.edu): - * Make /proc/apm easy to format (bump driver version) - * March 1996, Rik Faith (faith@cs.unc.edu): - * Prohibit APM BIOS calls unless apm_enabled. - * (Thanks to Ulrich Windl ) - * April 1996, Stephen Rothwell (sfr@canb.auug.org.au) - * Version 1.0 and 1.1 - * May 1996, Version 1.2 - * Feb 1998, Version 1.3 - * Feb 1998, Version 1.4 - * Aug 1998, Version 1.5 - * Sep 1998, Version 1.6 - * Nov 1998, Version 1.7 - * Jan 1999, Version 1.8 - * Jan 1999, Version 1.9 - * Oct 1999, Version 1.10 - * Nov 1999, Version 1.11 - * Jan 2000, Version 1.12 - * Feb 2000, Version 1.13 - * Nov 2000, Version 1.14 - * Oct 2001, Version 1.15 - * Jan 2002, Version 1.16 - * Oct 2002, Version 1.16ac - * - * History: - * 0.6b: first version in official kernel, Linux 1.3.46 - * 0.7: changed /proc/apm format, Linux 1.3.58 - * 0.8: fixed gcc 2.7.[12] compilation problems, Linux 1.3.59 - * 0.9: only call bios if bios is present, Linux 1.3.72 - * 1.0: use fixed device number, consolidate /proc/apm into this file, - * Linux 1.3.85 - * 1.1: support user-space standby and suspend, power off after system - * halted, Linux 1.3.98 - * 1.2: When resetting RTC after resume, take care so that the time - * is only incorrect by 30-60mS (vs. 1S previously) (Gabor J. Toth - * ); improve interaction between - * screen-blanking and gpm (Stephen Rothwell); Linux 1.99.4 - * 1.2a:Simple change to stop mysterious bug reports with SMP also added - * levels to the printk calls. APM is not defined for SMP machines. - * The new replacment for it is, but Linux doesn't yet support this. - * Alan Cox Linux 2.1.55 - * 1.3: Set up a valid data descriptor 0x40 for buggy BIOS's - * 1.4: Upgraded to support APM 1.2. Integrated ThinkPad suspend patch by - * Dean Gaudet . - * C. Scott Ananian Linux 2.1.87 - * 1.5: Fix segment register reloading (in case of bad segments saved - * across BIOS call). - * Stephen Rothwell - * 1.6: Cope with complier/assembler differences. - * Only try to turn off the first display device. - * Fix OOPS at power off with no APM BIOS by Jan Echternach - * - * Stephen Rothwell - * 1.7: Modify driver's cached copy of the disabled/disengaged flags - * to reflect current state of APM BIOS. - * Chris Rankin - * Reset interrupt 0 timer to 100Hz after suspend - * Chad Miller - * Add CONFIG_APM_IGNORE_SUSPEND_BOUNCE - * Richard Gooch - * Allow boot time disabling of APM - * Make boot messages far less verbose by default - * Make asm safer - * Stephen Rothwell - * 1.8: Add CONFIG_APM_RTC_IS_GMT - * Richard Gooch - * change APM_NOINTS to CONFIG_APM_ALLOW_INTS - * remove dependency on CONFIG_PROC_FS - * Stephen Rothwell - * 1.9: Fix small typo. - * Try to cope with BIOS's that need to have all display - * devices blanked and not just the first one. - * Ross Paterson - * Fix segment limit setting it has always been wrong as - * the segments needed to have byte granularity. - * Mark a few things __init. - * Add hack to allow power off of SMP systems by popular request. - * Use CONFIG_SMP instead of __SMP__ - * Ignore BOUNCES for three seconds. - * Stephen Rothwell - * 1.10: Fix for Thinkpad return code. - * Merge 2.2 and 2.3 drivers. - * Remove APM dependencies in arch/i386/kernel/process.c - * Remove APM dependencies in drivers/char/sysrq.c - * Reset time across standby. - * Allow more inititialisation on SMP. - * Remove CONFIG_APM_POWER_OFF and make it boot time - * configurable (default on). - * Make debug only a boot time parameter (remove APM_DEBUG). - * Try to blank all devices on any error. - * 1.11: Remove APM dependencies in drivers/char/console.c - * Check nr_running to detect if we are idle (from - * Borislav Deianov ) - * Fix for bioses that don't zero the top part of the - * entrypoint offset (Mario Sitta ) - * (reported by Panos Katsaloulis ). - * Real mode power off patch (Walter Hofmann - * ). - * 1.12: Remove CONFIG_SMP as the compiler will optimize - * the code away anyway (smp_num_cpus == 1 in UP) - * noted by Artur Skawina . - * Make power off under SMP work again. - * Fix thinko with initial engaging of BIOS. - * Make sure power off only happens on CPU 0 - * (Paul "Rusty" Russell ). - * Do error notification to user mode if BIOS calls fail. - * Move entrypoint offset fix to ...boot/setup.S - * where it belongs (Cosmos ). - * Remove smp-power-off. SMP users must now specify - * "apm=power-off" on the kernel command line. Suggested - * by Jim Avera , modified by Alan Cox - * . - * Register the /proc/apm entry even on SMP so that - * scripts that check for it before doing power off - * work (Jim Avera ). - * 1.13: Changes for new pm_ interfaces (Andy Henroid - * ). - * Modularize the code. - * Fix the Thinkpad (again) :-( (CONFIG_APM_IGNORE_MULTIPLE_SUSPENDS - * is now the way life works). - * Fix thinko in suspend() (wrong return). - * Notify drivers on critical suspend. - * Make kapmd absorb more idle time (Pavel Machek - * modified by sfr). - * Disable interrupts while we are suspended (Andy Henroid - * fixed by sfr). - * Make power off work on SMP again (Tony Hoyle - * and ) modified by sfr. - * Remove CONFIG_APM_SUSPEND_BOUNCE. The bounce ignore - * interval is now configurable. - * 1.14: Make connection version persist across module unload/load. - * Enable and engage power management earlier. - * Disengage power management on module unload. - * Changed to use the sysrq-register hack for registering the - * power off function called by magic sysrq based upon discussions - * in irc://irc.openprojects.net/#kernelnewbies - * (Crutcher Dunnavant ). - * Make CONFIG_APM_REAL_MODE_POWER_OFF run time configurable. - * (Arjan van de Ven ) modified by sfr. - * Work around byte swap bug in one of the Vaio's BIOS's - * (Marc Boucher ). - * Exposed the disable flag to dmi so that we can handle known - * broken APM (Alan Cox ). - * 1.14ac: If the BIOS says "I slowed the CPU down" then don't spin - * calling it - instead idle. (Alan Cox ) - * If an APM idle fails log it and idle sensibly - * 1.15: Don't queue events to clients who open the device O_WRONLY. - * Don't expect replies from clients who open the device O_RDONLY. - * (Idea from Thomas Hood) - * Minor waitqueue cleanups. (John Fremlin ) - * 1.16: Fix idle calling. (Andreas Steinmetz et al.) - * Notify listeners of standby or suspend events before notifying - * drivers. Return EBUSY to ioctl() if suspend is rejected. - * (Russell King and Thomas Hood) - * Ignore first resume after we generate our own resume event - * after a suspend (Thomas Hood) - * Daemonize now gets rid of our controlling terminal (sfr). - * CONFIG_APM_CPU_IDLE now just affects the default value of - * idle_threshold (sfr). - * Change name of kernel apm daemon (as it no longer idles) (sfr). - * 1.16ac: Fix up SMP support somewhat. You can now force SMP on and we - * make _all_ APM calls on the CPU#0. Fix unsafe sign bug. - * TODO: determine if its "boot CPU" or "CPU0" we want to lock to. - * - * APM 1.1 Reference: - * - * Intel Corporation, Microsoft Corporation. Advanced Power Management - * (APM) BIOS Interface Specification, Revision 1.1, September 1993. - * Intel Order Number 241704-001. Microsoft Part Number 781-110-X01. - * - * [This document is available free from Intel by calling 800.628.8686 (fax - * 916.356.6100) or 800.548.4725; or via anonymous ftp from - * ftp://ftp.intel.com/pub/IAL/software_specs/apmv11.doc. It is also - * available from Microsoft by calling 206.882.8080.] - * - * APM 1.2 Reference: - * Intel Corporation, Microsoft Corporation. Advanced Power Management - * (APM) BIOS Interface Specification, Revision 1.2, February 1996. - * - * [This document is available from Microsoft at: - * http://www.microsoft.com/whdc/archive/amp_12.mspx] - */ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "io_ports.h" - -#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) -extern int (*console_blank_hook)(int); -#endif - -/* - * The apm_bios device is one of the misc char devices. - * This is its minor number. - */ -#define APM_MINOR_DEV 134 - -/* - * See Documentation/Config.help for the configuration options. - * - * Various options can be changed at boot time as follows: - * (We allow underscores for compatibility with the modules code) - * apm=on/off enable/disable APM - * [no-]allow[-_]ints allow interrupts during BIOS calls - * [no-]broken[-_]psr BIOS has a broken GetPowerStatus call - * [no-]realmode[-_]power[-_]off switch to real mode before - * powering off - * [no-]debug log some debugging messages - * [no-]power[-_]off power off on shutdown - * [no-]smp Use apm even on an SMP box - * bounce[-_]interval= number of ticks to ignore suspend - * bounces - * idle[-_]threshold= System idle percentage above which to - * make APM BIOS idle calls. Set it to - * 100 to disable. - * idle[-_]period= Period (in 1/100s of a second) over - * which the idle percentage is - * calculated. - */ - -/* KNOWN PROBLEM MACHINES: - * - * U: TI 4000M TravelMate: BIOS is *NOT* APM compliant - * [Confirmed by TI representative] - * ?: ACER 486DX4/75: uses dseg 0040, in violation of APM specification - * [Confirmed by BIOS disassembly] - * [This may work now ...] - * P: Toshiba 1950S: battery life information only gets updated after resume - * P: Midwest Micro Soundbook Elite DX2/66 monochrome: screen blanking - * broken in BIOS [Reported by Garst R. Reese ] - * ?: AcerNote-950: oops on reading /proc/apm - workaround is a WIP - * Neale Banks December 2000 - * - * Legend: U = unusable with APM patches - * P = partially usable with APM patches - */ - -/* - * Define as 1 to make the driver always call the APM BIOS busy - * routine even if the clock was not reported as slowed by the - * idle routine. Otherwise, define as 0. - */ -#define ALWAYS_CALL_BUSY 1 - -/* - * Define to make the APM BIOS calls zero all data segment registers (so - * that an incorrect BIOS implementation will cause a kernel panic if it - * tries to write to arbitrary memory). - */ -#define APM_ZERO_SEGS - -#include "apm.h" - -/* - * Define to re-initialize the interrupt 0 timer to 100 Hz after a suspend. - * This patched by Chad Miller , original code by - * David Chen - */ -#undef INIT_TIMER_AFTER_SUSPEND - -#ifdef INIT_TIMER_AFTER_SUSPEND -#include -#include -#include -#endif - -/* - * Need to poll the APM BIOS every second - */ -#define APM_CHECK_TIMEOUT (HZ) - -/* - * Ignore suspend events for this amount of time after a resume - */ -#define DEFAULT_BOUNCE_INTERVAL (3 * HZ) - -/* - * Maximum number of events stored - */ -#define APM_MAX_EVENTS 20 - -/* - * The per-file APM data - */ -struct apm_user { - int magic; - struct apm_user * next; - unsigned int suser: 1; - unsigned int writer: 1; - unsigned int reader: 1; - unsigned int suspend_wait: 1; - int suspend_result; - int suspends_pending; - int standbys_pending; - int suspends_read; - int standbys_read; - int event_head; - int event_tail; - apm_event_t events[APM_MAX_EVENTS]; -}; - -/* - * The magic number in apm_user - */ -#define APM_BIOS_MAGIC 0x4101 - -/* - * idle percentage above which bios idle calls are done - */ -#ifdef CONFIG_APM_CPU_IDLE -#define DEFAULT_IDLE_THRESHOLD 95 -#else -#define DEFAULT_IDLE_THRESHOLD 100 -#endif -#define DEFAULT_IDLE_PERIOD (100 / 3) - -/* - * Local variables - */ -static struct { - unsigned long offset; - unsigned short segment; -} apm_bios_entry; -static int clock_slowed; -static int idle_threshold __read_mostly = DEFAULT_IDLE_THRESHOLD; -static int idle_period __read_mostly = DEFAULT_IDLE_PERIOD; -static int set_pm_idle; -static int suspends_pending; -static int standbys_pending; -static int ignore_sys_suspend; -static int ignore_normal_resume; -static int bounce_interval __read_mostly = DEFAULT_BOUNCE_INTERVAL; - -static int debug __read_mostly; -static int smp __read_mostly; -static int apm_disabled = -1; -#ifdef CONFIG_SMP -static int power_off; -#else -static int power_off = 1; -#endif -#ifdef CONFIG_APM_REAL_MODE_POWER_OFF -static int realmode_power_off = 1; -#else -static int realmode_power_off; -#endif -#ifdef CONFIG_APM_ALLOW_INTS -static int allow_ints = 1; -#else -static int allow_ints; -#endif -static int broken_psr; - -static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue); -static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue); -static struct apm_user * user_list; -static DEFINE_SPINLOCK(user_list_lock); -static const struct desc_struct bad_bios_desc = { 0, 0x00409200 }; - -static const char driver_version[] = "1.16ac"; /* no spaces */ - -static struct task_struct *kapmd_task; - -/* - * APM event names taken from the APM 1.2 specification. These are - * the message codes that the BIOS uses to tell us about events - */ -static const char * const apm_event_name[] = { - "system standby", - "system suspend", - "normal resume", - "critical resume", - "low battery", - "power status change", - "update time", - "critical suspend", - "user standby", - "user suspend", - "system standby resume", - "capabilities change" -}; -#define NR_APM_EVENT_NAME ARRAY_SIZE(apm_event_name) - -typedef struct lookup_t { - int key; - char * msg; -} lookup_t; - -/* - * The BIOS returns a set of standard error codes in AX when the - * carry flag is set. - */ - -static const lookup_t error_table[] = { -/* N/A { APM_SUCCESS, "Operation succeeded" }, */ - { APM_DISABLED, "Power management disabled" }, - { APM_CONNECTED, "Real mode interface already connected" }, - { APM_NOT_CONNECTED, "Interface not connected" }, - { APM_16_CONNECTED, "16 bit interface already connected" }, -/* N/A { APM_16_UNSUPPORTED, "16 bit interface not supported" }, */ - { APM_32_CONNECTED, "32 bit interface already connected" }, - { APM_32_UNSUPPORTED, "32 bit interface not supported" }, - { APM_BAD_DEVICE, "Unrecognized device ID" }, - { APM_BAD_PARAM, "Parameter out of range" }, - { APM_NOT_ENGAGED, "Interface not engaged" }, - { APM_BAD_FUNCTION, "Function not supported" }, - { APM_RESUME_DISABLED, "Resume timer disabled" }, - { APM_BAD_STATE, "Unable to enter requested state" }, -/* N/A { APM_NO_EVENTS, "No events pending" }, */ - { APM_NO_ERROR, "BIOS did not set a return code" }, - { APM_NOT_PRESENT, "No APM present" } -}; -#define ERROR_COUNT ARRAY_SIZE(error_table) - -/** - * apm_error - display an APM error - * @str: information string - * @err: APM BIOS return code - * - * Write a meaningful log entry to the kernel log in the event of - * an APM error. - */ - -static void apm_error(char *str, int err) -{ - int i; - - for (i = 0; i < ERROR_COUNT; i++) - if (error_table[i].key == err) break; - if (i < ERROR_COUNT) - printk(KERN_NOTICE "apm: %s: %s\n", str, error_table[i].msg); - else - printk(KERN_NOTICE "apm: %s: unknown error code %#2.2x\n", - str, err); -} - -/* - * Lock APM functionality to physical CPU 0 - */ - -#ifdef CONFIG_SMP - -static cpumask_t apm_save_cpus(void) -{ - cpumask_t x = current->cpus_allowed; - /* Some bioses don't like being called from CPU != 0 */ - set_cpus_allowed(current, cpumask_of_cpu(0)); - BUG_ON(smp_processor_id() != 0); - return x; -} - -static inline void apm_restore_cpus(cpumask_t mask) -{ - set_cpus_allowed(current, mask); -} - -#else - -/* - * No CPU lockdown needed on a uniprocessor - */ - -#define apm_save_cpus() (current->cpus_allowed) -#define apm_restore_cpus(x) (void)(x) - -#endif - -/* - * These are the actual BIOS calls. Depending on APM_ZERO_SEGS and - * apm_info.allow_ints, we are being really paranoid here! Not only - * are interrupts disabled, but all the segment registers (except SS) - * are saved and zeroed this means that if the BIOS tries to reference - * any data without explicitly loading the segment registers, the kernel - * will fault immediately rather than have some unforeseen circumstances - * for the rest of the kernel. And it will be very obvious! :-) Doing - * this depends on CS referring to the same physical memory as DS so that - * DS can be zeroed before the call. Unfortunately, we can't do anything - * about the stack segment/pointer. Also, we tell the compiler that - * everything could change. - * - * Also, we KNOW that for the non error case of apm_bios_call, there - * is no useful data returned in the low order 8 bits of eax. - */ - -static inline unsigned long __apm_irq_save(void) -{ - unsigned long flags; - local_save_flags(flags); - if (apm_info.allow_ints) { - if (irqs_disabled_flags(flags)) - local_irq_enable(); - } else - local_irq_disable(); - - return flags; -} - -#define apm_irq_save(flags) \ - do { flags = __apm_irq_save(); } while (0) - -static inline void apm_irq_restore(unsigned long flags) -{ - if (irqs_disabled_flags(flags)) - local_irq_disable(); - else if (irqs_disabled()) - local_irq_enable(); -} - -#ifdef APM_ZERO_SEGS -# define APM_DECL_SEGS \ - unsigned int saved_fs; unsigned int saved_gs; -# define APM_DO_SAVE_SEGS \ - savesegment(fs, saved_fs); savesegment(gs, saved_gs) -# define APM_DO_RESTORE_SEGS \ - loadsegment(fs, saved_fs); loadsegment(gs, saved_gs) -#else -# define APM_DECL_SEGS -# define APM_DO_SAVE_SEGS -# define APM_DO_RESTORE_SEGS -#endif - -/** - * apm_bios_call - Make an APM BIOS 32bit call - * @func: APM function to execute - * @ebx_in: EBX register for call entry - * @ecx_in: ECX register for call entry - * @eax: EAX register return - * @ebx: EBX register return - * @ecx: ECX register return - * @edx: EDX register return - * @esi: ESI register return - * - * Make an APM call using the 32bit protected mode interface. The - * caller is responsible for knowing if APM BIOS is configured and - * enabled. This call can disable interrupts for a long period of - * time on some laptops. The return value is in AH and the carry - * flag is loaded into AL. If there is an error, then the error - * code is returned in AH (bits 8-15 of eax) and this function - * returns non-zero. - */ - -static u8 apm_bios_call(u32 func, u32 ebx_in, u32 ecx_in, - u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, u32 *esi) -{ - APM_DECL_SEGS - unsigned long flags; - cpumask_t cpus; - int cpu; - struct desc_struct save_desc_40; - struct desc_struct *gdt; - - cpus = apm_save_cpus(); - - cpu = get_cpu(); - gdt = get_cpu_gdt_table(cpu); - save_desc_40 = gdt[0x40 / 8]; - gdt[0x40 / 8] = bad_bios_desc; - - apm_irq_save(flags); - APM_DO_SAVE_SEGS; - apm_bios_call_asm(func, ebx_in, ecx_in, eax, ebx, ecx, edx, esi); - APM_DO_RESTORE_SEGS; - apm_irq_restore(flags); - gdt[0x40 / 8] = save_desc_40; - put_cpu(); - apm_restore_cpus(cpus); - - return *eax & 0xff; -} - -/** - * apm_bios_call_simple - make a simple APM BIOS 32bit call - * @func: APM function to invoke - * @ebx_in: EBX register value for BIOS call - * @ecx_in: ECX register value for BIOS call - * @eax: EAX register on return from the BIOS call - * - * Make a BIOS call that returns one value only, or just status. - * If there is an error, then the error code is returned in AH - * (bits 8-15 of eax) and this function returns non-zero. This is - * used for simpler BIOS operations. This call may hold interrupts - * off for a long time on some laptops. - */ - -static u8 apm_bios_call_simple(u32 func, u32 ebx_in, u32 ecx_in, u32 *eax) -{ - u8 error; - APM_DECL_SEGS - unsigned long flags; - cpumask_t cpus; - int cpu; - struct desc_struct save_desc_40; - struct desc_struct *gdt; - - cpus = apm_save_cpus(); - - cpu = get_cpu(); - gdt = get_cpu_gdt_table(cpu); - save_desc_40 = gdt[0x40 / 8]; - gdt[0x40 / 8] = bad_bios_desc; - - apm_irq_save(flags); - APM_DO_SAVE_SEGS; - error = apm_bios_call_simple_asm(func, ebx_in, ecx_in, eax); - APM_DO_RESTORE_SEGS; - apm_irq_restore(flags); - gdt[0x40 / 8] = save_desc_40; - put_cpu(); - apm_restore_cpus(cpus); - return error; -} - -/** - * apm_driver_version - APM driver version - * @val: loaded with the APM version on return - * - * Retrieve the APM version supported by the BIOS. This is only - * supported for APM 1.1 or higher. An error indicates APM 1.0 is - * probably present. - * - * On entry val should point to a value indicating the APM driver - * version with the high byte being the major and the low byte the - * minor number both in BCD - * - * On return it will hold the BIOS revision supported in the - * same format. - */ - -static int apm_driver_version(u_short *val) -{ - u32 eax; - - if (apm_bios_call_simple(APM_FUNC_VERSION, 0, *val, &eax)) - return (eax >> 8) & 0xff; - *val = eax; - return APM_SUCCESS; -} - -/** - * apm_get_event - get an APM event from the BIOS - * @event: pointer to the event - * @info: point to the event information - * - * The APM BIOS provides a polled information for event - * reporting. The BIOS expects to be polled at least every second - * when events are pending. When a message is found the caller should - * poll until no more messages are present. However, this causes - * problems on some laptops where a suspend event notification is - * not cleared until it is acknowledged. - * - * Additional information is returned in the info pointer, providing - * that APM 1.2 is in use. If no messges are pending the value 0x80 - * is returned (No power management events pending). - */ - -static int apm_get_event(apm_event_t *event, apm_eventinfo_t *info) -{ - u32 eax; - u32 ebx; - u32 ecx; - u32 dummy; - - if (apm_bios_call(APM_FUNC_GET_EVENT, 0, 0, &eax, &ebx, &ecx, - &dummy, &dummy)) - return (eax >> 8) & 0xff; - *event = ebx; - if (apm_info.connection_version < 0x0102) - *info = ~0; /* indicate info not valid */ - else - *info = ecx; - return APM_SUCCESS; -} - -/** - * set_power_state - set the power management state - * @what: which items to transition - * @state: state to transition to - * - * Request an APM change of state for one or more system devices. The - * processor state must be transitioned last of all. what holds the - * class of device in the upper byte and the device number (0xFF for - * all) for the object to be transitioned. - * - * The state holds the state to transition to, which may in fact - * be an acceptance of a BIOS requested state change. - */ - -static int set_power_state(u_short what, u_short state) -{ - u32 eax; - - if (apm_bios_call_simple(APM_FUNC_SET_STATE, what, state, &eax)) - return (eax >> 8) & 0xff; - return APM_SUCCESS; -} - -/** - * set_system_power_state - set system wide power state - * @state: which state to enter - * - * Transition the entire system into a new APM power state. - */ - -static int set_system_power_state(u_short state) -{ - return set_power_state(APM_DEVICE_ALL, state); -} - -/** - * apm_do_idle - perform power saving - * - * This function notifies the BIOS that the processor is (in the view - * of the OS) idle. It returns -1 in the event that the BIOS refuses - * to handle the idle request. On a success the function returns 1 - * if the BIOS did clock slowing or 0 otherwise. - */ - -static int apm_do_idle(void) -{ - u32 eax; - u8 ret = 0; - int idled = 0; - int polling; - - polling = !!(current_thread_info()->status & TS_POLLING); - if (polling) { - current_thread_info()->status &= ~TS_POLLING; - /* - * TS_POLLING-cleared state must be visible before we - * test NEED_RESCHED: - */ - smp_mb(); - } - if (!need_resched()) { - idled = 1; - ret = apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax); - } - if (polling) - current_thread_info()->status |= TS_POLLING; - - if (!idled) - return 0; - - if (ret) { - static unsigned long t; - - /* This always fails on some SMP boards running UP kernels. - * Only report the failure the first 5 times. - */ - if (++t < 5) - { - printk(KERN_DEBUG "apm_do_idle failed (%d)\n", - (eax >> 8) & 0xff); - t = jiffies; - } - return -1; - } - clock_slowed = (apm_info.bios.flags & APM_IDLE_SLOWS_CLOCK) != 0; - return clock_slowed; -} - -/** - * apm_do_busy - inform the BIOS the CPU is busy - * - * Request that the BIOS brings the CPU back to full performance. - */ - -static void apm_do_busy(void) -{ - u32 dummy; - - if (clock_slowed || ALWAYS_CALL_BUSY) { - (void) apm_bios_call_simple(APM_FUNC_BUSY, 0, 0, &dummy); - clock_slowed = 0; - } -} - -/* - * If no process has really been interested in - * the CPU for some time, we want to call BIOS - * power management - we probably want - * to conserve power. - */ -#define IDLE_CALC_LIMIT (HZ * 100) -#define IDLE_LEAKY_MAX 16 - -static void (*original_pm_idle)(void) __read_mostly; - -/** - * apm_cpu_idle - cpu idling for APM capable Linux - * - * This is the idling function the kernel executes when APM is available. It - * tries to do BIOS powermanagement based on the average system idle time. - * Furthermore it calls the system default idle routine. - */ - -static void apm_cpu_idle(void) -{ - static int use_apm_idle; /* = 0 */ - static unsigned int last_jiffies; /* = 0 */ - static unsigned int last_stime; /* = 0 */ - - int apm_idle_done = 0; - unsigned int jiffies_since_last_check = jiffies - last_jiffies; - unsigned int bucket; - -recalc: - if (jiffies_since_last_check > IDLE_CALC_LIMIT) { - use_apm_idle = 0; - last_jiffies = jiffies; - last_stime = current->stime; - } else if (jiffies_since_last_check > idle_period) { - unsigned int idle_percentage; - - idle_percentage = current->stime - last_stime; - idle_percentage *= 100; - idle_percentage /= jiffies_since_last_check; - use_apm_idle = (idle_percentage > idle_threshold); - if (apm_info.forbid_idle) - use_apm_idle = 0; - last_jiffies = jiffies; - last_stime = current->stime; - } - - bucket = IDLE_LEAKY_MAX; - - while (!need_resched()) { - if (use_apm_idle) { - unsigned int t; - - t = jiffies; - switch (apm_do_idle()) { - case 0: apm_idle_done = 1; - if (t != jiffies) { - if (bucket) { - bucket = IDLE_LEAKY_MAX; - continue; - } - } else if (bucket) { - bucket--; - continue; - } - break; - case 1: apm_idle_done = 1; - break; - default: /* BIOS refused */ - break; - } - } - if (original_pm_idle) - original_pm_idle(); - else - default_idle(); - jiffies_since_last_check = jiffies - last_jiffies; - if (jiffies_since_last_check > idle_period) - goto recalc; - } - - if (apm_idle_done) - apm_do_busy(); -} - -/** - * apm_power_off - ask the BIOS to power off - * - * Handle the power off sequence. This is the one piece of code we - * will execute even on SMP machines. In order to deal with BIOS - * bugs we support real mode APM BIOS power off calls. We also make - * the SMP call on CPU0 as some systems will only honour this call - * on their first cpu. - */ - -static void apm_power_off(void) -{ - unsigned char po_bios_call[] = { - 0xb8, 0x00, 0x10, /* movw $0x1000,ax */ - 0x8e, 0xd0, /* movw ax,ss */ - 0xbc, 0x00, 0xf0, /* movw $0xf000,sp */ - 0xb8, 0x07, 0x53, /* movw $0x5307,ax */ - 0xbb, 0x01, 0x00, /* movw $0x0001,bx */ - 0xb9, 0x03, 0x00, /* movw $0x0003,cx */ - 0xcd, 0x15 /* int $0x15 */ - }; - - /* Some bioses don't like being called from CPU != 0 */ - if (apm_info.realmode_power_off) - { - (void)apm_save_cpus(); - machine_real_restart(po_bios_call, sizeof(po_bios_call)); - } - else - (void) set_system_power_state(APM_STATE_OFF); -} - -#ifdef CONFIG_APM_DO_ENABLE - -/** - * apm_enable_power_management - enable BIOS APM power management - * @enable: enable yes/no - * - * Enable or disable the APM BIOS power services. - */ - -static int apm_enable_power_management(int enable) -{ - u32 eax; - - if ((enable == 0) && (apm_info.bios.flags & APM_BIOS_DISENGAGED)) - return APM_NOT_ENGAGED; - if (apm_bios_call_simple(APM_FUNC_ENABLE_PM, APM_DEVICE_BALL, - enable, &eax)) - return (eax >> 8) & 0xff; - if (enable) - apm_info.bios.flags &= ~APM_BIOS_DISABLED; - else - apm_info.bios.flags |= APM_BIOS_DISABLED; - return APM_SUCCESS; -} -#endif - -/** - * apm_get_power_status - get current power state - * @status: returned status - * @bat: battery info - * @life: estimated life - * - * Obtain the current power status from the APM BIOS. We return a - * status which gives the rough battery status, and current power - * source. The bat value returned give an estimate as a percentage - * of life and a status value for the battery. The estimated life - * if reported is a lifetime in secodnds/minutes at current powwer - * consumption. - */ - -static int apm_get_power_status(u_short *status, u_short *bat, u_short *life) -{ - u32 eax; - u32 ebx; - u32 ecx; - u32 edx; - u32 dummy; - - if (apm_info.get_power_status_broken) - return APM_32_UNSUPPORTED; - if (apm_bios_call(APM_FUNC_GET_STATUS, APM_DEVICE_ALL, 0, - &eax, &ebx, &ecx, &edx, &dummy)) - return (eax >> 8) & 0xff; - *status = ebx; - *bat = ecx; - if (apm_info.get_power_status_swabinminutes) { - *life = swab16((u16)edx); - *life |= 0x8000; - } else - *life = edx; - return APM_SUCCESS; -} - -#if 0 -static int apm_get_battery_status(u_short which, u_short *status, - u_short *bat, u_short *life, u_short *nbat) -{ - u32 eax; - u32 ebx; - u32 ecx; - u32 edx; - u32 esi; - - if (apm_info.connection_version < 0x0102) { - /* pretend we only have one battery. */ - if (which != 1) - return APM_BAD_DEVICE; - *nbat = 1; - return apm_get_power_status(status, bat, life); - } - - if (apm_bios_call(APM_FUNC_GET_STATUS, (0x8000 | (which)), 0, &eax, - &ebx, &ecx, &edx, &esi)) - return (eax >> 8) & 0xff; - *status = ebx; - *bat = ecx; - *life = edx; - *nbat = esi; - return APM_SUCCESS; -} -#endif - -/** - * apm_engage_power_management - enable PM on a device - * @device: identity of device - * @enable: on/off - * - * Activate or deactive power management on either a specific device - * or the entire system (%APM_DEVICE_ALL). - */ - -static int apm_engage_power_management(u_short device, int enable) -{ - u32 eax; - - if ((enable == 0) && (device == APM_DEVICE_ALL) - && (apm_info.bios.flags & APM_BIOS_DISABLED)) - return APM_DISABLED; - if (apm_bios_call_simple(APM_FUNC_ENGAGE_PM, device, enable, &eax)) - return (eax >> 8) & 0xff; - if (device == APM_DEVICE_ALL) { - if (enable) - apm_info.bios.flags &= ~APM_BIOS_DISENGAGED; - else - apm_info.bios.flags |= APM_BIOS_DISENGAGED; - } - return APM_SUCCESS; -} - -#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) - -/** - * apm_console_blank - blank the display - * @blank: on/off - * - * Attempt to blank the console, firstly by blanking just video device - * zero, and if that fails (some BIOSes don't support it) then it blanks - * all video devices. Typically the BIOS will do laptop backlight and - * monitor powerdown for us. - */ - -static int apm_console_blank(int blank) -{ - int error = APM_NOT_ENGAGED; /* silence gcc */ - int i; - u_short state; - static const u_short dev[3] = { 0x100, 0x1FF, 0x101 }; - - state = blank ? APM_STATE_STANDBY : APM_STATE_READY; - - for (i = 0; i < ARRAY_SIZE(dev); i++) { - error = set_power_state(dev[i], state); - - if ((error == APM_SUCCESS) || (error == APM_NO_ERROR)) - return 1; - - if (error == APM_NOT_ENGAGED) - break; - } - - if (error == APM_NOT_ENGAGED) { - static int tried; - int eng_error; - if (tried++ == 0) { - eng_error = apm_engage_power_management(APM_DEVICE_ALL, 1); - if (eng_error) { - apm_error("set display", error); - apm_error("engage interface", eng_error); - return 0; - } else - return apm_console_blank(blank); - } - } - apm_error("set display", error); - return 0; -} -#endif - -static int queue_empty(struct apm_user *as) -{ - return as->event_head == as->event_tail; -} - -static apm_event_t get_queued_event(struct apm_user *as) -{ - if (++as->event_tail >= APM_MAX_EVENTS) - as->event_tail = 0; - return as->events[as->event_tail]; -} - -static void queue_event(apm_event_t event, struct apm_user *sender) -{ - struct apm_user * as; - - spin_lock(&user_list_lock); - if (user_list == NULL) - goto out; - for (as = user_list; as != NULL; as = as->next) { - if ((as == sender) || (!as->reader)) - continue; - if (++as->event_head >= APM_MAX_EVENTS) - as->event_head = 0; - - if (as->event_head == as->event_tail) { - static int notified; - - if (notified++ == 0) - printk(KERN_ERR "apm: an event queue overflowed\n"); - if (++as->event_tail >= APM_MAX_EVENTS) - as->event_tail = 0; - } - as->events[as->event_head] = event; - if ((!as->suser) || (!as->writer)) - continue; - switch (event) { - case APM_SYS_SUSPEND: - case APM_USER_SUSPEND: - as->suspends_pending++; - suspends_pending++; - break; - - case APM_SYS_STANDBY: - case APM_USER_STANDBY: - as->standbys_pending++; - standbys_pending++; - break; - } - } - wake_up_interruptible(&apm_waitqueue); -out: - spin_unlock(&user_list_lock); -} - -static void reinit_timer(void) -{ -#ifdef INIT_TIMER_AFTER_SUSPEND - unsigned long flags; - - spin_lock_irqsave(&i8253_lock, flags); - /* set the clock to HZ */ - outb_p(0x34, PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */ - udelay(10); - outb_p(LATCH & 0xff, PIT_CH0); /* LSB */ - udelay(10); - outb(LATCH >> 8, PIT_CH0); /* MSB */ - udelay(10); - spin_unlock_irqrestore(&i8253_lock, flags); -#endif -} - -static int suspend(int vetoable) -{ - int err; - struct apm_user *as; - - if (pm_send_all(PM_SUSPEND, (void *)3)) { - /* Vetoed */ - if (vetoable) { - if (apm_info.connection_version > 0x100) - set_system_power_state(APM_STATE_REJECT); - err = -EBUSY; - ignore_sys_suspend = 0; - printk(KERN_WARNING "apm: suspend was vetoed.\n"); - goto out; - } - printk(KERN_CRIT "apm: suspend was vetoed, but suspending anyway.\n"); - } - - device_suspend(PMSG_SUSPEND); - local_irq_disable(); - device_power_down(PMSG_SUSPEND); - - local_irq_enable(); - - save_processor_state(); - err = set_system_power_state(APM_STATE_SUSPEND); - ignore_normal_resume = 1; - restore_processor_state(); - - local_irq_disable(); - reinit_timer(); - - if (err == APM_NO_ERROR) - err = APM_SUCCESS; - if (err != APM_SUCCESS) - apm_error("suspend", err); - err = (err == APM_SUCCESS) ? 0 : -EIO; - device_power_up(); - local_irq_enable(); - device_resume(); - pm_send_all(PM_RESUME, (void *)0); - queue_event(APM_NORMAL_RESUME, NULL); - out: - spin_lock(&user_list_lock); - for (as = user_list; as != NULL; as = as->next) { - as->suspend_wait = 0; - as->suspend_result = err; - } - spin_unlock(&user_list_lock); - wake_up_interruptible(&apm_suspend_waitqueue); - return err; -} - -static void standby(void) -{ - int err; - - local_irq_disable(); - device_power_down(PMSG_SUSPEND); - local_irq_enable(); - - err = set_system_power_state(APM_STATE_STANDBY); - if ((err != APM_SUCCESS) && (err != APM_NO_ERROR)) - apm_error("standby", err); - - local_irq_disable(); - device_power_up(); - local_irq_enable(); -} - -static apm_event_t get_event(void) -{ - int error; - apm_event_t event = APM_NO_EVENTS; /* silence gcc */ - apm_eventinfo_t info; - - static int notified; - - /* we don't use the eventinfo */ - error = apm_get_event(&event, &info); - if (error == APM_SUCCESS) - return event; - - if ((error != APM_NO_EVENTS) && (notified++ == 0)) - apm_error("get_event", error); - - return 0; -} - -static void check_events(void) -{ - apm_event_t event; - static unsigned long last_resume; - static int ignore_bounce; - - while ((event = get_event()) != 0) { - if (debug) { - if (event <= NR_APM_EVENT_NAME) - printk(KERN_DEBUG "apm: received %s notify\n", - apm_event_name[event - 1]); - else - printk(KERN_DEBUG "apm: received unknown " - "event 0x%02x\n", event); - } - if (ignore_bounce - && ((jiffies - last_resume) > bounce_interval)) - ignore_bounce = 0; - - switch (event) { - case APM_SYS_STANDBY: - case APM_USER_STANDBY: - queue_event(event, NULL); - if (standbys_pending <= 0) - standby(); - break; - - case APM_USER_SUSPEND: -#ifdef CONFIG_APM_IGNORE_USER_SUSPEND - if (apm_info.connection_version > 0x100) - set_system_power_state(APM_STATE_REJECT); - break; -#endif - case APM_SYS_SUSPEND: - if (ignore_bounce) { - if (apm_info.connection_version > 0x100) - set_system_power_state(APM_STATE_REJECT); - break; - } - /* - * If we are already processing a SUSPEND, - * then further SUSPEND events from the BIOS - * will be ignored. We also return here to - * cope with the fact that the Thinkpads keep - * sending a SUSPEND event until something else - * happens! - */ - if (ignore_sys_suspend) - return; - ignore_sys_suspend = 1; - queue_event(event, NULL); - if (suspends_pending <= 0) - (void) suspend(1); - break; - - case APM_NORMAL_RESUME: - case APM_CRITICAL_RESUME: - case APM_STANDBY_RESUME: - ignore_sys_suspend = 0; - last_resume = jiffies; - ignore_bounce = 1; - if ((event != APM_NORMAL_RESUME) - || (ignore_normal_resume == 0)) { - device_resume(); - pm_send_all(PM_RESUME, (void *)0); - queue_event(event, NULL); - } - ignore_normal_resume = 0; - break; - - case APM_CAPABILITY_CHANGE: - case APM_LOW_BATTERY: - case APM_POWER_STATUS_CHANGE: - queue_event(event, NULL); - /* If needed, notify drivers here */ - break; - - case APM_UPDATE_TIME: - break; - - case APM_CRITICAL_SUSPEND: - /* - * We are not allowed to reject a critical suspend. - */ - (void) suspend(0); - break; - } - } -} - -static void apm_event_handler(void) -{ - static int pending_count = 4; - int err; - - if ((standbys_pending > 0) || (suspends_pending > 0)) { - if ((apm_info.connection_version > 0x100) && - (pending_count-- <= 0)) { - pending_count = 4; - if (debug) - printk(KERN_DEBUG "apm: setting state busy\n"); - err = set_system_power_state(APM_STATE_BUSY); - if (err) - apm_error("busy", err); - } - } else - pending_count = 4; - check_events(); -} - -/* - * This is the APM thread main loop. - */ - -static void apm_mainloop(void) -{ - DECLARE_WAITQUEUE(wait, current); - - add_wait_queue(&apm_waitqueue, &wait); - set_current_state(TASK_INTERRUPTIBLE); - for (;;) { - schedule_timeout(APM_CHECK_TIMEOUT); - if (kthread_should_stop()) - break; - /* - * Ok, check all events, check for idle (and mark us sleeping - * so as not to count towards the load average).. - */ - set_current_state(TASK_INTERRUPTIBLE); - apm_event_handler(); - } - remove_wait_queue(&apm_waitqueue, &wait); -} - -static int check_apm_user(struct apm_user *as, const char *func) -{ - if ((as == NULL) || (as->magic != APM_BIOS_MAGIC)) { - printk(KERN_ERR "apm: %s passed bad filp\n", func); - return 1; - } - return 0; -} - -static ssize_t do_read(struct file *fp, char __user *buf, size_t count, loff_t *ppos) -{ - struct apm_user * as; - int i; - apm_event_t event; - - as = fp->private_data; - if (check_apm_user(as, "read")) - return -EIO; - if ((int)count < sizeof(apm_event_t)) - return -EINVAL; - if ((queue_empty(as)) && (fp->f_flags & O_NONBLOCK)) - return -EAGAIN; - wait_event_interruptible(apm_waitqueue, !queue_empty(as)); - i = count; - while ((i >= sizeof(event)) && !queue_empty(as)) { - event = get_queued_event(as); - if (copy_to_user(buf, &event, sizeof(event))) { - if (i < count) - break; - return -EFAULT; - } - switch (event) { - case APM_SYS_SUSPEND: - case APM_USER_SUSPEND: - as->suspends_read++; - break; - - case APM_SYS_STANDBY: - case APM_USER_STANDBY: - as->standbys_read++; - break; - } - buf += sizeof(event); - i -= sizeof(event); - } - if (i < count) - return count - i; - if (signal_pending(current)) - return -ERESTARTSYS; - return 0; -} - -static unsigned int do_poll(struct file *fp, poll_table * wait) -{ - struct apm_user * as; - - as = fp->private_data; - if (check_apm_user(as, "poll")) - return 0; - poll_wait(fp, &apm_waitqueue, wait); - if (!queue_empty(as)) - return POLLIN | POLLRDNORM; - return 0; -} - -static int do_ioctl(struct inode * inode, struct file *filp, - u_int cmd, u_long arg) -{ - struct apm_user * as; - - as = filp->private_data; - if (check_apm_user(as, "ioctl")) - return -EIO; - if ((!as->suser) || (!as->writer)) - return -EPERM; - switch (cmd) { - case APM_IOC_STANDBY: - if (as->standbys_read > 0) { - as->standbys_read--; - as->standbys_pending--; - standbys_pending--; - } else - queue_event(APM_USER_STANDBY, as); - if (standbys_pending <= 0) - standby(); - break; - case APM_IOC_SUSPEND: - if (as->suspends_read > 0) { - as->suspends_read--; - as->suspends_pending--; - suspends_pending--; - } else - queue_event(APM_USER_SUSPEND, as); - if (suspends_pending <= 0) { - return suspend(1); - } else { - as->suspend_wait = 1; - wait_event_interruptible(apm_suspend_waitqueue, - as->suspend_wait == 0); - return as->suspend_result; - } - break; - default: - return -EINVAL; - } - return 0; -} - -static int do_release(struct inode * inode, struct file * filp) -{ - struct apm_user * as; - - as = filp->private_data; - if (check_apm_user(as, "release")) - return 0; - filp->private_data = NULL; - if (as->standbys_pending > 0) { - standbys_pending -= as->standbys_pending; - if (standbys_pending <= 0) - standby(); - } - if (as->suspends_pending > 0) { - suspends_pending -= as->suspends_pending; - if (suspends_pending <= 0) - (void) suspend(1); - } - spin_lock(&user_list_lock); - if (user_list == as) - user_list = as->next; - else { - struct apm_user * as1; - - for (as1 = user_list; - (as1 != NULL) && (as1->next != as); - as1 = as1->next) - ; - if (as1 == NULL) - printk(KERN_ERR "apm: filp not in user list\n"); - else - as1->next = as->next; - } - spin_unlock(&user_list_lock); - kfree(as); - return 0; -} - -static int do_open(struct inode * inode, struct file * filp) -{ - struct apm_user * as; - - as = kmalloc(sizeof(*as), GFP_KERNEL); - if (as == NULL) { - printk(KERN_ERR "apm: cannot allocate struct of size %d bytes\n", - sizeof(*as)); - return -ENOMEM; - } - as->magic = APM_BIOS_MAGIC; - as->event_tail = as->event_head = 0; - as->suspends_pending = as->standbys_pending = 0; - as->suspends_read = as->standbys_read = 0; - /* - * XXX - this is a tiny bit broken, when we consider BSD - * process accounting. If the device is opened by root, we - * instantly flag that we used superuser privs. Who knows, - * we might close the device immediately without doing a - * privileged operation -- cevans - */ - as->suser = capable(CAP_SYS_ADMIN); - as->writer = (filp->f_mode & FMODE_WRITE) == FMODE_WRITE; - as->reader = (filp->f_mode & FMODE_READ) == FMODE_READ; - spin_lock(&user_list_lock); - as->next = user_list; - user_list = as; - spin_unlock(&user_list_lock); - filp->private_data = as; - return 0; -} - -static int proc_apm_show(struct seq_file *m, void *v) -{ - unsigned short bx; - unsigned short cx; - unsigned short dx; - int error; - unsigned short ac_line_status = 0xff; - unsigned short battery_status = 0xff; - unsigned short battery_flag = 0xff; - int percentage = -1; - int time_units = -1; - char *units = "?"; - - if ((num_online_cpus() == 1) && - !(error = apm_get_power_status(&bx, &cx, &dx))) { - ac_line_status = (bx >> 8) & 0xff; - battery_status = bx & 0xff; - if ((cx & 0xff) != 0xff) - percentage = cx & 0xff; - - if (apm_info.connection_version > 0x100) { - battery_flag = (cx >> 8) & 0xff; - if (dx != 0xffff) { - units = (dx & 0x8000) ? "min" : "sec"; - time_units = dx & 0x7fff; - } - } - } - /* Arguments, with symbols from linux/apm_bios.h. Information is - from the Get Power Status (0x0a) call unless otherwise noted. - - 0) Linux driver version (this will change if format changes) - 1) APM BIOS Version. Usually 1.0, 1.1 or 1.2. - 2) APM flags from APM Installation Check (0x00): - bit 0: APM_16_BIT_SUPPORT - bit 1: APM_32_BIT_SUPPORT - bit 2: APM_IDLE_SLOWS_CLOCK - bit 3: APM_BIOS_DISABLED - bit 4: APM_BIOS_DISENGAGED - 3) AC line status - 0x00: Off-line - 0x01: On-line - 0x02: On backup power (BIOS >= 1.1 only) - 0xff: Unknown - 4) Battery status - 0x00: High - 0x01: Low - 0x02: Critical - 0x03: Charging - 0x04: Selected battery not present (BIOS >= 1.2 only) - 0xff: Unknown - 5) Battery flag - bit 0: High - bit 1: Low - bit 2: Critical - bit 3: Charging - bit 7: No system battery - 0xff: Unknown - 6) Remaining battery life (percentage of charge): - 0-100: valid - -1: Unknown - 7) Remaining battery life (time units): - Number of remaining minutes or seconds - -1: Unknown - 8) min = minutes; sec = seconds */ - - seq_printf(m, "%s %d.%d 0x%02x 0x%02x 0x%02x 0x%02x %d%% %d %s\n", - driver_version, - (apm_info.bios.version >> 8) & 0xff, - apm_info.bios.version & 0xff, - apm_info.bios.flags, - ac_line_status, - battery_status, - battery_flag, - percentage, - time_units, - units); - return 0; -} - -static int proc_apm_open(struct inode *inode, struct file *file) -{ - return single_open(file, proc_apm_show, NULL); -} - -static const struct file_operations apm_file_ops = { - .owner = THIS_MODULE, - .open = proc_apm_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int apm(void *unused) -{ - unsigned short bx; - unsigned short cx; - unsigned short dx; - int error; - char * power_stat; - char * bat_stat; - -#ifdef CONFIG_SMP - /* 2002/08/01 - WT - * This is to avoid random crashes at boot time during initialization - * on SMP systems in case of "apm=power-off" mode. Seen on ASUS A7M266D. - * Some bioses don't like being called from CPU != 0. - * Method suggested by Ingo Molnar. - */ - set_cpus_allowed(current, cpumask_of_cpu(0)); - BUG_ON(smp_processor_id() != 0); -#endif - - if (apm_info.connection_version == 0) { - apm_info.connection_version = apm_info.bios.version; - if (apm_info.connection_version > 0x100) { - /* - * We only support BIOSs up to version 1.2 - */ - if (apm_info.connection_version > 0x0102) - apm_info.connection_version = 0x0102; - error = apm_driver_version(&apm_info.connection_version); - if (error != APM_SUCCESS) { - apm_error("driver version", error); - /* Fall back to an APM 1.0 connection. */ - apm_info.connection_version = 0x100; - } - } - } - - if (debug) - printk(KERN_INFO "apm: Connection version %d.%d\n", - (apm_info.connection_version >> 8) & 0xff, - apm_info.connection_version & 0xff); - -#ifdef CONFIG_APM_DO_ENABLE - if (apm_info.bios.flags & APM_BIOS_DISABLED) { - /* - * This call causes my NEC UltraLite Versa 33/C to hang if it - * is booted with PM disabled but not in the docking station. - * Unfortunate ... - */ - error = apm_enable_power_management(1); - if (error) { - apm_error("enable power management", error); - return -1; - } - } -#endif - - if ((apm_info.bios.flags & APM_BIOS_DISENGAGED) - && (apm_info.connection_version > 0x0100)) { - error = apm_engage_power_management(APM_DEVICE_ALL, 1); - if (error) { - apm_error("engage power management", error); - return -1; - } - } - - if (debug && (num_online_cpus() == 1 || smp )) { - error = apm_get_power_status(&bx, &cx, &dx); - if (error) - printk(KERN_INFO "apm: power status not available\n"); - else { - switch ((bx >> 8) & 0xff) { - case 0: power_stat = "off line"; break; - case 1: power_stat = "on line"; break; - case 2: power_stat = "on backup power"; break; - default: power_stat = "unknown"; break; - } - switch (bx & 0xff) { - case 0: bat_stat = "high"; break; - case 1: bat_stat = "low"; break; - case 2: bat_stat = "critical"; break; - case 3: bat_stat = "charging"; break; - default: bat_stat = "unknown"; break; - } - printk(KERN_INFO - "apm: AC %s, battery status %s, battery life ", - power_stat, bat_stat); - if ((cx & 0xff) == 0xff) - printk("unknown\n"); - else - printk("%d%%\n", cx & 0xff); - if (apm_info.connection_version > 0x100) { - printk(KERN_INFO - "apm: battery flag 0x%02x, battery life ", - (cx >> 8) & 0xff); - if (dx == 0xffff) - printk("unknown\n"); - else - printk("%d %s\n", dx & 0x7fff, - (dx & 0x8000) ? - "minutes" : "seconds"); - } - } - } - - /* Install our power off handler.. */ - if (power_off) - pm_power_off = apm_power_off; - - if (num_online_cpus() == 1 || smp) { -#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) - console_blank_hook = apm_console_blank; -#endif - apm_mainloop(); -#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) - console_blank_hook = NULL; -#endif - } - - return 0; -} - -#ifndef MODULE -static int __init apm_setup(char *str) -{ - int invert; - - while ((str != NULL) && (*str != '\0')) { - if (strncmp(str, "off", 3) == 0) - apm_disabled = 1; - if (strncmp(str, "on", 2) == 0) - apm_disabled = 0; - if ((strncmp(str, "bounce-interval=", 16) == 0) || - (strncmp(str, "bounce_interval=", 16) == 0)) - bounce_interval = simple_strtol(str + 16, NULL, 0); - if ((strncmp(str, "idle-threshold=", 15) == 0) || - (strncmp(str, "idle_threshold=", 15) == 0)) - idle_threshold = simple_strtol(str + 15, NULL, 0); - if ((strncmp(str, "idle-period=", 12) == 0) || - (strncmp(str, "idle_period=", 12) == 0)) - idle_period = simple_strtol(str + 12, NULL, 0); - invert = (strncmp(str, "no-", 3) == 0) || - (strncmp(str, "no_", 3) == 0); - if (invert) - str += 3; - if (strncmp(str, "debug", 5) == 0) - debug = !invert; - if ((strncmp(str, "power-off", 9) == 0) || - (strncmp(str, "power_off", 9) == 0)) - power_off = !invert; - if (strncmp(str, "smp", 3) == 0) - { - smp = !invert; - idle_threshold = 100; - } - if ((strncmp(str, "allow-ints", 10) == 0) || - (strncmp(str, "allow_ints", 10) == 0)) - apm_info.allow_ints = !invert; - if ((strncmp(str, "broken-psr", 10) == 0) || - (strncmp(str, "broken_psr", 10) == 0)) - apm_info.get_power_status_broken = !invert; - if ((strncmp(str, "realmode-power-off", 18) == 0) || - (strncmp(str, "realmode_power_off", 18) == 0)) - apm_info.realmode_power_off = !invert; - str = strchr(str, ','); - if (str != NULL) - str += strspn(str, ", \t"); - } - return 1; -} - -__setup("apm=", apm_setup); -#endif - -static const struct file_operations apm_bios_fops = { - .owner = THIS_MODULE, - .read = do_read, - .poll = do_poll, - .ioctl = do_ioctl, - .open = do_open, - .release = do_release, -}; - -static struct miscdevice apm_device = { - APM_MINOR_DEV, - "apm_bios", - &apm_bios_fops -}; - - -/* Simple "print if true" callback */ -static int __init print_if_true(struct dmi_system_id *d) -{ - printk("%s\n", d->ident); - return 0; -} - -/* - * Some Bioses enable the PS/2 mouse (touchpad) at resume, even if it was - * disabled before the suspend. Linux used to get terribly confused by that. - */ -static int __init broken_ps2_resume(struct dmi_system_id *d) -{ - printk(KERN_INFO "%s machine detected. Mousepad Resume Bug workaround hopefully not needed.\n", d->ident); - return 0; -} - -/* Some bioses have a broken protected mode poweroff and need to use realmode */ -static int __init set_realmode_power_off(struct dmi_system_id *d) -{ - if (apm_info.realmode_power_off == 0) { - apm_info.realmode_power_off = 1; - printk(KERN_INFO "%s bios detected. Using realmode poweroff only.\n", d->ident); - } - return 0; -} - -/* Some laptops require interrupts to be enabled during APM calls */ -static int __init set_apm_ints(struct dmi_system_id *d) -{ - if (apm_info.allow_ints == 0) { - apm_info.allow_ints = 1; - printk(KERN_INFO "%s machine detected. Enabling interrupts during APM calls.\n", d->ident); - } - return 0; -} - -/* Some APM bioses corrupt memory or just plain do not work */ -static int __init apm_is_horked(struct dmi_system_id *d) -{ - if (apm_info.disabled == 0) { - apm_info.disabled = 1; - printk(KERN_INFO "%s machine detected. Disabling APM.\n", d->ident); - } - return 0; -} - -static int __init apm_is_horked_d850md(struct dmi_system_id *d) -{ - if (apm_info.disabled == 0) { - apm_info.disabled = 1; - printk(KERN_INFO "%s machine detected. Disabling APM.\n", d->ident); - printk(KERN_INFO "This bug is fixed in bios P15 which is available for \n"); - printk(KERN_INFO "download from support.intel.com \n"); - } - return 0; -} - -/* Some APM bioses hang on APM idle calls */ -static int __init apm_likes_to_melt(struct dmi_system_id *d) -{ - if (apm_info.forbid_idle == 0) { - apm_info.forbid_idle = 1; - printk(KERN_INFO "%s machine detected. Disabling APM idle calls.\n", d->ident); - } - return 0; -} - -/* - * Check for clue free BIOS implementations who use - * the following QA technique - * - * [ Write BIOS Code ]<------ - * | ^ - * < Does it Compile >----N-- - * |Y ^ - * < Does it Boot Win98 >-N-- - * |Y - * [Ship It] - * - * Phoenix A04 08/24/2000 is known bad (Dell Inspiron 5000e) - * Phoenix A07 09/29/2000 is known good (Dell Inspiron 5000) - */ -static int __init broken_apm_power(struct dmi_system_id *d) -{ - apm_info.get_power_status_broken = 1; - printk(KERN_WARNING "BIOS strings suggest APM bugs, disabling power status reporting.\n"); - return 0; -} - -/* - * This bios swaps the APM minute reporting bytes over (Many sony laptops - * have this problem). - */ -static int __init swab_apm_power_in_minutes(struct dmi_system_id *d) -{ - apm_info.get_power_status_swabinminutes = 1; - printk(KERN_WARNING "BIOS strings suggest APM reports battery life in minutes and wrong byte order.\n"); - return 0; -} - -static struct dmi_system_id __initdata apm_dmi_table[] = { - { - print_if_true, - KERN_WARNING "IBM T23 - BIOS 1.03b+ and controller firmware 1.02+ may be needed for Linux APM.", - { DMI_MATCH(DMI_SYS_VENDOR, "IBM"), - DMI_MATCH(DMI_BIOS_VERSION, "1AET38WW (1.01b)"), }, - }, - { /* Handle problems with APM on the C600 */ - broken_ps2_resume, "Dell Latitude C600", - { DMI_MATCH(DMI_SYS_VENDOR, "Dell"), - DMI_MATCH(DMI_PRODUCT_NAME, "Latitude C600"), }, - }, - { /* Allow interrupts during suspend on Dell Latitude laptops*/ - set_apm_ints, "Dell Latitude", - { DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), - DMI_MATCH(DMI_PRODUCT_NAME, "Latitude C510"), } - }, - { /* APM crashes */ - apm_is_horked, "Dell Inspiron 2500", - { DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), - DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 2500"), - DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION,"A11"), }, - }, - { /* Allow interrupts during suspend on Dell Inspiron laptops*/ - set_apm_ints, "Dell Inspiron", { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), - DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 4000"), }, - }, - { /* Handle problems with APM on Inspiron 5000e */ - broken_apm_power, "Dell Inspiron 5000e", - { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "A04"), - DMI_MATCH(DMI_BIOS_DATE, "08/24/2000"), }, - }, - { /* Handle problems with APM on Inspiron 2500 */ - broken_apm_power, "Dell Inspiron 2500", - { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "A12"), - DMI_MATCH(DMI_BIOS_DATE, "02/04/2002"), }, - }, - { /* APM crashes */ - apm_is_horked, "Dell Dimension 4100", - { DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), - DMI_MATCH(DMI_PRODUCT_NAME, "XPS-Z"), - DMI_MATCH(DMI_BIOS_VENDOR,"Intel Corp."), - DMI_MATCH(DMI_BIOS_VERSION,"A11"), }, - }, - { /* Allow interrupts during suspend on Compaq Laptops*/ - set_apm_ints, "Compaq 12XL125", - { DMI_MATCH(DMI_SYS_VENDOR, "Compaq"), - DMI_MATCH(DMI_PRODUCT_NAME, "Compaq PC"), - DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION,"4.06"), }, - }, - { /* Allow interrupts during APM or the clock goes slow */ - set_apm_ints, "ASUSTeK", - { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "L8400K series Notebook PC"), }, - }, - { /* APM blows on shutdown */ - apm_is_horked, "ABIT KX7-333[R]", - { DMI_MATCH(DMI_BOARD_VENDOR, "ABIT"), - DMI_MATCH(DMI_BOARD_NAME, "VT8367-8233A (KX7-333[R])"), }, - }, - { /* APM crashes */ - apm_is_horked, "Trigem Delhi3", - { DMI_MATCH(DMI_SYS_VENDOR, "TriGem Computer, Inc"), - DMI_MATCH(DMI_PRODUCT_NAME, "Delhi3"), }, - }, - { /* APM crashes */ - apm_is_horked, "Fujitsu-Siemens", - { DMI_MATCH(DMI_BIOS_VENDOR, "hoenix/FUJITSU SIEMENS"), - DMI_MATCH(DMI_BIOS_VERSION, "Version1.01"), }, - }, - { /* APM crashes */ - apm_is_horked_d850md, "Intel D850MD", - { DMI_MATCH(DMI_BIOS_VENDOR, "Intel Corp."), - DMI_MATCH(DMI_BIOS_VERSION, "MV85010A.86A.0016.P07.0201251536"), }, - }, - { /* APM crashes */ - apm_is_horked, "Intel D810EMO", - { DMI_MATCH(DMI_BIOS_VENDOR, "Intel Corp."), - DMI_MATCH(DMI_BIOS_VERSION, "MO81010A.86A.0008.P04.0004170800"), }, - }, - { /* APM crashes */ - apm_is_horked, "Dell XPS-Z", - { DMI_MATCH(DMI_BIOS_VENDOR, "Intel Corp."), - DMI_MATCH(DMI_BIOS_VERSION, "A11"), - DMI_MATCH(DMI_PRODUCT_NAME, "XPS-Z"), }, - }, - { /* APM crashes */ - apm_is_horked, "Sharp PC-PJ/AX", - { DMI_MATCH(DMI_SYS_VENDOR, "SHARP"), - DMI_MATCH(DMI_PRODUCT_NAME, "PC-PJ/AX"), - DMI_MATCH(DMI_BIOS_VENDOR,"SystemSoft"), - DMI_MATCH(DMI_BIOS_VERSION,"Version R2.08"), }, - }, - { /* APM crashes */ - apm_is_horked, "Dell Inspiron 2500", - { DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), - DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 2500"), - DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION,"A11"), }, - }, - { /* APM idle hangs */ - apm_likes_to_melt, "Jabil AMD", - { DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."), - DMI_MATCH(DMI_BIOS_VERSION, "0AASNP06"), }, - }, - { /* APM idle hangs */ - apm_likes_to_melt, "AMI Bios", - { DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."), - DMI_MATCH(DMI_BIOS_VERSION, "0AASNP05"), }, - }, - { /* Handle problems with APM on Sony Vaio PCG-N505X(DE) */ - swab_apm_power_in_minutes, "Sony VAIO", - { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "R0206H"), - DMI_MATCH(DMI_BIOS_DATE, "08/23/99"), }, - }, - { /* Handle problems with APM on Sony Vaio PCG-N505VX */ - swab_apm_power_in_minutes, "Sony VAIO", - { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "W2K06H0"), - DMI_MATCH(DMI_BIOS_DATE, "02/03/00"), }, - }, - { /* Handle problems with APM on Sony Vaio PCG-XG29 */ - swab_apm_power_in_minutes, "Sony VAIO", - { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "R0117A0"), - DMI_MATCH(DMI_BIOS_DATE, "04/25/00"), }, - }, - { /* Handle problems with APM on Sony Vaio PCG-Z600NE */ - swab_apm_power_in_minutes, "Sony VAIO", - { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "R0121Z1"), - DMI_MATCH(DMI_BIOS_DATE, "05/11/00"), }, - }, - { /* Handle problems with APM on Sony Vaio PCG-Z600NE */ - swab_apm_power_in_minutes, "Sony VAIO", - { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "WME01Z1"), - DMI_MATCH(DMI_BIOS_DATE, "08/11/00"), }, - }, - { /* Handle problems with APM on Sony Vaio PCG-Z600LEK(DE) */ - swab_apm_power_in_minutes, "Sony VAIO", - { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "R0206Z3"), - DMI_MATCH(DMI_BIOS_DATE, "12/25/00"), }, - }, - { /* Handle problems with APM on Sony Vaio PCG-Z505LS */ - swab_apm_power_in_minutes, "Sony VAIO", - { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "R0203D0"), - DMI_MATCH(DMI_BIOS_DATE, "05/12/00"), }, - }, - { /* Handle problems with APM on Sony Vaio PCG-Z505LS */ - swab_apm_power_in_minutes, "Sony VAIO", - { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "R0203Z3"), - DMI_MATCH(DMI_BIOS_DATE, "08/25/00"), }, - }, - { /* Handle problems with APM on Sony Vaio PCG-Z505LS (with updated BIOS) */ - swab_apm_power_in_minutes, "Sony VAIO", - { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "R0209Z3"), - DMI_MATCH(DMI_BIOS_DATE, "05/12/01"), }, - }, - { /* Handle problems with APM on Sony Vaio PCG-F104K */ - swab_apm_power_in_minutes, "Sony VAIO", - { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "R0204K2"), - DMI_MATCH(DMI_BIOS_DATE, "08/28/00"), }, - }, - - { /* Handle problems with APM on Sony Vaio PCG-C1VN/C1VE */ - swab_apm_power_in_minutes, "Sony VAIO", - { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "R0208P1"), - DMI_MATCH(DMI_BIOS_DATE, "11/09/00"), }, - }, - { /* Handle problems with APM on Sony Vaio PCG-C1VE */ - swab_apm_power_in_minutes, "Sony VAIO", - { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "R0204P1"), - DMI_MATCH(DMI_BIOS_DATE, "09/12/00"), }, - }, - { /* Handle problems with APM on Sony Vaio PCG-C1VE */ - swab_apm_power_in_minutes, "Sony VAIO", - { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION, "WXPO1Z3"), - DMI_MATCH(DMI_BIOS_DATE, "10/26/01"), }, - }, - { /* broken PM poweroff bios */ - set_realmode_power_off, "Award Software v4.60 PGMA", - { DMI_MATCH(DMI_BIOS_VENDOR, "Award Software International, Inc."), - DMI_MATCH(DMI_BIOS_VERSION, "4.60 PGMA"), - DMI_MATCH(DMI_BIOS_DATE, "134526184"), }, - }, - - /* Generic per vendor APM settings */ - - { /* Allow interrupts during suspend on IBM laptops */ - set_apm_ints, "IBM", - { DMI_MATCH(DMI_SYS_VENDOR, "IBM"), }, - }, - - { } -}; - -/* - * Just start the APM thread. We do NOT want to do APM BIOS - * calls from anything but the APM thread, if for no other reason - * than the fact that we don't trust the APM BIOS. This way, - * most common APM BIOS problems that lead to protection errors - * etc will have at least some level of being contained... - * - * In short, if something bad happens, at least we have a choice - * of just killing the apm thread.. - */ -static int __init apm_init(void) -{ - struct proc_dir_entry *apm_proc; - struct desc_struct *gdt; - int err; - - dmi_check_system(apm_dmi_table); - - if (apm_info.bios.version == 0 || paravirt_enabled()) { - printk(KERN_INFO "apm: BIOS not found.\n"); - return -ENODEV; - } - printk(KERN_INFO - "apm: BIOS version %d.%d Flags 0x%02x (Driver version %s)\n", - ((apm_info.bios.version >> 8) & 0xff), - (apm_info.bios.version & 0xff), - apm_info.bios.flags, - driver_version); - if ((apm_info.bios.flags & APM_32_BIT_SUPPORT) == 0) { - printk(KERN_INFO "apm: no 32 bit BIOS support\n"); - return -ENODEV; - } - - if (allow_ints) - apm_info.allow_ints = 1; - if (broken_psr) - apm_info.get_power_status_broken = 1; - if (realmode_power_off) - apm_info.realmode_power_off = 1; - /* User can override, but default is to trust DMI */ - if (apm_disabled != -1) - apm_info.disabled = apm_disabled; - - /* - * Fix for the Compaq Contura 3/25c which reports BIOS version 0.1 - * but is reportedly a 1.0 BIOS. - */ - if (apm_info.bios.version == 0x001) - apm_info.bios.version = 0x100; - - /* BIOS < 1.2 doesn't set cseg_16_len */ - if (apm_info.bios.version < 0x102) - apm_info.bios.cseg_16_len = 0; /* 64k */ - - if (debug) { - printk(KERN_INFO "apm: entry %x:%x cseg16 %x dseg %x", - apm_info.bios.cseg, apm_info.bios.offset, - apm_info.bios.cseg_16, apm_info.bios.dseg); - if (apm_info.bios.version > 0x100) - printk(" cseg len %x, dseg len %x", - apm_info.bios.cseg_len, - apm_info.bios.dseg_len); - if (apm_info.bios.version > 0x101) - printk(" cseg16 len %x", apm_info.bios.cseg_16_len); - printk("\n"); - } - - if (apm_info.disabled) { - printk(KERN_NOTICE "apm: disabled on user request.\n"); - return -ENODEV; - } - if ((num_online_cpus() > 1) && !power_off && !smp) { - printk(KERN_NOTICE "apm: disabled - APM is not SMP safe.\n"); - apm_info.disabled = 1; - return -ENODEV; - } - if (PM_IS_ACTIVE()) { - printk(KERN_NOTICE "apm: overridden by ACPI.\n"); - apm_info.disabled = 1; - return -ENODEV; - } -#ifdef CONFIG_PM_LEGACY - pm_active = 1; -#endif - - /* - * Set up a segment that references the real mode segment 0x40 - * that extends up to the end of page zero (that we have reserved). - * This is for buggy BIOS's that refer to (real mode) segment 0x40 - * even though they are called in protected mode. - */ - set_base(bad_bios_desc, __va((unsigned long)0x40 << 4)); - _set_limit((char *)&bad_bios_desc, 4095 - (0x40 << 4)); - - /* - * Set up the long jump entry point to the APM BIOS, which is called - * from inline assembly. - */ - apm_bios_entry.offset = apm_info.bios.offset; - apm_bios_entry.segment = APM_CS; - - /* - * The APM 1.1 BIOS is supposed to provide limit information that it - * recognizes. Many machines do this correctly, but many others do - * not restrict themselves to their claimed limit. When this happens, - * they will cause a segmentation violation in the kernel at boot time. - * Most BIOS's, however, will respect a 64k limit, so we use that. - * - * Note we only set APM segments on CPU zero, since we pin the APM - * code to that CPU. - */ - gdt = get_cpu_gdt_table(0); - set_base(gdt[APM_CS >> 3], - __va((unsigned long)apm_info.bios.cseg << 4)); - set_base(gdt[APM_CS_16 >> 3], - __va((unsigned long)apm_info.bios.cseg_16 << 4)); - set_base(gdt[APM_DS >> 3], - __va((unsigned long)apm_info.bios.dseg << 4)); - - apm_proc = create_proc_entry("apm", 0, NULL); - if (apm_proc) - apm_proc->proc_fops = &apm_file_ops; - - kapmd_task = kthread_create(apm, NULL, "kapmd"); - if (IS_ERR(kapmd_task)) { - printk(KERN_ERR "apm: disabled - Unable to start kernel " - "thread.\n"); - err = PTR_ERR(kapmd_task); - kapmd_task = NULL; - remove_proc_entry("apm", NULL); - return err; - } - wake_up_process(kapmd_task); - - if (num_online_cpus() > 1 && !smp ) { - printk(KERN_NOTICE - "apm: disabled - APM is not SMP safe (power off active).\n"); - return 0; - } - - /* - * Note we don't actually care if the misc_device cannot be registered. - * this driver can do its job without it, even if userspace can't - * control it. just log the error - */ - if (misc_register(&apm_device)) - printk(KERN_WARNING "apm: Could not register misc device.\n"); - - if (HZ != 100) - idle_period = (idle_period * HZ) / 100; - if (idle_threshold < 100) { - original_pm_idle = pm_idle; - pm_idle = apm_cpu_idle; - set_pm_idle = 1; - } - - return 0; -} - -static void __exit apm_exit(void) -{ - int error; - - if (set_pm_idle) { - pm_idle = original_pm_idle; - /* - * We are about to unload the current idle thread pm callback - * (pm_idle), Wait for all processors to update cached/local - * copies of pm_idle before proceeding. - */ - cpu_idle_wait(); - } - if (((apm_info.bios.flags & APM_BIOS_DISENGAGED) == 0) - && (apm_info.connection_version > 0x0100)) { - error = apm_engage_power_management(APM_DEVICE_ALL, 0); - if (error) - apm_error("disengage power management", error); - } - misc_deregister(&apm_device); - remove_proc_entry("apm", NULL); - if (power_off) - pm_power_off = NULL; - if (kapmd_task) { - kthread_stop(kapmd_task); - kapmd_task = NULL; - } -#ifdef CONFIG_PM_LEGACY - pm_active = 0; -#endif -} - -module_init(apm_init); -module_exit(apm_exit); - -MODULE_AUTHOR("Stephen Rothwell"); -MODULE_DESCRIPTION("Advanced Power Management"); -MODULE_LICENSE("GPL"); -module_param(debug, bool, 0644); -MODULE_PARM_DESC(debug, "Enable debug mode"); -module_param(power_off, bool, 0444); -MODULE_PARM_DESC(power_off, "Enable power off"); -module_param(bounce_interval, int, 0444); -MODULE_PARM_DESC(bounce_interval, - "Set the number of ticks to ignore suspend bounces"); -module_param(allow_ints, bool, 0444); -MODULE_PARM_DESC(allow_ints, "Allow interrupts during BIOS calls"); -module_param(broken_psr, bool, 0444); -MODULE_PARM_DESC(broken_psr, "BIOS has a broken GetPowerStatus call"); -module_param(realmode_power_off, bool, 0444); -MODULE_PARM_DESC(realmode_power_off, - "Switch to real mode before powering off"); -module_param(idle_threshold, int, 0444); -MODULE_PARM_DESC(idle_threshold, - "System idle percentage above which to make APM BIOS idle calls"); -module_param(idle_period, int, 0444); -MODULE_PARM_DESC(idle_period, - "Period (in sec/100) over which to caculate the idle percentage"); -module_param(smp, bool, 0444); -MODULE_PARM_DESC(smp, - "Set this to enable APM use on an SMP platform. Use with caution on older systems"); -MODULE_ALIAS_MISCDEV(APM_MINOR_DEV); diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c deleted file mode 100644 index cfa82c899f4..00000000000 --- a/arch/i386/kernel/asm-offsets.c +++ /dev/null @@ -1,5 +0,0 @@ -#ifdef CONFIG_X86_32 -# include "asm-offsets_32.c" -#else -# include "asm-offsets_64.c" -#endif diff --git a/arch/i386/kernel/asm-offsets_32.c b/arch/i386/kernel/asm-offsets_32.c deleted file mode 100644 index 8029742c0fc..00000000000 --- a/arch/i386/kernel/asm-offsets_32.c +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Generate definitions needed by assembly language modules. - * This code generates raw asm output which is post-processed - * to extract and format the required data. - */ - -#include -#include -#include -#include -#include -#include -#include "sigframe_32.h" -#include -#include -#include -#include -#include - -#include - -#ifdef CONFIG_LGUEST_GUEST -#include -#include "../../../drivers/lguest/lg.h" -#endif - -#define DEFINE(sym, val) \ - asm volatile("\n->" #sym " %0 " #val : : "i" (val)) - -#define BLANK() asm volatile("\n->" : : ) - -#define OFFSET(sym, str, mem) \ - DEFINE(sym, offsetof(struct str, mem)); - -/* workaround for a warning with -Wmissing-prototypes */ -void foo(void); - -void foo(void) -{ - OFFSET(SIGCONTEXT_eax, sigcontext, eax); - OFFSET(SIGCONTEXT_ebx, sigcontext, ebx); - OFFSET(SIGCONTEXT_ecx, sigcontext, ecx); - OFFSET(SIGCONTEXT_edx, sigcontext, edx); - OFFSET(SIGCONTEXT_esi, sigcontext, esi); - OFFSET(SIGCONTEXT_edi, sigcontext, edi); - OFFSET(SIGCONTEXT_ebp, sigcontext, ebp); - OFFSET(SIGCONTEXT_esp, sigcontext, esp); - OFFSET(SIGCONTEXT_eip, sigcontext, eip); - BLANK(); - - OFFSET(CPUINFO_x86, cpuinfo_x86, x86); - OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor); - OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model); - OFFSET(CPUINFO_x86_mask, cpuinfo_x86, x86_mask); - OFFSET(CPUINFO_hard_math, cpuinfo_x86, hard_math); - OFFSET(CPUINFO_cpuid_level, cpuinfo_x86, cpuid_level); - OFFSET(CPUINFO_x86_capability, cpuinfo_x86, x86_capability); - OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id); - BLANK(); - - OFFSET(TI_task, thread_info, task); - OFFSET(TI_exec_domain, thread_info, exec_domain); - OFFSET(TI_flags, thread_info, flags); - OFFSET(TI_status, thread_info, status); - OFFSET(TI_preempt_count, thread_info, preempt_count); - OFFSET(TI_addr_limit, thread_info, addr_limit); - OFFSET(TI_restart_block, thread_info, restart_block); - OFFSET(TI_sysenter_return, thread_info, sysenter_return); - OFFSET(TI_cpu, thread_info, cpu); - BLANK(); - - OFFSET(GDS_size, Xgt_desc_struct, size); - OFFSET(GDS_address, Xgt_desc_struct, address); - OFFSET(GDS_pad, Xgt_desc_struct, pad); - BLANK(); - - OFFSET(PT_EBX, pt_regs, ebx); - OFFSET(PT_ECX, pt_regs, ecx); - OFFSET(PT_EDX, pt_regs, edx); - OFFSET(PT_ESI, pt_regs, esi); - OFFSET(PT_EDI, pt_regs, edi); - OFFSET(PT_EBP, pt_regs, ebp); - OFFSET(PT_EAX, pt_regs, eax); - OFFSET(PT_DS, pt_regs, xds); - OFFSET(PT_ES, pt_regs, xes); - OFFSET(PT_FS, pt_regs, xfs); - OFFSET(PT_ORIG_EAX, pt_regs, orig_eax); - OFFSET(PT_EIP, pt_regs, eip); - OFFSET(PT_CS, pt_regs, xcs); - OFFSET(PT_EFLAGS, pt_regs, eflags); - OFFSET(PT_OLDESP, pt_regs, esp); - OFFSET(PT_OLDSS, pt_regs, xss); - BLANK(); - - OFFSET(EXEC_DOMAIN_handler, exec_domain, handler); - OFFSET(RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext); - BLANK(); - - OFFSET(pbe_address, pbe, address); - OFFSET(pbe_orig_address, pbe, orig_address); - OFFSET(pbe_next, pbe, next); - - /* Offset from the sysenter stack to tss.esp0 */ - DEFINE(TSS_sysenter_esp0, offsetof(struct tss_struct, x86_tss.esp0) - - sizeof(struct tss_struct)); - - DEFINE(PAGE_SIZE_asm, PAGE_SIZE); - DEFINE(PAGE_SHIFT_asm, PAGE_SHIFT); - DEFINE(PTRS_PER_PTE, PTRS_PER_PTE); - DEFINE(PTRS_PER_PMD, PTRS_PER_PMD); - DEFINE(PTRS_PER_PGD, PTRS_PER_PGD); - - DEFINE(VDSO_PRELINK_asm, VDSO_PRELINK); - - OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); - -#ifdef CONFIG_PARAVIRT - BLANK(); - OFFSET(PARAVIRT_enabled, paravirt_ops, paravirt_enabled); - OFFSET(PARAVIRT_irq_disable, paravirt_ops, irq_disable); - OFFSET(PARAVIRT_irq_enable, paravirt_ops, irq_enable); - OFFSET(PARAVIRT_irq_enable_sysexit, paravirt_ops, irq_enable_sysexit); - OFFSET(PARAVIRT_iret, paravirt_ops, iret); - OFFSET(PARAVIRT_read_cr0, paravirt_ops, read_cr0); -#endif - -#ifdef CONFIG_XEN - BLANK(); - OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask); - OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending); -#endif - -#ifdef CONFIG_LGUEST_GUEST - BLANK(); - OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled); - OFFSET(LGUEST_PAGES_host_gdt_desc, lguest_pages, state.host_gdt_desc); - OFFSET(LGUEST_PAGES_host_idt_desc, lguest_pages, state.host_idt_desc); - OFFSET(LGUEST_PAGES_host_cr3, lguest_pages, state.host_cr3); - OFFSET(LGUEST_PAGES_host_sp, lguest_pages, state.host_sp); - OFFSET(LGUEST_PAGES_guest_gdt_desc, lguest_pages,state.guest_gdt_desc); - OFFSET(LGUEST_PAGES_guest_idt_desc, lguest_pages,state.guest_idt_desc); - OFFSET(LGUEST_PAGES_guest_gdt, lguest_pages, state.guest_gdt); - OFFSET(LGUEST_PAGES_regs_trapnum, lguest_pages, regs.trapnum); - OFFSET(LGUEST_PAGES_regs_errcode, lguest_pages, regs.errcode); - OFFSET(LGUEST_PAGES_regs, lguest_pages, regs); -#endif -} diff --git a/arch/i386/kernel/bootflag.c b/arch/i386/kernel/bootflag.c deleted file mode 100644 index 0b9860530a6..00000000000 --- a/arch/i386/kernel/bootflag.c +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Implement 'Simple Boot Flag Specification 2.0' - */ - - -#include -#include -#include -#include -#include -#include -#include -#include - -#include - - -#define SBF_RESERVED (0x78) -#define SBF_PNPOS (1<<0) -#define SBF_BOOTING (1<<1) -#define SBF_DIAG (1<<2) -#define SBF_PARITY (1<<7) - - -int sbf_port __initdata = -1; /* set via acpi_boot_init() */ - - -static int __init parity(u8 v) -{ - int x = 0; - int i; - - for(i=0;i<8;i++) - { - x^=(v&1); - v>>=1; - } - return x; -} - -static void __init sbf_write(u8 v) -{ - unsigned long flags; - if(sbf_port != -1) - { - v &= ~SBF_PARITY; - if(!parity(v)) - v|=SBF_PARITY; - - printk(KERN_INFO "Simple Boot Flag at 0x%x set to 0x%x\n", sbf_port, v); - - spin_lock_irqsave(&rtc_lock, flags); - CMOS_WRITE(v, sbf_port); - spin_unlock_irqrestore(&rtc_lock, flags); - } -} - -static u8 __init sbf_read(void) -{ - u8 v; - unsigned long flags; - if(sbf_port == -1) - return 0; - spin_lock_irqsave(&rtc_lock, flags); - v = CMOS_READ(sbf_port); - spin_unlock_irqrestore(&rtc_lock, flags); - return v; -} - -static int __init sbf_value_valid(u8 v) -{ - if(v&SBF_RESERVED) /* Reserved bits */ - return 0; - if(!parity(v)) - return 0; - return 1; -} - -static int __init sbf_init(void) -{ - u8 v; - if(sbf_port == -1) - return 0; - v = sbf_read(); - if(!sbf_value_valid(v)) - printk(KERN_WARNING "Simple Boot Flag value 0x%x read from CMOS RAM was invalid\n",v); - - v &= ~SBF_RESERVED; - v &= ~SBF_BOOTING; - v &= ~SBF_DIAG; -#if defined(CONFIG_ISAPNP) - v |= SBF_PNPOS; -#endif - sbf_write(v); - return 0; -} - -module_init(sbf_init); diff --git a/arch/i386/kernel/cpuid.c b/arch/i386/kernel/cpuid.c deleted file mode 100644 index 5c2faa10e9f..00000000000 --- a/arch/i386/kernel/cpuid.c +++ /dev/null @@ -1,242 +0,0 @@ -/* ----------------------------------------------------------------------- * - * - * Copyright 2000 H. Peter Anvin - All Rights Reserved - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, - * USA; either version 2 of the License, or (at your option) any later - * version; incorporated herein by reference. - * - * ----------------------------------------------------------------------- */ - -/* - * cpuid.c - * - * x86 CPUID access device - * - * This device is accessed by lseek() to the appropriate CPUID level - * and then read in chunks of 16 bytes. A larger size means multiple - * reads of consecutive levels. - * - * This driver uses /dev/cpu/%d/cpuid where %d is the minor number, and on - * an SMP box will direct the access to CPU %d. - */ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -static struct class *cpuid_class; - -#ifdef CONFIG_SMP - -struct cpuid_command { - u32 reg; - u32 *data; -}; - -static void cpuid_smp_cpuid(void *cmd_block) -{ - struct cpuid_command *cmd = (struct cpuid_command *)cmd_block; - - cpuid(cmd->reg, &cmd->data[0], &cmd->data[1], &cmd->data[2], - &cmd->data[3]); -} - -static inline void do_cpuid(int cpu, u32 reg, u32 * data) -{ - struct cpuid_command cmd; - - preempt_disable(); - if (cpu == smp_processor_id()) { - cpuid(reg, &data[0], &data[1], &data[2], &data[3]); - } else { - cmd.reg = reg; - cmd.data = data; - - smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1, 1); - } - preempt_enable(); -} -#else /* ! CONFIG_SMP */ - -static inline void do_cpuid(int cpu, u32 reg, u32 * data) -{ - cpuid(reg, &data[0], &data[1], &data[2], &data[3]); -} - -#endif /* ! CONFIG_SMP */ - -static loff_t cpuid_seek(struct file *file, loff_t offset, int orig) -{ - loff_t ret; - - lock_kernel(); - - switch (orig) { - case 0: - file->f_pos = offset; - ret = file->f_pos; - break; - case 1: - file->f_pos += offset; - ret = file->f_pos; - break; - default: - ret = -EINVAL; - } - - unlock_kernel(); - return ret; -} - -static ssize_t cpuid_read(struct file *file, char __user *buf, - size_t count, loff_t * ppos) -{ - char __user *tmp = buf; - u32 data[4]; - u32 reg = *ppos; - int cpu = iminor(file->f_path.dentry->d_inode); - - if (count % 16) - return -EINVAL; /* Invalid chunk size */ - - for (; count; count -= 16) { - do_cpuid(cpu, reg, data); - if (copy_to_user(tmp, &data, 16)) - return -EFAULT; - tmp += 16; - *ppos = reg++; - } - - return tmp - buf; -} - -static int cpuid_open(struct inode *inode, struct file *file) -{ - unsigned int cpu = iminor(file->f_path.dentry->d_inode); - struct cpuinfo_x86 *c = &(cpu_data)[cpu]; - - if (cpu >= NR_CPUS || !cpu_online(cpu)) - return -ENXIO; /* No such CPU */ - if (c->cpuid_level < 0) - return -EIO; /* CPUID not supported */ - - return 0; -} - -/* - * File operations we support - */ -static const struct file_operations cpuid_fops = { - .owner = THIS_MODULE, - .llseek = cpuid_seek, - .read = cpuid_read, - .open = cpuid_open, -}; - -static int cpuid_device_create(int i) -{ - int err = 0; - struct device *dev; - - dev = device_create(cpuid_class, NULL, MKDEV(CPUID_MAJOR, i), "cpu%d",i); - if (IS_ERR(dev)) - err = PTR_ERR(dev); - return err; -} - -static int cpuid_class_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) -{ - unsigned int cpu = (unsigned long)hcpu; - - switch (action) { - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - cpuid_device_create(cpu); - break; - case CPU_DEAD: - case CPU_DEAD_FROZEN: - device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, cpu)); - break; - } - return NOTIFY_OK; -} - -static struct notifier_block __cpuinitdata cpuid_class_cpu_notifier = -{ - .notifier_call = cpuid_class_cpu_callback, -}; - -static int __init cpuid_init(void) -{ - int i, err = 0; - i = 0; - - if (register_chrdev(CPUID_MAJOR, "cpu/cpuid", &cpuid_fops)) { - printk(KERN_ERR "cpuid: unable to get major %d for cpuid\n", - CPUID_MAJOR); - err = -EBUSY; - goto out; - } - cpuid_class = class_create(THIS_MODULE, "cpuid"); - if (IS_ERR(cpuid_class)) { - err = PTR_ERR(cpuid_class); - goto out_chrdev; - } - for_each_online_cpu(i) { - err = cpuid_device_create(i); - if (err != 0) - goto out_class; - } - register_hotcpu_notifier(&cpuid_class_cpu_notifier); - - err = 0; - goto out; - -out_class: - i = 0; - for_each_online_cpu(i) { - device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, i)); - } - class_destroy(cpuid_class); -out_chrdev: - unregister_chrdev(CPUID_MAJOR, "cpu/cpuid"); -out: - return err; -} - -static void __exit cpuid_exit(void) -{ - int cpu = 0; - - for_each_online_cpu(cpu) - device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, cpu)); - class_destroy(cpuid_class); - unregister_chrdev(CPUID_MAJOR, "cpu/cpuid"); - unregister_hotcpu_notifier(&cpuid_class_cpu_notifier); -} - -module_init(cpuid_init); -module_exit(cpuid_exit); - -MODULE_AUTHOR("H. Peter Anvin "); -MODULE_DESCRIPTION("x86 generic CPUID driver"); -MODULE_LICENSE("GPL"); diff --git a/arch/i386/kernel/crash_32.c b/arch/i386/kernel/crash_32.c deleted file mode 100644 index 53589d1b1a0..00000000000 --- a/arch/i386/kernel/crash_32.c +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Architecture specific (i386) functions for kexec based crash dumps. - * - * Created by: Hariprasad Nellitheertha (hari@in.ibm.com) - * - * Copyright (C) IBM Corporation, 2004. All rights reserved. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include - - -/* This keeps a track of which one is crashing cpu. */ -static int crashing_cpu; - -#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) -static atomic_t waiting_for_crash_ipi; - -static int crash_nmi_callback(struct notifier_block *self, - unsigned long val, void *data) -{ - struct pt_regs *regs; - struct pt_regs fixed_regs; - int cpu; - - if (val != DIE_NMI_IPI) - return NOTIFY_OK; - - regs = ((struct die_args *)data)->regs; - cpu = raw_smp_processor_id(); - - /* Don't do anything if this handler is invoked on crashing cpu. - * Otherwise, system will completely hang. Crashing cpu can get - * an NMI if system was initially booted with nmi_watchdog parameter. - */ - if (cpu == crashing_cpu) - return NOTIFY_STOP; - local_irq_disable(); - - if (!user_mode_vm(regs)) { - crash_fixup_ss_esp(&fixed_regs, regs); - regs = &fixed_regs; - } - crash_save_cpu(regs, cpu); - disable_local_APIC(); - atomic_dec(&waiting_for_crash_ipi); - /* Assume hlt works */ - halt(); - for (;;) - cpu_relax(); - - return 1; -} - -static void smp_send_nmi_allbutself(void) -{ - cpumask_t mask = cpu_online_map; - cpu_clear(safe_smp_processor_id(), mask); - if (!cpus_empty(mask)) - send_IPI_mask(mask, NMI_VECTOR); -} - -static struct notifier_block crash_nmi_nb = { - .notifier_call = crash_nmi_callback, -}; - -static void nmi_shootdown_cpus(void) -{ - unsigned long msecs; - - atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); - /* Would it be better to replace the trap vector here? */ - if (register_die_notifier(&crash_nmi_nb)) - return; /* return what? */ - /* Ensure the new callback function is set before sending - * out the NMI - */ - wmb(); - - smp_send_nmi_allbutself(); - - msecs = 1000; /* Wait at most a second for the other cpus to stop */ - while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) { - mdelay(1); - msecs--; - } - - /* Leave the nmi callback set */ - disable_local_APIC(); -} -#else -static void nmi_shootdown_cpus(void) -{ - /* There are no cpus to shootdown */ -} -#endif - -void machine_crash_shutdown(struct pt_regs *regs) -{ - /* This function is only called after the system - * has panicked or is otherwise in a critical state. - * The minimum amount of code to allow a kexec'd kernel - * to run successfully needs to happen here. - * - * In practice this means shooting down the other cpus in - * an SMP system. - */ - /* The kernel is broken so disable interrupts */ - local_irq_disable(); - - /* Make a note of crashing cpu. Will be used in NMI callback.*/ - crashing_cpu = safe_smp_processor_id(); - nmi_shootdown_cpus(); - lapic_shutdown(); -#if defined(CONFIG_X86_IO_APIC) - disable_IO_APIC(); -#endif - crash_save_cpu(regs, safe_smp_processor_id()); -} diff --git a/arch/i386/kernel/crash_dump_32.c b/arch/i386/kernel/crash_dump_32.c deleted file mode 100644 index 3f532df488b..00000000000 --- a/arch/i386/kernel/crash_dump_32.c +++ /dev/null @@ -1,74 +0,0 @@ -/* - * kernel/crash_dump.c - Memory preserving reboot related code. - * - * Created by: Hariprasad Nellitheertha (hari@in.ibm.com) - * Copyright (C) IBM Corporation, 2004. All rights reserved - */ - -#include -#include -#include - -#include - -static void *kdump_buf_page; - -/** - * copy_oldmem_page - copy one page from "oldmem" - * @pfn: page frame number to be copied - * @buf: target memory address for the copy; this can be in kernel address - * space or user address space (see @userbuf) - * @csize: number of bytes to copy - * @offset: offset in bytes into the page (based on pfn) to begin the copy - * @userbuf: if set, @buf is in user address space, use copy_to_user(), - * otherwise @buf is in kernel address space, use memcpy(). - * - * Copy a page from "oldmem". For this page, there is no pte mapped - * in the current kernel. We stitch up a pte, similar to kmap_atomic. - * - * Calling copy_to_user() in atomic context is not desirable. Hence first - * copying the data to a pre-allocated kernel page and then copying to user - * space in non-atomic context. - */ -ssize_t copy_oldmem_page(unsigned long pfn, char *buf, - size_t csize, unsigned long offset, int userbuf) -{ - void *vaddr; - - if (!csize) - return 0; - - vaddr = kmap_atomic_pfn(pfn, KM_PTE0); - - if (!userbuf) { - memcpy(buf, (vaddr + offset), csize); - kunmap_atomic(vaddr, KM_PTE0); - } else { - if (!kdump_buf_page) { - printk(KERN_WARNING "Kdump: Kdump buffer page not" - " allocated\n"); - return -EFAULT; - } - copy_page(kdump_buf_page, vaddr); - kunmap_atomic(vaddr, KM_PTE0); - if (copy_to_user(buf, (kdump_buf_page + offset), csize)) - return -EFAULT; - } - - return csize; -} - -static int __init kdump_buf_page_init(void) -{ - int ret = 0; - - kdump_buf_page = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!kdump_buf_page) { - printk(KERN_WARNING "Kdump: Failed to allocate kdump buffer" - " page\n"); - ret = -ENOMEM; - } - - return ret; -} -arch_initcall(kdump_buf_page_init); diff --git a/arch/i386/kernel/doublefault_32.c b/arch/i386/kernel/doublefault_32.c deleted file mode 100644 index 40978af630e..00000000000 --- a/arch/i386/kernel/doublefault_32.c +++ /dev/null @@ -1,70 +0,0 @@ -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#define DOUBLEFAULT_STACKSIZE (1024) -static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE]; -#define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE) - -#define ptr_ok(x) ((x) > PAGE_OFFSET && (x) < PAGE_OFFSET + MAXMEM) - -static void doublefault_fn(void) -{ - struct Xgt_desc_struct gdt_desc = {0, 0}; - unsigned long gdt, tss; - - store_gdt(&gdt_desc); - gdt = gdt_desc.address; - - printk(KERN_EMERG "PANIC: double fault, gdt at %08lx [%d bytes]\n", gdt, gdt_desc.size); - - if (ptr_ok(gdt)) { - gdt += GDT_ENTRY_TSS << 3; - tss = *(u16 *)(gdt+2); - tss += *(u8 *)(gdt+4) << 16; - tss += *(u8 *)(gdt+7) << 24; - printk(KERN_EMERG "double fault, tss at %08lx\n", tss); - - if (ptr_ok(tss)) { - struct i386_hw_tss *t = (struct i386_hw_tss *)tss; - - printk(KERN_EMERG "eip = %08lx, esp = %08lx\n", t->eip, t->esp); - - printk(KERN_EMERG "eax = %08lx, ebx = %08lx, ecx = %08lx, edx = %08lx\n", - t->eax, t->ebx, t->ecx, t->edx); - printk(KERN_EMERG "esi = %08lx, edi = %08lx\n", - t->esi, t->edi); - } - } - - for (;;) - cpu_relax(); -} - -struct tss_struct doublefault_tss __cacheline_aligned = { - .x86_tss = { - .esp0 = STACK_START, - .ss0 = __KERNEL_DS, - .ldt = 0, - .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, - - .eip = (unsigned long) doublefault_fn, - /* 0x2 bit is always set */ - .eflags = X86_EFLAGS_SF | 0x2, - .esp = STACK_START, - .es = __USER_DS, - .cs = __KERNEL_CS, - .ss = __KERNEL_DS, - .ds = __USER_DS, - .fs = __KERNEL_PERCPU, - - .__cr3 = __pa(swapper_pg_dir) - } -}; diff --git a/arch/i386/kernel/e820_32.c b/arch/i386/kernel/e820_32.c deleted file mode 100644 index 3c86b979a40..00000000000 --- a/arch/i386/kernel/e820_32.c +++ /dev/null @@ -1,944 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#ifdef CONFIG_EFI -int efi_enabled = 0; -EXPORT_SYMBOL(efi_enabled); -#endif - -struct e820map e820; -struct change_member { - struct e820entry *pbios; /* pointer to original bios entry */ - unsigned long long addr; /* address for this change point */ -}; -static struct change_member change_point_list[2*E820MAX] __initdata; -static struct change_member *change_point[2*E820MAX] __initdata; -static struct e820entry *overlap_list[E820MAX] __initdata; -static struct e820entry new_bios[E820MAX] __initdata; -/* For PCI or other memory-mapped resources */ -unsigned long pci_mem_start = 0x10000000; -#ifdef CONFIG_PCI -EXPORT_SYMBOL(pci_mem_start); -#endif -extern int user_defined_memmap; -struct resource data_resource = { - .name = "Kernel data", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM -}; - -struct resource code_resource = { - .name = "Kernel code", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM -}; - -static struct resource system_rom_resource = { - .name = "System ROM", - .start = 0xf0000, - .end = 0xfffff, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}; - -static struct resource extension_rom_resource = { - .name = "Extension ROM", - .start = 0xe0000, - .end = 0xeffff, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}; - -static struct resource adapter_rom_resources[] = { { - .name = "Adapter ROM", - .start = 0xc8000, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}, { - .name = "Adapter ROM", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}, { - .name = "Adapter ROM", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}, { - .name = "Adapter ROM", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}, { - .name = "Adapter ROM", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}, { - .name = "Adapter ROM", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -} }; - -static struct resource video_rom_resource = { - .name = "Video ROM", - .start = 0xc0000, - .end = 0xc7fff, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}; - -static struct resource video_ram_resource = { - .name = "Video RAM area", - .start = 0xa0000, - .end = 0xbffff, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM -}; - -static struct resource standard_io_resources[] = { { - .name = "dma1", - .start = 0x0000, - .end = 0x001f, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "pic1", - .start = 0x0020, - .end = 0x0021, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "timer0", - .start = 0x0040, - .end = 0x0043, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "timer1", - .start = 0x0050, - .end = 0x0053, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "keyboard", - .start = 0x0060, - .end = 0x006f, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "dma page reg", - .start = 0x0080, - .end = 0x008f, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "pic2", - .start = 0x00a0, - .end = 0x00a1, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "dma2", - .start = 0x00c0, - .end = 0x00df, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "fpu", - .start = 0x00f0, - .end = 0x00ff, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -} }; - -#define ROMSIGNATURE 0xaa55 - -static int __init romsignature(const unsigned char *rom) -{ - const unsigned short * const ptr = (const unsigned short *)rom; - unsigned short sig; - - return probe_kernel_address(ptr, sig) == 0 && sig == ROMSIGNATURE; -} - -static int __init romchecksum(const unsigned char *rom, unsigned long length) -{ - unsigned char sum, c; - - for (sum = 0; length && probe_kernel_address(rom++, c) == 0; length--) - sum += c; - return !length && !sum; -} - -static void __init probe_roms(void) -{ - const unsigned char *rom; - unsigned long start, length, upper; - unsigned char c; - int i; - - /* video rom */ - upper = adapter_rom_resources[0].start; - for (start = video_rom_resource.start; start < upper; start += 2048) { - rom = isa_bus_to_virt(start); - if (!romsignature(rom)) - continue; - - video_rom_resource.start = start; - - if (probe_kernel_address(rom + 2, c) != 0) - continue; - - /* 0 < length <= 0x7f * 512, historically */ - length = c * 512; - - /* if checksum okay, trust length byte */ - if (length && romchecksum(rom, length)) - video_rom_resource.end = start + length - 1; - - request_resource(&iomem_resource, &video_rom_resource); - break; - } - - start = (video_rom_resource.end + 1 + 2047) & ~2047UL; - if (start < upper) - start = upper; - - /* system rom */ - request_resource(&iomem_resource, &system_rom_resource); - upper = system_rom_resource.start; - - /* check for extension rom (ignore length byte!) */ - rom = isa_bus_to_virt(extension_rom_resource.start); - if (romsignature(rom)) { - length = extension_rom_resource.end - extension_rom_resource.start + 1; - if (romchecksum(rom, length)) { - request_resource(&iomem_resource, &extension_rom_resource); - upper = extension_rom_resource.start; - } - } - - /* check for adapter roms on 2k boundaries */ - for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) { - rom = isa_bus_to_virt(start); - if (!romsignature(rom)) - continue; - - if (probe_kernel_address(rom + 2, c) != 0) - continue; - - /* 0 < length <= 0x7f * 512, historically */ - length = c * 512; - - /* but accept any length that fits if checksum okay */ - if (!length || start + length > upper || !romchecksum(rom, length)) - continue; - - adapter_rom_resources[i].start = start; - adapter_rom_resources[i].end = start + length - 1; - request_resource(&iomem_resource, &adapter_rom_resources[i]); - - start = adapter_rom_resources[i++].end & ~2047UL; - } -} - -/* - * Request address space for all standard RAM and ROM resources - * and also for regions reported as reserved by the e820. - */ -static void __init -legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource) -{ - int i; - - probe_roms(); - for (i = 0; i < e820.nr_map; i++) { - struct resource *res; -#ifndef CONFIG_RESOURCES_64BIT - if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL) - continue; -#endif - res = kzalloc(sizeof(struct resource), GFP_ATOMIC); - switch (e820.map[i].type) { - case E820_RAM: res->name = "System RAM"; break; - case E820_ACPI: res->name = "ACPI Tables"; break; - case E820_NVS: res->name = "ACPI Non-volatile Storage"; break; - default: res->name = "reserved"; - } - res->start = e820.map[i].addr; - res->end = res->start + e820.map[i].size - 1; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; - if (request_resource(&iomem_resource, res)) { - kfree(res); - continue; - } - if (e820.map[i].type == E820_RAM) { - /* - * We don't know which RAM region contains kernel data, - * so we try it repeatedly and let the resource manager - * test it. - */ - request_resource(res, code_resource); - request_resource(res, data_resource); -#ifdef CONFIG_KEXEC - request_resource(res, &crashk_res); -#endif - } - } -} - -/* - * Request address space for all standard resources - * - * This is called just before pcibios_init(), which is also a - * subsys_initcall, but is linked in later (in arch/i386/pci/common.c). - */ -static int __init request_standard_resources(void) -{ - int i; - - printk("Setting up standard PCI resources\n"); - if (efi_enabled) - efi_initialize_iomem_resources(&code_resource, &data_resource); - else - legacy_init_iomem_resources(&code_resource, &data_resource); - - /* EFI systems may still have VGA */ - request_resource(&iomem_resource, &video_ram_resource); - - /* request I/O space for devices used on all i[345]86 PCs */ - for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) - request_resource(&ioport_resource, &standard_io_resources[i]); - return 0; -} - -subsys_initcall(request_standard_resources); - -#if defined(CONFIG_PM) && defined(CONFIG_HIBERNATION) -/** - * e820_mark_nosave_regions - Find the ranges of physical addresses that do not - * correspond to e820 RAM areas and mark the corresponding pages as nosave for - * hibernation. - * - * This function requires the e820 map to be sorted and without any - * overlapping entries and assumes the first e820 area to be RAM. - */ -void __init e820_mark_nosave_regions(void) -{ - int i; - unsigned long pfn; - - pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size); - for (i = 1; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - - if (pfn < PFN_UP(ei->addr)) - register_nosave_region(pfn, PFN_UP(ei->addr)); - - pfn = PFN_DOWN(ei->addr + ei->size); - if (ei->type != E820_RAM) - register_nosave_region(PFN_UP(ei->addr), pfn); - - if (pfn >= max_low_pfn) - break; - } -} -#endif - -void __init add_memory_region(unsigned long long start, - unsigned long long size, int type) -{ - int x; - - if (!efi_enabled) { - x = e820.nr_map; - - if (x == E820MAX) { - printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); - return; - } - - e820.map[x].addr = start; - e820.map[x].size = size; - e820.map[x].type = type; - e820.nr_map++; - } -} /* add_memory_region */ - -/* - * Sanitize the BIOS e820 map. - * - * Some e820 responses include overlapping entries. The following - * replaces the original e820 map with a new one, removing overlaps. - * - */ -int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) -{ - struct change_member *change_tmp; - unsigned long current_type, last_type; - unsigned long long last_addr; - int chgidx, still_changing; - int overlap_entries; - int new_bios_entry; - int old_nr, new_nr, chg_nr; - int i; - - /* - Visually we're performing the following (1,2,3,4 = memory types)... - - Sample memory map (w/overlaps): - ____22__________________ - ______________________4_ - ____1111________________ - _44_____________________ - 11111111________________ - ____________________33__ - ___________44___________ - __________33333_________ - ______________22________ - ___________________2222_ - _________111111111______ - _____________________11_ - _________________4______ - - Sanitized equivalent (no overlap): - 1_______________________ - _44_____________________ - ___1____________________ - ____22__________________ - ______11________________ - _________1______________ - __________3_____________ - ___________44___________ - _____________33_________ - _______________2________ - ________________1_______ - _________________4______ - ___________________2____ - ____________________33__ - ______________________4_ - */ - /* if there's only one memory region, don't bother */ - if (*pnr_map < 2) { - return -1; - } - - old_nr = *pnr_map; - - /* bail out if we find any unreasonable addresses in bios map */ - for (i=0; iaddr = biosmap[i].addr; - change_point[chgidx++]->pbios = &biosmap[i]; - change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size; - change_point[chgidx++]->pbios = &biosmap[i]; - } - } - chg_nr = chgidx; /* true number of change-points */ - - /* sort change-point list by memory addresses (low -> high) */ - still_changing = 1; - while (still_changing) { - still_changing = 0; - for (i=1; i < chg_nr; i++) { - /* if > , swap */ - /* or, if current= & last=, swap */ - if ((change_point[i]->addr < change_point[i-1]->addr) || - ((change_point[i]->addr == change_point[i-1]->addr) && - (change_point[i]->addr == change_point[i]->pbios->addr) && - (change_point[i-1]->addr != change_point[i-1]->pbios->addr)) - ) - { - change_tmp = change_point[i]; - change_point[i] = change_point[i-1]; - change_point[i-1] = change_tmp; - still_changing=1; - } - } - } - - /* create a new bios memory map, removing overlaps */ - overlap_entries=0; /* number of entries in the overlap table */ - new_bios_entry=0; /* index for creating new bios map entries */ - last_type = 0; /* start with undefined memory type */ - last_addr = 0; /* start with 0 as last starting address */ - /* loop through change-points, determining affect on the new bios map */ - for (chgidx=0; chgidx < chg_nr; chgidx++) - { - /* keep track of all overlapping bios entries */ - if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr) - { - /* add map entry to overlap list (> 1 entry implies an overlap) */ - overlap_list[overlap_entries++]=change_point[chgidx]->pbios; - } - else - { - /* remove entry from list (order independent, so swap with last) */ - for (i=0; ipbios) - overlap_list[i] = overlap_list[overlap_entries-1]; - } - overlap_entries--; - } - /* if there are overlapping entries, decide which "type" to use */ - /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */ - current_type = 0; - for (i=0; itype > current_type) - current_type = overlap_list[i]->type; - /* continue building up new bios map based on this information */ - if (current_type != last_type) { - if (last_type != 0) { - new_bios[new_bios_entry].size = - change_point[chgidx]->addr - last_addr; - /* move forward only if the new size was non-zero */ - if (new_bios[new_bios_entry].size != 0) - if (++new_bios_entry >= E820MAX) - break; /* no more space left for new bios entries */ - } - if (current_type != 0) { - new_bios[new_bios_entry].addr = change_point[chgidx]->addr; - new_bios[new_bios_entry].type = current_type; - last_addr=change_point[chgidx]->addr; - } - last_type = current_type; - } - } - new_nr = new_bios_entry; /* retain count for new bios entries */ - - /* copy new bios mapping into original location */ - memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry)); - *pnr_map = new_nr; - - return 0; -} - -/* - * Copy the BIOS e820 map into a safe place. - * - * Sanity-check it while we're at it.. - * - * If we're lucky and live on a modern system, the setup code - * will have given us a memory map that we can use to properly - * set up memory. If we aren't, we'll fake a memory map. - * - * We check to see that the memory map contains at least 2 elements - * before we'll use it, because the detection code in setup.S may - * not be perfect and most every PC known to man has two memory - * regions: one from 0 to 640k, and one from 1mb up. (The IBM - * thinkpad 560x, for example, does not cooperate with the memory - * detection code.) - */ -int __init copy_e820_map(struct e820entry * biosmap, int nr_map) -{ - /* Only one memory region (or negative)? Ignore it */ - if (nr_map < 2) - return -1; - - do { - unsigned long long start = biosmap->addr; - unsigned long long size = biosmap->size; - unsigned long long end = start + size; - unsigned long type = biosmap->type; - - /* Overflow in 64 bits? Ignore the memory map. */ - if (start > end) - return -1; - - /* - * Some BIOSes claim RAM in the 640k - 1M region. - * Not right. Fix it up. - */ - if (type == E820_RAM) { - if (start < 0x100000ULL && end > 0xA0000ULL) { - if (start < 0xA0000ULL) - add_memory_region(start, 0xA0000ULL-start, type); - if (end <= 0x100000ULL) - continue; - start = 0x100000ULL; - size = end - start; - } - } - add_memory_region(start, size, type); - } while (biosmap++,--nr_map); - return 0; -} - -/* - * Callback for efi_memory_walk. - */ -static int __init -efi_find_max_pfn(unsigned long start, unsigned long end, void *arg) -{ - unsigned long *max_pfn = arg, pfn; - - if (start < end) { - pfn = PFN_UP(end -1); - if (pfn > *max_pfn) - *max_pfn = pfn; - } - return 0; -} - -static int __init -efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg) -{ - memory_present(0, PFN_UP(start), PFN_DOWN(end)); - return 0; -} - -/* - * Find the highest page frame number we have available - */ -void __init find_max_pfn(void) -{ - int i; - - max_pfn = 0; - if (efi_enabled) { - efi_memmap_walk(efi_find_max_pfn, &max_pfn); - efi_memmap_walk(efi_memory_present_wrapper, NULL); - return; - } - - for (i = 0; i < e820.nr_map; i++) { - unsigned long start, end; - /* RAM? */ - if (e820.map[i].type != E820_RAM) - continue; - start = PFN_UP(e820.map[i].addr); - end = PFN_DOWN(e820.map[i].addr + e820.map[i].size); - if (start >= end) - continue; - if (end > max_pfn) - max_pfn = end; - memory_present(0, start, end); - } -} - -/* - * Free all available memory for boot time allocation. Used - * as a callback function by efi_memory_walk() - */ - -static int __init -free_available_memory(unsigned long start, unsigned long end, void *arg) -{ - /* check max_low_pfn */ - if (start >= (max_low_pfn << PAGE_SHIFT)) - return 0; - if (end >= (max_low_pfn << PAGE_SHIFT)) - end = max_low_pfn << PAGE_SHIFT; - if (start < end) - free_bootmem(start, end - start); - - return 0; -} -/* - * Register fully available low RAM pages with the bootmem allocator. - */ -void __init register_bootmem_low_pages(unsigned long max_low_pfn) -{ - int i; - - if (efi_enabled) { - efi_memmap_walk(free_available_memory, NULL); - return; - } - for (i = 0; i < e820.nr_map; i++) { - unsigned long curr_pfn, last_pfn, size; - /* - * Reserve usable low memory - */ - if (e820.map[i].type != E820_RAM) - continue; - /* - * We are rounding up the start address of usable memory: - */ - curr_pfn = PFN_UP(e820.map[i].addr); - if (curr_pfn >= max_low_pfn) - continue; - /* - * ... and at the end of the usable range downwards: - */ - last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size); - - if (last_pfn > max_low_pfn) - last_pfn = max_low_pfn; - - /* - * .. finally, did all the rounding and playing - * around just make the area go away? - */ - if (last_pfn <= curr_pfn) - continue; - - size = last_pfn - curr_pfn; - free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size)); - } -} - -void __init e820_register_memory(void) -{ - unsigned long gapstart, gapsize, round; - unsigned long long last; - int i; - - /* - * Search for the bigest gap in the low 32 bits of the e820 - * memory space. - */ - last = 0x100000000ull; - gapstart = 0x10000000; - gapsize = 0x400000; - i = e820.nr_map; - while (--i >= 0) { - unsigned long long start = e820.map[i].addr; - unsigned long long end = start + e820.map[i].size; - - /* - * Since "last" is at most 4GB, we know we'll - * fit in 32 bits if this condition is true - */ - if (last > end) { - unsigned long gap = last - end; - - if (gap > gapsize) { - gapsize = gap; - gapstart = end; - } - } - if (start < last) - last = start; - } - - /* - * See how much we want to round up: start off with - * rounding to the next 1MB area. - */ - round = 0x100000; - while ((gapsize >> 4) > round) - round += round; - /* Fun with two's complement */ - pci_mem_start = (gapstart + round) & -round; - - printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n", - pci_mem_start, gapstart, gapsize); -} - -void __init print_memory_map(char *who) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - printk(" %s: %016Lx - %016Lx ", who, - e820.map[i].addr, - e820.map[i].addr + e820.map[i].size); - switch (e820.map[i].type) { - case E820_RAM: printk("(usable)\n"); - break; - case E820_RESERVED: - printk("(reserved)\n"); - break; - case E820_ACPI: - printk("(ACPI data)\n"); - break; - case E820_NVS: - printk("(ACPI NVS)\n"); - break; - default: printk("type %u\n", e820.map[i].type); - break; - } - } -} - -static __init __always_inline void efi_limit_regions(unsigned long long size) -{ - unsigned long long current_addr = 0; - efi_memory_desc_t *md, *next_md; - void *p, *p1; - int i, j; - - j = 0; - p1 = memmap.map; - for (p = p1, i = 0; p < memmap.map_end; p += memmap.desc_size, i++) { - md = p; - next_md = p1; - current_addr = md->phys_addr + - PFN_PHYS(md->num_pages); - if (is_available_memory(md)) { - if (md->phys_addr >= size) continue; - memcpy(next_md, md, memmap.desc_size); - if (current_addr >= size) { - next_md->num_pages -= - PFN_UP(current_addr-size); - } - p1 += memmap.desc_size; - next_md = p1; - j++; - } else if ((md->attribute & EFI_MEMORY_RUNTIME) == - EFI_MEMORY_RUNTIME) { - /* In order to make runtime services - * available we have to include runtime - * memory regions in memory map */ - memcpy(next_md, md, memmap.desc_size); - p1 += memmap.desc_size; - next_md = p1; - j++; - } - } - memmap.nr_map = j; - memmap.map_end = memmap.map + - (memmap.nr_map * memmap.desc_size); -} - -void __init limit_regions(unsigned long long size) -{ - unsigned long long current_addr; - int i; - - print_memory_map("limit_regions start"); - if (efi_enabled) { - efi_limit_regions(size); - return; - } - for (i = 0; i < e820.nr_map; i++) { - current_addr = e820.map[i].addr + e820.map[i].size; - if (current_addr < size) - continue; - - if (e820.map[i].type != E820_RAM) - continue; - - if (e820.map[i].addr >= size) { - /* - * This region starts past the end of the - * requested size, skip it completely. - */ - e820.nr_map = i; - } else { - e820.nr_map = i + 1; - e820.map[i].size -= current_addr - size; - } - print_memory_map("limit_regions endfor"); - return; - } - print_memory_map("limit_regions endfunc"); -} - -/* - * This function checks if any part of the range is mapped - * with type. - */ -int -e820_any_mapped(u64 start, u64 end, unsigned type) -{ - int i; - for (i = 0; i < e820.nr_map; i++) { - const struct e820entry *ei = &e820.map[i]; - if (type && ei->type != type) - continue; - if (ei->addr >= end || ei->addr + ei->size <= start) - continue; - return 1; - } - return 0; -} -EXPORT_SYMBOL_GPL(e820_any_mapped); - - /* - * This function checks if the entire range is mapped with type. - * - * Note: this function only works correct if the e820 table is sorted and - * not-overlapping, which is the case - */ -int __init -e820_all_mapped(unsigned long s, unsigned long e, unsigned type) -{ - u64 start = s; - u64 end = e; - int i; - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - if (type && ei->type != type) - continue; - /* is the region (part) in overlap with the current region ?*/ - if (ei->addr >= end || ei->addr + ei->size <= start) - continue; - /* if the region is at the beginning of we move - * start to the end of the region since it's ok until there - */ - if (ei->addr <= start) - start = ei->addr + ei->size; - /* if start is now at or beyond end, we're done, full - * coverage */ - if (start >= end) - return 1; /* we're done */ - } - return 0; -} - -static int __init parse_memmap(char *arg) -{ - if (!arg) - return -EINVAL; - - if (strcmp(arg, "exactmap") == 0) { -#ifdef CONFIG_CRASH_DUMP - /* If we are doing a crash dump, we - * still need to know the real mem - * size before original memory map is - * reset. - */ - find_max_pfn(); - saved_max_pfn = max_pfn; -#endif - e820.nr_map = 0; - user_defined_memmap = 1; - } else { - /* If the user specifies memory size, we - * limit the BIOS-provided memory map to - * that size. exactmap can be used to specify - * the exact map. mem=number can be used to - * trim the existing memory map. - */ - unsigned long long start_at, mem_size; - - mem_size = memparse(arg, &arg); - if (*arg == '@') { - start_at = memparse(arg+1, &arg); - add_memory_region(start_at, mem_size, E820_RAM); - } else if (*arg == '#') { - start_at = memparse(arg+1, &arg); - add_memory_region(start_at, mem_size, E820_ACPI); - } else if (*arg == '$') { - start_at = memparse(arg+1, &arg); - add_memory_region(start_at, mem_size, E820_RESERVED); - } else { - limit_regions(mem_size); - user_defined_memmap = 1; - } - } - return 0; -} -early_param("memmap", parse_memmap); diff --git a/arch/i386/kernel/early_printk.c b/arch/i386/kernel/early_printk.c deleted file mode 100644 index 92f812ba275..00000000000 --- a/arch/i386/kernel/early_printk.c +++ /dev/null @@ -1,2 +0,0 @@ - -#include "../../x86_64/kernel/early_printk.c" diff --git a/arch/i386/kernel/efi_32.c b/arch/i386/kernel/efi_32.c deleted file mode 100644 index 2452c6fbe99..00000000000 --- a/arch/i386/kernel/efi_32.c +++ /dev/null @@ -1,712 +0,0 @@ -/* - * Extensible Firmware Interface - * - * Based on Extensible Firmware Interface Specification version 1.0 - * - * Copyright (C) 1999 VA Linux Systems - * Copyright (C) 1999 Walt Drummond - * Copyright (C) 1999-2002 Hewlett-Packard Co. - * David Mosberger-Tang - * Stephane Eranian - * - * All EFI Runtime Services are not implemented yet as EFI only - * supports physical mode addressing on SoftSDV. This is to be fixed - * in a future version. --drummond 1999-07-20 - * - * Implemented EFI runtime services and virtual mode calls. --davidm - * - * Goutham Rao: - * Skip non-WB memory and ignore empty memory ranges. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#define EFI_DEBUG 0 -#define PFX "EFI: " - -extern efi_status_t asmlinkage efi_call_phys(void *, ...); - -struct efi efi; -EXPORT_SYMBOL(efi); -static struct efi efi_phys; -struct efi_memory_map memmap; - -/* - * We require an early boot_ioremap mapping mechanism initially - */ -extern void * boot_ioremap(unsigned long, unsigned long); - -/* - * To make EFI call EFI runtime service in physical addressing mode we need - * prelog/epilog before/after the invocation to disable interrupt, to - * claim EFI runtime service handler exclusively and to duplicate a memory in - * low memory space say 0 - 3G. - */ - -static unsigned long efi_rt_eflags; -static DEFINE_SPINLOCK(efi_rt_lock); -static pgd_t efi_bak_pg_dir_pointer[2]; - -static void efi_call_phys_prelog(void) __acquires(efi_rt_lock) -{ - unsigned long cr4; - unsigned long temp; - struct Xgt_desc_struct gdt_descr; - - spin_lock(&efi_rt_lock); - local_irq_save(efi_rt_eflags); - - /* - * If I don't have PSE, I should just duplicate two entries in page - * directory. If I have PSE, I just need to duplicate one entry in - * page directory. - */ - cr4 = read_cr4(); - - if (cr4 & X86_CR4_PSE) { - efi_bak_pg_dir_pointer[0].pgd = - swapper_pg_dir[pgd_index(0)].pgd; - swapper_pg_dir[0].pgd = - swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd; - } else { - efi_bak_pg_dir_pointer[0].pgd = - swapper_pg_dir[pgd_index(0)].pgd; - efi_bak_pg_dir_pointer[1].pgd = - swapper_pg_dir[pgd_index(0x400000)].pgd; - swapper_pg_dir[pgd_index(0)].pgd = - swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd; - temp = PAGE_OFFSET + 0x400000; - swapper_pg_dir[pgd_index(0x400000)].pgd = - swapper_pg_dir[pgd_index(temp)].pgd; - } - - /* - * After the lock is released, the original page table is restored. - */ - local_flush_tlb(); - - gdt_descr.address = __pa(get_cpu_gdt_table(0)); - gdt_descr.size = GDT_SIZE - 1; - load_gdt(&gdt_descr); -} - -static void efi_call_phys_epilog(void) __releases(efi_rt_lock) -{ - unsigned long cr4; - struct Xgt_desc_struct gdt_descr; - - gdt_descr.address = (unsigned long)get_cpu_gdt_table(0); - gdt_descr.size = GDT_SIZE - 1; - load_gdt(&gdt_descr); - - cr4 = read_cr4(); - - if (cr4 & X86_CR4_PSE) { - swapper_pg_dir[pgd_index(0)].pgd = - efi_bak_pg_dir_pointer[0].pgd; - } else { - swapper_pg_dir[pgd_index(0)].pgd = - efi_bak_pg_dir_pointer[0].pgd; - swapper_pg_dir[pgd_index(0x400000)].pgd = - efi_bak_pg_dir_pointer[1].pgd; - } - - /* - * After the lock is released, the original page table is restored. - */ - local_flush_tlb(); - - local_irq_restore(efi_rt_eflags); - spin_unlock(&efi_rt_lock); -} - -static efi_status_t -phys_efi_set_virtual_address_map(unsigned long memory_map_size, - unsigned long descriptor_size, - u32 descriptor_version, - efi_memory_desc_t *virtual_map) -{ - efi_status_t status; - - efi_call_phys_prelog(); - status = efi_call_phys(efi_phys.set_virtual_address_map, - memory_map_size, descriptor_size, - descriptor_version, virtual_map); - efi_call_phys_epilog(); - return status; -} - -static efi_status_t -phys_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) -{ - efi_status_t status; - - efi_call_phys_prelog(); - status = efi_call_phys(efi_phys.get_time, tm, tc); - efi_call_phys_epilog(); - return status; -} - -inline int efi_set_rtc_mmss(unsigned long nowtime) -{ - int real_seconds, real_minutes; - efi_status_t status; - efi_time_t eft; - efi_time_cap_t cap; - - spin_lock(&efi_rt_lock); - status = efi.get_time(&eft, &cap); - spin_unlock(&efi_rt_lock); - if (status != EFI_SUCCESS) - panic("Ooops, efitime: can't read time!\n"); - real_seconds = nowtime % 60; - real_minutes = nowtime / 60; - - if (((abs(real_minutes - eft.minute) + 15)/30) & 1) - real_minutes += 30; - real_minutes %= 60; - - eft.minute = real_minutes; - eft.second = real_seconds; - - if (status != EFI_SUCCESS) { - printk("Ooops: efitime: can't read time!\n"); - return -1; - } - return 0; -} -/* - * This is used during kernel init before runtime - * services have been remapped and also during suspend, therefore, - * we'll need to call both in physical and virtual modes. - */ -inline unsigned long efi_get_time(void) -{ - efi_status_t status; - efi_time_t eft; - efi_time_cap_t cap; - - if (efi.get_time) { - /* if we are in virtual mode use remapped function */ - status = efi.get_time(&eft, &cap); - } else { - /* we are in physical mode */ - status = phys_efi_get_time(&eft, &cap); - } - - if (status != EFI_SUCCESS) - printk("Oops: efitime: can't read time status: 0x%lx\n",status); - - return mktime(eft.year, eft.month, eft.day, eft.hour, - eft.minute, eft.second); -} - -int is_available_memory(efi_memory_desc_t * md) -{ - if (!(md->attribute & EFI_MEMORY_WB)) - return 0; - - switch (md->type) { - case EFI_LOADER_CODE: - case EFI_LOADER_DATA: - case EFI_BOOT_SERVICES_CODE: - case EFI_BOOT_SERVICES_DATA: - case EFI_CONVENTIONAL_MEMORY: - return 1; - } - return 0; -} - -/* - * We need to map the EFI memory map again after paging_init(). - */ -void __init efi_map_memmap(void) -{ - memmap.map = NULL; - - memmap.map = bt_ioremap((unsigned long) memmap.phys_map, - (memmap.nr_map * memmap.desc_size)); - if (memmap.map == NULL) - printk(KERN_ERR PFX "Could not remap the EFI memmap!\n"); - - memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size); -} - -#if EFI_DEBUG -static void __init print_efi_memmap(void) -{ - efi_memory_desc_t *md; - void *p; - int i; - - for (p = memmap.map, i = 0; p < memmap.map_end; p += memmap.desc_size, i++) { - md = p; - printk(KERN_INFO "mem%02u: type=%u, attr=0x%llx, " - "range=[0x%016llx-0x%016llx) (%lluMB)\n", - i, md->type, md->attribute, md->phys_addr, - md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), - (md->num_pages >> (20 - EFI_PAGE_SHIFT))); - } -} -#endif /* EFI_DEBUG */ - -/* - * Walks the EFI memory map and calls CALLBACK once for each EFI - * memory descriptor that has memory that is available for kernel use. - */ -void efi_memmap_walk(efi_freemem_callback_t callback, void *arg) -{ - int prev_valid = 0; - struct range { - unsigned long start; - unsigned long end; - } uninitialized_var(prev), curr; - efi_memory_desc_t *md; - unsigned long start, end; - void *p; - - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - md = p; - - if ((md->num_pages == 0) || (!is_available_memory(md))) - continue; - - curr.start = md->phys_addr; - curr.end = curr.start + (md->num_pages << EFI_PAGE_SHIFT); - - if (!prev_valid) { - prev = curr; - prev_valid = 1; - } else { - if (curr.start < prev.start) - printk(KERN_INFO PFX "Unordered memory map\n"); - if (prev.end == curr.start) - prev.end = curr.end; - else { - start = - (unsigned long) (PAGE_ALIGN(prev.start)); - end = (unsigned long) (prev.end & PAGE_MASK); - if ((end > start) - && (*callback) (start, end, arg) < 0) - return; - prev = curr; - } - } - } - if (prev_valid) { - start = (unsigned long) PAGE_ALIGN(prev.start); - end = (unsigned long) (prev.end & PAGE_MASK); - if (end > start) - (*callback) (start, end, arg); - } -} - -void __init efi_init(void) -{ - efi_config_table_t *config_tables; - efi_runtime_services_t *runtime; - efi_char16_t *c16; - char vendor[100] = "unknown"; - unsigned long num_config_tables; - int i = 0; - - memset(&efi, 0, sizeof(efi) ); - memset(&efi_phys, 0, sizeof(efi_phys)); - - efi_phys.systab = EFI_SYSTAB; - memmap.phys_map = EFI_MEMMAP; - memmap.nr_map = EFI_MEMMAP_SIZE/EFI_MEMDESC_SIZE; - memmap.desc_version = EFI_MEMDESC_VERSION; - memmap.desc_size = EFI_MEMDESC_SIZE; - - efi.systab = (efi_system_table_t *) - boot_ioremap((unsigned long) efi_phys.systab, - sizeof(efi_system_table_t)); - /* - * Verify the EFI Table - */ - if (efi.systab == NULL) - printk(KERN_ERR PFX "Woah! Couldn't map the EFI system table.\n"); - if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) - printk(KERN_ERR PFX "Woah! EFI system table signature incorrect\n"); - if ((efi.systab->hdr.revision >> 16) == 0) - printk(KERN_ERR PFX "Warning: EFI system table version " - "%d.%02d, expected 1.00 or greater\n", - efi.systab->hdr.revision >> 16, - efi.systab->hdr.revision & 0xffff); - - /* - * Grab some details from the system table - */ - num_config_tables = efi.systab->nr_tables; - config_tables = (efi_config_table_t *)efi.systab->tables; - runtime = efi.systab->runtime; - - /* - * Show what we know for posterity - */ - c16 = (efi_char16_t *) boot_ioremap(efi.systab->fw_vendor, 2); - if (c16) { - for (i = 0; i < (sizeof(vendor) - 1) && *c16; ++i) - vendor[i] = *c16++; - vendor[i] = '\0'; - } else - printk(KERN_ERR PFX "Could not map the firmware vendor!\n"); - - printk(KERN_INFO PFX "EFI v%u.%.02u by %s \n", - efi.systab->hdr.revision >> 16, - efi.systab->hdr.revision & 0xffff, vendor); - - /* - * Let's see what config tables the firmware passed to us. - */ - config_tables = (efi_config_table_t *) - boot_ioremap((unsigned long) config_tables, - num_config_tables * sizeof(efi_config_table_t)); - - if (config_tables == NULL) - printk(KERN_ERR PFX "Could not map EFI Configuration Table!\n"); - - efi.mps = EFI_INVALID_TABLE_ADDR; - efi.acpi = EFI_INVALID_TABLE_ADDR; - efi.acpi20 = EFI_INVALID_TABLE_ADDR; - efi.smbios = EFI_INVALID_TABLE_ADDR; - efi.sal_systab = EFI_INVALID_TABLE_ADDR; - efi.boot_info = EFI_INVALID_TABLE_ADDR; - efi.hcdp = EFI_INVALID_TABLE_ADDR; - efi.uga = EFI_INVALID_TABLE_ADDR; - - for (i = 0; i < num_config_tables; i++) { - if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) { - efi.mps = config_tables[i].table; - printk(KERN_INFO " MPS=0x%lx ", config_tables[i].table); - } else - if (efi_guidcmp(config_tables[i].guid, ACPI_20_TABLE_GUID) == 0) { - efi.acpi20 = config_tables[i].table; - printk(KERN_INFO " ACPI 2.0=0x%lx ", config_tables[i].table); - } else - if (efi_guidcmp(config_tables[i].guid, ACPI_TABLE_GUID) == 0) { - efi.acpi = config_tables[i].table; - printk(KERN_INFO " ACPI=0x%lx ", config_tables[i].table); - } else - if (efi_guidcmp(config_tables[i].guid, SMBIOS_TABLE_GUID) == 0) { - efi.smbios = config_tables[i].table; - printk(KERN_INFO " SMBIOS=0x%lx ", config_tables[i].table); - } else - if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) { - efi.hcdp = config_tables[i].table; - printk(KERN_INFO " HCDP=0x%lx ", config_tables[i].table); - } else - if (efi_guidcmp(config_tables[i].guid, UGA_IO_PROTOCOL_GUID) == 0) { - efi.uga = config_tables[i].table; - printk(KERN_INFO " UGA=0x%lx ", config_tables[i].table); - } - } - printk("\n"); - - /* - * Check out the runtime services table. We need to map - * the runtime services table so that we can grab the physical - * address of several of the EFI runtime functions, needed to - * set the firmware into virtual mode. - */ - - runtime = (efi_runtime_services_t *) boot_ioremap((unsigned long) - runtime, - sizeof(efi_runtime_services_t)); - if (runtime != NULL) { - /* - * We will only need *early* access to the following - * two EFI runtime services before set_virtual_address_map - * is invoked. - */ - efi_phys.get_time = (efi_get_time_t *) runtime->get_time; - efi_phys.set_virtual_address_map = - (efi_set_virtual_address_map_t *) - runtime->set_virtual_address_map; - } else - printk(KERN_ERR PFX "Could not map the runtime service table!\n"); - - /* Map the EFI memory map for use until paging_init() */ - memmap.map = boot_ioremap((unsigned long) EFI_MEMMAP, EFI_MEMMAP_SIZE); - if (memmap.map == NULL) - printk(KERN_ERR PFX "Could not map the EFI memory map!\n"); - - memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size); - -#if EFI_DEBUG - print_efi_memmap(); -#endif -} - -static inline void __init check_range_for_systab(efi_memory_desc_t *md) -{ - if (((unsigned long)md->phys_addr <= (unsigned long)efi_phys.systab) && - ((unsigned long)efi_phys.systab < md->phys_addr + - ((unsigned long)md->num_pages << EFI_PAGE_SHIFT))) { - unsigned long addr; - - addr = md->virt_addr - md->phys_addr + - (unsigned long)efi_phys.systab; - efi.systab = (efi_system_table_t *)addr; - } -} - -/* - * Wrap all the virtual calls in a way that forces the parameters on the stack. - */ - -#define efi_call_virt(f, args...) \ - ((efi_##f##_t __attribute__((regparm(0)))*)efi.systab->runtime->f)(args) - -static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) -{ - return efi_call_virt(get_time, tm, tc); -} - -static efi_status_t virt_efi_set_time (efi_time_t *tm) -{ - return efi_call_virt(set_time, tm); -} - -static efi_status_t virt_efi_get_wakeup_time (efi_bool_t *enabled, - efi_bool_t *pending, - efi_time_t *tm) -{ - return efi_call_virt(get_wakeup_time, enabled, pending, tm); -} - -static efi_status_t virt_efi_set_wakeup_time (efi_bool_t enabled, - efi_time_t *tm) -{ - return efi_call_virt(set_wakeup_time, enabled, tm); -} - -static efi_status_t virt_efi_get_variable (efi_char16_t *name, - efi_guid_t *vendor, u32 *attr, - unsigned long *data_size, void *data) -{ - return efi_call_virt(get_variable, name, vendor, attr, data_size, data); -} - -static efi_status_t virt_efi_get_next_variable (unsigned long *name_size, - efi_char16_t *name, - efi_guid_t *vendor) -{ - return efi_call_virt(get_next_variable, name_size, name, vendor); -} - -static efi_status_t virt_efi_set_variable (efi_char16_t *name, - efi_guid_t *vendor, - unsigned long attr, - unsigned long data_size, void *data) -{ - return efi_call_virt(set_variable, name, vendor, attr, data_size, data); -} - -static efi_status_t virt_efi_get_next_high_mono_count (u32 *count) -{ - return efi_call_virt(get_next_high_mono_count, count); -} - -static void virt_efi_reset_system (int reset_type, efi_status_t status, - unsigned long data_size, - efi_char16_t *data) -{ - efi_call_virt(reset_system, reset_type, status, data_size, data); -} - -/* - * This function will switch the EFI runtime services to virtual mode. - * Essentially, look through the EFI memmap and map every region that - * has the runtime attribute bit set in its memory descriptor and update - * that memory descriptor with the virtual address obtained from ioremap(). - * This enables the runtime services to be called without having to - * thunk back into physical mode for every invocation. - */ - -void __init efi_enter_virtual_mode(void) -{ - efi_memory_desc_t *md; - efi_status_t status; - void *p; - - efi.systab = NULL; - - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - md = p; - - if (!(md->attribute & EFI_MEMORY_RUNTIME)) - continue; - - md->virt_addr = (unsigned long)ioremap(md->phys_addr, - md->num_pages << EFI_PAGE_SHIFT); - if (!(unsigned long)md->virt_addr) { - printk(KERN_ERR PFX "ioremap of 0x%lX failed\n", - (unsigned long)md->phys_addr); - } - /* update the virtual address of the EFI system table */ - check_range_for_systab(md); - } - - BUG_ON(!efi.systab); - - status = phys_efi_set_virtual_address_map( - memmap.desc_size * memmap.nr_map, - memmap.desc_size, - memmap.desc_version, - memmap.phys_map); - - if (status != EFI_SUCCESS) { - printk (KERN_ALERT "You are screwed! " - "Unable to switch EFI into virtual mode " - "(status=%lx)\n", status); - panic("EFI call to SetVirtualAddressMap() failed!"); - } - - /* - * Now that EFI is in virtual mode, update the function - * pointers in the runtime service table to the new virtual addresses. - */ - - efi.get_time = virt_efi_get_time; - efi.set_time = virt_efi_set_time; - efi.get_wakeup_time = virt_efi_get_wakeup_time; - efi.set_wakeup_time = virt_efi_set_wakeup_time; - efi.get_variable = virt_efi_get_variable; - efi.get_next_variable = virt_efi_get_next_variable; - efi.set_variable = virt_efi_set_variable; - efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count; - efi.reset_system = virt_efi_reset_system; -} - -void __init -efi_initialize_iomem_resources(struct resource *code_resource, - struct resource *data_resource) -{ - struct resource *res; - efi_memory_desc_t *md; - void *p; - - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - md = p; - - if ((md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) > - 0x100000000ULL) - continue; - res = kzalloc(sizeof(struct resource), GFP_ATOMIC); - switch (md->type) { - case EFI_RESERVED_TYPE: - res->name = "Reserved Memory"; - break; - case EFI_LOADER_CODE: - res->name = "Loader Code"; - break; - case EFI_LOADER_DATA: - res->name = "Loader Data"; - break; - case EFI_BOOT_SERVICES_DATA: - res->name = "BootServices Data"; - break; - case EFI_BOOT_SERVICES_CODE: - res->name = "BootServices Code"; - break; - case EFI_RUNTIME_SERVICES_CODE: - res->name = "Runtime Service Code"; - break; - case EFI_RUNTIME_SERVICES_DATA: - res->name = "Runtime Service Data"; - break; - case EFI_CONVENTIONAL_MEMORY: - res->name = "Conventional Memory"; - break; - case EFI_UNUSABLE_MEMORY: - res->name = "Unusable Memory"; - break; - case EFI_ACPI_RECLAIM_MEMORY: - res->name = "ACPI Reclaim"; - break; - case EFI_ACPI_MEMORY_NVS: - res->name = "ACPI NVS"; - break; - case EFI_MEMORY_MAPPED_IO: - res->name = "Memory Mapped IO"; - break; - case EFI_MEMORY_MAPPED_IO_PORT_SPACE: - res->name = "Memory Mapped IO Port Space"; - break; - default: - res->name = "Reserved"; - break; - } - res->start = md->phys_addr; - res->end = res->start + ((md->num_pages << EFI_PAGE_SHIFT) - 1); - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; - if (request_resource(&iomem_resource, res) < 0) - printk(KERN_ERR PFX "Failed to allocate res %s : " - "0x%llx-0x%llx\n", res->name, - (unsigned long long)res->start, - (unsigned long long)res->end); - /* - * We don't know which region contains kernel data so we try - * it repeatedly and let the resource manager test it. - */ - if (md->type == EFI_CONVENTIONAL_MEMORY) { - request_resource(res, code_resource); - request_resource(res, data_resource); -#ifdef CONFIG_KEXEC - request_resource(res, &crashk_res); -#endif - } - } -} - -/* - * Convenience functions to obtain memory types and attributes - */ - -u32 efi_mem_type(unsigned long phys_addr) -{ - efi_memory_desc_t *md; - void *p; - - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - md = p; - if ((md->phys_addr <= phys_addr) && (phys_addr < - (md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) )) - return md->type; - } - return 0; -} - -u64 efi_mem_attributes(unsigned long phys_addr) -{ - efi_memory_desc_t *md; - void *p; - - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - md = p; - if ((md->phys_addr <= phys_addr) && (phys_addr < - (md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) )) - return md->attribute; - } - return 0; -} diff --git a/arch/i386/kernel/efi_stub_32.S b/arch/i386/kernel/efi_stub_32.S deleted file mode 100644 index ef00bb77d7e..00000000000 --- a/arch/i386/kernel/efi_stub_32.S +++ /dev/null @@ -1,122 +0,0 @@ -/* - * EFI call stub for IA32. - * - * This stub allows us to make EFI calls in physical mode with interrupts - * turned off. - */ - -#include -#include - -/* - * efi_call_phys(void *, ...) is a function with variable parameters. - * All the callers of this function assure that all the parameters are 4-bytes. - */ - -/* - * In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save. - * So we'd better save all of them at the beginning of this function and restore - * at the end no matter how many we use, because we can not assure EFI runtime - * service functions will comply with gcc calling convention, too. - */ - -.text -ENTRY(efi_call_phys) - /* - * 0. The function can only be called in Linux kernel. So CS has been - * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found - * the values of these registers are the same. And, the corresponding - * GDT entries are identical. So I will do nothing about segment reg - * and GDT, but change GDT base register in prelog and epilog. - */ - - /* - * 1. Now I am running with EIP = + PAGE_OFFSET. - * But to make it smoothly switch from virtual mode to flat mode. - * The mapping of lower virtual memory has been created in prelog and - * epilog. - */ - movl $1f, %edx - subl $__PAGE_OFFSET, %edx - jmp *%edx -1: - - /* - * 2. Now on the top of stack is the return - * address in the caller of efi_call_phys(), then parameter 1, - * parameter 2, ..., param n. To make things easy, we save the return - * address of efi_call_phys in a global variable. - */ - popl %edx - movl %edx, saved_return_addr - /* get the function pointer into ECX*/ - popl %ecx - movl %ecx, efi_rt_function_ptr - movl $2f, %edx - subl $__PAGE_OFFSET, %edx - pushl %edx - - /* - * 3. Clear PG bit in %CR0. - */ - movl %cr0, %edx - andl $0x7fffffff, %edx - movl %edx, %cr0 - jmp 1f -1: - - /* - * 4. Adjust stack pointer. - */ - subl $__PAGE_OFFSET, %esp - - /* - * 5. Call the physical function. - */ - jmp *%ecx - -2: - /* - * 6. After EFI runtime service returns, control will return to - * following instruction. We'd better readjust stack pointer first. - */ - addl $__PAGE_OFFSET, %esp - - /* - * 7. Restore PG bit - */ - movl %cr0, %edx - orl $0x80000000, %edx - movl %edx, %cr0 - jmp 1f -1: - /* - * 8. Now restore the virtual mode from flat mode by - * adding EIP with PAGE_OFFSET. - */ - movl $1f, %edx - jmp *%edx -1: - - /* - * 9. Balance the stack. And because EAX contain the return value, - * we'd better not clobber it. - */ - leal efi_rt_function_ptr, %edx - movl (%edx), %ecx - pushl %ecx - - /* - * 10. Push the saved return address onto the stack and return. - */ - leal saved_return_addr, %edx - movl (%edx), %ecx - pushl %ecx - ret -.previous - -.data -saved_return_addr: - .long 0 -efi_rt_function_ptr: - .long 0 diff --git a/arch/i386/kernel/entry_32.S b/arch/i386/kernel/entry_32.S deleted file mode 100644 index 290b7bc82da..00000000000 --- a/arch/i386/kernel/entry_32.S +++ /dev/null @@ -1,1112 +0,0 @@ -/* - * linux/arch/i386/entry.S - * - * Copyright (C) 1991, 1992 Linus Torvalds - */ - -/* - * entry.S contains the system-call and fault low-level handling routines. - * This also contains the timer-interrupt handler, as well as all interrupts - * and faults that can result in a task-switch. - * - * NOTE: This code handles signal-recognition, which happens every time - * after a timer-interrupt and after each system call. - * - * I changed all the .align's to 4 (16 byte alignment), as that's faster - * on a 486. - * - * Stack layout in 'syscall_exit': - * ptrace needs to have all regs on the stack. - * if the order here is changed, it needs to be - * updated in fork.c:copy_process, signal.c:do_signal, - * ptrace.c and ptrace.h - * - * 0(%esp) - %ebx - * 4(%esp) - %ecx - * 8(%esp) - %edx - * C(%esp) - %esi - * 10(%esp) - %edi - * 14(%esp) - %ebp - * 18(%esp) - %eax - * 1C(%esp) - %ds - * 20(%esp) - %es - * 24(%esp) - %fs - * 28(%esp) - orig_eax - * 2C(%esp) - %eip - * 30(%esp) - %cs - * 34(%esp) - %eflags - * 38(%esp) - %oldesp - * 3C(%esp) - %oldss - * - * "current" is in register %ebx during any slow entries. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "irq_vectors.h" - -/* - * We use macros for low-level operations which need to be overridden - * for paravirtualization. The following will never clobber any registers: - * INTERRUPT_RETURN (aka. "iret") - * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax") - * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit"). - * - * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must - * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY). - * Allowing a register to be clobbered can shrink the paravirt replacement - * enough to patch inline, increasing performance. - */ - -#define nr_syscalls ((syscall_table_size)/4) - -CF_MASK = 0x00000001 -TF_MASK = 0x00000100 -IF_MASK = 0x00000200 -DF_MASK = 0x00000400 -NT_MASK = 0x00004000 -VM_MASK = 0x00020000 - -#ifdef CONFIG_PREEMPT -#define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF -#else -#define preempt_stop(clobbers) -#define resume_kernel restore_nocheck -#endif - -.macro TRACE_IRQS_IRET -#ifdef CONFIG_TRACE_IRQFLAGS - testl $IF_MASK,PT_EFLAGS(%esp) # interrupts off? - jz 1f - TRACE_IRQS_ON -1: -#endif -.endm - -#ifdef CONFIG_VM86 -#define resume_userspace_sig check_userspace -#else -#define resume_userspace_sig resume_userspace -#endif - -#define SAVE_ALL \ - cld; \ - pushl %fs; \ - CFI_ADJUST_CFA_OFFSET 4;\ - /*CFI_REL_OFFSET fs, 0;*/\ - pushl %es; \ - CFI_ADJUST_CFA_OFFSET 4;\ - /*CFI_REL_OFFSET es, 0;*/\ - pushl %ds; \ - CFI_ADJUST_CFA_OFFSET 4;\ - /*CFI_REL_OFFSET ds, 0;*/\ - pushl %eax; \ - CFI_ADJUST_CFA_OFFSET 4;\ - CFI_REL_OFFSET eax, 0;\ - pushl %ebp; \ - CFI_ADJUST_CFA_OFFSET 4;\ - CFI_REL_OFFSET ebp, 0;\ - pushl %edi; \ - CFI_ADJUST_CFA_OFFSET 4;\ - CFI_REL_OFFSET edi, 0;\ - pushl %esi; \ - CFI_ADJUST_CFA_OFFSET 4;\ - CFI_REL_OFFSET esi, 0;\ - pushl %edx; \ - CFI_ADJUST_CFA_OFFSET 4;\ - CFI_REL_OFFSET edx, 0;\ - pushl %ecx; \ - CFI_ADJUST_CFA_OFFSET 4;\ - CFI_REL_OFFSET ecx, 0;\ - pushl %ebx; \ - CFI_ADJUST_CFA_OFFSET 4;\ - CFI_REL_OFFSET ebx, 0;\ - movl $(__USER_DS), %edx; \ - movl %edx, %ds; \ - movl %edx, %es; \ - movl $(__KERNEL_PERCPU), %edx; \ - movl %edx, %fs - -#define RESTORE_INT_REGS \ - popl %ebx; \ - CFI_ADJUST_CFA_OFFSET -4;\ - CFI_RESTORE ebx;\ - popl %ecx; \ - CFI_ADJUST_CFA_OFFSET -4;\ - CFI_RESTORE ecx;\ - popl %edx; \ - CFI_ADJUST_CFA_OFFSET -4;\ - CFI_RESTORE edx;\ - popl %esi; \ - CFI_ADJUST_CFA_OFFSET -4;\ - CFI_RESTORE esi;\ - popl %edi; \ - CFI_ADJUST_CFA_OFFSET -4;\ - CFI_RESTORE edi;\ - popl %ebp; \ - CFI_ADJUST_CFA_OFFSET -4;\ - CFI_RESTORE ebp;\ - popl %eax; \ - CFI_ADJUST_CFA_OFFSET -4;\ - CFI_RESTORE eax - -#define RESTORE_REGS \ - RESTORE_INT_REGS; \ -1: popl %ds; \ - CFI_ADJUST_CFA_OFFSET -4;\ - /*CFI_RESTORE ds;*/\ -2: popl %es; \ - CFI_ADJUST_CFA_OFFSET -4;\ - /*CFI_RESTORE es;*/\ -3: popl %fs; \ - CFI_ADJUST_CFA_OFFSET -4;\ - /*CFI_RESTORE fs;*/\ -.pushsection .fixup,"ax"; \ -4: movl $0,(%esp); \ - jmp 1b; \ -5: movl $0,(%esp); \ - jmp 2b; \ -6: movl $0,(%esp); \ - jmp 3b; \ -.section __ex_table,"a";\ - .align 4; \ - .long 1b,4b; \ - .long 2b,5b; \ - .long 3b,6b; \ -.popsection - -#define RING0_INT_FRAME \ - CFI_STARTPROC simple;\ - CFI_SIGNAL_FRAME;\ - CFI_DEF_CFA esp, 3*4;\ - /*CFI_OFFSET cs, -2*4;*/\ - CFI_OFFSET eip, -3*4 - -#define RING0_EC_FRAME \ - CFI_STARTPROC simple;\ - CFI_SIGNAL_FRAME;\ - CFI_DEF_CFA esp, 4*4;\ - /*CFI_OFFSET cs, -2*4;*/\ - CFI_OFFSET eip, -3*4 - -#define RING0_PTREGS_FRAME \ - CFI_STARTPROC simple;\ - CFI_SIGNAL_FRAME;\ - CFI_DEF_CFA esp, PT_OLDESP-PT_EBX;\ - /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/\ - CFI_OFFSET eip, PT_EIP-PT_OLDESP;\ - /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/\ - /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/\ - CFI_OFFSET eax, PT_EAX-PT_OLDESP;\ - CFI_OFFSET ebp, PT_EBP-PT_OLDESP;\ - CFI_OFFSET edi, PT_EDI-PT_OLDESP;\ - CFI_OFFSET esi, PT_ESI-PT_OLDESP;\ - CFI_OFFSET edx, PT_EDX-PT_OLDESP;\ - CFI_OFFSET ecx, PT_ECX-PT_OLDESP;\ - CFI_OFFSET ebx, PT_EBX-PT_OLDESP - -ENTRY(ret_from_fork) - CFI_STARTPROC - pushl %eax - CFI_ADJUST_CFA_OFFSET 4 - call schedule_tail - GET_THREAD_INFO(%ebp) - popl %eax - CFI_ADJUST_CFA_OFFSET -4 - pushl $0x0202 # Reset kernel eflags - CFI_ADJUST_CFA_OFFSET 4 - popfl - CFI_ADJUST_CFA_OFFSET -4 - jmp syscall_exit - CFI_ENDPROC -END(ret_from_fork) - -/* - * Return to user mode is not as complex as all this looks, - * but we want the default path for a system call return to - * go as quickly as possible which is why some of this is - * less clear than it otherwise should be. - */ - - # userspace resumption stub bypassing syscall exit tracing - ALIGN - RING0_PTREGS_FRAME -ret_from_exception: - preempt_stop(CLBR_ANY) -ret_from_intr: - GET_THREAD_INFO(%ebp) -check_userspace: - movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS - movb PT_CS(%esp), %al - andl $(VM_MASK | SEGMENT_RPL_MASK), %eax - cmpl $USER_RPL, %eax - jb resume_kernel # not returning to v8086 or userspace - -ENTRY(resume_userspace) - DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt - # setting need_resched or sigpending - # between sampling and the iret - movl TI_flags(%ebp), %ecx - andl $_TIF_WORK_MASK, %ecx # is there any work to be done on - # int/exception return? - jne work_pending - jmp restore_all -END(ret_from_exception) - -#ifdef CONFIG_PREEMPT -ENTRY(resume_kernel) - DISABLE_INTERRUPTS(CLBR_ANY) - cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? - jnz restore_nocheck -need_resched: - movl TI_flags(%ebp), %ecx # need_resched set ? - testb $_TIF_NEED_RESCHED, %cl - jz restore_all - testl $IF_MASK,PT_EFLAGS(%esp) # interrupts off (exception path) ? - jz restore_all - call preempt_schedule_irq - jmp need_resched -END(resume_kernel) -#endif - CFI_ENDPROC - -/* SYSENTER_RETURN points to after the "sysenter" instruction in - the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ - - # sysenter call handler stub -ENTRY(sysenter_entry) - CFI_STARTPROC simple - CFI_SIGNAL_FRAME - CFI_DEF_CFA esp, 0 - CFI_REGISTER esp, ebp - movl TSS_sysenter_esp0(%esp),%esp -sysenter_past_esp: - /* - * No need to follow this irqs on/off section: the syscall - * disabled irqs and here we enable it straight after entry: - */ - ENABLE_INTERRUPTS(CLBR_NONE) - pushl $(__USER_DS) - CFI_ADJUST_CFA_OFFSET 4 - /*CFI_REL_OFFSET ss, 0*/ - pushl %ebp - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET esp, 0 - pushfl - CFI_ADJUST_CFA_OFFSET 4 - pushl $(__USER_CS) - CFI_ADJUST_CFA_OFFSET 4 - /*CFI_REL_OFFSET cs, 0*/ - /* - * Push current_thread_info()->sysenter_return to the stack. - * A tiny bit of offset fixup is necessary - 4*4 means the 4 words - * pushed above; +8 corresponds to copy_thread's esp0 setting. - */ - pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET eip, 0 - -/* - * Load the potential sixth argument from user stack. - * Careful about security. - */ - cmpl $__PAGE_OFFSET-3,%ebp - jae syscall_fault -1: movl (%ebp),%ebp -.section __ex_table,"a" - .align 4 - .long 1b,syscall_fault -.previous - - pushl %eax - CFI_ADJUST_CFA_OFFSET 4 - SAVE_ALL - GET_THREAD_INFO(%ebp) - - /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ - testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) - jnz syscall_trace_entry - cmpl $(nr_syscalls), %eax - jae syscall_badsys - call *sys_call_table(,%eax,4) - movl %eax,PT_EAX(%esp) - DISABLE_INTERRUPTS(CLBR_ANY) - TRACE_IRQS_OFF - movl TI_flags(%ebp), %ecx - testw $_TIF_ALLWORK_MASK, %cx - jne syscall_exit_work -/* if something modifies registers it must also disable sysexit */ - movl PT_EIP(%esp), %edx - movl PT_OLDESP(%esp), %ecx - xorl %ebp,%ebp - TRACE_IRQS_ON -1: mov PT_FS(%esp), %fs - ENABLE_INTERRUPTS_SYSEXIT - CFI_ENDPROC -.pushsection .fixup,"ax" -2: movl $0,PT_FS(%esp) - jmp 1b -.section __ex_table,"a" - .align 4 - .long 1b,2b -.popsection -ENDPROC(sysenter_entry) - - # system call handler stub -ENTRY(system_call) - RING0_INT_FRAME # can't unwind into user space anyway - pushl %eax # save orig_eax - CFI_ADJUST_CFA_OFFSET 4 - SAVE_ALL - GET_THREAD_INFO(%ebp) - # system call tracing in operation / emulation - /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ - testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) - jnz syscall_trace_entry - cmpl $(nr_syscalls), %eax - jae syscall_badsys -syscall_call: - call *sys_call_table(,%eax,4) - movl %eax,PT_EAX(%esp) # store the return value -syscall_exit: - DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt - # setting need_resched or sigpending - # between sampling and the iret - TRACE_IRQS_OFF - testl $TF_MASK,PT_EFLAGS(%esp) # If tracing set singlestep flag on exit - jz no_singlestep - orl $_TIF_SINGLESTEP,TI_flags(%ebp) -no_singlestep: - movl TI_flags(%ebp), %ecx - testw $_TIF_ALLWORK_MASK, %cx # current->work - jne syscall_exit_work - -restore_all: - movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS - # Warning: PT_OLDSS(%esp) contains the wrong/random values if we - # are returning to the kernel. - # See comments in process.c:copy_thread() for details. - movb PT_OLDSS(%esp), %ah - movb PT_CS(%esp), %al - andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax - cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax - CFI_REMEMBER_STATE - je ldt_ss # returning to user-space with LDT SS -restore_nocheck: - TRACE_IRQS_IRET -restore_nocheck_notrace: - RESTORE_REGS - addl $4, %esp # skip orig_eax/error_code - CFI_ADJUST_CFA_OFFSET -4 -1: INTERRUPT_RETURN -.section .fixup,"ax" -iret_exc: - pushl $0 # no error code - pushl $do_iret_error - jmp error_code -.previous -.section __ex_table,"a" - .align 4 - .long 1b,iret_exc -.previous - - CFI_RESTORE_STATE -ldt_ss: - larl PT_OLDSS(%esp), %eax - jnz restore_nocheck - testl $0x00400000, %eax # returning to 32bit stack? - jnz restore_nocheck # allright, normal return - -#ifdef CONFIG_PARAVIRT - /* - * The kernel can't run on a non-flat stack if paravirt mode - * is active. Rather than try to fixup the high bits of - * ESP, bypass this code entirely. This may break DOSemu - * and/or Wine support in a paravirt VM, although the option - * is still available to implement the setting of the high - * 16-bits in the INTERRUPT_RETURN paravirt-op. - */ - cmpl $0, paravirt_ops+PARAVIRT_enabled - jne restore_nocheck -#endif - - /* If returning to userspace with 16bit stack, - * try to fix the higher word of ESP, as the CPU - * won't restore it. - * This is an "official" bug of all the x86-compatible - * CPUs, which we can try to work around to make - * dosemu and wine happy. */ - movl PT_OLDESP(%esp), %eax - movl %esp, %edx - call patch_espfix_desc - pushl $__ESPFIX_SS - CFI_ADJUST_CFA_OFFSET 4 - pushl %eax - CFI_ADJUST_CFA_OFFSET 4 - DISABLE_INTERRUPTS(CLBR_EAX) - TRACE_IRQS_OFF - lss (%esp), %esp - CFI_ADJUST_CFA_OFFSET -8 - jmp restore_nocheck - CFI_ENDPROC -ENDPROC(system_call) - - # perform work that needs to be done immediately before resumption - ALIGN - RING0_PTREGS_FRAME # can't unwind into user space anyway -work_pending: - testb $_TIF_NEED_RESCHED, %cl - jz work_notifysig -work_resched: - call schedule - DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt - # setting need_resched or sigpending - # between sampling and the iret - TRACE_IRQS_OFF - movl TI_flags(%ebp), %ecx - andl $_TIF_WORK_MASK, %ecx # is there any work to be done other - # than syscall tracing? - jz restore_all - testb $_TIF_NEED_RESCHED, %cl - jnz work_resched - -work_notifysig: # deal with pending signals and - # notify-resume requests -#ifdef CONFIG_VM86 - testl $VM_MASK, PT_EFLAGS(%esp) - movl %esp, %eax - jne work_notifysig_v86 # returning to kernel-space or - # vm86-space - xorl %edx, %edx - call do_notify_resume - jmp resume_userspace_sig - - ALIGN -work_notifysig_v86: - pushl %ecx # save ti_flags for do_notify_resume - CFI_ADJUST_CFA_OFFSET 4 - call save_v86_state # %eax contains pt_regs pointer - popl %ecx - CFI_ADJUST_CFA_OFFSET -4 - movl %eax, %esp -#else - movl %esp, %eax -#endif - xorl %edx, %edx - call do_notify_resume - jmp resume_userspace_sig -END(work_pending) - - # perform syscall exit tracing - ALIGN -syscall_trace_entry: - movl $-ENOSYS,PT_EAX(%esp) - movl %esp, %eax - xorl %edx,%edx - call do_syscall_trace - cmpl $0, %eax - jne resume_userspace # ret != 0 -> running under PTRACE_SYSEMU, - # so must skip actual syscall - movl PT_ORIG_EAX(%esp), %eax - cmpl $(nr_syscalls), %eax - jnae syscall_call - jmp syscall_exit -END(syscall_trace_entry) - - # perform syscall exit tracing - ALIGN -syscall_exit_work: - testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl - jz work_pending - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_ANY) # could let do_syscall_trace() call - # schedule() instead - movl %esp, %eax - movl $1, %edx - call do_syscall_trace - jmp resume_userspace -END(syscall_exit_work) - CFI_ENDPROC - - RING0_INT_FRAME # can't unwind into user space anyway -syscall_fault: - pushl %eax # save orig_eax - CFI_ADJUST_CFA_OFFSET 4 - SAVE_ALL - GET_THREAD_INFO(%ebp) - movl $-EFAULT,PT_EAX(%esp) - jmp resume_userspace -END(syscall_fault) - -syscall_badsys: - movl $-ENOSYS,PT_EAX(%esp) - jmp resume_userspace -END(syscall_badsys) - CFI_ENDPROC - -#define FIXUP_ESPFIX_STACK \ - /* since we are on a wrong stack, we cant make it a C code :( */ \ - PER_CPU(gdt_page, %ebx); \ - GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ - addl %esp, %eax; \ - pushl $__KERNEL_DS; \ - CFI_ADJUST_CFA_OFFSET 4; \ - pushl %eax; \ - CFI_ADJUST_CFA_OFFSET 4; \ - lss (%esp), %esp; \ - CFI_ADJUST_CFA_OFFSET -8; -#define UNWIND_ESPFIX_STACK \ - movl %ss, %eax; \ - /* see if on espfix stack */ \ - cmpw $__ESPFIX_SS, %ax; \ - jne 27f; \ - movl $__KERNEL_DS, %eax; \ - movl %eax, %ds; \ - movl %eax, %es; \ - /* switch to normal stack */ \ - FIXUP_ESPFIX_STACK; \ -27:; - -/* - * Build the entry stubs and pointer table with - * some assembler magic. - */ -.data -ENTRY(interrupt) -.text - -ENTRY(irq_entries_start) - RING0_INT_FRAME -vector=0 -.rept NR_IRQS - ALIGN - .if vector - CFI_ADJUST_CFA_OFFSET -4 - .endif -1: pushl $~(vector) - CFI_ADJUST_CFA_OFFSET 4 - jmp common_interrupt - .previous - .long 1b - .text -vector=vector+1 -.endr -END(irq_entries_start) - -.previous -END(interrupt) -.previous - -/* - * the CPU automatically disables interrupts when executing an IRQ vector, - * so IRQ-flags tracing has to follow that: - */ - ALIGN -common_interrupt: - SAVE_ALL - TRACE_IRQS_OFF - movl %esp,%eax - call do_IRQ - jmp ret_from_intr -ENDPROC(common_interrupt) - CFI_ENDPROC - -#define BUILD_INTERRUPT(name, nr) \ -ENTRY(name) \ - RING0_INT_FRAME; \ - pushl $~(nr); \ - CFI_ADJUST_CFA_OFFSET 4; \ - SAVE_ALL; \ - TRACE_IRQS_OFF \ - movl %esp,%eax; \ - call smp_##name; \ - jmp ret_from_intr; \ - CFI_ENDPROC; \ -ENDPROC(name) - -/* The include is where all of the SMP etc. interrupts come from */ -#include "entry_arch.h" - -KPROBE_ENTRY(page_fault) - RING0_EC_FRAME - pushl $do_page_fault - CFI_ADJUST_CFA_OFFSET 4 - ALIGN -error_code: - /* the function address is in %fs's slot on the stack */ - pushl %es - CFI_ADJUST_CFA_OFFSET 4 - /*CFI_REL_OFFSET es, 0*/ - pushl %ds - CFI_ADJUST_CFA_OFFSET 4 - /*CFI_REL_OFFSET ds, 0*/ - pushl %eax - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET eax, 0 - pushl %ebp - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET ebp, 0 - pushl %edi - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET edi, 0 - pushl %esi - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET esi, 0 - pushl %edx - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET edx, 0 - pushl %ecx - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET ecx, 0 - pushl %ebx - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET ebx, 0 - cld - pushl %fs - CFI_ADJUST_CFA_OFFSET 4 - /*CFI_REL_OFFSET fs, 0*/ - movl $(__KERNEL_PERCPU), %ecx - movl %ecx, %fs - UNWIND_ESPFIX_STACK - popl %ecx - CFI_ADJUST_CFA_OFFSET -4 - /*CFI_REGISTER es, ecx*/ - movl PT_FS(%esp), %edi # get the function address - movl PT_ORIG_EAX(%esp), %edx # get the error code - movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart - mov %ecx, PT_FS(%esp) - /*CFI_REL_OFFSET fs, ES*/ - movl $(__USER_DS), %ecx - movl %ecx, %ds - movl %ecx, %es - movl %esp,%eax # pt_regs pointer - call *%edi - jmp ret_from_exception - CFI_ENDPROC -KPROBE_END(page_fault) - -ENTRY(coprocessor_error) - RING0_INT_FRAME - pushl $0 - CFI_ADJUST_CFA_OFFSET 4 - pushl $do_coprocessor_error - CFI_ADJUST_CFA_OFFSET 4 - jmp error_code - CFI_ENDPROC -END(coprocessor_error) - -ENTRY(simd_coprocessor_error) - RING0_INT_FRAME - pushl $0 - CFI_ADJUST_CFA_OFFSET 4 - pushl $do_simd_coprocessor_error - CFI_ADJUST_CFA_OFFSET 4 - jmp error_code - CFI_ENDPROC -END(simd_coprocessor_error) - -ENTRY(device_not_available) - RING0_INT_FRAME - pushl $-1 # mark this as an int - CFI_ADJUST_CFA_OFFSET 4 - SAVE_ALL - GET_CR0_INTO_EAX - testl $0x4, %eax # EM (math emulation bit) - jne device_not_available_emulate - preempt_stop(CLBR_ANY) - call math_state_restore - jmp ret_from_exception -device_not_available_emulate: - pushl $0 # temporary storage for ORIG_EIP - CFI_ADJUST_CFA_OFFSET 4 - call math_emulate - addl $4, %esp - CFI_ADJUST_CFA_OFFSET -4 - jmp ret_from_exception - CFI_ENDPROC -END(device_not_available) - -/* - * Debug traps and NMI can happen at the one SYSENTER instruction - * that sets up the real kernel stack. Check here, since we can't - * allow the wrong stack to be used. - * - * "TSS_sysenter_esp0+12" is because the NMI/debug handler will have - * already pushed 3 words if it hits on the sysenter instruction: - * eflags, cs and eip. - * - * We just load the right stack, and push the three (known) values - * by hand onto the new stack - while updating the return eip past - * the instruction that would have done it for sysenter. - */ -#define FIX_STACK(offset, ok, label) \ - cmpw $__KERNEL_CS,4(%esp); \ - jne ok; \ -label: \ - movl TSS_sysenter_esp0+offset(%esp),%esp; \ - CFI_DEF_CFA esp, 0; \ - CFI_UNDEFINED eip; \ - pushfl; \ - CFI_ADJUST_CFA_OFFSET 4; \ - pushl $__KERNEL_CS; \ - CFI_ADJUST_CFA_OFFSET 4; \ - pushl $sysenter_past_esp; \ - CFI_ADJUST_CFA_OFFSET 4; \ - CFI_REL_OFFSET eip, 0 - -KPROBE_ENTRY(debug) - RING0_INT_FRAME - cmpl $sysenter_entry,(%esp) - jne debug_stack_correct - FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) -debug_stack_correct: - pushl $-1 # mark this as an int - CFI_ADJUST_CFA_OFFSET 4 - SAVE_ALL - xorl %edx,%edx # error code 0 - movl %esp,%eax # pt_regs pointer - call do_debug - jmp ret_from_exception - CFI_ENDPROC -KPROBE_END(debug) - -/* - * NMI is doubly nasty. It can happen _while_ we're handling - * a debug fault, and the debug fault hasn't yet been able to - * clear up the stack. So we first check whether we got an - * NMI on the sysenter entry path, but after that we need to - * check whether we got an NMI on the debug path where the debug - * fault happened on the sysenter path. - */ -KPROBE_ENTRY(nmi) - RING0_INT_FRAME - pushl %eax - CFI_ADJUST_CFA_OFFSET 4 - movl %ss, %eax - cmpw $__ESPFIX_SS, %ax - popl %eax - CFI_ADJUST_CFA_OFFSET -4 - je nmi_espfix_stack - cmpl $sysenter_entry,(%esp) - je nmi_stack_fixup - pushl %eax - CFI_ADJUST_CFA_OFFSET 4 - movl %esp,%eax - /* Do not access memory above the end of our stack page, - * it might not exist. - */ - andl $(THREAD_SIZE-1),%eax - cmpl $(THREAD_SIZE-20),%eax - popl %eax - CFI_ADJUST_CFA_OFFSET -4 - jae nmi_stack_correct - cmpl $sysenter_entry,12(%esp) - je nmi_debug_stack_check -nmi_stack_correct: - /* We have a RING0_INT_FRAME here */ - pushl %eax - CFI_ADJUST_CFA_OFFSET 4 - SAVE_ALL - xorl %edx,%edx # zero error code - movl %esp,%eax # pt_regs pointer - call do_nmi - jmp restore_nocheck_notrace - CFI_ENDPROC - -nmi_stack_fixup: - RING0_INT_FRAME - FIX_STACK(12,nmi_stack_correct, 1) - jmp nmi_stack_correct - -nmi_debug_stack_check: - /* We have a RING0_INT_FRAME here */ - cmpw $__KERNEL_CS,16(%esp) - jne nmi_stack_correct - cmpl $debug,(%esp) - jb nmi_stack_correct - cmpl $debug_esp_fix_insn,(%esp) - ja nmi_stack_correct - FIX_STACK(24,nmi_stack_correct, 1) - jmp nmi_stack_correct - -nmi_espfix_stack: - /* We have a RING0_INT_FRAME here. - * - * create the pointer to lss back - */ - pushl %ss - CFI_ADJUST_CFA_OFFSET 4 - pushl %esp - CFI_ADJUST_CFA_OFFSET 4 - addw $4, (%esp) - /* copy the iret frame of 12 bytes */ - .rept 3 - pushl 16(%esp) - CFI_ADJUST_CFA_OFFSET 4 - .endr - pushl %eax - CFI_ADJUST_CFA_OFFSET 4 - SAVE_ALL - FIXUP_ESPFIX_STACK # %eax == %esp - xorl %edx,%edx # zero error code - call do_nmi - RESTORE_REGS - lss 12+4(%esp), %esp # back to espfix stack - CFI_ADJUST_CFA_OFFSET -24 -1: INTERRUPT_RETURN - CFI_ENDPROC -.section __ex_table,"a" - .align 4 - .long 1b,iret_exc -.previous -KPROBE_END(nmi) - -#ifdef CONFIG_PARAVIRT -ENTRY(native_iret) -1: iret -.section __ex_table,"a" - .align 4 - .long 1b,iret_exc -.previous -END(native_iret) - -ENTRY(native_irq_enable_sysexit) - sti - sysexit -END(native_irq_enable_sysexit) -#endif - -KPROBE_ENTRY(int3) - RING0_INT_FRAME - pushl $-1 # mark this as an int - CFI_ADJUST_CFA_OFFSET 4 - SAVE_ALL - xorl %edx,%edx # zero error code - movl %esp,%eax # pt_regs pointer - call do_int3 - jmp ret_from_exception - CFI_ENDPROC -KPROBE_END(int3) - -ENTRY(overflow) - RING0_INT_FRAME - pushl $0 - CFI_ADJUST_CFA_OFFSET 4 - pushl $do_overflow - CFI_ADJUST_CFA_OFFSET 4 - jmp error_code - CFI_ENDPROC -END(overflow) - -ENTRY(bounds) - RING0_INT_FRAME - pushl $0 - CFI_ADJUST_CFA_OFFSET 4 - pushl $do_bounds - CFI_ADJUST_CFA_OFFSET 4 - jmp error_code - CFI_ENDPROC -END(bounds) - -ENTRY(invalid_op) - RING0_INT_FRAME - pushl $0 - CFI_ADJUST_CFA_OFFSET 4 - pushl $do_invalid_op - CFI_ADJUST_CFA_OFFSET 4 - jmp error_code - CFI_ENDPROC -END(invalid_op) - -ENTRY(coprocessor_segment_overrun) - RING0_INT_FRAME - pushl $0 - CFI_ADJUST_CFA_OFFSET 4 - pushl $do_coprocessor_segment_overrun - CFI_ADJUST_CFA_OFFSET 4 - jmp error_code - CFI_ENDPROC -END(coprocessor_segment_overrun) - -ENTRY(invalid_TSS) - RING0_EC_FRAME - pushl $do_invalid_TSS - CFI_ADJUST_CFA_OFFSET 4 - jmp error_code - CFI_ENDPROC -END(invalid_TSS) - -ENTRY(segment_not_present) - RING0_EC_FRAME - pushl $do_segment_not_present - CFI_ADJUST_CFA_OFFSET 4 - jmp error_code - CFI_ENDPROC -END(segment_not_present) - -ENTRY(stack_segment) - RING0_EC_FRAME - pushl $do_stack_segment - CFI_ADJUST_CFA_OFFSET 4 - jmp error_code - CFI_ENDPROC -END(stack_segment) - -KPROBE_ENTRY(general_protection) - RING0_EC_FRAME - pushl $do_general_protection - CFI_ADJUST_CFA_OFFSET 4 - jmp error_code - CFI_ENDPROC -KPROBE_END(general_protection) - -ENTRY(alignment_check) - RING0_EC_FRAME - pushl $do_alignment_check - CFI_ADJUST_CFA_OFFSET 4 - jmp error_code - CFI_ENDPROC -END(alignment_check) - -ENTRY(divide_error) - RING0_INT_FRAME - pushl $0 # no error code - CFI_ADJUST_CFA_OFFSET 4 - pushl $do_divide_error - CFI_ADJUST_CFA_OFFSET 4 - jmp error_code - CFI_ENDPROC -END(divide_error) - -#ifdef CONFIG_X86_MCE -ENTRY(machine_check) - RING0_INT_FRAME - pushl $0 - CFI_ADJUST_CFA_OFFSET 4 - pushl machine_check_vector - CFI_ADJUST_CFA_OFFSET 4 - jmp error_code - CFI_ENDPROC -END(machine_check) -#endif - -ENTRY(spurious_interrupt_bug) - RING0_INT_FRAME - pushl $0 - CFI_ADJUST_CFA_OFFSET 4 - pushl $do_spurious_interrupt_bug - CFI_ADJUST_CFA_OFFSET 4 - jmp error_code - CFI_ENDPROC -END(spurious_interrupt_bug) - -ENTRY(kernel_thread_helper) - pushl $0 # fake return address for unwinder - CFI_STARTPROC - movl %edx,%eax - push %edx - CFI_ADJUST_CFA_OFFSET 4 - call *%ebx - push %eax - CFI_ADJUST_CFA_OFFSET 4 - call do_exit - CFI_ENDPROC -ENDPROC(kernel_thread_helper) - -#ifdef CONFIG_XEN -ENTRY(xen_hypervisor_callback) - CFI_STARTPROC - pushl $0 - CFI_ADJUST_CFA_OFFSET 4 - SAVE_ALL - TRACE_IRQS_OFF - - /* Check to see if we got the event in the critical - region in xen_iret_direct, after we've reenabled - events and checked for pending events. This simulates - iret instruction's behaviour where it delivers a - pending interrupt when enabling interrupts. */ - movl PT_EIP(%esp),%eax - cmpl $xen_iret_start_crit,%eax - jb 1f - cmpl $xen_iret_end_crit,%eax - jae 1f - - call xen_iret_crit_fixup - -1: mov %esp, %eax - call xen_evtchn_do_upcall - jmp ret_from_intr - CFI_ENDPROC -ENDPROC(xen_hypervisor_callback) - -# Hypervisor uses this for application faults while it executes. -# We get here for two reasons: -# 1. Fault while reloading DS, ES, FS or GS -# 2. Fault while executing IRET -# Category 1 we fix up by reattempting the load, and zeroing the segment -# register if the load fails. -# Category 2 we fix up by jumping to do_iret_error. We cannot use the -# normal Linux return path in this case because if we use the IRET hypercall -# to pop the stack frame we end up in an infinite loop of failsafe callbacks. -# We distinguish between categories by maintaining a status value in EAX. -ENTRY(xen_failsafe_callback) - CFI_STARTPROC - pushl %eax - CFI_ADJUST_CFA_OFFSET 4 - movl $1,%eax -1: mov 4(%esp),%ds -2: mov 8(%esp),%es -3: mov 12(%esp),%fs -4: mov 16(%esp),%gs - testl %eax,%eax - popl %eax - CFI_ADJUST_CFA_OFFSET -4 - lea 16(%esp),%esp - CFI_ADJUST_CFA_OFFSET -16 - jz 5f - addl $16,%esp - jmp iret_exc # EAX != 0 => Category 2 (Bad IRET) -5: pushl $0 # EAX == 0 => Category 1 (Bad segment) - CFI_ADJUST_CFA_OFFSET 4 - SAVE_ALL - jmp ret_from_exception - CFI_ENDPROC - -.section .fixup,"ax" -6: xorl %eax,%eax - movl %eax,4(%esp) - jmp 1b -7: xorl %eax,%eax - movl %eax,8(%esp) - jmp 2b -8: xorl %eax,%eax - movl %eax,12(%esp) - jmp 3b -9: xorl %eax,%eax - movl %eax,16(%esp) - jmp 4b -.previous -.section __ex_table,"a" - .align 4 - .long 1b,6b - .long 2b,7b - .long 3b,8b - .long 4b,9b -.previous -ENDPROC(xen_failsafe_callback) - -#endif /* CONFIG_XEN */ - -.section .rodata,"a" -#include "syscall_table_32.S" - -syscall_table_size=(.-sys_call_table) diff --git a/arch/i386/kernel/geode_32.c b/arch/i386/kernel/geode_32.c deleted file mode 100644 index 41e8aec4c61..00000000000 --- a/arch/i386/kernel/geode_32.c +++ /dev/null @@ -1,155 +0,0 @@ -/* - * AMD Geode southbridge support code - * Copyright (C) 2006, Advanced Micro Devices, Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public License - * as published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include -#include - -static struct { - char *name; - u32 msr; - int size; - u32 base; -} lbars[] = { - { "geode-pms", MSR_LBAR_PMS, LBAR_PMS_SIZE, 0 }, - { "geode-acpi", MSR_LBAR_ACPI, LBAR_ACPI_SIZE, 0 }, - { "geode-gpio", MSR_LBAR_GPIO, LBAR_GPIO_SIZE, 0 }, - { "geode-mfgpt", MSR_LBAR_MFGPT, LBAR_MFGPT_SIZE, 0 } -}; - -static void __init init_lbars(void) -{ - u32 lo, hi; - int i; - - for (i = 0; i < ARRAY_SIZE(lbars); i++) { - rdmsr(lbars[i].msr, lo, hi); - if (hi & 0x01) - lbars[i].base = lo & 0x0000ffff; - - if (lbars[i].base == 0) - printk(KERN_ERR "geode: Couldn't initialize '%s'\n", - lbars[i].name); - } -} - -int geode_get_dev_base(unsigned int dev) -{ - BUG_ON(dev >= ARRAY_SIZE(lbars)); - return lbars[dev].base; -} -EXPORT_SYMBOL_GPL(geode_get_dev_base); - -/* === GPIO API === */ - -void geode_gpio_set(unsigned int gpio, unsigned int reg) -{ - u32 base = geode_get_dev_base(GEODE_DEV_GPIO); - - if (!base) - return; - - if (gpio < 16) - outl(1 << gpio, base + reg); - else - outl(1 << (gpio - 16), base + 0x80 + reg); -} -EXPORT_SYMBOL_GPL(geode_gpio_set); - -void geode_gpio_clear(unsigned int gpio, unsigned int reg) -{ - u32 base = geode_get_dev_base(GEODE_DEV_GPIO); - - if (!base) - return; - - if (gpio < 16) - outl(1 << (gpio + 16), base + reg); - else - outl(1 << gpio, base + 0x80 + reg); -} -EXPORT_SYMBOL_GPL(geode_gpio_clear); - -int geode_gpio_isset(unsigned int gpio, unsigned int reg) -{ - u32 base = geode_get_dev_base(GEODE_DEV_GPIO); - - if (!base) - return 0; - - if (gpio < 16) - return (inl(base + reg) & (1 << gpio)) ? 1 : 0; - else - return (inl(base + 0x80 + reg) & (1 << (gpio - 16))) ? 1 : 0; -} -EXPORT_SYMBOL_GPL(geode_gpio_isset); - -void geode_gpio_set_irq(unsigned int group, unsigned int irq) -{ - u32 lo, hi; - - if (group > 7 || irq > 15) - return; - - rdmsr(MSR_PIC_ZSEL_HIGH, lo, hi); - - lo &= ~(0xF << (group * 4)); - lo |= (irq & 0xF) << (group * 4); - - wrmsr(MSR_PIC_ZSEL_HIGH, lo, hi); -} -EXPORT_SYMBOL_GPL(geode_gpio_set_irq); - -void geode_gpio_setup_event(unsigned int gpio, int pair, int pme) -{ - u32 base = geode_get_dev_base(GEODE_DEV_GPIO); - u32 offset, shift, val; - - if (gpio >= 24) - offset = GPIO_MAP_W; - else if (gpio >= 16) - offset = GPIO_MAP_Z; - else if (gpio >= 8) - offset = GPIO_MAP_Y; - else - offset = GPIO_MAP_X; - - shift = (gpio % 8) * 4; - - val = inl(base + offset); - - /* Clear whatever was there before */ - val &= ~(0xF << shift); - - /* And set the new value */ - - val |= ((pair & 7) << shift); - - /* Set the PME bit if this is a PME event */ - - if (pme) - val |= (1 << (shift + 3)); - - outl(val, base + offset); -} -EXPORT_SYMBOL_GPL(geode_gpio_setup_event); - -static int __init geode_southbridge_init(void) -{ - if (!is_geode()) - return -ENODEV; - - init_lbars(); - return 0; -} - -postcore_initcall(geode_southbridge_init); diff --git a/arch/i386/kernel/head_32.S b/arch/i386/kernel/head_32.S deleted file mode 100644 index 9150ca9b5f8..00000000000 --- a/arch/i386/kernel/head_32.S +++ /dev/null @@ -1,578 +0,0 @@ -/* - * linux/arch/i386/kernel/head.S -- the 32-bit startup code. - * - * Copyright (C) 1991, 1992 Linus Torvalds - * - * Enhanced CPU detection and feature setting code by Mike Jagdis - * and Martin Mares, November 1997. - */ - -.text -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * References to members of the new_cpu_data structure. - */ - -#define X86 new_cpu_data+CPUINFO_x86 -#define X86_VENDOR new_cpu_data+CPUINFO_x86_vendor -#define X86_MODEL new_cpu_data+CPUINFO_x86_model -#define X86_MASK new_cpu_data+CPUINFO_x86_mask -#define X86_HARD_MATH new_cpu_data+CPUINFO_hard_math -#define X86_CPUID new_cpu_data+CPUINFO_cpuid_level -#define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability -#define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id - -/* - * This is how much memory *in addition to the memory covered up to - * and including _end* we need mapped initially. - * We need: - * - one bit for each possible page, but only in low memory, which means - * 2^32/4096/8 = 128K worst case (4G/4G split.) - * - enough space to map all low memory, which means - * (2^32/4096) / 1024 pages (worst case, non PAE) - * (2^32/4096) / 512 + 4 pages (worst case for PAE) - * - a few pages for allocator use before the kernel pagetable has - * been set up - * - * Modulo rounding, each megabyte assigned here requires a kilobyte of - * memory, which is currently unreclaimed. - * - * This should be a multiple of a page. - */ -LOW_PAGES = 1<<(32-PAGE_SHIFT_asm) - -#if PTRS_PER_PMD > 1 -PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD -#else -PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PGD) -#endif -BOOTBITMAP_SIZE = LOW_PAGES / 8 -ALLOCATOR_SLOP = 4 - -INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_SIZE_asm - -/* - * 32-bit kernel entrypoint; only used by the boot CPU. On entry, - * %esi points to the real-mode code as a 32-bit pointer. - * CS and DS must be 4 GB flat segments, but we don't depend on - * any particular GDT layout, because we load our own as soon as we - * can. - */ -.section .text.head,"ax",@progbits -ENTRY(startup_32) - -/* - * Set segments to known values. - */ - cld - lgdt boot_gdt_descr - __PAGE_OFFSET - movl $(__BOOT_DS),%eax - movl %eax,%ds - movl %eax,%es - movl %eax,%fs - movl %eax,%gs - -/* - * Clear BSS first so that there are no surprises... - * No need to cld as DF is already clear from cld above... - */ - xorl %eax,%eax - movl $__bss_start - __PAGE_OFFSET,%edi - movl $__bss_stop - __PAGE_OFFSET,%ecx - subl %edi,%ecx - shrl $2,%ecx - rep ; stosl -/* - * Copy bootup parameters out of the way. - * Note: %esi still has the pointer to the real-mode data. - * With the kexec as boot loader, parameter segment might be loaded beyond - * kernel image and might not even be addressable by early boot page tables. - * (kexec on panic case). Hence copy out the parameters before initializing - * page tables. - */ - movl $(boot_params - __PAGE_OFFSET),%edi - movl $(PARAM_SIZE/4),%ecx - cld - rep - movsl - movl boot_params - __PAGE_OFFSET + NEW_CL_POINTER,%esi - andl %esi,%esi - jnz 2f # New command line protocol - cmpw $(OLD_CL_MAGIC),OLD_CL_MAGIC_ADDR - jne 1f - movzwl OLD_CL_OFFSET,%esi - addl $(OLD_CL_BASE_ADDR),%esi -2: - movl $(boot_command_line - __PAGE_OFFSET),%edi - movl $(COMMAND_LINE_SIZE/4),%ecx - rep - movsl -1: - -/* - * Initialize page tables. This creates a PDE and a set of page - * tables, which are located immediately beyond _end. The variable - * init_pg_tables_end is set up to point to the first "safe" location. - * Mappings are created both at virtual address 0 (identity mapping) - * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END. - * - * Warning: don't use %esi or the stack in this code. However, %esp - * can be used as a GPR if you really need it... - */ -page_pde_offset = (__PAGE_OFFSET >> 20); - - movl $(pg0 - __PAGE_OFFSET), %edi - movl $(swapper_pg_dir - __PAGE_OFFSET), %edx - movl $0x007, %eax /* 0x007 = PRESENT+RW+USER */ -10: - leal 0x007(%edi),%ecx /* Create PDE entry */ - movl %ecx,(%edx) /* Store identity PDE entry */ - movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */ - addl $4,%edx - movl $1024, %ecx -11: - stosl - addl $0x1000,%eax - loop 11b - /* End condition: we must map up to and including INIT_MAP_BEYOND_END */ - /* bytes beyond the end of our own page tables; the +0x007 is the attribute bits */ - leal (INIT_MAP_BEYOND_END+0x007)(%edi),%ebp - cmpl %ebp,%eax - jb 10b - movl %edi,(init_pg_tables_end - __PAGE_OFFSET) - - xorl %ebx,%ebx /* This is the boot CPU (BSP) */ - jmp 3f -/* - * Non-boot CPU entry point; entered from trampoline.S - * We can't lgdt here, because lgdt itself uses a data segment, but - * we know the trampoline has already loaded the boot_gdt for us. - * - * If cpu hotplug is not supported then this code can go in init section - * which will be freed later - */ - -#ifndef CONFIG_HOTPLUG_CPU -.section .init.text,"ax",@progbits -#endif - - /* Do an early initialization of the fixmap area */ - movl $(swapper_pg_dir - __PAGE_OFFSET), %edx - movl $(swapper_pg_pmd - __PAGE_OFFSET), %eax - addl $0x007, %eax /* 0x007 = PRESENT+RW+USER */ - movl %eax, 4092(%edx) - -#ifdef CONFIG_SMP -ENTRY(startup_32_smp) - cld - movl $(__BOOT_DS),%eax - movl %eax,%ds - movl %eax,%es - movl %eax,%fs - movl %eax,%gs - -/* - * New page tables may be in 4Mbyte page mode and may - * be using the global pages. - * - * NOTE! If we are on a 486 we may have no cr4 at all! - * So we do not try to touch it unless we really have - * some bits in it to set. This won't work if the BSP - * implements cr4 but this AP does not -- very unlikely - * but be warned! The same applies to the pse feature - * if not equally supported. --macro - * - * NOTE! We have to correct for the fact that we're - * not yet offset PAGE_OFFSET.. - */ -#define cr4_bits mmu_cr4_features-__PAGE_OFFSET - movl cr4_bits,%edx - andl %edx,%edx - jz 6f - movl %cr4,%eax # Turn on paging options (PSE,PAE,..) - orl %edx,%eax - movl %eax,%cr4 - - btl $5, %eax # check if PAE is enabled - jnc 6f - - /* Check if extended functions are implemented */ - movl $0x80000000, %eax - cpuid - cmpl $0x80000000, %eax - jbe 6f - mov $0x80000001, %eax - cpuid - /* Execute Disable bit supported? */ - btl $20, %edx - jnc 6f - - /* Setup EFER (Extended Feature Enable Register) */ - movl $0xc0000080, %ecx - rdmsr - - btsl $11, %eax - /* Make changes effective */ - wrmsr - -6: - /* This is a secondary processor (AP) */ - xorl %ebx,%ebx - incl %ebx - -#endif /* CONFIG_SMP */ -3: - -/* - * Enable paging - */ - movl $swapper_pg_dir-__PAGE_OFFSET,%eax - movl %eax,%cr3 /* set the page table pointer.. */ - movl %cr0,%eax - orl $0x80000000,%eax - movl %eax,%cr0 /* ..and set paging (PG) bit */ - ljmp $__BOOT_CS,$1f /* Clear prefetch and normalize %eip */ -1: - /* Set up the stack pointer */ - lss stack_start,%esp - -/* - * Initialize eflags. Some BIOS's leave bits like NT set. This would - * confuse the debugger if this code is traced. - * XXX - best to initialize before switching to protected mode. - */ - pushl $0 - popfl - -#ifdef CONFIG_SMP - andl %ebx,%ebx - jz 1f /* Initial CPU cleans BSS */ - jmp checkCPUtype -1: -#endif /* CONFIG_SMP */ - -/* - * start system 32-bit setup. We need to re-do some of the things done - * in 16-bit mode for the "real" operations. - */ - call setup_idt - -checkCPUtype: - - movl $-1,X86_CPUID # -1 for no CPUID initially - -/* check if it is 486 or 386. */ -/* - * XXX - this does a lot of unnecessary setup. Alignment checks don't - * apply at our cpl of 0 and the stack ought to be aligned already, and - * we don't need to preserve eflags. - */ - - movb $3,X86 # at least 386 - pushfl # push EFLAGS - popl %eax # get EFLAGS - movl %eax,%ecx # save original EFLAGS - xorl $0x240000,%eax # flip AC and ID bits in EFLAGS - pushl %eax # copy to EFLAGS - popfl # set EFLAGS - pushfl # get new EFLAGS - popl %eax # put it in eax - xorl %ecx,%eax # change in flags - pushl %ecx # restore original EFLAGS - popfl - testl $0x40000,%eax # check if AC bit changed - je is386 - - movb $4,X86 # at least 486 - testl $0x200000,%eax # check if ID bit changed - je is486 - - /* get vendor info */ - xorl %eax,%eax # call CPUID with 0 -> return vendor ID - cpuid - movl %eax,X86_CPUID # save CPUID level - movl %ebx,X86_VENDOR_ID # lo 4 chars - movl %edx,X86_VENDOR_ID+4 # next 4 chars - movl %ecx,X86_VENDOR_ID+8 # last 4 chars - - orl %eax,%eax # do we have processor info as well? - je is486 - - movl $1,%eax # Use the CPUID instruction to get CPU type - cpuid - movb %al,%cl # save reg for future use - andb $0x0f,%ah # mask processor family - movb %ah,X86 - andb $0xf0,%al # mask model - shrb $4,%al - movb %al,X86_MODEL - andb $0x0f,%cl # mask mask revision - movb %cl,X86_MASK - movl %edx,X86_CAPABILITY - -is486: movl $0x50022,%ecx # set AM, WP, NE and MP - jmp 2f - -is386: movl $2,%ecx # set MP -2: movl %cr0,%eax - andl $0x80000011,%eax # Save PG,PE,ET - orl %ecx,%eax - movl %eax,%cr0 - - call check_x87 - lgdt early_gdt_descr - lidt idt_descr - ljmp $(__KERNEL_CS),$1f -1: movl $(__KERNEL_DS),%eax # reload all the segment registers - movl %eax,%ss # after changing gdt. - movl %eax,%fs # gets reset once there's real percpu - - movl $(__USER_DS),%eax # DS/ES contains default USER segment - movl %eax,%ds - movl %eax,%es - - xorl %eax,%eax # Clear GS and LDT - movl %eax,%gs - lldt %ax - - cld # gcc2 wants the direction flag cleared at all times - pushl $0 # fake return address for unwinder -#ifdef CONFIG_SMP - movb ready, %cl - movb $1, ready - cmpb $0,%cl # the first CPU calls start_kernel - je 1f - movl $(__KERNEL_PERCPU), %eax - movl %eax,%fs # set this cpu's percpu - jmp initialize_secondary # all other CPUs call initialize_secondary -1: -#endif /* CONFIG_SMP */ - jmp start_kernel - -/* - * We depend on ET to be correct. This checks for 287/387. - */ -check_x87: - movb $0,X86_HARD_MATH - clts - fninit - fstsw %ax - cmpb $0,%al - je 1f - movl %cr0,%eax /* no coprocessor: have to set bits */ - xorl $4,%eax /* set EM */ - movl %eax,%cr0 - ret - ALIGN -1: movb $1,X86_HARD_MATH - .byte 0xDB,0xE4 /* fsetpm for 287, ignored by 387 */ - ret - -/* - * setup_idt - * - * sets up a idt with 256 entries pointing to - * ignore_int, interrupt gates. It doesn't actually load - * idt - that can be done only after paging has been enabled - * and the kernel moved to PAGE_OFFSET. Interrupts - * are enabled elsewhere, when we can be relatively - * sure everything is ok. - * - * Warning: %esi is live across this function. - */ -setup_idt: - lea ignore_int,%edx - movl $(__KERNEL_CS << 16),%eax - movw %dx,%ax /* selector = 0x0010 = cs */ - movw $0x8E00,%dx /* interrupt gate - dpl=0, present */ - - lea idt_table,%edi - mov $256,%ecx -rp_sidt: - movl %eax,(%edi) - movl %edx,4(%edi) - addl $8,%edi - dec %ecx - jne rp_sidt - -.macro set_early_handler handler,trapno - lea \handler,%edx - movl $(__KERNEL_CS << 16),%eax - movw %dx,%ax - movw $0x8E00,%dx /* interrupt gate - dpl=0, present */ - lea idt_table,%edi - movl %eax,8*\trapno(%edi) - movl %edx,8*\trapno+4(%edi) -.endm - - set_early_handler handler=early_divide_err,trapno=0 - set_early_handler handler=early_illegal_opcode,trapno=6 - set_early_handler handler=early_protection_fault,trapno=13 - set_early_handler handler=early_page_fault,trapno=14 - - ret - -early_divide_err: - xor %edx,%edx - pushl $0 /* fake errcode */ - jmp early_fault - -early_illegal_opcode: - movl $6,%edx - pushl $0 /* fake errcode */ - jmp early_fault - -early_protection_fault: - movl $13,%edx - jmp early_fault - -early_page_fault: - movl $14,%edx - jmp early_fault - -early_fault: - cld -#ifdef CONFIG_PRINTK - movl $(__KERNEL_DS),%eax - movl %eax,%ds - movl %eax,%es - cmpl $2,early_recursion_flag - je hlt_loop - incl early_recursion_flag - movl %cr2,%eax - pushl %eax - pushl %edx /* trapno */ - pushl $fault_msg -#ifdef CONFIG_EARLY_PRINTK - call early_printk -#else - call printk -#endif -#endif -hlt_loop: - hlt - jmp hlt_loop - -/* This is the default interrupt "handler" :-) */ - ALIGN -ignore_int: - cld -#ifdef CONFIG_PRINTK - pushl %eax - pushl %ecx - pushl %edx - pushl %es - pushl %ds - movl $(__KERNEL_DS),%eax - movl %eax,%ds - movl %eax,%es - cmpl $2,early_recursion_flag - je hlt_loop - incl early_recursion_flag - pushl 16(%esp) - pushl 24(%esp) - pushl 32(%esp) - pushl 40(%esp) - pushl $int_msg -#ifdef CONFIG_EARLY_PRINTK - call early_printk -#else - call printk -#endif - addl $(5*4),%esp - popl %ds - popl %es - popl %edx - popl %ecx - popl %eax -#endif - iret - -.section .text -/* - * Real beginning of normal "text" segment - */ -ENTRY(stext) -ENTRY(_stext) - -/* - * BSS section - */ -.section ".bss.page_aligned","wa" - .align PAGE_SIZE_asm -ENTRY(swapper_pg_dir) - .fill 1024,4,0 -ENTRY(swapper_pg_pmd) - .fill 1024,4,0 -ENTRY(empty_zero_page) - .fill 4096,1,0 - -/* - * This starts the data section. - */ -.data -ENTRY(stack_start) - .long init_thread_union+THREAD_SIZE - .long __BOOT_DS - -ready: .byte 0 - -early_recursion_flag: - .long 0 - -int_msg: - .asciz "Unknown interrupt or fault at EIP %p %p %p\n" - -fault_msg: - .ascii "Int %d: CR2 %p err %p EIP %p CS %p flags %p\n" - .asciz "Stack: %p %p %p %p %p %p %p %p\n" - -#include "../../x86/xen/xen-head.S" - -/* - * The IDT and GDT 'descriptors' are a strange 48-bit object - * only used by the lidt and lgdt instructions. They are not - * like usual segment descriptors - they consist of a 16-bit - * segment size, and 32-bit linear address value: - */ - -.globl boot_gdt_descr -.globl idt_descr - - ALIGN -# early boot GDT descriptor (must use 1:1 address mapping) - .word 0 # 32 bit align gdt_desc.address -boot_gdt_descr: - .word __BOOT_DS+7 - .long boot_gdt - __PAGE_OFFSET - - .word 0 # 32-bit align idt_desc.address -idt_descr: - .word IDT_ENTRIES*8-1 # idt contains 256 entries - .long idt_table - -# boot GDT descriptor (later on used by CPU#0): - .word 0 # 32 bit align gdt_desc.address -ENTRY(early_gdt_descr) - .word GDT_ENTRIES*8-1 - .long per_cpu__gdt_page /* Overwritten for secondary CPUs */ - -/* - * The boot_gdt must mirror the equivalent in setup.S and is - * used only for booting. - */ - .align L1_CACHE_BYTES -ENTRY(boot_gdt) - .fill GDT_ENTRY_BOOT_CS,8,0 - .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */ - .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */ diff --git a/arch/i386/kernel/hpet_32.c b/arch/i386/kernel/hpet_32.c deleted file mode 100644 index 533d4932bc7..00000000000 --- a/arch/i386/kernel/hpet_32.c +++ /dev/null @@ -1,553 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -extern struct clock_event_device *global_clock_event; - -#define HPET_MASK CLOCKSOURCE_MASK(32) -#define HPET_SHIFT 22 - -/* FSEC = 10^-15 NSEC = 10^-9 */ -#define FSEC_PER_NSEC 1000000 - -/* - * HPET address is set in acpi/boot.c, when an ACPI entry exists - */ -unsigned long hpet_address; -static void __iomem * hpet_virt_address; - -static inline unsigned long hpet_readl(unsigned long a) -{ - return readl(hpet_virt_address + a); -} - -static inline void hpet_writel(unsigned long d, unsigned long a) -{ - writel(d, hpet_virt_address + a); -} - -/* - * HPET command line enable / disable - */ -static int boot_hpet_disable; - -static int __init hpet_setup(char* str) -{ - if (str) { - if (!strncmp("disable", str, 7)) - boot_hpet_disable = 1; - } - return 1; -} -__setup("hpet=", hpet_setup); - -static inline int is_hpet_capable(void) -{ - return (!boot_hpet_disable && hpet_address); -} - -/* - * HPET timer interrupt enable / disable - */ -static int hpet_legacy_int_enabled; - -/** - * is_hpet_enabled - check whether the hpet timer interrupt is enabled - */ -int is_hpet_enabled(void) -{ - return is_hpet_capable() && hpet_legacy_int_enabled; -} - -/* - * When the hpet driver (/dev/hpet) is enabled, we need to reserve - * timer 0 and timer 1 in case of RTC emulation. - */ -#ifdef CONFIG_HPET -static void hpet_reserve_platform_timers(unsigned long id) -{ - struct hpet __iomem *hpet = hpet_virt_address; - struct hpet_timer __iomem *timer = &hpet->hpet_timers[2]; - unsigned int nrtimers, i; - struct hpet_data hd; - - nrtimers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1; - - memset(&hd, 0, sizeof (hd)); - hd.hd_phys_address = hpet_address; - hd.hd_address = hpet_virt_address; - hd.hd_nirqs = nrtimers; - hd.hd_flags = HPET_DATA_PLATFORM; - hpet_reserve_timer(&hd, 0); - -#ifdef CONFIG_HPET_EMULATE_RTC - hpet_reserve_timer(&hd, 1); -#endif - - hd.hd_irq[0] = HPET_LEGACY_8254; - hd.hd_irq[1] = HPET_LEGACY_RTC; - - for (i = 2; i < nrtimers; timer++, i++) - hd.hd_irq[i] = (timer->hpet_config & Tn_INT_ROUTE_CNF_MASK) >> - Tn_INT_ROUTE_CNF_SHIFT; - - hpet_alloc(&hd); - -} -#else -static void hpet_reserve_platform_timers(unsigned long id) { } -#endif - -/* - * Common hpet info - */ -static unsigned long hpet_period; - -static void hpet_set_mode(enum clock_event_mode mode, - struct clock_event_device *evt); -static int hpet_next_event(unsigned long delta, - struct clock_event_device *evt); - -/* - * The hpet clock event device - */ -static struct clock_event_device hpet_clockevent = { - .name = "hpet", - .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, - .set_mode = hpet_set_mode, - .set_next_event = hpet_next_event, - .shift = 32, - .irq = 0, -}; - -static void hpet_start_counter(void) -{ - unsigned long cfg = hpet_readl(HPET_CFG); - - cfg &= ~HPET_CFG_ENABLE; - hpet_writel(cfg, HPET_CFG); - hpet_writel(0, HPET_COUNTER); - hpet_writel(0, HPET_COUNTER + 4); - cfg |= HPET_CFG_ENABLE; - hpet_writel(cfg, HPET_CFG); -} - -static void hpet_enable_int(void) -{ - unsigned long cfg = hpet_readl(HPET_CFG); - - cfg |= HPET_CFG_LEGACY; - hpet_writel(cfg, HPET_CFG); - hpet_legacy_int_enabled = 1; -} - -static void hpet_set_mode(enum clock_event_mode mode, - struct clock_event_device *evt) -{ - unsigned long cfg, cmp, now; - uint64_t delta; - - switch(mode) { - case CLOCK_EVT_MODE_PERIODIC: - delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * hpet_clockevent.mult; - delta >>= hpet_clockevent.shift; - now = hpet_readl(HPET_COUNTER); - cmp = now + (unsigned long) delta; - cfg = hpet_readl(HPET_T0_CFG); - cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | - HPET_TN_SETVAL | HPET_TN_32BIT; - hpet_writel(cfg, HPET_T0_CFG); - /* - * The first write after writing TN_SETVAL to the - * config register sets the counter value, the second - * write sets the period. - */ - hpet_writel(cmp, HPET_T0_CMP); - udelay(1); - hpet_writel((unsigned long) delta, HPET_T0_CMP); - break; - - case CLOCK_EVT_MODE_ONESHOT: - cfg = hpet_readl(HPET_T0_CFG); - cfg &= ~HPET_TN_PERIODIC; - cfg |= HPET_TN_ENABLE | HPET_TN_32BIT; - hpet_writel(cfg, HPET_T0_CFG); - break; - - case CLOCK_EVT_MODE_UNUSED: - case CLOCK_EVT_MODE_SHUTDOWN: - cfg = hpet_readl(HPET_T0_CFG); - cfg &= ~HPET_TN_ENABLE; - hpet_writel(cfg, HPET_T0_CFG); - break; - - case CLOCK_EVT_MODE_RESUME: - hpet_enable_int(); - break; - } -} - -static int hpet_next_event(unsigned long delta, - struct clock_event_device *evt) -{ - unsigned long cnt; - - cnt = hpet_readl(HPET_COUNTER); - cnt += delta; - hpet_writel(cnt, HPET_T0_CMP); - - return ((long)(hpet_readl(HPET_COUNTER) - cnt ) > 0) ? -ETIME : 0; -} - -/* - * Clock source related code - */ -static cycle_t read_hpet(void) -{ - return (cycle_t)hpet_readl(HPET_COUNTER); -} - -static struct clocksource clocksource_hpet = { - .name = "hpet", - .rating = 250, - .read = read_hpet, - .mask = HPET_MASK, - .shift = HPET_SHIFT, - .flags = CLOCK_SOURCE_IS_CONTINUOUS, - .resume = hpet_start_counter, -}; - -/* - * Try to setup the HPET timer - */ -int __init hpet_enable(void) -{ - unsigned long id; - uint64_t hpet_freq; - u64 tmp, start, now; - cycle_t t1; - - if (!is_hpet_capable()) - return 0; - - hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE); - - /* - * Read the period and check for a sane value: - */ - hpet_period = hpet_readl(HPET_PERIOD); - if (hpet_period < HPET_MIN_PERIOD || hpet_period > HPET_MAX_PERIOD) - goto out_nohpet; - - /* - * The period is a femto seconds value. We need to calculate the - * scaled math multiplication factor for nanosecond to hpet tick - * conversion. - */ - hpet_freq = 1000000000000000ULL; - do_div(hpet_freq, hpet_period); - hpet_clockevent.mult = div_sc((unsigned long) hpet_freq, - NSEC_PER_SEC, 32); - /* Calculate the min / max delta */ - hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, - &hpet_clockevent); - hpet_clockevent.min_delta_ns = clockevent_delta2ns(0x30, - &hpet_clockevent); - - /* - * Read the HPET ID register to retrieve the IRQ routing - * information and the number of channels - */ - id = hpet_readl(HPET_ID); - -#ifdef CONFIG_HPET_EMULATE_RTC - /* - * The legacy routing mode needs at least two channels, tick timer - * and the rtc emulation channel. - */ - if (!(id & HPET_ID_NUMBER)) - goto out_nohpet; -#endif - - /* Start the counter */ - hpet_start_counter(); - - /* Verify whether hpet counter works */ - t1 = read_hpet(); - rdtscll(start); - - /* - * We don't know the TSC frequency yet, but waiting for - * 200000 TSC cycles is safe: - * 4 GHz == 50us - * 1 GHz == 200us - */ - do { - rep_nop(); - rdtscll(now); - } while ((now - start) < 200000UL); - - if (t1 == read_hpet()) { - printk(KERN_WARNING - "HPET counter not counting. HPET disabled\n"); - goto out_nohpet; - } - - /* Initialize and register HPET clocksource - * - * hpet period is in femto seconds per cycle - * so we need to convert this to ns/cyc units - * aproximated by mult/2^shift - * - * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift - * fsec/cyc * 1ns/1000000fsec * 2^shift = mult - * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult - * (fsec/cyc << shift)/1000000 = mult - * (hpet_period << shift)/FSEC_PER_NSEC = mult - */ - tmp = (u64)hpet_period << HPET_SHIFT; - do_div(tmp, FSEC_PER_NSEC); - clocksource_hpet.mult = (u32)tmp; - - clocksource_register(&clocksource_hpet); - - if (id & HPET_ID_LEGSUP) { - hpet_enable_int(); - hpet_reserve_platform_timers(id); - /* - * Start hpet with the boot cpu mask and make it - * global after the IO_APIC has been initialized. - */ - hpet_clockevent.cpumask = cpumask_of_cpu(smp_processor_id()); - clockevents_register_device(&hpet_clockevent); - global_clock_event = &hpet_clockevent; - return 1; - } - return 0; - -out_nohpet: - iounmap(hpet_virt_address); - hpet_virt_address = NULL; - boot_hpet_disable = 1; - return 0; -} - - -#ifdef CONFIG_HPET_EMULATE_RTC - -/* HPET in LegacyReplacement Mode eats up RTC interrupt line. When, HPET - * is enabled, we support RTC interrupt functionality in software. - * RTC has 3 kinds of interrupts: - * 1) Update Interrupt - generate an interrupt, every sec, when RTC clock - * is updated - * 2) Alarm Interrupt - generate an interrupt at a specific time of day - * 3) Periodic Interrupt - generate periodic interrupt, with frequencies - * 2Hz-8192Hz (2Hz-64Hz for non-root user) (all freqs in powers of 2) - * (1) and (2) above are implemented using polling at a frequency of - * 64 Hz. The exact frequency is a tradeoff between accuracy and interrupt - * overhead. (DEFAULT_RTC_INT_FREQ) - * For (3), we use interrupts at 64Hz or user specified periodic - * frequency, whichever is higher. - */ -#include -#include - -#define DEFAULT_RTC_INT_FREQ 64 -#define DEFAULT_RTC_SHIFT 6 -#define RTC_NUM_INTS 1 - -static unsigned long hpet_rtc_flags; -static unsigned long hpet_prev_update_sec; -static struct rtc_time hpet_alarm_time; -static unsigned long hpet_pie_count; -static unsigned long hpet_t1_cmp; -static unsigned long hpet_default_delta; -static unsigned long hpet_pie_delta; -static unsigned long hpet_pie_limit; - -/* - * Timer 1 for RTC emulation. We use one shot mode, as periodic mode - * is not supported by all HPET implementations for timer 1. - * - * hpet_rtc_timer_init() is called when the rtc is initialized. - */ -int hpet_rtc_timer_init(void) -{ - unsigned long cfg, cnt, delta, flags; - - if (!is_hpet_enabled()) - return 0; - - if (!hpet_default_delta) { - uint64_t clc; - - clc = (uint64_t) hpet_clockevent.mult * NSEC_PER_SEC; - clc >>= hpet_clockevent.shift + DEFAULT_RTC_SHIFT; - hpet_default_delta = (unsigned long) clc; - } - - if (!(hpet_rtc_flags & RTC_PIE) || hpet_pie_limit) - delta = hpet_default_delta; - else - delta = hpet_pie_delta; - - local_irq_save(flags); - - cnt = delta + hpet_readl(HPET_COUNTER); - hpet_writel(cnt, HPET_T1_CMP); - hpet_t1_cmp = cnt; - - cfg = hpet_readl(HPET_T1_CFG); - cfg &= ~HPET_TN_PERIODIC; - cfg |= HPET_TN_ENABLE | HPET_TN_32BIT; - hpet_writel(cfg, HPET_T1_CFG); - - local_irq_restore(flags); - - return 1; -} - -/* - * The functions below are called from rtc driver. - * Return 0 if HPET is not being used. - * Otherwise do the necessary changes and return 1. - */ -int hpet_mask_rtc_irq_bit(unsigned long bit_mask) -{ - if (!is_hpet_enabled()) - return 0; - - hpet_rtc_flags &= ~bit_mask; - return 1; -} - -int hpet_set_rtc_irq_bit(unsigned long bit_mask) -{ - unsigned long oldbits = hpet_rtc_flags; - - if (!is_hpet_enabled()) - return 0; - - hpet_rtc_flags |= bit_mask; - - if (!oldbits) - hpet_rtc_timer_init(); - - return 1; -} - -int hpet_set_alarm_time(unsigned char hrs, unsigned char min, - unsigned char sec) -{ - if (!is_hpet_enabled()) - return 0; - - hpet_alarm_time.tm_hour = hrs; - hpet_alarm_time.tm_min = min; - hpet_alarm_time.tm_sec = sec; - - return 1; -} - -int hpet_set_periodic_freq(unsigned long freq) -{ - uint64_t clc; - - if (!is_hpet_enabled()) - return 0; - - if (freq <= DEFAULT_RTC_INT_FREQ) - hpet_pie_limit = DEFAULT_RTC_INT_FREQ / freq; - else { - clc = (uint64_t) hpet_clockevent.mult * NSEC_PER_SEC; - do_div(clc, freq); - clc >>= hpet_clockevent.shift; - hpet_pie_delta = (unsigned long) clc; - } - return 1; -} - -int hpet_rtc_dropped_irq(void) -{ - return is_hpet_enabled(); -} - -static void hpet_rtc_timer_reinit(void) -{ - unsigned long cfg, delta; - int lost_ints = -1; - - if (unlikely(!hpet_rtc_flags)) { - cfg = hpet_readl(HPET_T1_CFG); - cfg &= ~HPET_TN_ENABLE; - hpet_writel(cfg, HPET_T1_CFG); - return; - } - - if (!(hpet_rtc_flags & RTC_PIE) || hpet_pie_limit) - delta = hpet_default_delta; - else - delta = hpet_pie_delta; - - /* - * Increment the comparator value until we are ahead of the - * current count. - */ - do { - hpet_t1_cmp += delta; - hpet_writel(hpet_t1_cmp, HPET_T1_CMP); - lost_ints++; - } while ((long)(hpet_readl(HPET_COUNTER) - hpet_t1_cmp) > 0); - - if (lost_ints) { - if (hpet_rtc_flags & RTC_PIE) - hpet_pie_count += lost_ints; - if (printk_ratelimit()) - printk(KERN_WARNING "rtc: lost %d interrupts\n", - lost_ints); - } -} - -irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id) -{ - struct rtc_time curr_time; - unsigned long rtc_int_flag = 0; - - hpet_rtc_timer_reinit(); - - if (hpet_rtc_flags & (RTC_UIE | RTC_AIE)) - rtc_get_rtc_time(&curr_time); - - if (hpet_rtc_flags & RTC_UIE && - curr_time.tm_sec != hpet_prev_update_sec) { - rtc_int_flag = RTC_UF; - hpet_prev_update_sec = curr_time.tm_sec; - } - - if (hpet_rtc_flags & RTC_PIE && - ++hpet_pie_count >= hpet_pie_limit) { - rtc_int_flag |= RTC_PF; - hpet_pie_count = 0; - } - - if (hpet_rtc_flags & RTC_PIE && - (curr_time.tm_sec == hpet_alarm_time.tm_sec) && - (curr_time.tm_min == hpet_alarm_time.tm_min) && - (curr_time.tm_hour == hpet_alarm_time.tm_hour)) - rtc_int_flag |= RTC_AF; - - if (rtc_int_flag) { - rtc_int_flag |= (RTC_IRQF | (RTC_NUM_INTS << 8)); - rtc_interrupt(rtc_int_flag, dev_id); - } - return IRQ_HANDLED; -} -#endif diff --git a/arch/i386/kernel/i386_ksyms_32.c b/arch/i386/kernel/i386_ksyms_32.c deleted file mode 100644 index e3d4b73bfdb..00000000000 --- a/arch/i386/kernel/i386_ksyms_32.c +++ /dev/null @@ -1,30 +0,0 @@ -#include -#include -#include - -EXPORT_SYMBOL(__down_failed); -EXPORT_SYMBOL(__down_failed_interruptible); -EXPORT_SYMBOL(__down_failed_trylock); -EXPORT_SYMBOL(__up_wakeup); -/* Networking helper routines. */ -EXPORT_SYMBOL(csum_partial_copy_generic); - -EXPORT_SYMBOL(__get_user_1); -EXPORT_SYMBOL(__get_user_2); -EXPORT_SYMBOL(__get_user_4); - -EXPORT_SYMBOL(__put_user_1); -EXPORT_SYMBOL(__put_user_2); -EXPORT_SYMBOL(__put_user_4); -EXPORT_SYMBOL(__put_user_8); - -EXPORT_SYMBOL(strstr); - -#ifdef CONFIG_SMP -extern void FASTCALL( __write_lock_failed(rwlock_t *rw)); -extern void FASTCALL( __read_lock_failed(rwlock_t *rw)); -EXPORT_SYMBOL(__write_lock_failed); -EXPORT_SYMBOL(__read_lock_failed); -#endif - -EXPORT_SYMBOL(csum_partial); diff --git a/arch/i386/kernel/i387_32.c b/arch/i386/kernel/i387_32.c deleted file mode 100644 index 665847281ed..00000000000 --- a/arch/i386/kernel/i387_32.c +++ /dev/null @@ -1,546 +0,0 @@ -/* - * linux/arch/i386/kernel/i387.c - * - * Copyright (C) 1994 Linus Torvalds - * - * Pentium III FXSR, SSE support - * General FPU state handling cleanups - * Gareth Hughes , May 2000 - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef CONFIG_MATH_EMULATION -#define HAVE_HWFP (boot_cpu_data.hard_math) -#else -#define HAVE_HWFP 1 -#endif - -static unsigned long mxcsr_feature_mask __read_mostly = 0xffffffff; - -void mxcsr_feature_mask_init(void) -{ - unsigned long mask = 0; - clts(); - if (cpu_has_fxsr) { - memset(¤t->thread.i387.fxsave, 0, sizeof(struct i387_fxsave_struct)); - asm volatile("fxsave %0" : : "m" (current->thread.i387.fxsave)); - mask = current->thread.i387.fxsave.mxcsr_mask; - if (mask == 0) mask = 0x0000ffbf; - } - mxcsr_feature_mask &= mask; - stts(); -} - -/* - * The _current_ task is using the FPU for the first time - * so initialize it and set the mxcsr to its default - * value at reset if we support XMM instructions and then - * remeber the current task has used the FPU. - */ -void init_fpu(struct task_struct *tsk) -{ - if (cpu_has_fxsr) { - memset(&tsk->thread.i387.fxsave, 0, sizeof(struct i387_fxsave_struct)); - tsk->thread.i387.fxsave.cwd = 0x37f; - if (cpu_has_xmm) - tsk->thread.i387.fxsave.mxcsr = 0x1f80; - } else { - memset(&tsk->thread.i387.fsave, 0, sizeof(struct i387_fsave_struct)); - tsk->thread.i387.fsave.cwd = 0xffff037fu; - tsk->thread.i387.fsave.swd = 0xffff0000u; - tsk->thread.i387.fsave.twd = 0xffffffffu; - tsk->thread.i387.fsave.fos = 0xffff0000u; - } - /* only the device not available exception or ptrace can call init_fpu */ - set_stopped_child_used_math(tsk); -} - -/* - * FPU lazy state save handling. - */ - -void kernel_fpu_begin(void) -{ - struct thread_info *thread = current_thread_info(); - - preempt_disable(); - if (thread->status & TS_USEDFPU) { - __save_init_fpu(thread->task); - return; - } - clts(); -} -EXPORT_SYMBOL_GPL(kernel_fpu_begin); - -/* - * FPU tag word conversions. - */ - -static inline unsigned short twd_i387_to_fxsr( unsigned short twd ) -{ - unsigned int tmp; /* to avoid 16 bit prefixes in the code */ - - /* Transform each pair of bits into 01 (valid) or 00 (empty) */ - tmp = ~twd; - tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */ - /* and move the valid bits to the lower byte. */ - tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */ - tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */ - tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */ - return tmp; -} - -static inline unsigned long twd_fxsr_to_i387( struct i387_fxsave_struct *fxsave ) -{ - struct _fpxreg *st = NULL; - unsigned long tos = (fxsave->swd >> 11) & 7; - unsigned long twd = (unsigned long) fxsave->twd; - unsigned long tag; - unsigned long ret = 0xffff0000u; - int i; - -#define FPREG_ADDR(f, n) ((void *)&(f)->st_space + (n) * 16); - - for ( i = 0 ; i < 8 ; i++ ) { - if ( twd & 0x1 ) { - st = FPREG_ADDR( fxsave, (i - tos) & 7 ); - - switch ( st->exponent & 0x7fff ) { - case 0x7fff: - tag = 2; /* Special */ - break; - case 0x0000: - if ( !st->significand[0] && - !st->significand[1] && - !st->significand[2] && - !st->significand[3] ) { - tag = 1; /* Zero */ - } else { - tag = 2; /* Special */ - } - break; - default: - if ( st->significand[3] & 0x8000 ) { - tag = 0; /* Valid */ - } else { - tag = 2; /* Special */ - } - break; - } - } else { - tag = 3; /* Empty */ - } - ret |= (tag << (2 * i)); - twd = twd >> 1; - } - return ret; -} - -/* - * FPU state interaction. - */ - -unsigned short get_fpu_cwd( struct task_struct *tsk ) -{ - if ( cpu_has_fxsr ) { - return tsk->thread.i387.fxsave.cwd; - } else { - return (unsigned short)tsk->thread.i387.fsave.cwd; - } -} - -unsigned short get_fpu_swd( struct task_struct *tsk ) -{ - if ( cpu_has_fxsr ) { - return tsk->thread.i387.fxsave.swd; - } else { - return (unsigned short)tsk->thread.i387.fsave.swd; - } -} - -#if 0 -unsigned short get_fpu_twd( struct task_struct *tsk ) -{ - if ( cpu_has_fxsr ) { - return tsk->thread.i387.fxsave.twd; - } else { - return (unsigned short)tsk->thread.i387.fsave.twd; - } -} -#endif /* 0 */ - -unsigned short get_fpu_mxcsr( struct task_struct *tsk ) -{ - if ( cpu_has_xmm ) { - return tsk->thread.i387.fxsave.mxcsr; - } else { - return 0x1f80; - } -} - -#if 0 - -void set_fpu_cwd( struct task_struct *tsk, unsigned short cwd ) -{ - if ( cpu_has_fxsr ) { - tsk->thread.i387.fxsave.cwd = cwd; - } else { - tsk->thread.i387.fsave.cwd = ((long)cwd | 0xffff0000u); - } -} - -void set_fpu_swd( struct task_struct *tsk, unsigned short swd ) -{ - if ( cpu_has_fxsr ) { - tsk->thread.i387.fxsave.swd = swd; - } else { - tsk->thread.i387.fsave.swd = ((long)swd | 0xffff0000u); - } -} - -void set_fpu_twd( struct task_struct *tsk, unsigned short twd ) -{ - if ( cpu_has_fxsr ) { - tsk->thread.i387.fxsave.twd = twd_i387_to_fxsr(twd); - } else { - tsk->thread.i387.fsave.twd = ((long)twd | 0xffff0000u); - } -} - -#endif /* 0 */ - -/* - * FXSR floating point environment conversions. - */ - -static int convert_fxsr_to_user( struct _fpstate __user *buf, - struct i387_fxsave_struct *fxsave ) -{ - unsigned long env[7]; - struct _fpreg __user *to; - struct _fpxreg *from; - int i; - - env[0] = (unsigned long)fxsave->cwd | 0xffff0000ul; - env[1] = (unsigned long)fxsave->swd | 0xffff0000ul; - env[2] = twd_fxsr_to_i387(fxsave); - env[3] = fxsave->fip; - env[4] = fxsave->fcs | ((unsigned long)fxsave->fop << 16); - env[5] = fxsave->foo; - env[6] = fxsave->fos; - - if ( __copy_to_user( buf, env, 7 * sizeof(unsigned long) ) ) - return 1; - - to = &buf->_st[0]; - from = (struct _fpxreg *) &fxsave->st_space[0]; - for ( i = 0 ; i < 8 ; i++, to++, from++ ) { - unsigned long __user *t = (unsigned long __user *)to; - unsigned long *f = (unsigned long *)from; - - if (__put_user(*f, t) || - __put_user(*(f + 1), t + 1) || - __put_user(from->exponent, &to->exponent)) - return 1; - } - return 0; -} - -static int convert_fxsr_from_user( struct i387_fxsave_struct *fxsave, - struct _fpstate __user *buf ) -{ - unsigned long env[7]; - struct _fpxreg *to; - struct _fpreg __user *from; - int i; - - if ( __copy_from_user( env, buf, 7 * sizeof(long) ) ) - return 1; - - fxsave->cwd = (unsigned short)(env[0] & 0xffff); - fxsave->swd = (unsigned short)(env[1] & 0xffff); - fxsave->twd = twd_i387_to_fxsr((unsigned short)(env[2] & 0xffff)); - fxsave->fip = env[3]; - fxsave->fop = (unsigned short)((env[4] & 0xffff0000ul) >> 16); - fxsave->fcs = (env[4] & 0xffff); - fxsave->foo = env[5]; - fxsave->fos = env[6]; - - to = (struct _fpxreg *) &fxsave->st_space[0]; - from = &buf->_st[0]; - for ( i = 0 ; i < 8 ; i++, to++, from++ ) { - unsigned long *t = (unsigned long *)to; - unsigned long __user *f = (unsigned long __user *)from; - - if (__get_user(*t, f) || - __get_user(*(t + 1), f + 1) || - __get_user(to->exponent, &from->exponent)) - return 1; - } - return 0; -} - -/* - * Signal frame handlers. - */ - -static inline int save_i387_fsave( struct _fpstate __user *buf ) -{ - struct task_struct *tsk = current; - - unlazy_fpu( tsk ); - tsk->thread.i387.fsave.status = tsk->thread.i387.fsave.swd; - if ( __copy_to_user( buf, &tsk->thread.i387.fsave, - sizeof(struct i387_fsave_struct) ) ) - return -1; - return 1; -} - -static int save_i387_fxsave( struct _fpstate __user *buf ) -{ - struct task_struct *tsk = current; - int err = 0; - - unlazy_fpu( tsk ); - - if ( convert_fxsr_to_user( buf, &tsk->thread.i387.fxsave ) ) - return -1; - - err |= __put_user( tsk->thread.i387.fxsave.swd, &buf->status ); - err |= __put_user( X86_FXSR_MAGIC, &buf->magic ); - if ( err ) - return -1; - - if ( __copy_to_user( &buf->_fxsr_env[0], &tsk->thread.i387.fxsave, - sizeof(struct i387_fxsave_struct) ) ) - return -1; - return 1; -} - -int save_i387( struct _fpstate __user *buf ) -{ - if ( !used_math() ) - return 0; - - /* This will cause a "finit" to be triggered by the next - * attempted FPU operation by the 'current' process. - */ - clear_used_math(); - - if ( HAVE_HWFP ) { - if ( cpu_has_fxsr ) { - return save_i387_fxsave( buf ); - } else { - return save_i387_fsave( buf ); - } - } else { - return save_i387_soft( ¤t->thread.i387.soft, buf ); - } -} - -static inline int restore_i387_fsave( struct _fpstate __user *buf ) -{ - struct task_struct *tsk = current; - clear_fpu( tsk ); - return __copy_from_user( &tsk->thread.i387.fsave, buf, - sizeof(struct i387_fsave_struct) ); -} - -static int restore_i387_fxsave( struct _fpstate __user *buf ) -{ - int err; - struct task_struct *tsk = current; - clear_fpu( tsk ); - err = __copy_from_user( &tsk->thread.i387.fxsave, &buf->_fxsr_env[0], - sizeof(struct i387_fxsave_struct) ); - /* mxcsr reserved bits must be masked to zero for security reasons */ - tsk->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask; - return err ? 1 : convert_fxsr_from_user( &tsk->thread.i387.fxsave, buf ); -} - -int restore_i387( struct _fpstate __user *buf ) -{ - int err; - - if ( HAVE_HWFP ) { - if ( cpu_has_fxsr ) { - err = restore_i387_fxsave( buf ); - } else { - err = restore_i387_fsave( buf ); - } - } else { - err = restore_i387_soft( ¤t->thread.i387.soft, buf ); - } - set_used_math(); - return err; -} - -/* - * ptrace request handlers. - */ - -static inline int get_fpregs_fsave( struct user_i387_struct __user *buf, - struct task_struct *tsk ) -{ - return __copy_to_user( buf, &tsk->thread.i387.fsave, - sizeof(struct user_i387_struct) ); -} - -static inline int get_fpregs_fxsave( struct user_i387_struct __user *buf, - struct task_struct *tsk ) -{ - return convert_fxsr_to_user( (struct _fpstate __user *)buf, - &tsk->thread.i387.fxsave ); -} - -int get_fpregs( struct user_i387_struct __user *buf, struct task_struct *tsk ) -{ - if ( HAVE_HWFP ) { - if ( cpu_has_fxsr ) { - return get_fpregs_fxsave( buf, tsk ); - } else { - return get_fpregs_fsave( buf, tsk ); - } - } else { - return save_i387_soft( &tsk->thread.i387.soft, - (struct _fpstate __user *)buf ); - } -} - -static inline int set_fpregs_fsave( struct task_struct *tsk, - struct user_i387_struct __user *buf ) -{ - return __copy_from_user( &tsk->thread.i387.fsave, buf, - sizeof(struct user_i387_struct) ); -} - -static inline int set_fpregs_fxsave( struct task_struct *tsk, - struct user_i387_struct __user *buf ) -{ - return convert_fxsr_from_user( &tsk->thread.i387.fxsave, - (struct _fpstate __user *)buf ); -} - -int set_fpregs( struct task_struct *tsk, struct user_i387_struct __user *buf ) -{ - if ( HAVE_HWFP ) { - if ( cpu_has_fxsr ) { - return set_fpregs_fxsave( tsk, buf ); - } else { - return set_fpregs_fsave( tsk, buf ); - } - } else { - return restore_i387_soft( &tsk->thread.i387.soft, - (struct _fpstate __user *)buf ); - } -} - -int get_fpxregs( struct user_fxsr_struct __user *buf, struct task_struct *tsk ) -{ - if ( cpu_has_fxsr ) { - if (__copy_to_user( buf, &tsk->thread.i387.fxsave, - sizeof(struct user_fxsr_struct) )) - return -EFAULT; - return 0; - } else { - return -EIO; - } -} - -int set_fpxregs( struct task_struct *tsk, struct user_fxsr_struct __user *buf ) -{ - int ret = 0; - - if ( cpu_has_fxsr ) { - if (__copy_from_user( &tsk->thread.i387.fxsave, buf, - sizeof(struct user_fxsr_struct) )) - ret = -EFAULT; - /* mxcsr reserved bits must be masked to zero for security reasons */ - tsk->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask; - } else { - ret = -EIO; - } - return ret; -} - -/* - * FPU state for core dumps. - */ - -static inline void copy_fpu_fsave( struct task_struct *tsk, - struct user_i387_struct *fpu ) -{ - memcpy( fpu, &tsk->thread.i387.fsave, - sizeof(struct user_i387_struct) ); -} - -static inline void copy_fpu_fxsave( struct task_struct *tsk, - struct user_i387_struct *fpu ) -{ - unsigned short *to; - unsigned short *from; - int i; - - memcpy( fpu, &tsk->thread.i387.fxsave, 7 * sizeof(long) ); - - to = (unsigned short *)&fpu->st_space[0]; - from = (unsigned short *)&tsk->thread.i387.fxsave.st_space[0]; - for ( i = 0 ; i < 8 ; i++, to += 5, from += 8 ) { - memcpy( to, from, 5 * sizeof(unsigned short) ); - } -} - -int dump_fpu( struct pt_regs *regs, struct user_i387_struct *fpu ) -{ - int fpvalid; - struct task_struct *tsk = current; - - fpvalid = !!used_math(); - if ( fpvalid ) { - unlazy_fpu( tsk ); - if ( cpu_has_fxsr ) { - copy_fpu_fxsave( tsk, fpu ); - } else { - copy_fpu_fsave( tsk, fpu ); - } - } - - return fpvalid; -} -EXPORT_SYMBOL(dump_fpu); - -int dump_task_fpu(struct task_struct *tsk, struct user_i387_struct *fpu) -{ - int fpvalid = !!tsk_used_math(tsk); - - if (fpvalid) { - if (tsk == current) - unlazy_fpu(tsk); - if (cpu_has_fxsr) - copy_fpu_fxsave(tsk, fpu); - else - copy_fpu_fsave(tsk, fpu); - } - return fpvalid; -} - -int dump_task_extended_fpu(struct task_struct *tsk, struct user_fxsr_struct *fpu) -{ - int fpvalid = tsk_used_math(tsk) && cpu_has_fxsr; - - if (fpvalid) { - if (tsk == current) - unlazy_fpu(tsk); - memcpy(fpu, &tsk->thread.i387.fxsave, sizeof(*fpu)); - } - return fpvalid; -} diff --git a/arch/i386/kernel/i8237.c b/arch/i386/kernel/i8237.c deleted file mode 100644 index 6f508e8d7c5..00000000000 --- a/arch/i386/kernel/i8237.c +++ /dev/null @@ -1,72 +0,0 @@ -/* - * i8237.c: 8237A DMA controller suspend functions. - * - * Written by Pierre Ossman, 2005. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or (at - * your option) any later version. - */ - -#include -#include - -#include - -/* - * This module just handles suspend/resume issues with the - * 8237A DMA controller (used for ISA and LPC). - * Allocation is handled in kernel/dma.c and normal usage is - * in asm/dma.h. - */ - -static int i8237A_resume(struct sys_device *dev) -{ - unsigned long flags; - int i; - - flags = claim_dma_lock(); - - dma_outb(DMA1_RESET_REG, 0); - dma_outb(DMA2_RESET_REG, 0); - - for (i = 0;i < 8;i++) { - set_dma_addr(i, 0x000000); - /* DMA count is a bit weird so this is not 0 */ - set_dma_count(i, 1); - } - - /* Enable cascade DMA or channel 0-3 won't work */ - enable_dma(4); - - release_dma_lock(flags); - - return 0; -} - -static int i8237A_suspend(struct sys_device *dev, pm_message_t state) -{ - return 0; -} - -static struct sysdev_class i8237_sysdev_class = { - set_kset_name("i8237"), - .suspend = i8237A_suspend, - .resume = i8237A_resume, -}; - -static struct sys_device device_i8237A = { - .id = 0, - .cls = &i8237_sysdev_class, -}; - -static int __init i8237A_init_sysfs(void) -{ - int error = sysdev_class_register(&i8237_sysdev_class); - if (!error) - error = sysdev_register(&device_i8237A); - return error; -} - -device_initcall(i8237A_init_sysfs); diff --git a/arch/i386/kernel/i8253_32.c b/arch/i386/kernel/i8253_32.c deleted file mode 100644 index 6d839f2f1b1..00000000000 --- a/arch/i386/kernel/i8253_32.c +++ /dev/null @@ -1,206 +0,0 @@ -/* - * i8253.c 8253/PIT functions - * - */ -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -DEFINE_SPINLOCK(i8253_lock); -EXPORT_SYMBOL(i8253_lock); - -/* - * HPET replaces the PIT, when enabled. So we need to know, which of - * the two timers is used - */ -struct clock_event_device *global_clock_event; - -/* - * Initialize the PIT timer. - * - * This is also called after resume to bring the PIT into operation again. - */ -static void init_pit_timer(enum clock_event_mode mode, - struct clock_event_device *evt) -{ - unsigned long flags; - - spin_lock_irqsave(&i8253_lock, flags); - - switch(mode) { - case CLOCK_EVT_MODE_PERIODIC: - /* binary, mode 2, LSB/MSB, ch 0 */ - outb_p(0x34, PIT_MODE); - outb_p(LATCH & 0xff , PIT_CH0); /* LSB */ - outb(LATCH >> 8 , PIT_CH0); /* MSB */ - break; - - case CLOCK_EVT_MODE_SHUTDOWN: - case CLOCK_EVT_MODE_UNUSED: - if (evt->mode == CLOCK_EVT_MODE_PERIODIC || - evt->mode == CLOCK_EVT_MODE_ONESHOT) { - outb_p(0x30, PIT_MODE); - outb_p(0, PIT_CH0); - outb_p(0, PIT_CH0); - } - break; - - case CLOCK_EVT_MODE_ONESHOT: - /* One shot setup */ - outb_p(0x38, PIT_MODE); - break; - - case CLOCK_EVT_MODE_RESUME: - /* Nothing to do here */ - break; - } - spin_unlock_irqrestore(&i8253_lock, flags); -} - -/* - * Program the next event in oneshot mode - * - * Delta is given in PIT ticks - */ -static int pit_next_event(unsigned long delta, struct clock_event_device *evt) -{ - unsigned long flags; - - spin_lock_irqsave(&i8253_lock, flags); - outb_p(delta & 0xff , PIT_CH0); /* LSB */ - outb(delta >> 8 , PIT_CH0); /* MSB */ - spin_unlock_irqrestore(&i8253_lock, flags); - - return 0; -} - -/* - * On UP the PIT can serve all of the possible timer functions. On SMP systems - * it can be solely used for the global tick. - * - * The profiling and update capabilites are switched off once the local apic is - * registered. This mechanism replaces the previous #ifdef LOCAL_APIC - - * !using_apic_timer decisions in do_timer_interrupt_hook() - */ -struct clock_event_device pit_clockevent = { - .name = "pit", - .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, - .set_mode = init_pit_timer, - .set_next_event = pit_next_event, - .shift = 32, - .irq = 0, -}; - -/* - * Initialize the conversion factor and the min/max deltas of the clock event - * structure and register the clock event source with the framework. - */ -void __init setup_pit_timer(void) -{ - /* - * Start pit with the boot cpu mask and make it global after the - * IO_APIC has been initialized. - */ - pit_clockevent.cpumask = cpumask_of_cpu(smp_processor_id()); - pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, 32); - pit_clockevent.max_delta_ns = - clockevent_delta2ns(0x7FFF, &pit_clockevent); - pit_clockevent.min_delta_ns = - clockevent_delta2ns(0xF, &pit_clockevent); - clockevents_register_device(&pit_clockevent); - global_clock_event = &pit_clockevent; -} - -/* - * Since the PIT overflows every tick, its not very useful - * to just read by itself. So use jiffies to emulate a free - * running counter: - */ -static cycle_t pit_read(void) -{ - unsigned long flags; - int count; - u32 jifs; - static int old_count; - static u32 old_jifs; - - spin_lock_irqsave(&i8253_lock, flags); - /* - * Although our caller may have the read side of xtime_lock, - * this is now a seqlock, and we are cheating in this routine - * by having side effects on state that we cannot undo if - * there is a collision on the seqlock and our caller has to - * retry. (Namely, old_jifs and old_count.) So we must treat - * jiffies as volatile despite the lock. We read jiffies - * before latching the timer count to guarantee that although - * the jiffies value might be older than the count (that is, - * the counter may underflow between the last point where - * jiffies was incremented and the point where we latch the - * count), it cannot be newer. - */ - jifs = jiffies; - outb_p(0x00, PIT_MODE); /* latch the count ASAP */ - count = inb_p(PIT_CH0); /* read the latched count */ - count |= inb_p(PIT_CH0) << 8; - - /* VIA686a test code... reset the latch if count > max + 1 */ - if (count > LATCH) { - outb_p(0x34, PIT_MODE); - outb_p(LATCH & 0xff, PIT_CH0); - outb(LATCH >> 8, PIT_CH0); - count = LATCH - 1; - } - - /* - * It's possible for count to appear to go the wrong way for a - * couple of reasons: - * - * 1. The timer counter underflows, but we haven't handled the - * resulting interrupt and incremented jiffies yet. - * 2. Hardware problem with the timer, not giving us continuous time, - * the counter does small "jumps" upwards on some Pentium systems, - * (see c't 95/10 page 335 for Neptun bug.) - * - * Previous attempts to handle these cases intelligently were - * buggy, so we just do the simple thing now. - */ - if (count > old_count && jifs == old_jifs) { - count = old_count; - } - old_count = count; - old_jifs = jifs; - - spin_unlock_irqrestore(&i8253_lock, flags); - - count = (LATCH - 1) - count; - - return (cycle_t)(jifs * LATCH) + count; -} - -static struct clocksource clocksource_pit = { - .name = "pit", - .rating = 110, - .read = pit_read, - .mask = CLOCKSOURCE_MASK(32), - .mult = 0, - .shift = 20, -}; - -static int __init init_pit_clocksource(void) -{ - if (num_possible_cpus() > 1) /* PIT does not scale! */ - return 0; - - clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 20); - return clocksource_register(&clocksource_pit); -} -arch_initcall(init_pit_clocksource); diff --git a/arch/i386/kernel/i8259_32.c b/arch/i386/kernel/i8259_32.c deleted file mode 100644 index 0499cbe9871..00000000000 --- a/arch/i386/kernel/i8259_32.c +++ /dev/null @@ -1,420 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -/* - * This is the 'legacy' 8259A Programmable Interrupt Controller, - * present in the majority of PC/AT boxes. - * plus some generic x86 specific things if generic specifics makes - * any sense at all. - * this file should become arch/i386/kernel/irq.c when the old irq.c - * moves to arch independent land - */ - -static int i8259A_auto_eoi; -DEFINE_SPINLOCK(i8259A_lock); -static void mask_and_ack_8259A(unsigned int); - -static struct irq_chip i8259A_chip = { - .name = "XT-PIC", - .mask = disable_8259A_irq, - .disable = disable_8259A_irq, - .unmask = enable_8259A_irq, - .mask_ack = mask_and_ack_8259A, -}; - -/* - * 8259A PIC functions to handle ISA devices: - */ - -/* - * This contains the irq mask for both 8259A irq controllers, - */ -unsigned int cached_irq_mask = 0xffff; - -/* - * Not all IRQs can be routed through the IO-APIC, eg. on certain (older) - * boards the timer interrupt is not really connected to any IO-APIC pin, - * it's fed to the master 8259A's IR0 line only. - * - * Any '1' bit in this mask means the IRQ is routed through the IO-APIC. - * this 'mixed mode' IRQ handling costs nothing because it's only used - * at IRQ setup time. - */ -unsigned long io_apic_irqs; - -void disable_8259A_irq(unsigned int irq) -{ - unsigned int mask = 1 << irq; - unsigned long flags; - - spin_lock_irqsave(&i8259A_lock, flags); - cached_irq_mask |= mask; - if (irq & 8) - outb(cached_slave_mask, PIC_SLAVE_IMR); - else - outb(cached_master_mask, PIC_MASTER_IMR); - spin_unlock_irqrestore(&i8259A_lock, flags); -} - -void enable_8259A_irq(unsigned int irq) -{ - unsigned int mask = ~(1 << irq); - unsigned long flags; - - spin_lock_irqsave(&i8259A_lock, flags); - cached_irq_mask &= mask; - if (irq & 8) - outb(cached_slave_mask, PIC_SLAVE_IMR); - else - outb(cached_master_mask, PIC_MASTER_IMR); - spin_unlock_irqrestore(&i8259A_lock, flags); -} - -int i8259A_irq_pending(unsigned int irq) -{ - unsigned int mask = 1<> 8); - spin_unlock_irqrestore(&i8259A_lock, flags); - - return ret; -} - -void make_8259A_irq(unsigned int irq) -{ - disable_irq_nosync(irq); - io_apic_irqs &= ~(1<> 8); - outb(0x0A,PIC_SLAVE_CMD); /* back to the IRR register */ - return value; -} - -/* - * Careful! The 8259A is a fragile beast, it pretty - * much _has_ to be done exactly like this (mask it - * first, _then_ send the EOI, and the order of EOI - * to the two 8259s is important! - */ -static void mask_and_ack_8259A(unsigned int irq) -{ - unsigned int irqmask = 1 << irq; - unsigned long flags; - - spin_lock_irqsave(&i8259A_lock, flags); - /* - * Lightweight spurious IRQ detection. We do not want - * to overdo spurious IRQ handling - it's usually a sign - * of hardware problems, so we only do the checks we can - * do without slowing down good hardware unnecessarily. - * - * Note that IRQ7 and IRQ15 (the two spurious IRQs - * usually resulting from the 8259A-1|2 PICs) occur - * even if the IRQ is masked in the 8259A. Thus we - * can check spurious 8259A IRQs without doing the - * quite slow i8259A_irq_real() call for every IRQ. - * This does not cover 100% of spurious interrupts, - * but should be enough to warn the user that there - * is something bad going on ... - */ - if (cached_irq_mask & irqmask) - goto spurious_8259A_irq; - cached_irq_mask |= irqmask; - -handle_real_irq: - if (irq & 8) { - inb(PIC_SLAVE_IMR); /* DUMMY - (do we need this?) */ - outb(cached_slave_mask, PIC_SLAVE_IMR); - outb(0x60+(irq&7),PIC_SLAVE_CMD);/* 'Specific EOI' to slave */ - outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD); /* 'Specific EOI' to master-IRQ2 */ - } else { - inb(PIC_MASTER_IMR); /* DUMMY - (do we need this?) */ - outb(cached_master_mask, PIC_MASTER_IMR); - outb(0x60+irq,PIC_MASTER_CMD); /* 'Specific EOI to master */ - } - spin_unlock_irqrestore(&i8259A_lock, flags); - return; - -spurious_8259A_irq: - /* - * this is the slow path - should happen rarely. - */ - if (i8259A_irq_real(irq)) - /* - * oops, the IRQ _is_ in service according to the - * 8259A - not spurious, go handle it. - */ - goto handle_real_irq; - - { - static int spurious_irq_mask; - /* - * At this point we can be sure the IRQ is spurious, - * lets ACK and report it. [once per IRQ] - */ - if (!(spurious_irq_mask & irqmask)) { - printk(KERN_DEBUG "spurious 8259A interrupt: IRQ%d.\n", irq); - spurious_irq_mask |= irqmask; - } - atomic_inc(&irq_err_count); - /* - * Theoretically we do not have to handle this IRQ, - * but in Linux this does not cause problems and is - * simpler for us. - */ - goto handle_real_irq; - } -} - -static char irq_trigger[2]; -/** - * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ - */ -static void restore_ELCR(char *trigger) -{ - outb(trigger[0], 0x4d0); - outb(trigger[1], 0x4d1); -} - -static void save_ELCR(char *trigger) -{ - /* IRQ 0,1,2,8,13 are marked as reserved */ - trigger[0] = inb(0x4d0) & 0xF8; - trigger[1] = inb(0x4d1) & 0xDE; -} - -static int i8259A_resume(struct sys_device *dev) -{ - init_8259A(i8259A_auto_eoi); - restore_ELCR(irq_trigger); - return 0; -} - -static int i8259A_suspend(struct sys_device *dev, pm_message_t state) -{ - save_ELCR(irq_trigger); - return 0; -} - -static int i8259A_shutdown(struct sys_device *dev) -{ - /* Put the i8259A into a quiescent state that - * the kernel initialization code can get it - * out of. - */ - outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ - outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-1 */ - return 0; -} - -static struct sysdev_class i8259_sysdev_class = { - set_kset_name("i8259"), - .suspend = i8259A_suspend, - .resume = i8259A_resume, - .shutdown = i8259A_shutdown, -}; - -static struct sys_device device_i8259A = { - .id = 0, - .cls = &i8259_sysdev_class, -}; - -static int __init i8259A_init_sysfs(void) -{ - int error = sysdev_class_register(&i8259_sysdev_class); - if (!error) - error = sysdev_register(&device_i8259A); - return error; -} - -device_initcall(i8259A_init_sysfs); - -void init_8259A(int auto_eoi) -{ - unsigned long flags; - - i8259A_auto_eoi = auto_eoi; - - spin_lock_irqsave(&i8259A_lock, flags); - - outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ - outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ - - /* - * outb_p - this has to work on a wide range of PC hardware. - */ - outb_p(0x11, PIC_MASTER_CMD); /* ICW1: select 8259A-1 init */ - outb_p(0x20 + 0, PIC_MASTER_IMR); /* ICW2: 8259A-1 IR0-7 mapped to 0x20-0x27 */ - outb_p(1U << PIC_CASCADE_IR, PIC_MASTER_IMR); /* 8259A-1 (the master) has a slave on IR2 */ - if (auto_eoi) /* master does Auto EOI */ - outb_p(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR); - else /* master expects normal EOI */ - outb_p(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR); - - outb_p(0x11, PIC_SLAVE_CMD); /* ICW1: select 8259A-2 init */ - outb_p(0x20 + 8, PIC_SLAVE_IMR); /* ICW2: 8259A-2 IR0-7 mapped to 0x28-0x2f */ - outb_p(PIC_CASCADE_IR, PIC_SLAVE_IMR); /* 8259A-2 is a slave on master's IR2 */ - outb_p(SLAVE_ICW4_DEFAULT, PIC_SLAVE_IMR); /* (slave's support for AEOI in flat mode is to be investigated) */ - if (auto_eoi) - /* - * In AEOI mode we just have to mask the interrupt - * when acking. - */ - i8259A_chip.mask_ack = disable_8259A_irq; - else - i8259A_chip.mask_ack = mask_and_ack_8259A; - - udelay(100); /* wait for 8259A to initialize */ - - outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ - outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ - - spin_unlock_irqrestore(&i8259A_lock, flags); -} - -/* - * Note that on a 486, we don't want to do a SIGFPE on an irq13 - * as the irq is unreliable, and exception 16 works correctly - * (ie as explained in the intel literature). On a 386, you - * can't use exception 16 due to bad IBM design, so we have to - * rely on the less exact irq13. - * - * Careful.. Not only is IRQ13 unreliable, but it is also - * leads to races. IBM designers who came up with it should - * be shot. - */ - - -static irqreturn_t math_error_irq(int cpl, void *dev_id) -{ - extern void math_error(void __user *); - outb(0,0xF0); - if (ignore_fpu_irq || !boot_cpu_data.hard_math) - return IRQ_NONE; - math_error((void __user *)get_irq_regs()->eip); - return IRQ_HANDLED; -} - -/* - * New motherboards sometimes make IRQ 13 be a PCI interrupt, - * so allow interrupt sharing. - */ -static struct irqaction fpu_irq = { math_error_irq, 0, CPU_MASK_NONE, "fpu", NULL, NULL }; - -void __init init_ISA_irqs (void) -{ - int i; - -#ifdef CONFIG_X86_LOCAL_APIC - init_bsp_APIC(); -#endif - init_8259A(0); - - for (i = 0; i < NR_IRQS; i++) { - irq_desc[i].status = IRQ_DISABLED; - irq_desc[i].action = NULL; - irq_desc[i].depth = 1; - - if (i < 16) { - /* - * 16 old-style INTA-cycle interrupts: - */ - set_irq_chip_and_handler_name(i, &i8259A_chip, - handle_level_irq, "XT"); - } else { - /* - * 'high' PCI IRQs filled in on demand - */ - irq_desc[i].chip = &no_irq_chip; - } - } -} - -/* Overridden in paravirt.c */ -void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); - -void __init native_init_IRQ(void) -{ - int i; - - /* all the set up before the call gates are initialised */ - pre_intr_init_hook(); - - /* - * Cover the whole vector space, no vector can escape - * us. (some of these will be overridden and become - * 'special' SMP interrupts) - */ - for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { - int vector = FIRST_EXTERNAL_VECTOR + i; - if (i >= NR_IRQS) - break; - if (vector != SYSCALL_VECTOR) - set_intr_gate(vector, interrupt[i]); - } - - /* setup after call gates are initialised (usually add in - * the architecture specific gates) - */ - intr_init_hook(); - - /* - * External FPU? Set up irq13 if so, for - * original braindamaged IBM FERR coupling. - */ - if (boot_cpu_data.hard_math && !cpu_has_fpu) - setup_irq(FPU_IRQ, &fpu_irq); - - irq_ctx_init(smp_processor_id()); -} diff --git a/arch/i386/kernel/init_task_32.c b/arch/i386/kernel/init_task_32.c deleted file mode 100644 index d26fc063a76..00000000000 --- a/arch/i386/kernel/init_task_32.c +++ /dev/null @@ -1,46 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; -static struct signal_struct init_signals = INIT_SIGNALS(init_signals); -static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); -struct mm_struct init_mm = INIT_MM(init_mm); - -EXPORT_SYMBOL(init_mm); - -/* - * Initial thread structure. - * - * We need to make sure that this is THREAD_SIZE aligned due to the - * way process stacks are handled. This is done by having a special - * "init_task" linker map entry.. - */ -union thread_union init_thread_union - __attribute__((__section__(".data.init_task"))) = - { INIT_THREAD_INFO(init_task) }; - -/* - * Initial task structure. - * - * All other task structs will be allocated on slabs in fork.c - */ -struct task_struct init_task = INIT_TASK(init_task); - -EXPORT_SYMBOL(init_task); - -/* - * per-CPU TSS segments. Threads are completely 'soft' on Linux, - * no more per-task TSS's. - */ -DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS; - diff --git a/arch/i386/kernel/io_apic_32.c b/arch/i386/kernel/io_apic_32.c deleted file mode 100644 index e2f4a1c6854..00000000000 --- a/arch/i386/kernel/io_apic_32.c +++ /dev/null @@ -1,2847 +0,0 @@ -/* - * Intel IO-APIC support for multi-Pentium hosts. - * - * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo - * - * Many thanks to Stig Venaas for trying out countless experimental - * patches and reporting/debugging problems patiently! - * - * (c) 1999, Multiple IO-APIC support, developed by - * Ken-ichi Yaku and - * Hidemi Kishimoto , - * further tested and cleaned up by Zach Brown - * and Ingo Molnar - * - * Fixes - * Maciej W. Rozycki : Bits for genuine 82489DX APICs; - * thanks to Eric Gilmore - * and Rolf G. Tews - * for testing these extensively - * Paul Diefenbaugh : Added full ACPI support - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "io_ports.h" - -int (*ioapic_renumber_irq)(int ioapic, int irq); -atomic_t irq_mis_count; - -/* Where if anywhere is the i8259 connect in external int mode */ -static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; - -static DEFINE_SPINLOCK(ioapic_lock); -static DEFINE_SPINLOCK(vector_lock); - -int timer_over_8254 __initdata = 1; - -/* - * Is the SiS APIC rmw bug present ? - * -1 = don't know, 0 = no, 1 = yes - */ -int sis_apic_bug = -1; - -/* - * # of IRQ routing registers - */ -int nr_ioapic_registers[MAX_IO_APICS]; - -static int disable_timer_pin_1 __initdata; - -/* - * Rough estimation of how many shared IRQs there are, can - * be changed anytime. - */ -#define MAX_PLUS_SHARED_IRQS NR_IRQS -#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) - -/* - * This is performance-critical, we want to do it O(1) - * - * the indexing order of this array favors 1:1 mappings - * between pins and IRQs. - */ - -static struct irq_pin_list { - int apic, pin, next; -} irq_2_pin[PIN_MAP_SIZE]; - -struct io_apic { - unsigned int index; - unsigned int unused[3]; - unsigned int data; -}; - -static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) -{ - return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) - + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK); -} - -static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) -{ - struct io_apic __iomem *io_apic = io_apic_base(apic); - writel(reg, &io_apic->index); - return readl(&io_apic->data); -} - -static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) -{ - struct io_apic __iomem *io_apic = io_apic_base(apic); - writel(reg, &io_apic->index); - writel(value, &io_apic->data); -} - -/* - * Re-write a value: to be used for read-modify-write - * cycles where the read already set up the index register. - * - * Older SiS APIC requires we rewrite the index register - */ -static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) -{ - volatile struct io_apic __iomem *io_apic = io_apic_base(apic); - if (sis_apic_bug) - writel(reg, &io_apic->index); - writel(value, &io_apic->data); -} - -union entry_union { - struct { u32 w1, w2; }; - struct IO_APIC_route_entry entry; -}; - -static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) -{ - union entry_union eu; - unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); - eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); - eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); - spin_unlock_irqrestore(&ioapic_lock, flags); - return eu.entry; -} - -/* - * When we write a new IO APIC routing entry, we need to write the high - * word first! If the mask bit in the low word is clear, we will enable - * the interrupt, and we need to make sure the entry is fully populated - * before that happens. - */ -static void -__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) -{ - union entry_union eu; - eu.entry = e; - io_apic_write(apic, 0x11 + 2*pin, eu.w2); - io_apic_write(apic, 0x10 + 2*pin, eu.w1); -} - -static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) -{ - unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); - __ioapic_write_entry(apic, pin, e); - spin_unlock_irqrestore(&ioapic_lock, flags); -} - -/* - * When we mask an IO APIC routing entry, we need to write the low - * word first, in order to set the mask bit before we change the - * high bits! - */ -static void ioapic_mask_entry(int apic, int pin) -{ - unsigned long flags; - union entry_union eu = { .entry.mask = 1 }; - - spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(apic, 0x10 + 2*pin, eu.w1); - io_apic_write(apic, 0x11 + 2*pin, eu.w2); - spin_unlock_irqrestore(&ioapic_lock, flags); -} - -/* - * The common case is 1:1 IRQ<->pin mappings. Sometimes there are - * shared ISA-space IRQs, so we have to support them. We are super - * fast in the common case, and fast for shared ISA-space IRQs. - */ -static void add_pin_to_irq(unsigned int irq, int apic, int pin) -{ - static int first_free_entry = NR_IRQS; - struct irq_pin_list *entry = irq_2_pin + irq; - - while (entry->next) - entry = irq_2_pin + entry->next; - - if (entry->pin != -1) { - entry->next = first_free_entry; - entry = irq_2_pin + entry->next; - if (++first_free_entry >= PIN_MAP_SIZE) - panic("io_apic.c: whoops"); - } - entry->apic = apic; - entry->pin = pin; -} - -/* - * Reroute an IRQ to a different pin. - */ -static void __init replace_pin_at_irq(unsigned int irq, - int oldapic, int oldpin, - int newapic, int newpin) -{ - struct irq_pin_list *entry = irq_2_pin + irq; - - while (1) { - if (entry->apic == oldapic && entry->pin == oldpin) { - entry->apic = newapic; - entry->pin = newpin; - } - if (!entry->next) - break; - entry = irq_2_pin + entry->next; - } -} - -static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsigned long disable) -{ - struct irq_pin_list *entry = irq_2_pin + irq; - unsigned int pin, reg; - - for (;;) { - pin = entry->pin; - if (pin == -1) - break; - reg = io_apic_read(entry->apic, 0x10 + pin*2); - reg &= ~disable; - reg |= enable; - io_apic_modify(entry->apic, 0x10 + pin*2, reg); - if (!entry->next) - break; - entry = irq_2_pin + entry->next; - } -} - -/* mask = 1 */ -static void __mask_IO_APIC_irq (unsigned int irq) -{ - __modify_IO_APIC_irq(irq, 0x00010000, 0); -} - -/* mask = 0 */ -static void __unmask_IO_APIC_irq (unsigned int irq) -{ - __modify_IO_APIC_irq(irq, 0, 0x00010000); -} - -/* mask = 1, trigger = 0 */ -static void __mask_and_edge_IO_APIC_irq (unsigned int irq) -{ - __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000); -} - -/* mask = 0, trigger = 1 */ -static void __unmask_and_level_IO_APIC_irq (unsigned int irq) -{ - __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000); -} - -static void mask_IO_APIC_irq (unsigned int irq) -{ - unsigned long flags; - - spin_lock_irqsave(&ioapic_lock, flags); - __mask_IO_APIC_irq(irq); - spin_unlock_irqrestore(&ioapic_lock, flags); -} - -static void unmask_IO_APIC_irq (unsigned int irq) -{ - unsigned long flags; - - spin_lock_irqsave(&ioapic_lock, flags); - __unmask_IO_APIC_irq(irq); - spin_unlock_irqrestore(&ioapic_lock, flags); -} - -static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) -{ - struct IO_APIC_route_entry entry; - - /* Check delivery_mode to be sure we're not clearing an SMI pin */ - entry = ioapic_read_entry(apic, pin); - if (entry.delivery_mode == dest_SMI) - return; - - /* - * Disable it in the IO-APIC irq-routing table: - */ - ioapic_mask_entry(apic, pin); -} - -static void clear_IO_APIC (void) -{ - int apic, pin; - - for (apic = 0; apic < nr_ioapics; apic++) - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) - clear_IO_APIC_pin(apic, pin); -} - -#ifdef CONFIG_SMP -static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) -{ - unsigned long flags; - int pin; - struct irq_pin_list *entry = irq_2_pin + irq; - unsigned int apicid_value; - cpumask_t tmp; - - cpus_and(tmp, cpumask, cpu_online_map); - if (cpus_empty(tmp)) - tmp = TARGET_CPUS; - - cpus_and(cpumask, tmp, CPU_MASK_ALL); - - apicid_value = cpu_mask_to_apicid(cpumask); - /* Prepare to do the io_apic_write */ - apicid_value = apicid_value << 24; - spin_lock_irqsave(&ioapic_lock, flags); - for (;;) { - pin = entry->pin; - if (pin == -1) - break; - io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value); - if (!entry->next) - break; - entry = irq_2_pin + entry->next; - } - irq_desc[irq].affinity = cpumask; - spin_unlock_irqrestore(&ioapic_lock, flags); -} - -#if defined(CONFIG_IRQBALANCE) -# include /* kernel_thread() */ -# include /* kstat */ -# include /* kmalloc() */ -# include /* time_after() */ - -#define IRQBALANCE_CHECK_ARCH -999 -#define MAX_BALANCED_IRQ_INTERVAL (5*HZ) -#define MIN_BALANCED_IRQ_INTERVAL (HZ/2) -#define BALANCED_IRQ_MORE_DELTA (HZ/10) -#define BALANCED_IRQ_LESS_DELTA (HZ) - -static int irqbalance_disabled __read_mostly = IRQBALANCE_CHECK_ARCH; -static int physical_balance __read_mostly; -static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL; - -static struct irq_cpu_info { - unsigned long * last_irq; - unsigned long * irq_delta; - unsigned long irq; -} irq_cpu_data[NR_CPUS]; - -#define CPU_IRQ(cpu) (irq_cpu_data[cpu].irq) -#define LAST_CPU_IRQ(cpu,irq) (irq_cpu_data[cpu].last_irq[irq]) -#define IRQ_DELTA(cpu,irq) (irq_cpu_data[cpu].irq_delta[irq]) - -#define IDLE_ENOUGH(cpu,now) \ - (idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1)) - -#define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask) - -#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i])) - -static cpumask_t balance_irq_affinity[NR_IRQS] = { - [0 ... NR_IRQS-1] = CPU_MASK_ALL -}; - -void set_balance_irq_affinity(unsigned int irq, cpumask_t mask) -{ - balance_irq_affinity[irq] = mask; -} - -static unsigned long move(int curr_cpu, cpumask_t allowed_mask, - unsigned long now, int direction) -{ - int search_idle = 1; - int cpu = curr_cpu; - - goto inside; - - do { - if (unlikely(cpu == curr_cpu)) - search_idle = 0; -inside: - if (direction == 1) { - cpu++; - if (cpu >= NR_CPUS) - cpu = 0; - } else { - cpu--; - if (cpu == -1) - cpu = NR_CPUS-1; - } - } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu,allowed_mask) || - (search_idle && !IDLE_ENOUGH(cpu,now))); - - return cpu; -} - -static inline void balance_irq(int cpu, int irq) -{ - unsigned long now = jiffies; - cpumask_t allowed_mask; - unsigned int new_cpu; - - if (irqbalance_disabled) - return; - - cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]); - new_cpu = move(cpu, allowed_mask, now, 1); - if (cpu != new_cpu) { - set_pending_irq(irq, cpumask_of_cpu(new_cpu)); - } -} - -static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold) -{ - int i, j; - - for_each_online_cpu(i) { - for (j = 0; j < NR_IRQS; j++) { - if (!irq_desc[j].action) - continue; - /* Is it a significant load ? */ - if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i),j) < - useful_load_threshold) - continue; - balance_irq(i, j); - } - } - balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL, - balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); - return; -} - -static void do_irq_balance(void) -{ - int i, j; - unsigned long max_cpu_irq = 0, min_cpu_irq = (~0); - unsigned long move_this_load = 0; - int max_loaded = 0, min_loaded = 0; - int load; - unsigned long useful_load_threshold = balanced_irq_interval + 10; - int selected_irq; - int tmp_loaded, first_attempt = 1; - unsigned long tmp_cpu_irq; - unsigned long imbalance = 0; - cpumask_t allowed_mask, target_cpu_mask, tmp; - - for_each_possible_cpu(i) { - int package_index; - CPU_IRQ(i) = 0; - if (!cpu_online(i)) - continue; - package_index = CPU_TO_PACKAGEINDEX(i); - for (j = 0; j < NR_IRQS; j++) { - unsigned long value_now, delta; - /* Is this an active IRQ or balancing disabled ? */ - if (!irq_desc[j].action || irq_balancing_disabled(j)) - continue; - if ( package_index == i ) - IRQ_DELTA(package_index,j) = 0; - /* Determine the total count per processor per IRQ */ - value_now = (unsigned long) kstat_cpu(i).irqs[j]; - - /* Determine the activity per processor per IRQ */ - delta = value_now - LAST_CPU_IRQ(i,j); - - /* Update last_cpu_irq[][] for the next time */ - LAST_CPU_IRQ(i,j) = value_now; - - /* Ignore IRQs whose rate is less than the clock */ - if (delta < useful_load_threshold) - continue; - /* update the load for the processor or package total */ - IRQ_DELTA(package_index,j) += delta; - - /* Keep track of the higher numbered sibling as well */ - if (i != package_index) - CPU_IRQ(i) += delta; - /* - * We have sibling A and sibling B in the package - * - * cpu_irq[A] = load for cpu A + load for cpu B - * cpu_irq[B] = load for cpu B - */ - CPU_IRQ(package_index) += delta; - } - } - /* Find the least loaded processor package */ - for_each_online_cpu(i) { - if (i != CPU_TO_PACKAGEINDEX(i)) - continue; - if (min_cpu_irq > CPU_IRQ(i)) { - min_cpu_irq = CPU_IRQ(i); - min_loaded = i; - } - } - max_cpu_irq = ULONG_MAX; - -tryanothercpu: - /* Look for heaviest loaded processor. - * We may come back to get the next heaviest loaded processor. - * Skip processors with trivial loads. - */ - tmp_cpu_irq = 0; - tmp_loaded = -1; - for_each_online_cpu(i) { - if (i != CPU_TO_PACKAGEINDEX(i)) - continue; - if (max_cpu_irq <= CPU_IRQ(i)) - continue; - if (tmp_cpu_irq < CPU_IRQ(i)) { - tmp_cpu_irq = CPU_IRQ(i); - tmp_loaded = i; - } - } - - if (tmp_loaded == -1) { - /* In the case of small number of heavy interrupt sources, - * loading some of the cpus too much. We use Ingo's original - * approach to rotate them around. - */ - if (!first_attempt && imbalance >= useful_load_threshold) { - rotate_irqs_among_cpus(useful_load_threshold); - return; - } - goto not_worth_the_effort; - } - - first_attempt = 0; /* heaviest search */ - max_cpu_irq = tmp_cpu_irq; /* load */ - max_loaded = tmp_loaded; /* processor */ - imbalance = (max_cpu_irq - min_cpu_irq) / 2; - - /* if imbalance is less than approx 10% of max load, then - * observe diminishing returns action. - quit - */ - if (imbalance < (max_cpu_irq >> 3)) - goto not_worth_the_effort; - -tryanotherirq: - /* if we select an IRQ to move that can't go where we want, then - * see if there is another one to try. - */ - move_this_load = 0; - selected_irq = -1; - for (j = 0; j < NR_IRQS; j++) { - /* Is this an active IRQ? */ - if (!irq_desc[j].action) - continue; - if (imbalance <= IRQ_DELTA(max_loaded,j)) - continue; - /* Try to find the IRQ that is closest to the imbalance - * without going over. - */ - if (move_this_load < IRQ_DELTA(max_loaded,j)) { - move_this_load = IRQ_DELTA(max_loaded,j); - selected_irq = j; - } - } - if (selected_irq == -1) { - goto tryanothercpu; - } - - imbalance = move_this_load; - - /* For physical_balance case, we accumlated both load - * values in the one of the siblings cpu_irq[], - * to use the same code for physical and logical processors - * as much as possible. - * - * NOTE: the cpu_irq[] array holds the sum of the load for - * sibling A and sibling B in the slot for the lowest numbered - * sibling (A), _AND_ the load for sibling B in the slot for - * the higher numbered sibling. - * - * We seek the least loaded sibling by making the comparison - * (A+B)/2 vs B - */ - load = CPU_IRQ(min_loaded) >> 1; - for_each_cpu_mask(j, cpu_sibling_map[min_loaded]) { - if (load > CPU_IRQ(j)) { - /* This won't change cpu_sibling_map[min_loaded] */ - load = CPU_IRQ(j); - min_loaded = j; - } - } - - cpus_and(allowed_mask, - cpu_online_map, - balance_irq_affinity[selected_irq]); - target_cpu_mask = cpumask_of_cpu(min_loaded); - cpus_and(tmp, target_cpu_mask, allowed_mask); - - if (!cpus_empty(tmp)) { - /* mark for change destination */ - set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded)); - - /* Since we made a change, come back sooner to - * check for more variation. - */ - balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL, - balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); - return; - } - goto tryanotherirq; - -not_worth_the_effort: - /* - * if we did not find an IRQ to move, then adjust the time interval - * upward - */ - balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL, - balanced_irq_interval + BALANCED_IRQ_MORE_DELTA); - return; -} - -static int balanced_irq(void *unused) -{ - int i; - unsigned long prev_balance_time = jiffies; - long time_remaining = balanced_irq_interval; - - /* push everything to CPU 0 to give us a starting point. */ - for (i = 0 ; i < NR_IRQS ; i++) { - irq_desc[i].pending_mask = cpumask_of_cpu(0); - set_pending_irq(i, cpumask_of_cpu(0)); - } - - set_freezable(); - for ( ; ; ) { - time_remaining = schedule_timeout_interruptible(time_remaining); - try_to_freeze(); - if (time_after(jiffies, - prev_balance_time+balanced_irq_interval)) { - preempt_disable(); - do_irq_balance(); - prev_balance_time = jiffies; - time_remaining = balanced_irq_interval; - preempt_enable(); - } - } - return 0; -} - -static int __init balanced_irq_init(void) -{ - int i; - struct cpuinfo_x86 *c; - cpumask_t tmp; - - cpus_shift_right(tmp, cpu_online_map, 2); - c = &boot_cpu_data; - /* When not overwritten by the command line ask subarchitecture. */ - if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH) - irqbalance_disabled = NO_BALANCE_IRQ; - if (irqbalance_disabled) - return 0; - - /* disable irqbalance completely if there is only one processor online */ - if (num_online_cpus() < 2) { - irqbalance_disabled = 1; - return 0; - } - /* - * Enable physical balance only if more than 1 physical processor - * is present - */ - if (smp_num_siblings > 1 && !cpus_empty(tmp)) - physical_balance = 1; - - for_each_online_cpu(i) { - irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); - irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); - if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) { - printk(KERN_ERR "balanced_irq_init: out of memory"); - goto failed; - } - memset(irq_cpu_data[i].irq_delta,0,sizeof(unsigned long) * NR_IRQS); - memset(irq_cpu_data[i].last_irq,0,sizeof(unsigned long) * NR_IRQS); - } - - printk(KERN_INFO "Starting balanced_irq\n"); - if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd"))) - return 0; - printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq"); -failed: - for_each_possible_cpu(i) { - kfree(irq_cpu_data[i].irq_delta); - irq_cpu_data[i].irq_delta = NULL; - kfree(irq_cpu_data[i].last_irq); - irq_cpu_data[i].last_irq = NULL; - } - return 0; -} - -int __devinit irqbalance_disable(char *str) -{ - irqbalance_disabled = 1; - return 1; -} - -__setup("noirqbalance", irqbalance_disable); - -late_initcall(balanced_irq_init); -#endif /* CONFIG_IRQBALANCE */ -#endif /* CONFIG_SMP */ - -#ifndef CONFIG_SMP -void fastcall send_IPI_self(int vector) -{ - unsigned int cfg; - - /* - * Wait for idle. - */ - apic_wait_icr_idle(); - cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL; - /* - * Send the IPI. The write to APIC_ICR fires this off. - */ - apic_write_around(APIC_ICR, cfg); -} -#endif /* !CONFIG_SMP */ - - -/* - * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to - * specific CPU-side IRQs. - */ - -#define MAX_PIRQS 8 -static int pirq_entries [MAX_PIRQS]; -static int pirqs_enabled; -int skip_ioapic_setup; - -static int __init ioapic_pirq_setup(char *str) -{ - int i, max; - int ints[MAX_PIRQS+1]; - - get_options(str, ARRAY_SIZE(ints), ints); - - for (i = 0; i < MAX_PIRQS; i++) - pirq_entries[i] = -1; - - pirqs_enabled = 1; - apic_printk(APIC_VERBOSE, KERN_INFO - "PIRQ redirection, working around broken MP-BIOS.\n"); - max = MAX_PIRQS; - if (ints[0] < MAX_PIRQS) - max = ints[0]; - - for (i = 0; i < max; i++) { - apic_printk(APIC_VERBOSE, KERN_DEBUG - "... PIRQ%d -> IRQ %d\n", i, ints[i+1]); - /* - * PIRQs are mapped upside down, usually. - */ - pirq_entries[MAX_PIRQS-i-1] = ints[i+1]; - } - return 1; -} - -__setup("pirq=", ioapic_pirq_setup); - -/* - * Find the IRQ entry number of a certain pin. - */ -static int find_irq_entry(int apic, int pin, int type) -{ - int i; - - for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mpc_irqtype == type && - (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid || - mp_irqs[i].mpc_dstapic == MP_APIC_ALL) && - mp_irqs[i].mpc_dstirq == pin) - return i; - - return -1; -} - -/* - * Find the pin to which IRQ[irq] (ISA) is connected - */ -static int __init find_isa_irq_pin(int irq, int type) -{ - int i; - - for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mpc_srcbus; - - if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA || - mp_bus_id_to_type[lbus] == MP_BUS_EISA || - mp_bus_id_to_type[lbus] == MP_BUS_MCA - ) && - (mp_irqs[i].mpc_irqtype == type) && - (mp_irqs[i].mpc_srcbusirq == irq)) - - return mp_irqs[i].mpc_dstirq; - } - return -1; -} - -static int __init find_isa_irq_apic(int irq, int type) -{ - int i; - - for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mpc_srcbus; - - if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA || - mp_bus_id_to_type[lbus] == MP_BUS_EISA || - mp_bus_id_to_type[lbus] == MP_BUS_MCA - ) && - (mp_irqs[i].mpc_irqtype == type) && - (mp_irqs[i].mpc_srcbusirq == irq)) - break; - } - if (i < mp_irq_entries) { - int apic; - for(apic = 0; apic < nr_ioapics; apic++) { - if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic) - return apic; - } - } - - return -1; -} - -/* - * Find a specific PCI IRQ entry. - * Not an __init, possibly needed by modules - */ -static int pin_2_irq(int idx, int apic, int pin); - -int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) -{ - int apic, i, best_guess = -1; - - apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, " - "slot:%d, pin:%d.\n", bus, slot, pin); - if (mp_bus_id_to_pci_bus[bus] == -1) { - printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus); - return -1; - } - for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mpc_srcbus; - - for (apic = 0; apic < nr_ioapics; apic++) - if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic || - mp_irqs[i].mpc_dstapic == MP_APIC_ALL) - break; - - if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) && - !mp_irqs[i].mpc_irqtype && - (bus == lbus) && - (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) { - int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq); - - if (!(apic || IO_APIC_IRQ(irq))) - continue; - - if (pin == (mp_irqs[i].mpc_srcbusirq & 3)) - return irq; - /* - * Use the first all-but-pin matching entry as a - * best-guess fuzzy result for broken mptables. - */ - if (best_guess < 0) - best_guess = irq; - } - } - return best_guess; -} -EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); - -/* - * This function currently is only a helper for the i386 smp boot process where - * we need to reprogram the ioredtbls to cater for the cpus which have come online - * so mask in all cases should simply be TARGET_CPUS - */ -#ifdef CONFIG_SMP -void __init setup_ioapic_dest(void) -{ - int pin, ioapic, irq, irq_entry; - - if (skip_ioapic_setup == 1) - return; - - for (ioapic = 0; ioapic < nr_ioapics; ioapic++) { - for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { - irq_entry = find_irq_entry(ioapic, pin, mp_INT); - if (irq_entry == -1) - continue; - irq = pin_2_irq(irq_entry, ioapic, pin); - set_ioapic_affinity_irq(irq, TARGET_CPUS); - } - - } -} -#endif - -/* - * EISA Edge/Level control register, ELCR - */ -static int EISA_ELCR(unsigned int irq) -{ - if (irq < 16) { - unsigned int port = 0x4d0 + (irq >> 3); - return (inb(port) >> (irq & 7)) & 1; - } - apic_printk(APIC_VERBOSE, KERN_INFO - "Broken MPtable reports ISA irq %d\n", irq); - return 0; -} - -/* EISA interrupts are always polarity zero and can be edge or level - * trigger depending on the ELCR value. If an interrupt is listed as - * EISA conforming in the MP table, that means its trigger type must - * be read in from the ELCR */ - -#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq)) -#define default_EISA_polarity(idx) (0) - -/* ISA interrupts are always polarity zero edge triggered, - * when listed as conforming in the MP table. */ - -#define default_ISA_trigger(idx) (0) -#define default_ISA_polarity(idx) (0) - -/* PCI interrupts are always polarity one level triggered, - * when listed as conforming in the MP table. */ - -#define default_PCI_trigger(idx) (1) -#define default_PCI_polarity(idx) (1) - -/* MCA interrupts are always polarity zero level triggered, - * when listed as conforming in the MP table. */ - -#define default_MCA_trigger(idx) (1) -#define default_MCA_polarity(idx) (0) - -static int __init MPBIOS_polarity(int idx) -{ - int bus = mp_irqs[idx].mpc_srcbus; - int polarity; - - /* - * Determine IRQ line polarity (high active or low active): - */ - switch (mp_irqs[idx].mpc_irqflag & 3) - { - case 0: /* conforms, ie. bus-type dependent polarity */ - { - switch (mp_bus_id_to_type[bus]) - { - case MP_BUS_ISA: /* ISA pin */ - { - polarity = default_ISA_polarity(idx); - break; - } - case MP_BUS_EISA: /* EISA pin */ - { - polarity = default_EISA_polarity(idx); - break; - } - case MP_BUS_PCI: /* PCI pin */ - { - polarity = default_PCI_polarity(idx); - break; - } - case MP_BUS_MCA: /* MCA pin */ - { - polarity = default_MCA_polarity(idx); - break; - } - default: - { - printk(KERN_WARNING "broken BIOS!!\n"); - polarity = 1; - break; - } - } - break; - } - case 1: /* high active */ - { - polarity = 0; - break; - } - case 2: /* reserved */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - polarity = 1; - break; - } - case 3: /* low active */ - { - polarity = 1; - break; - } - default: /* invalid */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - polarity = 1; - break; - } - } - return polarity; -} - -static int MPBIOS_trigger(int idx) -{ - int bus = mp_irqs[idx].mpc_srcbus; - int trigger; - - /* - * Determine IRQ trigger mode (edge or level sensitive): - */ - switch ((mp_irqs[idx].mpc_irqflag>>2) & 3) - { - case 0: /* conforms, ie. bus-type dependent */ - { - switch (mp_bus_id_to_type[bus]) - { - case MP_BUS_ISA: /* ISA pin */ - { - trigger = default_ISA_trigger(idx); - break; - } - case MP_BUS_EISA: /* EISA pin */ - { - trigger = default_EISA_trigger(idx); - break; - } - case MP_BUS_PCI: /* PCI pin */ - { - trigger = default_PCI_trigger(idx); - break; - } - case MP_BUS_MCA: /* MCA pin */ - { - trigger = default_MCA_trigger(idx); - break; - } - default: - { - printk(KERN_WARNING "broken BIOS!!\n"); - trigger = 1; - break; - } - } - break; - } - case 1: /* edge */ - { - trigger = 0; - break; - } - case 2: /* reserved */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - trigger = 1; - break; - } - case 3: /* level */ - { - trigger = 1; - break; - } - default: /* invalid */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - trigger = 0; - break; - } - } - return trigger; -} - -static inline int irq_polarity(int idx) -{ - return MPBIOS_polarity(idx); -} - -static inline int irq_trigger(int idx) -{ - return MPBIOS_trigger(idx); -} - -static int pin_2_irq(int idx, int apic, int pin) -{ - int irq, i; - int bus = mp_irqs[idx].mpc_srcbus; - - /* - * Debugging check, we are in big trouble if this message pops up! - */ - if (mp_irqs[idx].mpc_dstirq != pin) - printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); - - switch (mp_bus_id_to_type[bus]) - { - case MP_BUS_ISA: /* ISA pin */ - case MP_BUS_EISA: - case MP_BUS_MCA: - { - irq = mp_irqs[idx].mpc_srcbusirq; - break; - } - case MP_BUS_PCI: /* PCI pin */ - { - /* - * PCI IRQs are mapped in order - */ - i = irq = 0; - while (i < apic) - irq += nr_ioapic_registers[i++]; - irq += pin; - - /* - * For MPS mode, so far only needed by ES7000 platform - */ - if (ioapic_renumber_irq) - irq = ioapic_renumber_irq(apic, irq); - - break; - } - default: - { - printk(KERN_ERR "unknown bus type %d.\n",bus); - irq = 0; - break; - } - } - - /* - * PCI IRQ command line redirection. Yes, limits are hardcoded. - */ - if ((pin >= 16) && (pin <= 23)) { - if (pirq_entries[pin-16] != -1) { - if (!pirq_entries[pin-16]) { - apic_printk(APIC_VERBOSE, KERN_DEBUG - "disabling PIRQ%d\n", pin-16); - } else { - irq = pirq_entries[pin-16]; - apic_printk(APIC_VERBOSE, KERN_DEBUG - "using PIRQ%d -> IRQ %d\n", - pin-16, irq); - } - } - } - return irq; -} - -static inline int IO_APIC_irq_trigger(int irq) -{ - int apic, idx, pin; - - for (apic = 0; apic < nr_ioapics; apic++) { - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { - idx = find_irq_entry(apic,pin,mp_INT); - if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin))) - return irq_trigger(idx); - } - } - /* - * nonexistent IRQs are edge default - */ - return 0; -} - -/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ -static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 }; - -static int __assign_irq_vector(int irq) -{ - static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; - int vector, offset, i; - - BUG_ON((unsigned)irq >= NR_IRQ_VECTORS); - - if (irq_vector[irq] > 0) - return irq_vector[irq]; - - vector = current_vector; - offset = current_offset; -next: - vector += 8; - if (vector >= FIRST_SYSTEM_VECTOR) { - offset = (offset + 1) % 8; - vector = FIRST_DEVICE_VECTOR + offset; - } - if (vector == current_vector) - return -ENOSPC; - if (vector == SYSCALL_VECTOR) - goto next; - for (i = 0; i < NR_IRQ_VECTORS; i++) - if (irq_vector[i] == vector) - goto next; - - current_vector = vector; - current_offset = offset; - irq_vector[irq] = vector; - - return vector; -} - -static int assign_irq_vector(int irq) -{ - unsigned long flags; - int vector; - - spin_lock_irqsave(&vector_lock, flags); - vector = __assign_irq_vector(irq); - spin_unlock_irqrestore(&vector_lock, flags); - - return vector; -} -static struct irq_chip ioapic_chip; - -#define IOAPIC_AUTO -1 -#define IOAPIC_EDGE 0 -#define IOAPIC_LEVEL 1 - -static void ioapic_register_intr(int irq, int vector, unsigned long trigger) -{ - if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || - trigger == IOAPIC_LEVEL) { - irq_desc[irq].status |= IRQ_LEVEL; - set_irq_chip_and_handler_name(irq, &ioapic_chip, - handle_fasteoi_irq, "fasteoi"); - } else { - irq_desc[irq].status &= ~IRQ_LEVEL; - set_irq_chip_and_handler_name(irq, &ioapic_chip, - handle_edge_irq, "edge"); - } - set_intr_gate(vector, interrupt[irq]); -} - -static void __init setup_IO_APIC_irqs(void) -{ - struct IO_APIC_route_entry entry; - int apic, pin, idx, irq, first_notcon = 1, vector; - unsigned long flags; - - apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); - - for (apic = 0; apic < nr_ioapics; apic++) { - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { - - /* - * add it to the IO-APIC irq-routing table: - */ - memset(&entry,0,sizeof(entry)); - - entry.delivery_mode = INT_DELIVERY_MODE; - entry.dest_mode = INT_DEST_MODE; - entry.mask = 0; /* enable IRQ */ - entry.dest.logical.logical_dest = - cpu_mask_to_apicid(TARGET_CPUS); - - idx = find_irq_entry(apic,pin,mp_INT); - if (idx == -1) { - if (first_notcon) { - apic_printk(APIC_VERBOSE, KERN_DEBUG - " IO-APIC (apicid-pin) %d-%d", - mp_ioapics[apic].mpc_apicid, - pin); - first_notcon = 0; - } else - apic_printk(APIC_VERBOSE, ", %d-%d", - mp_ioapics[apic].mpc_apicid, pin); - continue; - } - - entry.trigger = irq_trigger(idx); - entry.polarity = irq_polarity(idx); - - if (irq_trigger(idx)) { - entry.trigger = 1; - entry.mask = 1; - } - - irq = pin_2_irq(idx, apic, pin); - /* - * skip adding the timer int on secondary nodes, which causes - * a small but painful rift in the time-space continuum - */ - if (multi_timer_check(apic, irq)) - continue; - else - add_pin_to_irq(irq, apic, pin); - - if (!apic && !IO_APIC_IRQ(irq)) - continue; - - if (IO_APIC_IRQ(irq)) { - vector = assign_irq_vector(irq); - entry.vector = vector; - ioapic_register_intr(irq, vector, IOAPIC_AUTO); - - if (!apic && (irq < 16)) - disable_8259A_irq(irq); - } - spin_lock_irqsave(&ioapic_lock, flags); - __ioapic_write_entry(apic, pin, entry); - spin_unlock_irqrestore(&ioapic_lock, flags); - } - } - - if (!first_notcon) - apic_printk(APIC_VERBOSE, " not connected.\n"); -} - -/* - * Set up the 8259A-master output pin: - */ -static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector) -{ - struct IO_APIC_route_entry entry; - - memset(&entry,0,sizeof(entry)); - - disable_8259A_irq(0); - - /* mask LVT0 */ - apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); - - /* - * We use logical delivery to get the timer IRQ - * to the first CPU. - */ - entry.dest_mode = INT_DEST_MODE; - entry.mask = 0; /* unmask IRQ now */ - entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); - entry.delivery_mode = INT_DELIVERY_MODE; - entry.polarity = 0; - entry.trigger = 0; - entry.vector = vector; - - /* - * The timer IRQ doesn't have to know that behind the - * scene we have a 8259A-master in AEOI mode ... - */ - irq_desc[0].chip = &ioapic_chip; - set_irq_handler(0, handle_edge_irq); - - /* - * Add it to the IO-APIC irq-routing table: - */ - ioapic_write_entry(apic, pin, entry); - - enable_8259A_irq(0); -} - -void __init print_IO_APIC(void) -{ - int apic, i; - union IO_APIC_reg_00 reg_00; - union IO_APIC_reg_01 reg_01; - union IO_APIC_reg_02 reg_02; - union IO_APIC_reg_03 reg_03; - unsigned long flags; - - if (apic_verbosity == APIC_QUIET) - return; - - printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); - for (i = 0; i < nr_ioapics; i++) - printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", - mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]); - - /* - * We are a bit conservative about what we expect. We have to - * know about every hardware change ASAP. - */ - printk(KERN_INFO "testing the IO APIC.......................\n"); - - for (apic = 0; apic < nr_ioapics; apic++) { - - spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(apic, 0); - reg_01.raw = io_apic_read(apic, 1); - if (reg_01.bits.version >= 0x10) - reg_02.raw = io_apic_read(apic, 2); - if (reg_01.bits.version >= 0x20) - reg_03.raw = io_apic_read(apic, 3); - spin_unlock_irqrestore(&ioapic_lock, flags); - - printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid); - printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); - printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); - printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); - printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS); - - printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw); - printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); - - printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); - printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); - - /* - * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02, - * but the value of reg_02 is read as the previous read register - * value, so ignore it if reg_02 == reg_01. - */ - if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) { - printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); - printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); - } - - /* - * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02 - * or reg_03, but the value of reg_0[23] is read as the previous read - * register value, so ignore it if reg_03 == reg_0[12]. - */ - if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw && - reg_03.raw != reg_01.raw) { - printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw); - printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT); - } - - printk(KERN_DEBUG ".... IRQ redirection table:\n"); - - printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol" - " Stat Dest Deli Vect: \n"); - - for (i = 0; i <= reg_01.bits.entries; i++) { - struct IO_APIC_route_entry entry; - - entry = ioapic_read_entry(apic, i); - - printk(KERN_DEBUG " %02x %03X %02X ", - i, - entry.dest.logical.logical_dest, - entry.dest.physical.physical_dest - ); - - printk("%1d %1d %1d %1d %1d %1d %1d %02X\n", - entry.mask, - entry.trigger, - entry.irr, - entry.polarity, - entry.delivery_status, - entry.dest_mode, - entry.delivery_mode, - entry.vector - ); - } - } - printk(KERN_DEBUG "IRQ to pin mappings:\n"); - for (i = 0; i < NR_IRQS; i++) { - struct irq_pin_list *entry = irq_2_pin + i; - if (entry->pin < 0) - continue; - printk(KERN_DEBUG "IRQ%d ", i); - for (;;) { - printk("-> %d:%d", entry->apic, entry->pin); - if (!entry->next) - break; - entry = irq_2_pin + entry->next; - } - printk("\n"); - } - - printk(KERN_INFO ".................................... done.\n"); - - return; -} - -#if 0 - -static void print_APIC_bitfield (int base) -{ - unsigned int v; - int i, j; - - if (apic_verbosity == APIC_QUIET) - return; - - printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG); - for (i = 0; i < 8; i++) { - v = apic_read(base + i*0x10); - for (j = 0; j < 32; j++) { - if (v & (1< 3) /* Due to the Pentium erratum 3AP. */ - apic_write(APIC_ESR, 0); - v = apic_read(APIC_ESR); - printk(KERN_DEBUG "... APIC ESR: %08x\n", v); - } - - v = apic_read(APIC_ICR); - printk(KERN_DEBUG "... APIC ICR: %08x\n", v); - v = apic_read(APIC_ICR2); - printk(KERN_DEBUG "... APIC ICR2: %08x\n", v); - - v = apic_read(APIC_LVTT); - printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); - - if (maxlvt > 3) { /* PC is LVT#4. */ - v = apic_read(APIC_LVTPC); - printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v); - } - v = apic_read(APIC_LVT0); - printk(KERN_DEBUG "... APIC LVT0: %08x\n", v); - v = apic_read(APIC_LVT1); - printk(KERN_DEBUG "... APIC LVT1: %08x\n", v); - - if (maxlvt > 2) { /* ERR is LVT#3. */ - v = apic_read(APIC_LVTERR); - printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v); - } - - v = apic_read(APIC_TMICT); - printk(KERN_DEBUG "... APIC TMICT: %08x\n", v); - v = apic_read(APIC_TMCCT); - printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v); - v = apic_read(APIC_TDCR); - printk(KERN_DEBUG "... APIC TDCR: %08x\n", v); - printk("\n"); -} - -void print_all_local_APICs (void) -{ - on_each_cpu(print_local_APIC, NULL, 1, 1); -} - -void /*__init*/ print_PIC(void) -{ - unsigned int v; - unsigned long flags; - - if (apic_verbosity == APIC_QUIET) - return; - - printk(KERN_DEBUG "\nprinting PIC contents\n"); - - spin_lock_irqsave(&i8259A_lock, flags); - - v = inb(0xa1) << 8 | inb(0x21); - printk(KERN_DEBUG "... PIC IMR: %04x\n", v); - - v = inb(0xa0) << 8 | inb(0x20); - printk(KERN_DEBUG "... PIC IRR: %04x\n", v); - - outb(0x0b,0xa0); - outb(0x0b,0x20); - v = inb(0xa0) << 8 | inb(0x20); - outb(0x0a,0xa0); - outb(0x0a,0x20); - - spin_unlock_irqrestore(&i8259A_lock, flags); - - printk(KERN_DEBUG "... PIC ISR: %04x\n", v); - - v = inb(0x4d1) << 8 | inb(0x4d0); - printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); -} - -#endif /* 0 */ - -static void __init enable_IO_APIC(void) -{ - union IO_APIC_reg_01 reg_01; - int i8259_apic, i8259_pin; - int i, apic; - unsigned long flags; - - for (i = 0; i < PIN_MAP_SIZE; i++) { - irq_2_pin[i].pin = -1; - irq_2_pin[i].next = 0; - } - if (!pirqs_enabled) - for (i = 0; i < MAX_PIRQS; i++) - pirq_entries[i] = -1; - - /* - * The number of IO-APIC IRQ registers (== #pins): - */ - for (apic = 0; apic < nr_ioapics; apic++) { - spin_lock_irqsave(&ioapic_lock, flags); - reg_01.raw = io_apic_read(apic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); - nr_ioapic_registers[apic] = reg_01.bits.entries+1; - } - for(apic = 0; apic < nr_ioapics; apic++) { - int pin; - /* See if any of the pins is in ExtINT mode */ - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { - struct IO_APIC_route_entry entry; - entry = ioapic_read_entry(apic, pin); - - - /* If the interrupt line is enabled and in ExtInt mode - * I have found the pin where the i8259 is connected. - */ - if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) { - ioapic_i8259.apic = apic; - ioapic_i8259.pin = pin; - goto found_i8259; - } - } - } - found_i8259: - /* Look to see what if the MP table has reported the ExtINT */ - /* If we could not find the appropriate pin by looking at the ioapic - * the i8259 probably is not connected the ioapic but give the - * mptable a chance anyway. - */ - i8259_pin = find_isa_irq_pin(0, mp_ExtINT); - i8259_apic = find_isa_irq_apic(0, mp_ExtINT); - /* Trust the MP table if nothing is setup in the hardware */ - if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) { - printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n"); - ioapic_i8259.pin = i8259_pin; - ioapic_i8259.apic = i8259_apic; - } - /* Complain if the MP table and the hardware disagree */ - if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) && - (i8259_pin >= 0) && (ioapic_i8259.pin >= 0)) - { - printk(KERN_WARNING "ExtINT in hardware and MP table differ\n"); - } - - /* - * Do not trust the IO-APIC being empty at bootup - */ - clear_IO_APIC(); -} - -/* - * Not an __init, needed by the reboot code - */ -void disable_IO_APIC(void) -{ - /* - * Clear the IO-APIC before rebooting: - */ - clear_IO_APIC(); - - /* - * If the i8259 is routed through an IOAPIC - * Put that IOAPIC in virtual wire mode - * so legacy interrupts can be delivered. - */ - if (ioapic_i8259.pin != -1) { - struct IO_APIC_route_entry entry; - - memset(&entry, 0, sizeof(entry)); - entry.mask = 0; /* Enabled */ - entry.trigger = 0; /* Edge */ - entry.irr = 0; - entry.polarity = 0; /* High */ - entry.delivery_status = 0; - entry.dest_mode = 0; /* Physical */ - entry.delivery_mode = dest_ExtINT; /* ExtInt */ - entry.vector = 0; - entry.dest.physical.physical_dest = - GET_APIC_ID(apic_read(APIC_ID)); - - /* - * Add it to the IO-APIC irq-routing table: - */ - ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); - } - disconnect_bsp_APIC(ioapic_i8259.pin != -1); -} - -/* - * function to set the IO-APIC physical IDs based on the - * values stored in the MPC table. - * - * by Matt Domsch Tue Dec 21 12:25:05 CST 1999 - */ - -#ifndef CONFIG_X86_NUMAQ -static void __init setup_ioapic_ids_from_mpc(void) -{ - union IO_APIC_reg_00 reg_00; - physid_mask_t phys_id_present_map; - int apic; - int i; - unsigned char old_id; - unsigned long flags; - - /* - * Don't check I/O APIC IDs for xAPIC systems. They have - * no meaning without the serial APIC bus. - */ - if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) - || APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) - return; - /* - * This is broken; anything with a real cpu count has to - * circumvent this idiocy regardless. - */ - phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map); - - /* - * Set the IOAPIC ID to the value stored in the MPC table. - */ - for (apic = 0; apic < nr_ioapics; apic++) { - - /* Read the register 0 value */ - spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(apic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); - - old_id = mp_ioapics[apic].mpc_apicid; - - if (mp_ioapics[apic].mpc_apicid >= get_physical_broadcast()) { - printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", - apic, mp_ioapics[apic].mpc_apicid); - printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", - reg_00.bits.ID); - mp_ioapics[apic].mpc_apicid = reg_00.bits.ID; - } - - /* - * Sanity check, is the ID really free? Every APIC in a - * system must have a unique ID or we get lots of nice - * 'stuck on smp_invalidate_needed IPI wait' messages. - */ - if (check_apicid_used(phys_id_present_map, - mp_ioapics[apic].mpc_apicid)) { - printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", - apic, mp_ioapics[apic].mpc_apicid); - for (i = 0; i < get_physical_broadcast(); i++) - if (!physid_isset(i, phys_id_present_map)) - break; - if (i >= get_physical_broadcast()) - panic("Max APIC ID exceeded!\n"); - printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", - i); - physid_set(i, phys_id_present_map); - mp_ioapics[apic].mpc_apicid = i; - } else { - physid_mask_t tmp; - tmp = apicid_to_cpu_present(mp_ioapics[apic].mpc_apicid); - apic_printk(APIC_VERBOSE, "Setting %d in the " - "phys_id_present_map\n", - mp_ioapics[apic].mpc_apicid); - physids_or(phys_id_present_map, phys_id_present_map, tmp); - } - - - /* - * We need to adjust the IRQ routing table - * if the ID changed. - */ - if (old_id != mp_ioapics[apic].mpc_apicid) - for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mpc_dstapic == old_id) - mp_irqs[i].mpc_dstapic - = mp_ioapics[apic].mpc_apicid; - - /* - * Read the right value from the MPC table and - * write it into the ID register. - */ - apic_printk(APIC_VERBOSE, KERN_INFO - "...changing IO-APIC physical APIC ID to %d ...", - mp_ioapics[apic].mpc_apicid); - - reg_00.bits.ID = mp_ioapics[apic].mpc_apicid; - spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(apic, 0, reg_00.raw); - spin_unlock_irqrestore(&ioapic_lock, flags); - - /* - * Sanity check - */ - spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(apic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); - if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid) - printk("could not set ID!\n"); - else - apic_printk(APIC_VERBOSE, " ok.\n"); - } -} -#else -static void __init setup_ioapic_ids_from_mpc(void) { } -#endif - -int no_timer_check __initdata; - -static int __init notimercheck(char *s) -{ - no_timer_check = 1; - return 1; -} -__setup("no_timer_check", notimercheck); - -/* - * There is a nasty bug in some older SMP boards, their mptable lies - * about the timer IRQ. We do the following to work around the situation: - * - * - timer IRQ defaults to IO-APIC IRQ - * - if this function detects that timer IRQs are defunct, then we fall - * back to ISA timer IRQs - */ -static int __init timer_irq_works(void) -{ - unsigned long t1 = jiffies; - - if (no_timer_check) - return 1; - - local_irq_enable(); - /* Let ten ticks pass... */ - mdelay((10 * 1000) / HZ); - - /* - * Expect a few ticks at least, to be sure some possible - * glue logic does not lock up after one or two first - * ticks in a non-ExtINT mode. Also the local APIC - * might have cached one ExtINT interrupt. Finally, at - * least one tick may be lost due to delays. - */ - if (jiffies - t1 > 4) - return 1; - - return 0; -} - -/* - * In the SMP+IOAPIC case it might happen that there are an unspecified - * number of pending IRQ events unhandled. These cases are very rare, - * so we 'resend' these IRQs via IPIs, to the same CPU. It's much - * better to do it this way as thus we do not have to be aware of - * 'pending' interrupts in the IRQ path, except at this point. - */ -/* - * Edge triggered needs to resend any interrupt - * that was delayed but this is now handled in the device - * independent code. - */ - -/* - * Startup quirk: - * - * Starting up a edge-triggered IO-APIC interrupt is - * nasty - we need to make sure that we get the edge. - * If it is already asserted for some reason, we need - * return 1 to indicate that is was pending. - * - * This is not complete - we should be able to fake - * an edge even if it isn't on the 8259A... - * - * (We do this for level-triggered IRQs too - it cannot hurt.) - */ -static unsigned int startup_ioapic_irq(unsigned int irq) -{ - int was_pending = 0; - unsigned long flags; - - spin_lock_irqsave(&ioapic_lock, flags); - if (irq < 16) { - disable_8259A_irq(irq); - if (i8259A_irq_pending(irq)) - was_pending = 1; - } - __unmask_IO_APIC_irq(irq); - spin_unlock_irqrestore(&ioapic_lock, flags); - - return was_pending; -} - -static void ack_ioapic_irq(unsigned int irq) -{ - move_native_irq(irq); - ack_APIC_irq(); -} - -static void ack_ioapic_quirk_irq(unsigned int irq) -{ - unsigned long v; - int i; - - move_native_irq(irq); -/* - * It appears there is an erratum which affects at least version 0x11 - * of I/O APIC (that's the 82093AA and cores integrated into various - * chipsets). Under certain conditions a level-triggered interrupt is - * erroneously delivered as edge-triggered one but the respective IRR - * bit gets set nevertheless. As a result the I/O unit expects an EOI - * message but it will never arrive and further interrupts are blocked - * from the source. The exact reason is so far unknown, but the - * phenomenon was observed when two consecutive interrupt requests - * from a given source get delivered to the same CPU and the source is - * temporarily disabled in between. - * - * A workaround is to simulate an EOI message manually. We achieve it - * by setting the trigger mode to edge and then to level when the edge - * trigger mode gets detected in the TMR of a local APIC for a - * level-triggered interrupt. We mask the source for the time of the - * operation to prevent an edge-triggered interrupt escaping meanwhile. - * The idea is from Manfred Spraul. --macro - */ - i = irq_vector[irq]; - - v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); - - ack_APIC_irq(); - - if (!(v & (1 << (i & 0x1f)))) { - atomic_inc(&irq_mis_count); - spin_lock(&ioapic_lock); - __mask_and_edge_IO_APIC_irq(irq); - __unmask_and_level_IO_APIC_irq(irq); - spin_unlock(&ioapic_lock); - } -} - -static int ioapic_retrigger_irq(unsigned int irq) -{ - send_IPI_self(irq_vector[irq]); - - return 1; -} - -static struct irq_chip ioapic_chip __read_mostly = { - .name = "IO-APIC", - .startup = startup_ioapic_irq, - .mask = mask_IO_APIC_irq, - .unmask = unmask_IO_APIC_irq, - .ack = ack_ioapic_irq, - .eoi = ack_ioapic_quirk_irq, -#ifdef CONFIG_SMP - .set_affinity = set_ioapic_affinity_irq, -#endif - .retrigger = ioapic_retrigger_irq, -}; - - -static inline void init_IO_APIC_traps(void) -{ - int irq; - - /* - * NOTE! The local APIC isn't very good at handling - * multiple interrupts at the same interrupt level. - * As the interrupt level is determined by taking the - * vector number and shifting that right by 4, we - * want to spread these out a bit so that they don't - * all fall in the same interrupt level. - * - * Also, we've got to be careful not to trash gate - * 0x80, because int 0x80 is hm, kind of importantish. ;) - */ - for (irq = 0; irq < NR_IRQS ; irq++) { - int tmp = irq; - if (IO_APIC_IRQ(tmp) && !irq_vector[tmp]) { - /* - * Hmm.. We don't have an entry for this, - * so default to an old-fashioned 8259 - * interrupt if we can.. - */ - if (irq < 16) - make_8259A_irq(irq); - else - /* Strange. Oh, well.. */ - irq_desc[irq].chip = &no_irq_chip; - } - } -} - -/* - * The local APIC irq-chip implementation: - */ - -static void ack_apic(unsigned int irq) -{ - ack_APIC_irq(); -} - -static void mask_lapic_irq (unsigned int irq) -{ - unsigned long v; - - v = apic_read(APIC_LVT0); - apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED); -} - -static void unmask_lapic_irq (unsigned int irq) -{ - unsigned long v; - - v = apic_read(APIC_LVT0); - apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED); -} - -static struct irq_chip lapic_chip __read_mostly = { - .name = "local-APIC-edge", - .mask = mask_lapic_irq, - .unmask = unmask_lapic_irq, - .eoi = ack_apic, -}; - -static void setup_nmi (void) -{ - /* - * Dirty trick to enable the NMI watchdog ... - * We put the 8259A master into AEOI mode and - * unmask on all local APICs LVT0 as NMI. - * - * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') - * is from Maciej W. Rozycki - so we do not have to EOI from - * the NMI handler or the timer interrupt. - */ - apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ..."); - - on_each_cpu(enable_NMI_through_LVT0, NULL, 1, 1); - - apic_printk(APIC_VERBOSE, " done.\n"); -} - -/* - * This looks a bit hackish but it's about the only one way of sending - * a few INTA cycles to 8259As and any associated glue logic. ICR does - * not support the ExtINT mode, unfortunately. We need to send these - * cycles as some i82489DX-based boards have glue logic that keeps the - * 8259A interrupt line asserted until INTA. --macro - */ -static inline void unlock_ExtINT_logic(void) -{ - int apic, pin, i; - struct IO_APIC_route_entry entry0, entry1; - unsigned char save_control, save_freq_select; - - pin = find_isa_irq_pin(8, mp_INT); - if (pin == -1) { - WARN_ON_ONCE(1); - return; - } - apic = find_isa_irq_apic(8, mp_INT); - if (apic == -1) { - WARN_ON_ONCE(1); - return; - } - - entry0 = ioapic_read_entry(apic, pin); - clear_IO_APIC_pin(apic, pin); - - memset(&entry1, 0, sizeof(entry1)); - - entry1.dest_mode = 0; /* physical delivery */ - entry1.mask = 0; /* unmask IRQ now */ - entry1.dest.physical.physical_dest = hard_smp_processor_id(); - entry1.delivery_mode = dest_ExtINT; - entry1.polarity = entry0.polarity; - entry1.trigger = 0; - entry1.vector = 0; - - ioapic_write_entry(apic, pin, entry1); - - save_control = CMOS_READ(RTC_CONTROL); - save_freq_select = CMOS_READ(RTC_FREQ_SELECT); - CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6, - RTC_FREQ_SELECT); - CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL); - - i = 100; - while (i-- > 0) { - mdelay(10); - if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF) - i -= 10; - } - - CMOS_WRITE(save_control, RTC_CONTROL); - CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); - clear_IO_APIC_pin(apic, pin); - - ioapic_write_entry(apic, pin, entry0); -} - -int timer_uses_ioapic_pin_0; - -/* - * This code may look a bit paranoid, but it's supposed to cooperate with - * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ - * is so screwy. Thanks to Brian Perkins for testing/hacking this beast - * fanatically on his truly buggy board. - */ -static inline void __init check_timer(void) -{ - int apic1, pin1, apic2, pin2; - int vector; - - /* - * get/set the timer IRQ vector: - */ - disable_8259A_irq(0); - vector = assign_irq_vector(0); - set_intr_gate(vector, interrupt[0]); - - /* - * Subtle, code in do_timer_interrupt() expects an AEOI - * mode for the 8259A whenever interrupts are routed - * through I/O APICs. Also IRQ0 has to be enabled in - * the 8259A which implies the virtual wire has to be - * disabled in the local APIC. - */ - apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); - init_8259A(1); - timer_ack = 1; - if (timer_over_8254 > 0) - enable_8259A_irq(0); - - pin1 = find_isa_irq_pin(0, mp_INT); - apic1 = find_isa_irq_apic(0, mp_INT); - pin2 = ioapic_i8259.pin; - apic2 = ioapic_i8259.apic; - - if (pin1 == 0) - timer_uses_ioapic_pin_0 = 1; - - printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", - vector, apic1, pin1, apic2, pin2); - - if (pin1 != -1) { - /* - * Ok, does IRQ0 through the IOAPIC work? - */ - unmask_IO_APIC_irq(0); - if (timer_irq_works()) { - if (nmi_watchdog == NMI_IO_APIC) { - disable_8259A_irq(0); - setup_nmi(); - enable_8259A_irq(0); - } - if (disable_timer_pin_1 > 0) - clear_IO_APIC_pin(0, pin1); - return; - } - clear_IO_APIC_pin(apic1, pin1); - printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to " - "IO-APIC\n"); - } - - printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... "); - if (pin2 != -1) { - printk("\n..... (found pin %d) ...", pin2); - /* - * legacy devices should be connected to IO APIC #0 - */ - setup_ExtINT_IRQ0_pin(apic2, pin2, vector); - if (timer_irq_works()) { - printk("works.\n"); - if (pin1 != -1) - replace_pin_at_irq(0, apic1, pin1, apic2, pin2); - else - add_pin_to_irq(0, apic2, pin2); - if (nmi_watchdog == NMI_IO_APIC) { - setup_nmi(); - } - return; - } - /* - * Cleanup, just in case ... - */ - clear_IO_APIC_pin(apic2, pin2); - } - printk(" failed.\n"); - - if (nmi_watchdog == NMI_IO_APIC) { - printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); - nmi_watchdog = 0; - } - - printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); - - disable_8259A_irq(0); - set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq, - "fasteoi"); - apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ - enable_8259A_irq(0); - - if (timer_irq_works()) { - printk(" works.\n"); - return; - } - apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector); - printk(" failed.\n"); - - printk(KERN_INFO "...trying to set up timer as ExtINT IRQ..."); - - timer_ack = 0; - init_8259A(0); - make_8259A_irq(0); - apic_write_around(APIC_LVT0, APIC_DM_EXTINT); - - unlock_ExtINT_logic(); - - if (timer_irq_works()) { - printk(" works.\n"); - return; - } - printk(" failed :(.\n"); - panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " - "report. Then try booting with the 'noapic' option"); -} - -/* - * - * IRQ's that are handled by the PIC in the MPS IOAPIC case. - * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ. - * Linux doesn't really care, as it's not actually used - * for any interrupt handling anyway. - */ -#define PIC_IRQS (1 << PIC_CASCADE_IR) - -void __init setup_IO_APIC(void) -{ - enable_IO_APIC(); - - if (acpi_ioapic) - io_apic_irqs = ~0; /* all IRQs go through IOAPIC */ - else - io_apic_irqs = ~PIC_IRQS; - - printk("ENABLING IO-APIC IRQs\n"); - - /* - * Set up IO-APIC IRQ routing. - */ - if (!acpi_ioapic) - setup_ioapic_ids_from_mpc(); - sync_Arb_IDs(); - setup_IO_APIC_irqs(); - init_IO_APIC_traps(); - check_timer(); - if (!acpi_ioapic) - print_IO_APIC(); -} - -static int __init setup_disable_8254_timer(char *s) -{ - timer_over_8254 = -1; - return 1; -} -static int __init setup_enable_8254_timer(char *s) -{ - timer_over_8254 = 2; - return 1; -} - -__setup("disable_8254_timer", setup_disable_8254_timer); -__setup("enable_8254_timer", setup_enable_8254_timer); - -/* - * Called after all the initialization is done. If we didnt find any - * APIC bugs then we can allow the modify fast path - */ - -static int __init io_apic_bug_finalize(void) -{ - if(sis_apic_bug == -1) - sis_apic_bug = 0; - return 0; -} - -late_initcall(io_apic_bug_finalize); - -struct sysfs_ioapic_data { - struct sys_device dev; - struct IO_APIC_route_entry entry[0]; -}; -static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS]; - -static int ioapic_suspend(struct sys_device *dev, pm_message_t state) -{ - struct IO_APIC_route_entry *entry; - struct sysfs_ioapic_data *data; - int i; - - data = container_of(dev, struct sysfs_ioapic_data, dev); - entry = data->entry; - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++) - entry[i] = ioapic_read_entry(dev->id, i); - - return 0; -} - -static int ioapic_resume(struct sys_device *dev) -{ - struct IO_APIC_route_entry *entry; - struct sysfs_ioapic_data *data; - unsigned long flags; - union IO_APIC_reg_00 reg_00; - int i; - - data = container_of(dev, struct sysfs_ioapic_data, dev); - entry = data->entry; - - spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(dev->id, 0); - if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) { - reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid; - io_apic_write(dev->id, 0, reg_00.raw); - } - spin_unlock_irqrestore(&ioapic_lock, flags); - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++) - ioapic_write_entry(dev->id, i, entry[i]); - - return 0; -} - -static struct sysdev_class ioapic_sysdev_class = { - set_kset_name("ioapic"), - .suspend = ioapic_suspend, - .resume = ioapic_resume, -}; - -static int __init ioapic_init_sysfs(void) -{ - struct sys_device * dev; - int i, size, error = 0; - - error = sysdev_class_register(&ioapic_sysdev_class); - if (error) - return error; - - for (i = 0; i < nr_ioapics; i++ ) { - size = sizeof(struct sys_device) + nr_ioapic_registers[i] - * sizeof(struct IO_APIC_route_entry); - mp_ioapic_data[i] = kmalloc(size, GFP_KERNEL); - if (!mp_ioapic_data[i]) { - printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); - continue; - } - memset(mp_ioapic_data[i], 0, size); - dev = &mp_ioapic_data[i]->dev; - dev->id = i; - dev->cls = &ioapic_sysdev_class; - error = sysdev_register(dev); - if (error) { - kfree(mp_ioapic_data[i]); - mp_ioapic_data[i] = NULL; - printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); - continue; - } - } - - return 0; -} - -device_initcall(ioapic_init_sysfs); - -/* - * Dynamic irq allocate and deallocation - */ -int create_irq(void) -{ - /* Allocate an unused irq */ - int irq, new, vector = 0; - unsigned long flags; - - irq = -ENOSPC; - spin_lock_irqsave(&vector_lock, flags); - for (new = (NR_IRQS - 1); new >= 0; new--) { - if (platform_legacy_irq(new)) - continue; - if (irq_vector[new] != 0) - continue; - vector = __assign_irq_vector(new); - if (likely(vector > 0)) - irq = new; - break; - } - spin_unlock_irqrestore(&vector_lock, flags); - - if (irq >= 0) { - set_intr_gate(vector, interrupt[irq]); - dynamic_irq_init(irq); - } - return irq; -} - -void destroy_irq(unsigned int irq) -{ - unsigned long flags; - - dynamic_irq_cleanup(irq); - - spin_lock_irqsave(&vector_lock, flags); - irq_vector[irq] = 0; - spin_unlock_irqrestore(&vector_lock, flags); -} - -/* - * MSI mesage composition - */ -#ifdef CONFIG_PCI_MSI -static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) -{ - int vector; - unsigned dest; - - vector = assign_irq_vector(irq); - if (vector >= 0) { - dest = cpu_mask_to_apicid(TARGET_CPUS); - - msg->address_hi = MSI_ADDR_BASE_HI; - msg->address_lo = - MSI_ADDR_BASE_LO | - ((INT_DEST_MODE == 0) ? - MSI_ADDR_DEST_MODE_PHYSICAL: - MSI_ADDR_DEST_MODE_LOGICAL) | - ((INT_DELIVERY_MODE != dest_LowestPrio) ? - MSI_ADDR_REDIRECTION_CPU: - MSI_ADDR_REDIRECTION_LOWPRI) | - MSI_ADDR_DEST_ID(dest); - - msg->data = - MSI_DATA_TRIGGER_EDGE | - MSI_DATA_LEVEL_ASSERT | - ((INT_DELIVERY_MODE != dest_LowestPrio) ? - MSI_DATA_DELIVERY_FIXED: - MSI_DATA_DELIVERY_LOWPRI) | - MSI_DATA_VECTOR(vector); - } - return vector; -} - -#ifdef CONFIG_SMP -static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) -{ - struct msi_msg msg; - unsigned int dest; - cpumask_t tmp; - int vector; - - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) - tmp = TARGET_CPUS; - - vector = assign_irq_vector(irq); - if (vector < 0) - return; - - dest = cpu_mask_to_apicid(mask); - - read_msi_msg(irq, &msg); - - msg.data &= ~MSI_DATA_VECTOR_MASK; - msg.data |= MSI_DATA_VECTOR(vector); - msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; - msg.address_lo |= MSI_ADDR_DEST_ID(dest); - - write_msi_msg(irq, &msg); - irq_desc[irq].affinity = mask; -} -#endif /* CONFIG_SMP */ - -/* - * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, - * which implement the MSI or MSI-X Capability Structure. - */ -static struct irq_chip msi_chip = { - .name = "PCI-MSI", - .unmask = unmask_msi_irq, - .mask = mask_msi_irq, - .ack = ack_ioapic_irq, -#ifdef CONFIG_SMP - .set_affinity = set_msi_irq_affinity, -#endif - .retrigger = ioapic_retrigger_irq, -}; - -int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) -{ - struct msi_msg msg; - int irq, ret; - irq = create_irq(); - if (irq < 0) - return irq; - - ret = msi_compose_msg(dev, irq, &msg); - if (ret < 0) { - destroy_irq(irq); - return ret; - } - - set_irq_msi(irq, desc); - write_msi_msg(irq, &msg); - - set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, - "edge"); - - return 0; -} - -void arch_teardown_msi_irq(unsigned int irq) -{ - destroy_irq(irq); -} - -#endif /* CONFIG_PCI_MSI */ - -/* - * Hypertransport interrupt support - */ -#ifdef CONFIG_HT_IRQ - -#ifdef CONFIG_SMP - -static void target_ht_irq(unsigned int irq, unsigned int dest) -{ - struct ht_irq_msg msg; - fetch_ht_irq_msg(irq, &msg); - - msg.address_lo &= ~(HT_IRQ_LOW_DEST_ID_MASK); - msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK); - - msg.address_lo |= HT_IRQ_LOW_DEST_ID(dest); - msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest); - - write_ht_irq_msg(irq, &msg); -} - -static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) -{ - unsigned int dest; - cpumask_t tmp; - - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) - tmp = TARGET_CPUS; - - cpus_and(mask, tmp, CPU_MASK_ALL); - - dest = cpu_mask_to_apicid(mask); - - target_ht_irq(irq, dest); - irq_desc[irq].affinity = mask; -} -#endif - -static struct irq_chip ht_irq_chip = { - .name = "PCI-HT", - .mask = mask_ht_irq, - .unmask = unmask_ht_irq, - .ack = ack_ioapic_irq, -#ifdef CONFIG_SMP - .set_affinity = set_ht_irq_affinity, -#endif - .retrigger = ioapic_retrigger_irq, -}; - -int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) -{ - int vector; - - vector = assign_irq_vector(irq); - if (vector >= 0) { - struct ht_irq_msg msg; - unsigned dest; - cpumask_t tmp; - - cpus_clear(tmp); - cpu_set(vector >> 8, tmp); - dest = cpu_mask_to_apicid(tmp); - - msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); - - msg.address_lo = - HT_IRQ_LOW_BASE | - HT_IRQ_LOW_DEST_ID(dest) | - HT_IRQ_LOW_VECTOR(vector) | - ((INT_DEST_MODE == 0) ? - HT_IRQ_LOW_DM_PHYSICAL : - HT_IRQ_LOW_DM_LOGICAL) | - HT_IRQ_LOW_RQEOI_EDGE | - ((INT_DELIVERY_MODE != dest_LowestPrio) ? - HT_IRQ_LOW_MT_FIXED : - HT_IRQ_LOW_MT_ARBITRATED) | - HT_IRQ_LOW_IRQ_MASKED; - - write_ht_irq_msg(irq, &msg); - - set_irq_chip_and_handler_name(irq, &ht_irq_chip, - handle_edge_irq, "edge"); - } - return vector; -} -#endif /* CONFIG_HT_IRQ */ - -/* -------------------------------------------------------------------------- - ACPI-based IOAPIC Configuration - -------------------------------------------------------------------------- */ - -#ifdef CONFIG_ACPI - -int __init io_apic_get_unique_id (int ioapic, int apic_id) -{ - union IO_APIC_reg_00 reg_00; - static physid_mask_t apic_id_map = PHYSID_MASK_NONE; - physid_mask_t tmp; - unsigned long flags; - int i = 0; - - /* - * The P4 platform supports up to 256 APIC IDs on two separate APIC - * buses (one for LAPICs, one for IOAPICs), where predecessors only - * supports up to 16 on one shared APIC bus. - * - * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full - * advantage of new APIC bus architecture. - */ - - if (physids_empty(apic_id_map)) - apic_id_map = ioapic_phys_id_map(phys_cpu_present_map); - - spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(ioapic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); - - if (apic_id >= get_physical_broadcast()) { - printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " - "%d\n", ioapic, apic_id, reg_00.bits.ID); - apic_id = reg_00.bits.ID; - } - - /* - * Every APIC in a system must have a unique ID or we get lots of nice - * 'stuck on smp_invalidate_needed IPI wait' messages. - */ - if (check_apicid_used(apic_id_map, apic_id)) { - - for (i = 0; i < get_physical_broadcast(); i++) { - if (!check_apicid_used(apic_id_map, i)) - break; - } - - if (i == get_physical_broadcast()) - panic("Max apic_id exceeded!\n"); - - printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, " - "trying %d\n", ioapic, apic_id, i); - - apic_id = i; - } - - tmp = apicid_to_cpu_present(apic_id); - physids_or(apic_id_map, apic_id_map, tmp); - - if (reg_00.bits.ID != apic_id) { - reg_00.bits.ID = apic_id; - - spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(ioapic, 0, reg_00.raw); - reg_00.raw = io_apic_read(ioapic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); - - /* Sanity check */ - if (reg_00.bits.ID != apic_id) { - printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic); - return -1; - } - } - - apic_printk(APIC_VERBOSE, KERN_INFO - "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id); - - return apic_id; -} - - -int __init io_apic_get_version (int ioapic) -{ - union IO_APIC_reg_01 reg_01; - unsigned long flags; - - spin_lock_irqsave(&ioapic_lock, flags); - reg_01.raw = io_apic_read(ioapic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); - - return reg_01.bits.version; -} - - -int __init io_apic_get_redir_entries (int ioapic) -{ - union IO_APIC_reg_01 reg_01; - unsigned long flags; - - spin_lock_irqsave(&ioapic_lock, flags); - reg_01.raw = io_apic_read(ioapic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); - - return reg_01.bits.entries; -} - - -int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low) -{ - struct IO_APIC_route_entry entry; - unsigned long flags; - - if (!IO_APIC_IRQ(irq)) { - printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", - ioapic); - return -EINVAL; - } - - /* - * Generate a PCI IRQ routing entry and program the IOAPIC accordingly. - * Note that we mask (disable) IRQs now -- these get enabled when the - * corresponding device driver registers for this IRQ. - */ - - memset(&entry,0,sizeof(entry)); - - entry.delivery_mode = INT_DELIVERY_MODE; - entry.dest_mode = INT_DEST_MODE; - entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); - entry.trigger = edge_level; - entry.polarity = active_high_low; - entry.mask = 1; - - /* - * IRQs < 16 are already in the irq_2_pin[] map - */ - if (irq >= 16) - add_pin_to_irq(irq, ioapic, pin); - - entry.vector = assign_irq_vector(irq); - - apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry " - "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic, - mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq, - edge_level, active_high_low); - - ioapic_register_intr(irq, entry.vector, edge_level); - - if (!ioapic && (irq < 16)) - disable_8259A_irq(irq); - - spin_lock_irqsave(&ioapic_lock, flags); - __ioapic_write_entry(ioapic, pin, entry); - spin_unlock_irqrestore(&ioapic_lock, flags); - - return 0; -} - -#endif /* CONFIG_ACPI */ - -static int __init parse_disable_timer_pin_1(char *arg) -{ - disable_timer_pin_1 = 1; - return 0; -} -early_param("disable_timer_pin_1", parse_disable_timer_pin_1); - -static int __init parse_enable_timer_pin_1(char *arg) -{ - disable_timer_pin_1 = -1; - return 0; -} -early_param("enable_timer_pin_1", parse_enable_timer_pin_1); - -static int __init parse_noapic(char *arg) -{ - /* disable IO-APIC */ - disable_ioapic_setup(); - return 0; -} -early_param("noapic", parse_noapic); diff --git a/arch/i386/kernel/ioport_32.c b/arch/i386/kernel/ioport_32.c deleted file mode 100644 index 3d310a946d7..00000000000 --- a/arch/i386/kernel/ioport_32.c +++ /dev/null @@ -1,153 +0,0 @@ -/* - * linux/arch/i386/kernel/ioport.c - * - * This contains the io-permission bitmap code - written by obz, with changes - * by Linus. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ -static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value) -{ - unsigned long mask; - unsigned long *bitmap_base = bitmap + (base / BITS_PER_LONG); - unsigned int low_index = base & (BITS_PER_LONG-1); - int length = low_index + extent; - - if (low_index != 0) { - mask = (~0UL << low_index); - if (length < BITS_PER_LONG) - mask &= ~(~0UL << length); - if (new_value) - *bitmap_base++ |= mask; - else - *bitmap_base++ &= ~mask; - length -= BITS_PER_LONG; - } - - mask = (new_value ? ~0UL : 0UL); - while (length >= BITS_PER_LONG) { - *bitmap_base++ = mask; - length -= BITS_PER_LONG; - } - - if (length > 0) { - mask = ~(~0UL << length); - if (new_value) - *bitmap_base++ |= mask; - else - *bitmap_base++ &= ~mask; - } -} - - -/* - * this changes the io permissions bitmap in the current task. - */ -asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) -{ - unsigned long i, max_long, bytes, bytes_updated; - struct thread_struct * t = ¤t->thread; - struct tss_struct * tss; - unsigned long *bitmap; - - if ((from + num <= from) || (from + num > IO_BITMAP_BITS)) - return -EINVAL; - if (turn_on && !capable(CAP_SYS_RAWIO)) - return -EPERM; - - /* - * If it's the first ioperm() call in this thread's lifetime, set the - * IO bitmap up. ioperm() is much less timing critical than clone(), - * this is why we delay this operation until now: - */ - if (!t->io_bitmap_ptr) { - bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); - if (!bitmap) - return -ENOMEM; - - memset(bitmap, 0xff, IO_BITMAP_BYTES); - t->io_bitmap_ptr = bitmap; - set_thread_flag(TIF_IO_BITMAP); - } - - /* - * do it in the per-thread copy and in the TSS ... - * - * Disable preemption via get_cpu() - we must not switch away - * because the ->io_bitmap_max value must match the bitmap - * contents: - */ - tss = &per_cpu(init_tss, get_cpu()); - - set_bitmap(t->io_bitmap_ptr, from, num, !turn_on); - - /* - * Search for a (possibly new) maximum. This is simple and stupid, - * to keep it obviously correct: - */ - max_long = 0; - for (i = 0; i < IO_BITMAP_LONGS; i++) - if (t->io_bitmap_ptr[i] != ~0UL) - max_long = i; - - bytes = (max_long + 1) * sizeof(long); - bytes_updated = max(bytes, t->io_bitmap_max); - - t->io_bitmap_max = bytes; - - /* - * Sets the lazy trigger so that the next I/O operation will - * reload the correct bitmap. - * Reset the owner so that a process switch will not set - * tss->io_bitmap_base to IO_BITMAP_OFFSET. - */ - tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY; - tss->io_bitmap_owner = NULL; - - put_cpu(); - - return 0; -} - -/* - * sys_iopl has to be used when you want to access the IO ports - * beyond the 0x3ff range: to get the full 65536 ports bitmapped - * you'd need 8kB of bitmaps/process, which is a bit excessive. - * - * Here we just change the eflags value on the stack: we allow - * only the super-user to do it. This depends on the stack-layout - * on system-call entry - see also fork() and the signal handling - * code. - */ - -asmlinkage long sys_iopl(unsigned long unused) -{ - volatile struct pt_regs * regs = (struct pt_regs *) &unused; - unsigned int level = regs->ebx; - unsigned int old = (regs->eflags >> 12) & 3; - struct thread_struct *t = ¤t->thread; - - if (level > 3) - return -EINVAL; - /* Trying to gain more privileges? */ - if (level > old) { - if (!capable(CAP_SYS_RAWIO)) - return -EPERM; - } - t->iopl = level << 12; - regs->eflags = (regs->eflags & ~X86_EFLAGS_IOPL) | t->iopl; - set_iopl_mask(t->iopl); - return 0; -} diff --git a/arch/i386/kernel/irq_32.c b/arch/i386/kernel/irq_32.c deleted file mode 100644 index dd2b97fc00b..00000000000 --- a/arch/i386/kernel/irq_32.c +++ /dev/null @@ -1,343 +0,0 @@ -/* - * linux/arch/i386/kernel/irq.c - * - * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar - * - * This file contains the lowest level x86-specific interrupt - * entry, irq-stacks and irq statistics code. All the remaining - * irq logic is done by the generic kernel/irq/ code and - * by the x86-specific irq controller code. (e.g. i8259.c and - * io_apic.c.) - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); -EXPORT_PER_CPU_SYMBOL(irq_stat); - -DEFINE_PER_CPU(struct pt_regs *, irq_regs); -EXPORT_PER_CPU_SYMBOL(irq_regs); - -/* - * 'what should we do if we get a hw irq event on an illegal vector'. - * each architecture has to answer this themselves. - */ -void ack_bad_irq(unsigned int irq) -{ - printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq); - -#ifdef CONFIG_X86_LOCAL_APIC - /* - * Currently unexpected vectors happen only on SMP and APIC. - * We _must_ ack these because every local APIC has only N - * irq slots per priority level, and a 'hanging, unacked' IRQ - * holds up an irq slot - in excessive cases (when multiple - * unexpected vectors occur) that might lock up the APIC - * completely. - * But only ack when the APIC is enabled -AK - */ - if (cpu_has_apic) - ack_APIC_irq(); -#endif -} - -#ifdef CONFIG_4KSTACKS -/* - * per-CPU IRQ handling contexts (thread information and stack) - */ -union irq_ctx { - struct thread_info tinfo; - u32 stack[THREAD_SIZE/sizeof(u32)]; -}; - -static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly; -static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly; -#endif - -/* - * do_IRQ handles all normal device IRQ's (the special - * SMP cross-CPU interrupts have their own specific - * handlers). - */ -fastcall unsigned int do_IRQ(struct pt_regs *regs) -{ - struct pt_regs *old_regs; - /* high bit used in ret_from_ code */ - int irq = ~regs->orig_eax; - struct irq_desc *desc = irq_desc + irq; -#ifdef CONFIG_4KSTACKS - union irq_ctx *curctx, *irqctx; - u32 *isp; -#endif - - if (unlikely((unsigned)irq >= NR_IRQS)) { - printk(KERN_EMERG "%s: cannot handle IRQ %d\n", - __FUNCTION__, irq); - BUG(); - } - - old_regs = set_irq_regs(regs); - irq_enter(); -#ifdef CONFIG_DEBUG_STACKOVERFLOW - /* Debugging check for stack overflow: is there less than 1KB free? */ - { - long esp; - - __asm__ __volatile__("andl %%esp,%0" : - "=r" (esp) : "0" (THREAD_SIZE - 1)); - if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) { - printk("do_IRQ: stack overflow: %ld\n", - esp - sizeof(struct thread_info)); - dump_stack(); - } - } -#endif - -#ifdef CONFIG_4KSTACKS - - curctx = (union irq_ctx *) current_thread_info(); - irqctx = hardirq_ctx[smp_processor_id()]; - - /* - * this is where we switch to the IRQ stack. However, if we are - * already using the IRQ stack (because we interrupted a hardirq - * handler) we can't do that and just have to keep using the - * current stack (which is the irq stack already after all) - */ - if (curctx != irqctx) { - int arg1, arg2, ebx; - - /* build the stack frame on the IRQ stack */ - isp = (u32*) ((char*)irqctx + sizeof(*irqctx)); - irqctx->tinfo.task = curctx->tinfo.task; - irqctx->tinfo.previous_esp = current_stack_pointer; - - /* - * Copy the softirq bits in preempt_count so that the - * softirq checks work in the hardirq context. - */ - irqctx->tinfo.preempt_count = - (irqctx->tinfo.preempt_count & ~SOFTIRQ_MASK) | - (curctx->tinfo.preempt_count & SOFTIRQ_MASK); - - asm volatile( - " xchgl %%ebx,%%esp \n" - " call *%%edi \n" - " movl %%ebx,%%esp \n" - : "=a" (arg1), "=d" (arg2), "=b" (ebx) - : "0" (irq), "1" (desc), "2" (isp), - "D" (desc->handle_irq) - : "memory", "cc" - ); - } else -#endif - desc->handle_irq(irq, desc); - - irq_exit(); - set_irq_regs(old_regs); - return 1; -} - -#ifdef CONFIG_4KSTACKS - -static char softirq_stack[NR_CPUS * THREAD_SIZE] - __attribute__((__section__(".bss.page_aligned"))); - -static char hardirq_stack[NR_CPUS * THREAD_SIZE] - __attribute__((__section__(".bss.page_aligned"))); - -/* - * allocate per-cpu stacks for hardirq and for softirq processing - */ -void irq_ctx_init(int cpu) -{ - union irq_ctx *irqctx; - - if (hardirq_ctx[cpu]) - return; - - irqctx = (union irq_ctx*) &hardirq_stack[cpu*THREAD_SIZE]; - irqctx->tinfo.task = NULL; - irqctx->tinfo.exec_domain = NULL; - irqctx->tinfo.cpu = cpu; - irqctx->tinfo.preempt_count = HARDIRQ_OFFSET; - irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); - - hardirq_ctx[cpu] = irqctx; - - irqctx = (union irq_ctx*) &softirq_stack[cpu*THREAD_SIZE]; - irqctx->tinfo.task = NULL; - irqctx->tinfo.exec_domain = NULL; - irqctx->tinfo.cpu = cpu; - irqctx->tinfo.preempt_count = 0; - irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); - - softirq_ctx[cpu] = irqctx; - - printk("CPU %u irqstacks, hard=%p soft=%p\n", - cpu,hardirq_ctx[cpu],softirq_ctx[cpu]); -} - -void irq_ctx_exit(int cpu) -{ - hardirq_ctx[cpu] = NULL; -} - -extern asmlinkage void __do_softirq(void); - -asmlinkage void do_softirq(void) -{ - unsigned long flags; - struct thread_info *curctx; - union irq_ctx *irqctx; - u32 *isp; - - if (in_interrupt()) - return; - - local_irq_save(flags); - - if (local_softirq_pending()) { - curctx = current_thread_info(); - irqctx = softirq_ctx[smp_processor_id()]; - irqctx->tinfo.task = curctx->task; - irqctx->tinfo.previous_esp = current_stack_pointer; - - /* build the stack frame on the softirq stack */ - isp = (u32*) ((char*)irqctx + sizeof(*irqctx)); - - asm volatile( - " xchgl %%ebx,%%esp \n" - " call __do_softirq \n" - " movl %%ebx,%%esp \n" - : "=b"(isp) - : "0"(isp) - : "memory", "cc", "edx", "ecx", "eax" - ); - /* - * Shouldnt happen, we returned above if in_interrupt(): - */ - WARN_ON_ONCE(softirq_count()); - } - - local_irq_restore(flags); -} - -EXPORT_SYMBOL(do_softirq); -#endif - -/* - * Interrupt statistics: - */ - -atomic_t irq_err_count; - -/* - * /proc/interrupts printing: - */ - -int show_interrupts(struct seq_file *p, void *v) -{ - int i = *(loff_t *) v, j; - struct irqaction * action; - unsigned long flags; - - if (i == 0) { - seq_printf(p, " "); - for_each_online_cpu(j) - seq_printf(p, "CPU%-8d",j); - seq_putc(p, '\n'); - } - - if (i < NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); - action = irq_desc[i].action; - if (!action) - goto skip; - seq_printf(p, "%3d: ",i); -#ifndef CONFIG_SMP - seq_printf(p, "%10u ", kstat_irqs(i)); -#else - for_each_online_cpu(j) - seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); -#endif - seq_printf(p, " %8s", irq_desc[i].chip->name); - seq_printf(p, "-%-8s", irq_desc[i].name); - seq_printf(p, " %s", action->name); - - for (action=action->next; action; action = action->next) - seq_printf(p, ", %s", action->name); - - seq_putc(p, '\n'); -skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); - } else if (i == NR_IRQS) { - seq_printf(p, "NMI: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", nmi_count(j)); - seq_putc(p, '\n'); -#ifdef CONFIG_X86_LOCAL_APIC - seq_printf(p, "LOC: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", - per_cpu(irq_stat,j).apic_timer_irqs); - seq_putc(p, '\n'); -#endif - seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); -#if defined(CONFIG_X86_IO_APIC) - seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); -#endif - } - return 0; -} - -#ifdef CONFIG_HOTPLUG_CPU -#include - -void fixup_irqs(cpumask_t map) -{ - unsigned int irq; - static int warned; - - for (irq = 0; irq < NR_IRQS; irq++) { - cpumask_t mask; - if (irq == 2) - continue; - - cpus_and(mask, irq_desc[irq].affinity, map); - if (any_online_cpu(mask) == NR_CPUS) { - printk("Breaking affinity for irq %i\n", irq); - mask = map; - } - if (irq_desc[irq].chip->set_affinity) - irq_desc[irq].chip->set_affinity(irq, mask); - else if (irq_desc[irq].action && !(warned++)) - printk("Cannot set affinity for irq %i\n", irq); - } - -#if 0 - barrier(); - /* Ingo Molnar says: "after the IO-APIC masks have been redirected - [note the nop - the interrupt-enable boundary on x86 is two - instructions from sti] - to flush out pending hardirqs and - IPIs. After this point nothing is supposed to reach this CPU." */ - __asm__ __volatile__("sti; nop; cli"); - barrier(); -#else - /* That doesn't seem sufficient. Give it 1ms. */ - local_irq_enable(); - mdelay(1); - local_irq_disable(); -#endif -} -#endif - diff --git a/arch/i386/kernel/kprobes_32.c b/arch/i386/kernel/kprobes_32.c deleted file mode 100644 index 448a50b1324..00000000000 --- a/arch/i386/kernel/kprobes_32.c +++ /dev/null @@ -1,751 +0,0 @@ -/* - * Kernel Probes (KProbes) - * arch/i386/kernel/kprobes.c - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Copyright (C) IBM Corporation, 2002, 2004 - * - * 2002-Oct Created by Vamsi Krishna S Kernel - * Probes initial implementation ( includes contributions from - * Rusty Russell). - * 2004-July Suparna Bhattacharya added jumper probes - * interface to access function arguments. - * 2005-May Hien Nguyen , Jim Keniston - * and Prasanna S Panchamukhi - * added function-return probes. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -void jprobe_return_end(void); - -DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; -DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); - -/* insert a jmp code */ -static __always_inline void set_jmp_op(void *from, void *to) -{ - struct __arch_jmp_op { - char op; - long raddr; - } __attribute__((packed)) *jop; - jop = (struct __arch_jmp_op *)from; - jop->raddr = (long)(to) - ((long)(from) + 5); - jop->op = RELATIVEJUMP_INSTRUCTION; -} - -/* - * returns non-zero if opcodes can be boosted. - */ -static __always_inline int can_boost(kprobe_opcode_t *opcodes) -{ -#define W(row,b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,ba,bb,bc,bd,be,bf) \ - (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ - (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \ - (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) | \ - (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf)) \ - << (row % 32)) - /* - * Undefined/reserved opcodes, conditional jump, Opcode Extension - * Groups, and some special opcodes can not be boost. - */ - static const unsigned long twobyte_is_boostable[256 / 32] = { - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ - /* ------------------------------- */ - W(0x00, 0,0,1,1,0,0,1,0,1,1,0,0,0,0,0,0)| /* 00 */ - W(0x10, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 10 */ - W(0x20, 1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0)| /* 20 */ - W(0x30, 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 30 */ - W(0x40, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)| /* 40 */ - W(0x50, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 50 */ - W(0x60, 1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1)| /* 60 */ - W(0x70, 0,0,0,0,1,1,1,1,0,0,0,0,0,0,1,1), /* 70 */ - W(0x80, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)| /* 80 */ - W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1), /* 90 */ - W(0xa0, 1,1,0,1,1,1,0,0,1,1,0,1,1,1,0,1)| /* a0 */ - W(0xb0, 1,1,1,1,1,1,1,1,0,0,0,1,1,1,1,1), /* b0 */ - W(0xc0, 1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,1)| /* c0 */ - W(0xd0, 0,1,1,1,0,1,0,0,1,1,0,1,1,1,0,1), /* d0 */ - W(0xe0, 0,1,1,0,0,1,0,0,1,1,0,1,1,1,0,1)| /* e0 */ - W(0xf0, 0,1,1,1,0,1,0,0,1,1,1,0,1,1,1,0) /* f0 */ - /* ------------------------------- */ - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ - }; -#undef W - kprobe_opcode_t opcode; - kprobe_opcode_t *orig_opcodes = opcodes; -retry: - if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1) - return 0; - opcode = *(opcodes++); - - /* 2nd-byte opcode */ - if (opcode == 0x0f) { - if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1) - return 0; - return test_bit(*opcodes, twobyte_is_boostable); - } - - switch (opcode & 0xf0) { - case 0x60: - if (0x63 < opcode && opcode < 0x67) - goto retry; /* prefixes */ - /* can't boost Address-size override and bound */ - return (opcode != 0x62 && opcode != 0x67); - case 0x70: - return 0; /* can't boost conditional jump */ - case 0xc0: - /* can't boost software-interruptions */ - return (0xc1 < opcode && opcode < 0xcc) || opcode == 0xcf; - case 0xd0: - /* can boost AA* and XLAT */ - return (opcode == 0xd4 || opcode == 0xd5 || opcode == 0xd7); - case 0xe0: - /* can boost in/out and absolute jmps */ - return ((opcode & 0x04) || opcode == 0xea); - case 0xf0: - if ((opcode & 0x0c) == 0 && opcode != 0xf1) - goto retry; /* lock/rep(ne) prefix */ - /* clear and set flags can be boost */ - return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe)); - default: - if (opcode == 0x26 || opcode == 0x36 || opcode == 0x3e) - goto retry; /* prefixes */ - /* can't boost CS override and call */ - return (opcode != 0x2e && opcode != 0x9a); - } -} - -/* - * returns non-zero if opcode modifies the interrupt flag. - */ -static int __kprobes is_IF_modifier(kprobe_opcode_t opcode) -{ - switch (opcode) { - case 0xfa: /* cli */ - case 0xfb: /* sti */ - case 0xcf: /* iret/iretd */ - case 0x9d: /* popf/popfd */ - return 1; - } - return 0; -} - -int __kprobes arch_prepare_kprobe(struct kprobe *p) -{ - /* insn: must be on special executable page on i386. */ - p->ainsn.insn = get_insn_slot(); - if (!p->ainsn.insn) - return -ENOMEM; - - memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); - p->opcode = *p->addr; - if (can_boost(p->addr)) { - p->ainsn.boostable = 0; - } else { - p->ainsn.boostable = -1; - } - return 0; -} - -void __kprobes arch_arm_kprobe(struct kprobe *p) -{ - text_poke(p->addr, ((unsigned char []){BREAKPOINT_INSTRUCTION}), 1); -} - -void __kprobes arch_disarm_kprobe(struct kprobe *p) -{ - text_poke(p->addr, &p->opcode, 1); -} - -void __kprobes arch_remove_kprobe(struct kprobe *p) -{ - mutex_lock(&kprobe_mutex); - free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1)); - mutex_unlock(&kprobe_mutex); -} - -static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) -{ - kcb->prev_kprobe.kp = kprobe_running(); - kcb->prev_kprobe.status = kcb->kprobe_status; - kcb->prev_kprobe.old_eflags = kcb->kprobe_old_eflags; - kcb->prev_kprobe.saved_eflags = kcb->kprobe_saved_eflags; -} - -static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) -{ - __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; - kcb->kprobe_status = kcb->prev_kprobe.status; - kcb->kprobe_old_eflags = kcb->prev_kprobe.old_eflags; - kcb->kprobe_saved_eflags = kcb->prev_kprobe.saved_eflags; -} - -static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, - struct kprobe_ctlblk *kcb) -{ - __get_cpu_var(current_kprobe) = p; - kcb->kprobe_saved_eflags = kcb->kprobe_old_eflags - = (regs->eflags & (TF_MASK | IF_MASK)); - if (is_IF_modifier(p->opcode)) - kcb->kprobe_saved_eflags &= ~IF_MASK; -} - -static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) -{ - regs->eflags |= TF_MASK; - regs->eflags &= ~IF_MASK; - /*single step inline if the instruction is an int3*/ - if (p->opcode == BREAKPOINT_INSTRUCTION) - regs->eip = (unsigned long)p->addr; - else - regs->eip = (unsigned long)p->ainsn.insn; -} - -/* Called with kretprobe_lock held */ -void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, - struct pt_regs *regs) -{ - unsigned long *sara = (unsigned long *)®s->esp; - - ri->ret_addr = (kprobe_opcode_t *) *sara; - - /* Replace the return addr with trampoline addr */ - *sara = (unsigned long) &kretprobe_trampoline; -} - -/* - * Interrupts are disabled on entry as trap3 is an interrupt gate and they - * remain disabled thorough out this function. - */ -static int __kprobes kprobe_handler(struct pt_regs *regs) -{ - struct kprobe *p; - int ret = 0; - kprobe_opcode_t *addr; - struct kprobe_ctlblk *kcb; - - addr = (kprobe_opcode_t *)(regs->eip - sizeof(kprobe_opcode_t)); - - /* - * We don't want to be preempted for the entire - * duration of kprobe processing - */ - preempt_disable(); - kcb = get_kprobe_ctlblk(); - - /* Check we're not actually recursing */ - if (kprobe_running()) { - p = get_kprobe(addr); - if (p) { - if (kcb->kprobe_status == KPROBE_HIT_SS && - *p->ainsn.insn == BREAKPOINT_INSTRUCTION) { - regs->eflags &= ~TF_MASK; - regs->eflags |= kcb->kprobe_saved_eflags; - goto no_kprobe; - } - /* We have reentered the kprobe_handler(), since - * another probe was hit while within the handler. - * We here save the original kprobes variables and - * just single step on the instruction of the new probe - * without calling any user handlers. - */ - save_previous_kprobe(kcb); - set_current_kprobe(p, regs, kcb); - kprobes_inc_nmissed_count(p); - prepare_singlestep(p, regs); - kcb->kprobe_status = KPROBE_REENTER; - return 1; - } else { - if (*addr != BREAKPOINT_INSTRUCTION) { - /* The breakpoint instruction was removed by - * another cpu right after we hit, no further - * handling of this interrupt is appropriate - */ - regs->eip -= sizeof(kprobe_opcode_t); - ret = 1; - goto no_kprobe; - } - p = __get_cpu_var(current_kprobe); - if (p->break_handler && p->break_handler(p, regs)) { - goto ss_probe; - } - } - goto no_kprobe; - } - - p = get_kprobe(addr); - if (!p) { - if (*addr != BREAKPOINT_INSTRUCTION) { - /* - * The breakpoint instruction was removed right - * after we hit it. Another cpu has removed - * either a probepoint or a debugger breakpoint - * at this address. In either case, no further - * handling of this interrupt is appropriate. - * Back up over the (now missing) int3 and run - * the original instruction. - */ - regs->eip -= sizeof(kprobe_opcode_t); - ret = 1; - } - /* Not one of ours: let kernel handle it */ - goto no_kprobe; - } - - set_current_kprobe(p, regs, kcb); - kcb->kprobe_status = KPROBE_HIT_ACTIVE; - - if (p->pre_handler && p->pre_handler(p, regs)) - /* handler has already set things up, so skip ss setup */ - return 1; - -ss_probe: -#if !defined(CONFIG_PREEMPT) || defined(CONFIG_PM) - if (p->ainsn.boostable == 1 && !p->post_handler){ - /* Boost up -- we can execute copied instructions directly */ - reset_current_kprobe(); - regs->eip = (unsigned long)p->ainsn.insn; - preempt_enable_no_resched(); - return 1; - } -#endif - prepare_singlestep(p, regs); - kcb->kprobe_status = KPROBE_HIT_SS; - return 1; - -no_kprobe: - preempt_enable_no_resched(); - return ret; -} - -/* - * For function-return probes, init_kprobes() establishes a probepoint - * here. When a retprobed function returns, this probe is hit and - * trampoline_probe_handler() runs, calling the kretprobe's handler. - */ - void __kprobes kretprobe_trampoline_holder(void) - { - asm volatile ( ".global kretprobe_trampoline\n" - "kretprobe_trampoline: \n" - " pushf\n" - /* skip cs, eip, orig_eax */ - " subl $12, %esp\n" - " pushl %fs\n" - " pushl %ds\n" - " pushl %es\n" - " pushl %eax\n" - " pushl %ebp\n" - " pushl %edi\n" - " pushl %esi\n" - " pushl %edx\n" - " pushl %ecx\n" - " pushl %ebx\n" - " movl %esp, %eax\n" - " call trampoline_handler\n" - /* move eflags to cs */ - " movl 52(%esp), %edx\n" - " movl %edx, 48(%esp)\n" - /* save true return address on eflags */ - " movl %eax, 52(%esp)\n" - " popl %ebx\n" - " popl %ecx\n" - " popl %edx\n" - " popl %esi\n" - " popl %edi\n" - " popl %ebp\n" - " popl %eax\n" - /* skip eip, orig_eax, es, ds, fs */ - " addl $20, %esp\n" - " popf\n" - " ret\n"); -} - -/* - * Called from kretprobe_trampoline - */ -fastcall void *__kprobes trampoline_handler(struct pt_regs *regs) -{ - struct kretprobe_instance *ri = NULL; - struct hlist_head *head, empty_rp; - struct hlist_node *node, *tmp; - unsigned long flags, orig_ret_address = 0; - unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline; - - INIT_HLIST_HEAD(&empty_rp); - spin_lock_irqsave(&kretprobe_lock, flags); - head = kretprobe_inst_table_head(current); - /* fixup registers */ - regs->xcs = __KERNEL_CS | get_kernel_rpl(); - regs->eip = trampoline_address; - regs->orig_eax = 0xffffffff; - - /* - * It is possible to have multiple instances associated with a given - * task either because an multiple functions in the call path - * have a return probe installed on them, and/or more then one return - * return probe was registered for a target function. - * - * We can handle this because: - * - instances are always inserted at the head of the list - * - when multiple return probes are registered for the same - * function, the first instance's ret_addr will point to the - * real return address, and all the rest will point to - * kretprobe_trampoline - */ - hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { - if (ri->task != current) - /* another task is sharing our hash bucket */ - continue; - - if (ri->rp && ri->rp->handler){ - __get_cpu_var(current_kprobe) = &ri->rp->kp; - get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; - ri->rp->handler(ri, regs); - __get_cpu_var(current_kprobe) = NULL; - } - - orig_ret_address = (unsigned long)ri->ret_addr; - recycle_rp_inst(ri, &empty_rp); - - if (orig_ret_address != trampoline_address) - /* - * This is the real return address. Any other - * instances associated with this task are for - * other calls deeper on the call stack - */ - break; - } - - kretprobe_assert(ri, orig_ret_address, trampoline_address); - spin_unlock_irqrestore(&kretprobe_lock, flags); - - hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { - hlist_del(&ri->hlist); - kfree(ri); - } - return (void*)orig_ret_address; -} - -/* - * Called after single-stepping. p->addr is the address of the - * instruction whose first byte has been replaced by the "int 3" - * instruction. To avoid the SMP problems that can occur when we - * temporarily put back the original opcode to single-step, we - * single-stepped a copy of the instruction. The address of this - * copy is p->ainsn.insn. - * - * This function prepares to return from the post-single-step - * interrupt. We have to fix up the stack as follows: - * - * 0) Except in the case of absolute or indirect jump or call instructions, - * the new eip is relative to the copied instruction. We need to make - * it relative to the original instruction. - * - * 1) If the single-stepped instruction was pushfl, then the TF and IF - * flags are set in the just-pushed eflags, and may need to be cleared. - * - * 2) If the single-stepped instruction was a call, the return address - * that is atop the stack is the address following the copied instruction. - * We need to make it the address following the original instruction. - * - * This function also checks instruction size for preparing direct execution. - */ -static void __kprobes resume_execution(struct kprobe *p, - struct pt_regs *regs, struct kprobe_ctlblk *kcb) -{ - unsigned long *tos = (unsigned long *)®s->esp; - unsigned long copy_eip = (unsigned long)p->ainsn.insn; - unsigned long orig_eip = (unsigned long)p->addr; - - regs->eflags &= ~TF_MASK; - switch (p->ainsn.insn[0]) { - case 0x9c: /* pushfl */ - *tos &= ~(TF_MASK | IF_MASK); - *tos |= kcb->kprobe_old_eflags; - break; - case 0xc2: /* iret/ret/lret */ - case 0xc3: - case 0xca: - case 0xcb: - case 0xcf: - case 0xea: /* jmp absolute -- eip is correct */ - /* eip is already adjusted, no more changes required */ - p->ainsn.boostable = 1; - goto no_change; - case 0xe8: /* call relative - Fix return addr */ - *tos = orig_eip + (*tos - copy_eip); - break; - case 0x9a: /* call absolute -- same as call absolute, indirect */ - *tos = orig_eip + (*tos - copy_eip); - goto no_change; - case 0xff: - if ((p->ainsn.insn[1] & 0x30) == 0x10) { - /* - * call absolute, indirect - * Fix return addr; eip is correct. - * But this is not boostable - */ - *tos = orig_eip + (*tos - copy_eip); - goto no_change; - } else if (((p->ainsn.insn[1] & 0x31) == 0x20) || /* jmp near, absolute indirect */ - ((p->ainsn.insn[1] & 0x31) == 0x21)) { /* jmp far, absolute indirect */ - /* eip is correct. And this is boostable */ - p->ainsn.boostable = 1; - goto no_change; - } - default: - break; - } - - if (p->ainsn.boostable == 0) { - if ((regs->eip > copy_eip) && - (regs->eip - copy_eip) + 5 < MAX_INSN_SIZE) { - /* - * These instructions can be executed directly if it - * jumps back to correct address. - */ - set_jmp_op((void *)regs->eip, - (void *)orig_eip + (regs->eip - copy_eip)); - p->ainsn.boostable = 1; - } else { - p->ainsn.boostable = -1; - } - } - - regs->eip = orig_eip + (regs->eip - copy_eip); - -no_change: - return; -} - -/* - * Interrupts are disabled on entry as trap1 is an interrupt gate and they - * remain disabled thoroughout this function. - */ -static int __kprobes post_kprobe_handler(struct pt_regs *regs) -{ - struct kprobe *cur = kprobe_running(); - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - - if (!cur) - return 0; - - if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { - kcb->kprobe_status = KPROBE_HIT_SSDONE; - cur->post_handler(cur, regs, 0); - } - - resume_execution(cur, regs, kcb); - regs->eflags |= kcb->kprobe_saved_eflags; - - /*Restore back the original saved kprobes variables and continue. */ - if (kcb->kprobe_status == KPROBE_REENTER) { - restore_previous_kprobe(kcb); - goto out; - } - reset_current_kprobe(); -out: - preempt_enable_no_resched(); - - /* - * if somebody else is singlestepping across a probe point, eflags - * will have TF set, in which case, continue the remaining processing - * of do_debug, as if this is not a probe hit. - */ - if (regs->eflags & TF_MASK) - return 0; - - return 1; -} - -static int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) -{ - struct kprobe *cur = kprobe_running(); - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - - switch(kcb->kprobe_status) { - case KPROBE_HIT_SS: - case KPROBE_REENTER: - /* - * We are here because the instruction being single - * stepped caused a page fault. We reset the current - * kprobe and the eip points back to the probe address - * and allow the page fault handler to continue as a - * normal page fault. - */ - regs->eip = (unsigned long)cur->addr; - regs->eflags |= kcb->kprobe_old_eflags; - if (kcb->kprobe_status == KPROBE_REENTER) - restore_previous_kprobe(kcb); - else - reset_current_kprobe(); - preempt_enable_no_resched(); - break; - case KPROBE_HIT_ACTIVE: - case KPROBE_HIT_SSDONE: - /* - * We increment the nmissed count for accounting, - * we can also use npre/npostfault count for accouting - * these specific fault cases. - */ - kprobes_inc_nmissed_count(cur); - - /* - * We come here because instructions in the pre/post - * handler caused the page_fault, this could happen - * if handler tries to access user space by - * copy_from_user(), get_user() etc. Let the - * user-specified handler try to fix it first. - */ - if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) - return 1; - - /* - * In case the user-specified fault handler returned - * zero, try to fix up. - */ - if (fixup_exception(regs)) - return 1; - - /* - * fixup_exception() could not handle it, - * Let do_page_fault() fix it. - */ - break; - default: - break; - } - return 0; -} - -/* - * Wrapper routine to for handling exceptions. - */ -int __kprobes kprobe_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data) -{ - struct die_args *args = (struct die_args *)data; - int ret = NOTIFY_DONE; - - if (args->regs && user_mode_vm(args->regs)) - return ret; - - switch (val) { - case DIE_INT3: - if (kprobe_handler(args->regs)) - ret = NOTIFY_STOP; - break; - case DIE_DEBUG: - if (post_kprobe_handler(args->regs)) - ret = NOTIFY_STOP; - break; - case DIE_GPF: - case DIE_PAGE_FAULT: - /* kprobe_running() needs smp_processor_id() */ - preempt_disable(); - if (kprobe_running() && - kprobe_fault_handler(args->regs, args->trapnr)) - ret = NOTIFY_STOP; - preempt_enable(); - break; - default: - break; - } - return ret; -} - -int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) -{ - struct jprobe *jp = container_of(p, struct jprobe, kp); - unsigned long addr; - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - - kcb->jprobe_saved_regs = *regs; - kcb->jprobe_saved_esp = ®s->esp; - addr = (unsigned long)(kcb->jprobe_saved_esp); - - /* - * TBD: As Linus pointed out, gcc assumes that the callee - * owns the argument space and could overwrite it, e.g. - * tailcall optimization. So, to be absolutely safe - * we also save and restore enough stack bytes to cover - * the argument area. - */ - memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr, - MIN_STACK_SIZE(addr)); - regs->eflags &= ~IF_MASK; - regs->eip = (unsigned long)(jp->entry); - return 1; -} - -void __kprobes jprobe_return(void) -{ - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - - asm volatile (" xchgl %%ebx,%%esp \n" - " int3 \n" - " .globl jprobe_return_end \n" - " jprobe_return_end: \n" - " nop \n"::"b" - (kcb->jprobe_saved_esp):"memory"); -} - -int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) -{ - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - u8 *addr = (u8 *) (regs->eip - 1); - unsigned long stack_addr = (unsigned long)(kcb->jprobe_saved_esp); - struct jprobe *jp = container_of(p, struct jprobe, kp); - - if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) { - if (®s->esp != kcb->jprobe_saved_esp) { - struct pt_regs *saved_regs = - container_of(kcb->jprobe_saved_esp, - struct pt_regs, esp); - printk("current esp %p does not match saved esp %p\n", - ®s->esp, kcb->jprobe_saved_esp); - printk("Saved registers for jprobe %p\n", jp); - show_registers(saved_regs); - printk("Current registers\n"); - show_registers(regs); - BUG(); - } - *regs = kcb->jprobe_saved_regs; - memcpy((kprobe_opcode_t *) stack_addr, kcb->jprobes_stack, - MIN_STACK_SIZE(stack_addr)); - preempt_enable_no_resched(); - return 1; - } - return 0; -} - -int __kprobes arch_trampoline_kprobe(struct kprobe *p) -{ - return 0; -} - -int __init arch_init_kprobes(void) -{ - return 0; -} diff --git a/arch/i386/kernel/ldt_32.c b/arch/i386/kernel/ldt_32.c deleted file mode 100644 index e0b2d17f4f1..00000000000 --- a/arch/i386/kernel/ldt_32.c +++ /dev/null @@ -1,250 +0,0 @@ -/* - * linux/arch/i386/kernel/ldt.c - * - * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds - * Copyright (C) 1999 Ingo Molnar - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#ifdef CONFIG_SMP /* avoids "defined but not used" warnig */ -static void flush_ldt(void *null) -{ - if (current->active_mm) - load_LDT(¤t->active_mm->context); -} -#endif - -static int alloc_ldt(mm_context_t *pc, int mincount, int reload) -{ - void *oldldt; - void *newldt; - int oldsize; - - if (mincount <= pc->size) - return 0; - oldsize = pc->size; - mincount = (mincount+511)&(~511); - if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE) - newldt = vmalloc(mincount*LDT_ENTRY_SIZE); - else - newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL); - - if (!newldt) - return -ENOMEM; - - if (oldsize) - memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE); - oldldt = pc->ldt; - memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE); - pc->ldt = newldt; - wmb(); - pc->size = mincount; - wmb(); - - if (reload) { -#ifdef CONFIG_SMP - cpumask_t mask; - preempt_disable(); - load_LDT(pc); - mask = cpumask_of_cpu(smp_processor_id()); - if (!cpus_equal(current->mm->cpu_vm_mask, mask)) - smp_call_function(flush_ldt, NULL, 1, 1); - preempt_enable(); -#else - load_LDT(pc); -#endif - } - if (oldsize) { - if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(oldldt); - else - kfree(oldldt); - } - return 0; -} - -static inline int copy_ldt(mm_context_t *new, mm_context_t *old) -{ - int err = alloc_ldt(new, old->size, 0); - if (err < 0) - return err; - memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE); - return 0; -} - -/* - * we do not have to muck with descriptors here, that is - * done in switch_mm() as needed. - */ -int init_new_context(struct task_struct *tsk, struct mm_struct *mm) -{ - struct mm_struct * old_mm; - int retval = 0; - - init_MUTEX(&mm->context.sem); - mm->context.size = 0; - old_mm = current->mm; - if (old_mm && old_mm->context.size > 0) { - down(&old_mm->context.sem); - retval = copy_ldt(&mm->context, &old_mm->context); - up(&old_mm->context.sem); - } - return retval; -} - -/* - * No need to lock the MM as we are the last user - */ -void destroy_context(struct mm_struct *mm) -{ - if (mm->context.size) { - if (mm == current->active_mm) - clear_LDT(); - if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(mm->context.ldt); - else - kfree(mm->context.ldt); - mm->context.size = 0; - } -} - -static int read_ldt(void __user * ptr, unsigned long bytecount) -{ - int err; - unsigned long size; - struct mm_struct * mm = current->mm; - - if (!mm->context.size) - return 0; - if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES) - bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES; - - down(&mm->context.sem); - size = mm->context.size*LDT_ENTRY_SIZE; - if (size > bytecount) - size = bytecount; - - err = 0; - if (copy_to_user(ptr, mm->context.ldt, size)) - err = -EFAULT; - up(&mm->context.sem); - if (err < 0) - goto error_return; - if (size != bytecount) { - /* zero-fill the rest */ - if (clear_user(ptr+size, bytecount-size) != 0) { - err = -EFAULT; - goto error_return; - } - } - return bytecount; -error_return: - return err; -} - -static int read_default_ldt(void __user * ptr, unsigned long bytecount) -{ - int err; - unsigned long size; - - err = 0; - size = 5*sizeof(struct desc_struct); - if (size > bytecount) - size = bytecount; - - err = size; - if (clear_user(ptr, size)) - err = -EFAULT; - - return err; -} - -static int write_ldt(void __user * ptr, unsigned long bytecount, int oldmode) -{ - struct mm_struct * mm = current->mm; - __u32 entry_1, entry_2; - int error; - struct user_desc ldt_info; - - error = -EINVAL; - if (bytecount != sizeof(ldt_info)) - goto out; - error = -EFAULT; - if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info))) - goto out; - - error = -EINVAL; - if (ldt_info.entry_number >= LDT_ENTRIES) - goto out; - if (ldt_info.contents == 3) { - if (oldmode) - goto out; - if (ldt_info.seg_not_present == 0) - goto out; - } - - down(&mm->context.sem); - if (ldt_info.entry_number >= mm->context.size) { - error = alloc_ldt(¤t->mm->context, ldt_info.entry_number+1, 1); - if (error < 0) - goto out_unlock; - } - - /* Allow LDTs to be cleared by the user. */ - if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { - if (oldmode || LDT_empty(&ldt_info)) { - entry_1 = 0; - entry_2 = 0; - goto install; - } - } - - entry_1 = LDT_entry_a(&ldt_info); - entry_2 = LDT_entry_b(&ldt_info); - if (oldmode) - entry_2 &= ~(1 << 20); - - /* Install the new entry ... */ -install: - write_ldt_entry(mm->context.ldt, ldt_info.entry_number, entry_1, entry_2); - error = 0; - -out_unlock: - up(&mm->context.sem); -out: - return error; -} - -asmlinkage int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount) -{ - int ret = -ENOSYS; - - switch (func) { - case 0: - ret = read_ldt(ptr, bytecount); - break; - case 1: - ret = write_ldt(ptr, bytecount, 1); - break; - case 2: - ret = read_default_ldt(ptr, bytecount); - break; - case 0x11: - ret = write_ldt(ptr, bytecount, 0); - break; - } - return ret; -} diff --git a/arch/i386/kernel/machine_kexec_32.c b/arch/i386/kernel/machine_kexec_32.c deleted file mode 100644 index 91966bafb3d..00000000000 --- a/arch/i386/kernel/machine_kexec_32.c +++ /dev/null @@ -1,171 +0,0 @@ -/* - * machine_kexec.c - handle transition of Linux booting another kernel - * Copyright (C) 2002-2005 Eric Biederman - * - * This source code is licensed under the GNU General Public License, - * Version 2. See the file COPYING for more details. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) -static u32 kexec_pgd[1024] PAGE_ALIGNED; -#ifdef CONFIG_X86_PAE -static u32 kexec_pmd0[1024] PAGE_ALIGNED; -static u32 kexec_pmd1[1024] PAGE_ALIGNED; -#endif -static u32 kexec_pte0[1024] PAGE_ALIGNED; -static u32 kexec_pte1[1024] PAGE_ALIGNED; - -static void set_idt(void *newidt, __u16 limit) -{ - struct Xgt_desc_struct curidt; - - /* ia32 supports unaliged loads & stores */ - curidt.size = limit; - curidt.address = (unsigned long)newidt; - - load_idt(&curidt); -}; - - -static void set_gdt(void *newgdt, __u16 limit) -{ - struct Xgt_desc_struct curgdt; - - /* ia32 supports unaligned loads & stores */ - curgdt.size = limit; - curgdt.address = (unsigned long)newgdt; - - load_gdt(&curgdt); -}; - -static void load_segments(void) -{ -#define __STR(X) #X -#define STR(X) __STR(X) - - __asm__ __volatile__ ( - "\tljmp $"STR(__KERNEL_CS)",$1f\n" - "\t1:\n" - "\tmovl $"STR(__KERNEL_DS)",%%eax\n" - "\tmovl %%eax,%%ds\n" - "\tmovl %%eax,%%es\n" - "\tmovl %%eax,%%fs\n" - "\tmovl %%eax,%%gs\n" - "\tmovl %%eax,%%ss\n" - ::: "eax", "memory"); -#undef STR -#undef __STR -} - -/* - * A architecture hook called to validate the - * proposed image and prepare the control pages - * as needed. The pages for KEXEC_CONTROL_CODE_SIZE - * have been allocated, but the segments have yet - * been copied into the kernel. - * - * Do what every setup is needed on image and the - * reboot code buffer to allow us to avoid allocations - * later. - * - * Currently nothing. - */ -int machine_kexec_prepare(struct kimage *image) -{ - return 0; -} - -/* - * Undo anything leftover by machine_kexec_prepare - * when an image is freed. - */ -void machine_kexec_cleanup(struct kimage *image) -{ -} - -/* - * Do not allocate memory (or fail in any way) in machine_kexec(). - * We are past the point of no return, committed to rebooting now. - */ -NORET_TYPE void machine_kexec(struct kimage *image) -{ - unsigned long page_list[PAGES_NR]; - void *control_page; - - /* Interrupts aren't acceptable while we reboot */ - local_irq_disable(); - - control_page = page_address(image->control_code_page); - memcpy(control_page, relocate_kernel, PAGE_SIZE); - - page_list[PA_CONTROL_PAGE] = __pa(control_page); - page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel; - page_list[PA_PGD] = __pa(kexec_pgd); - page_list[VA_PGD] = (unsigned long)kexec_pgd; -#ifdef CONFIG_X86_PAE - page_list[PA_PMD_0] = __pa(kexec_pmd0); - page_list[VA_PMD_0] = (unsigned long)kexec_pmd0; - page_list[PA_PMD_1] = __pa(kexec_pmd1); - page_list[VA_PMD_1] = (unsigned long)kexec_pmd1; -#endif - page_list[PA_PTE_0] = __pa(kexec_pte0); - page_list[VA_PTE_0] = (unsigned long)kexec_pte0; - page_list[PA_PTE_1] = __pa(kexec_pte1); - page_list[VA_PTE_1] = (unsigned long)kexec_pte1; - - /* The segment registers are funny things, they have both a - * visible and an invisible part. Whenever the visible part is - * set to a specific selector, the invisible part is loaded - * with from a table in memory. At no other time is the - * descriptor table in memory accessed. - * - * I take advantage of this here by force loading the - * segments, before I zap the gdt with an invalid value. - */ - load_segments(); - /* The gdt & idt are now invalid. - * If you want to load them you must set up your own idt & gdt. - */ - set_gdt(phys_to_virt(0),0); - set_idt(phys_to_virt(0),0); - - /* now call it */ - relocate_kernel((unsigned long)image->head, (unsigned long)page_list, - image->start, cpu_has_pae); -} - -/* crashkernel=size@addr specifies the location to reserve for - * a crash kernel. By reserving this memory we guarantee - * that linux never sets it up as a DMA target. - * Useful for holding code to do something appropriate - * after a kernel panic. - */ -static int __init parse_crashkernel(char *arg) -{ - unsigned long size, base; - size = memparse(arg, &arg); - if (*arg == '@') { - base = memparse(arg+1, &arg); - /* FIXME: Do I want a sanity check - * to validate the memory range? - */ - crashk_res.start = base; - crashk_res.end = base + size - 1; - } - return 0; -} -early_param("crashkernel", parse_crashkernel); diff --git a/arch/i386/kernel/mca_32.c b/arch/i386/kernel/mca_32.c deleted file mode 100644 index b83672b8952..00000000000 --- a/arch/i386/kernel/mca_32.c +++ /dev/null @@ -1,470 +0,0 @@ -/* - * linux/arch/i386/kernel/mca.c - * Written by Martin Kolinek, February 1996 - * - * Changes: - * - * Chris Beauregard July 28th, 1996 - * - Fixed up integrated SCSI detection - * - * Chris Beauregard August 3rd, 1996 - * - Made mca_info local - * - Made integrated registers accessible through standard function calls - * - Added name field - * - More sanity checking - * - * Chris Beauregard August 9th, 1996 - * - Rewrote /proc/mca - * - * Chris Beauregard January 7th, 1997 - * - Added basic NMI-processing - * - Added more information to mca_info structure - * - * David Weinehall October 12th, 1998 - * - Made a lot of cleaning up in the source - * - Added use of save_flags / restore_flags - * - Added the 'driver_loaded' flag in MCA_adapter - * - Added an alternative implemention of ZP Gu's mca_find_unused_adapter - * - * David Weinehall March 24th, 1999 - * - Fixed the output of 'Driver Installed' in /proc/mca/pos - * - Made the Integrated Video & SCSI show up even if they have id 0000 - * - * Alexander Viro November 9th, 1999 - * - Switched to regular procfs methods - * - * Alfred Arnold & David Weinehall August 23rd, 2000 - * - Added support for Planar POS-registers - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static unsigned char which_scsi = 0; - -int MCA_bus = 0; -EXPORT_SYMBOL(MCA_bus); - -/* - * Motherboard register spinlock. Untested on SMP at the moment, but - * are there any MCA SMP boxes? - * - * Yes - Alan - */ -static DEFINE_SPINLOCK(mca_lock); - -/* Build the status info for the adapter */ - -static void mca_configure_adapter_status(struct mca_device *mca_dev) { - mca_dev->status = MCA_ADAPTER_NONE; - - mca_dev->pos_id = mca_dev->pos[0] - + (mca_dev->pos[1] << 8); - - if(!mca_dev->pos_id && mca_dev->slot < MCA_MAX_SLOT_NR) { - - /* id = 0x0000 usually indicates hardware failure, - * however, ZP Gu (zpg@castle.net> reports that his 9556 - * has 0x0000 as id and everything still works. There - * also seem to be an adapter with id = 0x0000; the - * NCR Parallel Bus Memory Card. Until this is confirmed, - * however, this code will stay. - */ - - mca_dev->status = MCA_ADAPTER_ERROR; - - return; - } else if(mca_dev->pos_id != 0xffff) { - - /* 0xffff usually indicates that there's no adapter, - * however, some integrated adapters may have 0xffff as - * their id and still be valid. Examples are on-board - * VGA of the 55sx, the integrated SCSI of the 56 & 57, - * and possibly also the 95 ULTIMEDIA. - */ - - mca_dev->status = MCA_ADAPTER_NORMAL; - } - - if((mca_dev->pos_id == 0xffff || - mca_dev->pos_id == 0x0000) && mca_dev->slot >= MCA_MAX_SLOT_NR) { - int j; - - for(j = 2; j < 8; j++) { - if(mca_dev->pos[j] != 0xff) { - mca_dev->status = MCA_ADAPTER_NORMAL; - break; - } - } - } - - if(!(mca_dev->pos[2] & MCA_ENABLED)) { - - /* enabled bit is in POS 2 */ - - mca_dev->status = MCA_ADAPTER_DISABLED; - } -} /* mca_configure_adapter_status */ - -/*--------------------------------------------------------------------*/ - -static struct resource mca_standard_resources[] = { - { .start = 0x60, .end = 0x60, .name = "system control port B (MCA)" }, - { .start = 0x90, .end = 0x90, .name = "arbitration (MCA)" }, - { .start = 0x91, .end = 0x91, .name = "card Select Feedback (MCA)" }, - { .start = 0x92, .end = 0x92, .name = "system Control port A (MCA)" }, - { .start = 0x94, .end = 0x94, .name = "system board setup (MCA)" }, - { .start = 0x96, .end = 0x97, .name = "POS (MCA)" }, - { .start = 0x100, .end = 0x107, .name = "POS (MCA)" } -}; - -#define MCA_STANDARD_RESOURCES ARRAY_SIZE(mca_standard_resources) - -/** - * mca_read_and_store_pos - read the POS registers into a memory buffer - * @pos: a char pointer to 8 bytes, contains the POS register value on - * successful return - * - * Returns 1 if a card actually exists (i.e. the pos isn't - * all 0xff) or 0 otherwise - */ -static int mca_read_and_store_pos(unsigned char *pos) { - int j; - int found = 0; - - for(j=0; j<8; j++) { - if((pos[j] = inb_p(MCA_POS_REG(j))) != 0xff) { - /* 0xff all across means no device. 0x00 means - * something's broken, but a device is - * probably there. However, if you get 0x00 - * from a motherboard register it won't matter - * what we find. For the record, on the - * 57SLC, the integrated SCSI adapter has - * 0xffff for the adapter ID, but nonzero for - * other registers. */ - - found = 1; - } - } - return found; -} - -static unsigned char mca_pc_read_pos(struct mca_device *mca_dev, int reg) -{ - unsigned char byte; - unsigned long flags; - - if(reg < 0 || reg >= 8) - return 0; - - spin_lock_irqsave(&mca_lock, flags); - if(mca_dev->pos_register) { - /* Disable adapter setup, enable motherboard setup */ - - outb_p(0, MCA_ADAPTER_SETUP_REG); - outb_p(mca_dev->pos_register, MCA_MOTHERBOARD_SETUP_REG); - - byte = inb_p(MCA_POS_REG(reg)); - outb_p(0xff, MCA_MOTHERBOARD_SETUP_REG); - } else { - - /* Make sure motherboard setup is off */ - - outb_p(0xff, MCA_MOTHERBOARD_SETUP_REG); - - /* Read the appropriate register */ - - outb_p(0x8|(mca_dev->slot & 0xf), MCA_ADAPTER_SETUP_REG); - byte = inb_p(MCA_POS_REG(reg)); - outb_p(0, MCA_ADAPTER_SETUP_REG); - } - spin_unlock_irqrestore(&mca_lock, flags); - - mca_dev->pos[reg] = byte; - - return byte; -} - -static void mca_pc_write_pos(struct mca_device *mca_dev, int reg, - unsigned char byte) -{ - unsigned long flags; - - if(reg < 0 || reg >= 8) - return; - - spin_lock_irqsave(&mca_lock, flags); - - /* Make sure motherboard setup is off */ - - outb_p(0xff, MCA_MOTHERBOARD_SETUP_REG); - - /* Read in the appropriate register */ - - outb_p(0x8|(mca_dev->slot&0xf), MCA_ADAPTER_SETUP_REG); - outb_p(byte, MCA_POS_REG(reg)); - outb_p(0, MCA_ADAPTER_SETUP_REG); - - spin_unlock_irqrestore(&mca_lock, flags); - - /* Update the global register list, while we have the byte */ - - mca_dev->pos[reg] = byte; - -} - -/* for the primary MCA bus, we have identity transforms */ -static int mca_dummy_transform_irq(struct mca_device * mca_dev, int irq) -{ - return irq; -} - -static int mca_dummy_transform_ioport(struct mca_device * mca_dev, int port) -{ - return port; -} - -static void *mca_dummy_transform_memory(struct mca_device * mca_dev, void *mem) -{ - return mem; -} - - -static int __init mca_init(void) -{ - unsigned int i, j; - struct mca_device *mca_dev; - unsigned char pos[8]; - short mca_builtin_scsi_ports[] = {0xf7, 0xfd, 0x00}; - struct mca_bus *bus; - - /* WARNING: Be careful when making changes here. Putting an adapter - * and the motherboard simultaneously into setup mode may result in - * damage to chips (according to The Indispensible PC Hardware Book - * by Hans-Peter Messmer). Also, we disable system interrupts (so - * that we are not disturbed in the middle of this). - */ - - /* Make sure the MCA bus is present */ - - if (mca_system_init()) { - printk(KERN_ERR "MCA bus system initialisation failed\n"); - return -ENODEV; - } - - if (!MCA_bus) - return -ENODEV; - - printk(KERN_INFO "Micro Channel bus detected.\n"); - - /* All MCA systems have at least a primary bus */ - bus = mca_attach_bus(MCA_PRIMARY_BUS); - if (!bus) - goto out_nomem; - bus->default_dma_mask = 0xffffffffLL; - bus->f.mca_write_pos = mca_pc_write_pos; - bus->f.mca_read_pos = mca_pc_read_pos; - bus->f.mca_transform_irq = mca_dummy_transform_irq; - bus->f.mca_transform_ioport = mca_dummy_transform_ioport; - bus->f.mca_transform_memory = mca_dummy_transform_memory; - - /* get the motherboard device */ - mca_dev = kzalloc(sizeof(struct mca_device), GFP_KERNEL); - if(unlikely(!mca_dev)) - goto out_nomem; - - /* - * We do not expect many MCA interrupts during initialization, - * but let us be safe: - */ - spin_lock_irq(&mca_lock); - - /* Make sure adapter setup is off */ - - outb_p(0, MCA_ADAPTER_SETUP_REG); - - /* Read motherboard POS registers */ - - mca_dev->pos_register = 0x7f; - outb_p(mca_dev->pos_register, MCA_MOTHERBOARD_SETUP_REG); - mca_dev->name[0] = 0; - mca_read_and_store_pos(mca_dev->pos); - mca_configure_adapter_status(mca_dev); - /* fake POS and slot for a motherboard */ - mca_dev->pos_id = MCA_MOTHERBOARD_POS; - mca_dev->slot = MCA_MOTHERBOARD; - mca_register_device(MCA_PRIMARY_BUS, mca_dev); - - mca_dev = kzalloc(sizeof(struct mca_device), GFP_ATOMIC); - if(unlikely(!mca_dev)) - goto out_unlock_nomem; - - /* Put motherboard into video setup mode, read integrated video - * POS registers, and turn motherboard setup off. - */ - - mca_dev->pos_register = 0xdf; - outb_p(mca_dev->pos_register, MCA_MOTHERBOARD_SETUP_REG); - mca_dev->name[0] = 0; - mca_read_and_store_pos(mca_dev->pos); - mca_configure_adapter_status(mca_dev); - /* fake POS and slot for the integrated video */ - mca_dev->pos_id = MCA_INTEGVIDEO_POS; - mca_dev->slot = MCA_INTEGVIDEO; - mca_register_device(MCA_PRIMARY_BUS, mca_dev); - - /* Put motherboard into scsi setup mode, read integrated scsi - * POS registers, and turn motherboard setup off. - * - * It seems there are two possible SCSI registers. Martin says that - * for the 56,57, 0xf7 is the one, but fails on the 76. - * Alfredo (apena@vnet.ibm.com) says - * 0xfd works on his machine. We'll try both of them. I figure it's - * a good bet that only one could be valid at a time. This could - * screw up though if one is used for something else on the other - * machine. - */ - - for(i = 0; (which_scsi = mca_builtin_scsi_ports[i]) != 0; i++) { - outb_p(which_scsi, MCA_MOTHERBOARD_SETUP_REG); - if(mca_read_and_store_pos(pos)) - break; - } - if(which_scsi) { - /* found a scsi card */ - mca_dev = kzalloc(sizeof(struct mca_device), GFP_ATOMIC); - if(unlikely(!mca_dev)) - goto out_unlock_nomem; - - for(j = 0; j < 8; j++) - mca_dev->pos[j] = pos[j]; - - mca_configure_adapter_status(mca_dev); - /* fake POS and slot for integrated SCSI controller */ - mca_dev->pos_id = MCA_INTEGSCSI_POS; - mca_dev->slot = MCA_INTEGSCSI; - mca_dev->pos_register = which_scsi; - mca_register_device(MCA_PRIMARY_BUS, mca_dev); - } - - /* Turn off motherboard setup */ - - outb_p(0xff, MCA_MOTHERBOARD_SETUP_REG); - - /* Now loop over MCA slots: put each adapter into setup mode, and - * read its POS registers. Then put adapter setup off. - */ - - for(i=0; ipos[j]=pos[j]; - - mca_dev->driver_loaded = 0; - mca_dev->slot = i; - mca_dev->pos_register = 0; - mca_configure_adapter_status(mca_dev); - mca_register_device(MCA_PRIMARY_BUS, mca_dev); - } - outb_p(0, MCA_ADAPTER_SETUP_REG); - - /* Enable interrupts and return memory start */ - spin_unlock_irq(&mca_lock); - - for (i = 0; i < MCA_STANDARD_RESOURCES; i++) - request_resource(&ioport_resource, mca_standard_resources + i); - - mca_do_proc_init(); - - return 0; - - out_unlock_nomem: - spin_unlock_irq(&mca_lock); - out_nomem: - printk(KERN_EMERG "Failed memory allocation in MCA setup!\n"); - return -ENOMEM; -} - -subsys_initcall(mca_init); - -/*--------------------------------------------------------------------*/ - -static __kprobes void -mca_handle_nmi_device(struct mca_device *mca_dev, int check_flag) -{ - int slot = mca_dev->slot; - - if(slot == MCA_INTEGSCSI) { - printk(KERN_CRIT "NMI: caused by MCA integrated SCSI adapter (%s)\n", - mca_dev->name); - } else if(slot == MCA_INTEGVIDEO) { - printk(KERN_CRIT "NMI: caused by MCA integrated video adapter (%s)\n", - mca_dev->name); - } else if(slot == MCA_MOTHERBOARD) { - printk(KERN_CRIT "NMI: caused by motherboard (%s)\n", - mca_dev->name); - } - - /* More info available in POS 6 and 7? */ - - if(check_flag) { - unsigned char pos6, pos7; - - pos6 = mca_device_read_pos(mca_dev, 6); - pos7 = mca_device_read_pos(mca_dev, 7); - - printk(KERN_CRIT "NMI: POS 6 = 0x%x, POS 7 = 0x%x\n", pos6, pos7); - } - -} /* mca_handle_nmi_slot */ - -/*--------------------------------------------------------------------*/ - -static int __kprobes mca_handle_nmi_callback(struct device *dev, void *data) -{ - struct mca_device *mca_dev = to_mca_device(dev); - unsigned char pos5; - - pos5 = mca_device_read_pos(mca_dev, 5); - - if(!(pos5 & 0x80)) { - /* Bit 7 of POS 5 is reset when this adapter has a hardware - * error. Bit 7 it reset if there's error information - * available in POS 6 and 7. - */ - mca_handle_nmi_device(mca_dev, !(pos5 & 0x40)); - return 1; - } - return 0; -} - -void __kprobes mca_handle_nmi(void) -{ - /* First try - scan the various adapters and see if a specific - * adapter was responsible for the error. - */ - bus_for_each_dev(&mca_bus_type, NULL, NULL, mca_handle_nmi_callback); - - mca_nmi_hook(); -} /* mca_handle_nmi */ diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c deleted file mode 100644 index 09cf7811035..00000000000 --- a/arch/i386/kernel/microcode.c +++ /dev/null @@ -1,850 +0,0 @@ -/* - * Intel CPU Microcode Update Driver for Linux - * - * Copyright (C) 2000-2006 Tigran Aivazian - * 2006 Shaohua Li - * - * This driver allows to upgrade microcode on Intel processors - * belonging to IA-32 family - PentiumPro, Pentium II, - * Pentium III, Xeon, Pentium 4, etc. - * - * Reference: Section 8.10 of Volume III, Intel Pentium 4 Manual, - * Order Number 245472 or free download from: - * - * http://developer.intel.com/design/pentium4/manuals/245472.htm - * - * For more information, go to http://www.urbanmyth.org/microcode - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * 1.0 16 Feb 2000, Tigran Aivazian - * Initial release. - * 1.01 18 Feb 2000, Tigran Aivazian - * Added read() support + cleanups. - * 1.02 21 Feb 2000, Tigran Aivazian - * Added 'device trimming' support. open(O_WRONLY) zeroes - * and frees the saved copy of applied microcode. - * 1.03 29 Feb 2000, Tigran Aivazian - * Made to use devfs (/dev/cpu/microcode) + cleanups. - * 1.04 06 Jun 2000, Simon Trimmer - * Added misc device support (now uses both devfs and misc). - * Added MICROCODE_IOCFREE ioctl to clear memory. - * 1.05 09 Jun 2000, Simon Trimmer - * Messages for error cases (non Intel & no suitable microcode). - * 1.06 03 Aug 2000, Tigran Aivazian - * Removed ->release(). Removed exclusive open and status bitmap. - * Added microcode_rwsem to serialize read()/write()/ioctl(). - * Removed global kernel lock usage. - * 1.07 07 Sep 2000, Tigran Aivazian - * Write 0 to 0x8B msr and then cpuid before reading revision, - * so that it works even if there were no update done by the - * BIOS. Otherwise, reading from 0x8B gives junk (which happened - * to be 0 on my machine which is why it worked even when I - * disabled update by the BIOS) - * Thanks to Eric W. Biederman for the fix. - * 1.08 11 Dec 2000, Richard Schaal and - * Tigran Aivazian - * Intel Pentium 4 processor support and bugfixes. - * 1.09 30 Oct 2001, Tigran Aivazian - * Bugfix for HT (Hyper-Threading) enabled processors - * whereby processor resources are shared by all logical processors - * in a single CPU package. - * 1.10 28 Feb 2002 Asit K Mallick and - * Tigran Aivazian , - * Serialize updates as required on HT processors due to speculative - * nature of implementation. - * 1.11 22 Mar 2002 Tigran Aivazian - * Fix the panic when writing zero-length microcode chunk. - * 1.12 29 Sep 2003 Nitin Kamble , - * Jun Nakajima - * Support for the microcode updates in the new format. - * 1.13 10 Oct 2003 Tigran Aivazian - * Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl - * because we no longer hold a copy of applied microcode - * in kernel memory. - * 1.14 25 Jun 2004 Tigran Aivazian - * Fix sigmatch() macro to handle old CPUs with pf == 0. - * Thanks to Stuart Swales for pointing out this bug. - */ - -//#define DEBUG /* pr_debug */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver"); -MODULE_AUTHOR("Tigran Aivazian "); -MODULE_LICENSE("GPL"); - -#define MICROCODE_VERSION "1.14a" - -#define DEFAULT_UCODE_DATASIZE (2000) /* 2000 bytes */ -#define MC_HEADER_SIZE (sizeof (microcode_header_t)) /* 48 bytes */ -#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 2048 bytes */ -#define EXT_HEADER_SIZE (sizeof (struct extended_sigtable)) /* 20 bytes */ -#define EXT_SIGNATURE_SIZE (sizeof (struct extended_signature)) /* 12 bytes */ -#define DWSIZE (sizeof (u32)) -#define get_totalsize(mc) \ - (((microcode_t *)mc)->hdr.totalsize ? \ - ((microcode_t *)mc)->hdr.totalsize : DEFAULT_UCODE_TOTALSIZE) -#define get_datasize(mc) \ - (((microcode_t *)mc)->hdr.datasize ? \ - ((microcode_t *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE) - -#define sigmatch(s1, s2, p1, p2) \ - (((s1) == (s2)) && (((p1) & (p2)) || (((p1) == 0) && ((p2) == 0)))) - -#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE) - -/* serialize access to the physical write to MSR 0x79 */ -static DEFINE_SPINLOCK(microcode_update_lock); - -/* no concurrent ->write()s are allowed on /dev/cpu/microcode */ -static DEFINE_MUTEX(microcode_mutex); - -static struct ucode_cpu_info { - int valid; - unsigned int sig; - unsigned int pf; - unsigned int rev; - microcode_t *mc; -} ucode_cpu_info[NR_CPUS]; - -static void collect_cpu_info(int cpu_num) -{ - struct cpuinfo_x86 *c = cpu_data + cpu_num; - struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; - unsigned int val[2]; - - /* We should bind the task to the CPU */ - BUG_ON(raw_smp_processor_id() != cpu_num); - uci->pf = uci->rev = 0; - uci->mc = NULL; - uci->valid = 1; - - if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || - cpu_has(c, X86_FEATURE_IA64)) { - printk(KERN_ERR "microcode: CPU%d not a capable Intel " - "processor\n", cpu_num); - uci->valid = 0; - return; - } - - uci->sig = cpuid_eax(0x00000001); - - if ((c->x86_model >= 5) || (c->x86 > 6)) { - /* get processor flags from MSR 0x17 */ - rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); - uci->pf = 1 << ((val[1] >> 18) & 7); - } - - wrmsr(MSR_IA32_UCODE_REV, 0, 0); - /* see notes above for revision 1.07. Apparent chip bug */ - sync_core(); - /* get the current revision from MSR 0x8B */ - rdmsr(MSR_IA32_UCODE_REV, val[0], uci->rev); - pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n", - uci->sig, uci->pf, uci->rev); -} - -static inline int microcode_update_match(int cpu_num, - microcode_header_t *mc_header, int sig, int pf) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; - - if (!sigmatch(sig, uci->sig, pf, uci->pf) - || mc_header->rev <= uci->rev) - return 0; - return 1; -} - -static int microcode_sanity_check(void *mc) -{ - microcode_header_t *mc_header = mc; - struct extended_sigtable *ext_header = NULL; - struct extended_signature *ext_sig; - unsigned long total_size, data_size, ext_table_size; - int sum, orig_sum, ext_sigcount = 0, i; - - total_size = get_totalsize(mc_header); - data_size = get_datasize(mc_header); - if (data_size + MC_HEADER_SIZE > total_size) { - printk(KERN_ERR "microcode: error! " - "Bad data size in microcode data file\n"); - return -EINVAL; - } - - if (mc_header->ldrver != 1 || mc_header->hdrver != 1) { - printk(KERN_ERR "microcode: error! " - "Unknown microcode update format\n"); - return -EINVAL; - } - ext_table_size = total_size - (MC_HEADER_SIZE + data_size); - if (ext_table_size) { - if ((ext_table_size < EXT_HEADER_SIZE) - || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) { - printk(KERN_ERR "microcode: error! " - "Small exttable size in microcode data file\n"); - return -EINVAL; - } - ext_header = mc + MC_HEADER_SIZE + data_size; - if (ext_table_size != exttable_size(ext_header)) { - printk(KERN_ERR "microcode: error! " - "Bad exttable size in microcode data file\n"); - return -EFAULT; - } - ext_sigcount = ext_header->count; - } - - /* check extended table checksum */ - if (ext_table_size) { - int ext_table_sum = 0; - int *ext_tablep = (int *)ext_header; - - i = ext_table_size / DWSIZE; - while (i--) - ext_table_sum += ext_tablep[i]; - if (ext_table_sum) { - printk(KERN_WARNING "microcode: aborting, " - "bad extended signature table checksum\n"); - return -EINVAL; - } - } - - /* calculate the checksum */ - orig_sum = 0; - i = (MC_HEADER_SIZE + data_size) / DWSIZE; - while (i--) - orig_sum += ((int *)mc)[i]; - if (orig_sum) { - printk(KERN_ERR "microcode: aborting, bad checksum\n"); - return -EINVAL; - } - if (!ext_table_size) - return 0; - /* check extended signature checksum */ - for (i = 0; i < ext_sigcount; i++) { - ext_sig = (struct extended_signature *)((void *)ext_header - + EXT_HEADER_SIZE + EXT_SIGNATURE_SIZE * i); - sum = orig_sum - - (mc_header->sig + mc_header->pf + mc_header->cksum) - + (ext_sig->sig + ext_sig->pf + ext_sig->cksum); - if (sum) { - printk(KERN_ERR "microcode: aborting, bad checksum\n"); - return -EINVAL; - } - } - return 0; -} - -/* - * return 0 - no update found - * return 1 - found update - * return < 0 - error - */ -static int get_maching_microcode(void *mc, int cpu) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - microcode_header_t *mc_header = mc; - struct extended_sigtable *ext_header; - unsigned long total_size = get_totalsize(mc_header); - int ext_sigcount, i; - struct extended_signature *ext_sig; - void *new_mc; - - if (microcode_update_match(cpu, mc_header, - mc_header->sig, mc_header->pf)) - goto find; - - if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE) - return 0; - - ext_header = (struct extended_sigtable *)(mc + - get_datasize(mc_header) + MC_HEADER_SIZE); - ext_sigcount = ext_header->count; - ext_sig = (struct extended_signature *)((void *)ext_header - + EXT_HEADER_SIZE); - for (i = 0; i < ext_sigcount; i++) { - if (microcode_update_match(cpu, mc_header, - ext_sig->sig, ext_sig->pf)) - goto find; - ext_sig++; - } - return 0; -find: - pr_debug("microcode: CPU %d found a matching microcode update with" - " version 0x%x (current=0x%x)\n", cpu, mc_header->rev,uci->rev); - new_mc = vmalloc(total_size); - if (!new_mc) { - printk(KERN_ERR "microcode: error! Can not allocate memory\n"); - return -ENOMEM; - } - - /* free previous update file */ - vfree(uci->mc); - - memcpy(new_mc, mc, total_size); - uci->mc = new_mc; - return 1; -} - -static void apply_microcode(int cpu) -{ - unsigned long flags; - unsigned int val[2]; - int cpu_num = raw_smp_processor_id(); - struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; - - /* We should bind the task to the CPU */ - BUG_ON(cpu_num != cpu); - - if (uci->mc == NULL) - return; - - /* serialize access to the physical write to MSR 0x79 */ - spin_lock_irqsave(µcode_update_lock, flags); - - /* write microcode via MSR 0x79 */ - wrmsr(MSR_IA32_UCODE_WRITE, - (unsigned long) uci->mc->bits, - (unsigned long) uci->mc->bits >> 16 >> 16); - wrmsr(MSR_IA32_UCODE_REV, 0, 0); - - /* see notes above for revision 1.07. Apparent chip bug */ - sync_core(); - - /* get the current revision from MSR 0x8B */ - rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); - - spin_unlock_irqrestore(µcode_update_lock, flags); - if (val[1] != uci->mc->hdr.rev) { - printk(KERN_ERR "microcode: CPU%d updated from revision " - "0x%x to 0x%x failed\n", cpu_num, uci->rev, val[1]); - return; - } - pr_debug("microcode: CPU%d updated from revision " - "0x%x to 0x%x, date = %08x \n", - cpu_num, uci->rev, val[1], uci->mc->hdr.date); - uci->rev = val[1]; -} - -#ifdef CONFIG_MICROCODE_OLD_INTERFACE -static void __user *user_buffer; /* user area microcode data buffer */ -static unsigned int user_buffer_size; /* it's size */ - -static long get_next_ucode(void **mc, long offset) -{ - microcode_header_t mc_header; - unsigned long total_size; - - /* No more data */ - if (offset >= user_buffer_size) - return 0; - if (copy_from_user(&mc_header, user_buffer + offset, MC_HEADER_SIZE)) { - printk(KERN_ERR "microcode: error! Can not read user data\n"); - return -EFAULT; - } - total_size = get_totalsize(&mc_header); - if (offset + total_size > user_buffer_size) { - printk(KERN_ERR "microcode: error! Bad total size in microcode " - "data file\n"); - return -EINVAL; - } - *mc = vmalloc(total_size); - if (!*mc) - return -ENOMEM; - if (copy_from_user(*mc, user_buffer + offset, total_size)) { - printk(KERN_ERR "microcode: error! Can not read user data\n"); - vfree(*mc); - return -EFAULT; - } - return offset + total_size; -} - -static int do_microcode_update (void) -{ - long cursor = 0; - int error = 0; - void *new_mc = NULL; - int cpu; - cpumask_t old; - - old = current->cpus_allowed; - - while ((cursor = get_next_ucode(&new_mc, cursor)) > 0) { - error = microcode_sanity_check(new_mc); - if (error) - goto out; - /* - * It's possible the data file has multiple matching ucode, - * lets keep searching till the latest version - */ - for_each_online_cpu(cpu) { - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - if (!uci->valid) - continue; - set_cpus_allowed(current, cpumask_of_cpu(cpu)); - error = get_maching_microcode(new_mc, cpu); - if (error < 0) - goto out; - if (error == 1) - apply_microcode(cpu); - } - vfree(new_mc); - } -out: - if (cursor > 0) - vfree(new_mc); - if (cursor < 0) - error = cursor; - set_cpus_allowed(current, old); - return error; -} - -static int microcode_open (struct inode *unused1, struct file *unused2) -{ - return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; -} - -static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos) -{ - ssize_t ret; - - if ((len >> PAGE_SHIFT) > num_physpages) { - printk(KERN_ERR "microcode: too much data (max %ld pages)\n", num_physpages); - return -EINVAL; - } - - lock_cpu_hotplug(); - mutex_lock(µcode_mutex); - - user_buffer = (void __user *) buf; - user_buffer_size = (int) len; - - ret = do_microcode_update(); - if (!ret) - ret = (ssize_t)len; - - mutex_unlock(µcode_mutex); - unlock_cpu_hotplug(); - - return ret; -} - -static const struct file_operations microcode_fops = { - .owner = THIS_MODULE, - .write = microcode_write, - .open = microcode_open, -}; - -static struct miscdevice microcode_dev = { - .minor = MICROCODE_MINOR, - .name = "microcode", - .fops = µcode_fops, -}; - -static int __init microcode_dev_init (void) -{ - int error; - - error = misc_register(µcode_dev); - if (error) { - printk(KERN_ERR - "microcode: can't misc_register on minor=%d\n", - MICROCODE_MINOR); - return error; - } - - return 0; -} - -static void microcode_dev_exit (void) -{ - misc_deregister(µcode_dev); -} - -MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); -#else -#define microcode_dev_init() 0 -#define microcode_dev_exit() do { } while(0) -#endif - -static long get_next_ucode_from_buffer(void **mc, void *buf, - unsigned long size, long offset) -{ - microcode_header_t *mc_header; - unsigned long total_size; - - /* No more data */ - if (offset >= size) - return 0; - mc_header = (microcode_header_t *)(buf + offset); - total_size = get_totalsize(mc_header); - - if (offset + total_size > size) { - printk(KERN_ERR "microcode: error! Bad data in microcode data file\n"); - return -EINVAL; - } - - *mc = vmalloc(total_size); - if (!*mc) { - printk(KERN_ERR "microcode: error! Can not allocate memory\n"); - return -ENOMEM; - } - memcpy(*mc, buf + offset, total_size); - return offset + total_size; -} - -/* fake device for request_firmware */ -static struct platform_device *microcode_pdev; - -static int cpu_request_microcode(int cpu) -{ - char name[30]; - struct cpuinfo_x86 *c = cpu_data + cpu; - const struct firmware *firmware; - void *buf; - unsigned long size; - long offset = 0; - int error; - void *mc; - - /* We should bind the task to the CPU */ - BUG_ON(cpu != raw_smp_processor_id()); - sprintf(name,"intel-ucode/%02x-%02x-%02x", - c->x86, c->x86_model, c->x86_mask); - error = request_firmware(&firmware, name, µcode_pdev->dev); - if (error) { - pr_debug("ucode data file %s load failed\n", name); - return error; - } - buf = (void *)firmware->data; - size = firmware->size; - while ((offset = get_next_ucode_from_buffer(&mc, buf, size, offset)) - > 0) { - error = microcode_sanity_check(mc); - if (error) - break; - error = get_maching_microcode(mc, cpu); - if (error < 0) - break; - /* - * It's possible the data file has multiple matching ucode, - * lets keep searching till the latest version - */ - if (error == 1) { - apply_microcode(cpu); - error = 0; - } - vfree(mc); - } - if (offset > 0) - vfree(mc); - if (offset < 0) - error = offset; - release_firmware(firmware); - - return error; -} - -static int apply_microcode_check_cpu(int cpu) -{ - struct cpuinfo_x86 *c = cpu_data + cpu; - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - cpumask_t old; - unsigned int val[2]; - int err = 0; - - /* Check if the microcode is available */ - if (!uci->mc) - return 0; - - old = current->cpus_allowed; - set_cpus_allowed(current, cpumask_of_cpu(cpu)); - - /* Check if the microcode we have in memory matches the CPU */ - if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || - cpu_has(c, X86_FEATURE_IA64) || uci->sig != cpuid_eax(0x00000001)) - err = -EINVAL; - - if (!err && ((c->x86_model >= 5) || (c->x86 > 6))) { - /* get processor flags from MSR 0x17 */ - rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); - if (uci->pf != (1 << ((val[1] >> 18) & 7))) - err = -EINVAL; - } - - if (!err) { - wrmsr(MSR_IA32_UCODE_REV, 0, 0); - /* see notes above for revision 1.07. Apparent chip bug */ - sync_core(); - /* get the current revision from MSR 0x8B */ - rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); - if (uci->rev != val[1]) - err = -EINVAL; - } - - if (!err) - apply_microcode(cpu); - else - printk(KERN_ERR "microcode: Could not apply microcode to CPU%d:" - " sig=0x%x, pf=0x%x, rev=0x%x\n", - cpu, uci->sig, uci->pf, uci->rev); - - set_cpus_allowed(current, old); - return err; -} - -static void microcode_init_cpu(int cpu, int resume) -{ - cpumask_t old; - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - old = current->cpus_allowed; - - set_cpus_allowed(current, cpumask_of_cpu(cpu)); - mutex_lock(µcode_mutex); - collect_cpu_info(cpu); - if (uci->valid && system_state == SYSTEM_RUNNING && !resume) - cpu_request_microcode(cpu); - mutex_unlock(µcode_mutex); - set_cpus_allowed(current, old); -} - -static void microcode_fini_cpu(int cpu) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - mutex_lock(µcode_mutex); - uci->valid = 0; - vfree(uci->mc); - uci->mc = NULL; - mutex_unlock(µcode_mutex); -} - -static ssize_t reload_store(struct sys_device *dev, const char *buf, size_t sz) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; - char *end; - unsigned long val = simple_strtoul(buf, &end, 0); - int err = 0; - int cpu = dev->id; - - if (end == buf) - return -EINVAL; - if (val == 1) { - cpumask_t old; - - old = current->cpus_allowed; - - lock_cpu_hotplug(); - set_cpus_allowed(current, cpumask_of_cpu(cpu)); - - mutex_lock(µcode_mutex); - if (uci->valid) - err = cpu_request_microcode(cpu); - mutex_unlock(µcode_mutex); - unlock_cpu_hotplug(); - set_cpus_allowed(current, old); - } - if (err) - return err; - return sz; -} - -static ssize_t version_show(struct sys_device *dev, char *buf) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; - - return sprintf(buf, "0x%x\n", uci->rev); -} - -static ssize_t pf_show(struct sys_device *dev, char *buf) -{ - struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; - - return sprintf(buf, "0x%x\n", uci->pf); -} - -static SYSDEV_ATTR(reload, 0200, NULL, reload_store); -static SYSDEV_ATTR(version, 0400, version_show, NULL); -static SYSDEV_ATTR(processor_flags, 0400, pf_show, NULL); - -static struct attribute *mc_default_attrs[] = { - &attr_reload.attr, - &attr_version.attr, - &attr_processor_flags.attr, - NULL -}; - -static struct attribute_group mc_attr_group = { - .attrs = mc_default_attrs, - .name = "microcode", -}; - -static int __mc_sysdev_add(struct sys_device *sys_dev, int resume) -{ - int err, cpu = sys_dev->id; - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - if (!cpu_online(cpu)) - return 0; - - pr_debug("Microcode:CPU %d added\n", cpu); - memset(uci, 0, sizeof(*uci)); - - err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); - if (err) - return err; - - microcode_init_cpu(cpu, resume); - - return 0; -} - -static int mc_sysdev_add(struct sys_device *sys_dev) -{ - return __mc_sysdev_add(sys_dev, 0); -} - -static int mc_sysdev_remove(struct sys_device *sys_dev) -{ - int cpu = sys_dev->id; - - if (!cpu_online(cpu)) - return 0; - - pr_debug("Microcode:CPU %d removed\n", cpu); - microcode_fini_cpu(cpu); - sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); - return 0; -} - -static int mc_sysdev_resume(struct sys_device *dev) -{ - int cpu = dev->id; - - if (!cpu_online(cpu)) - return 0; - pr_debug("Microcode:CPU %d resumed\n", cpu); - /* only CPU 0 will apply ucode here */ - apply_microcode(0); - return 0; -} - -static struct sysdev_driver mc_sysdev_driver = { - .add = mc_sysdev_add, - .remove = mc_sysdev_remove, - .resume = mc_sysdev_resume, -}; - -static __cpuinit int -mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) -{ - unsigned int cpu = (unsigned long)hcpu; - struct sys_device *sys_dev; - - sys_dev = get_cpu_sysdev(cpu); - switch (action) { - case CPU_UP_CANCELED_FROZEN: - /* The CPU refused to come up during a system resume */ - microcode_fini_cpu(cpu); - break; - case CPU_ONLINE: - case CPU_DOWN_FAILED: - mc_sysdev_add(sys_dev); - break; - case CPU_ONLINE_FROZEN: - /* System-wide resume is in progress, try to apply microcode */ - if (apply_microcode_check_cpu(cpu)) { - /* The application of microcode failed */ - microcode_fini_cpu(cpu); - __mc_sysdev_add(sys_dev, 1); - break; - } - case CPU_DOWN_FAILED_FROZEN: - if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group)) - printk(KERN_ERR "Microcode: Failed to create the sysfs " - "group for CPU%d\n", cpu); - break; - case CPU_DOWN_PREPARE: - mc_sysdev_remove(sys_dev); - break; - case CPU_DOWN_PREPARE_FROZEN: - /* Suspend is in progress, only remove the interface */ - sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); - break; - } - return NOTIFY_OK; -} - -static struct notifier_block __cpuinitdata mc_cpu_notifier = { - .notifier_call = mc_cpu_callback, -}; - -static int __init microcode_init (void) -{ - int error; - - error = microcode_dev_init(); - if (error) - return error; - microcode_pdev = platform_device_register_simple("microcode", -1, - NULL, 0); - if (IS_ERR(microcode_pdev)) { - microcode_dev_exit(); - return PTR_ERR(microcode_pdev); - } - - lock_cpu_hotplug(); - error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver); - unlock_cpu_hotplug(); - if (error) { - microcode_dev_exit(); - platform_device_unregister(microcode_pdev); - return error; - } - - register_hotcpu_notifier(&mc_cpu_notifier); - - printk(KERN_INFO - "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " \n"); - return 0; -} - -static void __exit microcode_exit (void) -{ - microcode_dev_exit(); - - unregister_hotcpu_notifier(&mc_cpu_notifier); - - lock_cpu_hotplug(); - sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver); - unlock_cpu_hotplug(); - - platform_device_unregister(microcode_pdev); -} - -module_init(microcode_init) -module_exit(microcode_exit) diff --git a/arch/i386/kernel/module_32.c b/arch/i386/kernel/module_32.c deleted file mode 100644 index 3db0a5442eb..00000000000 --- a/arch/i386/kernel/module_32.c +++ /dev/null @@ -1,152 +0,0 @@ -/* Kernel module help for i386. - Copyright (C) 2001 Rusty Russell. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ -#include -#include -#include -#include -#include -#include -#include - -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(fmt...) -#endif - -void *module_alloc(unsigned long size) -{ - if (size == 0) - return NULL; - return vmalloc_exec(size); -} - - -/* Free memory returned from module_alloc */ -void module_free(struct module *mod, void *module_region) -{ - vfree(module_region); - /* FIXME: If module_region == mod->init_region, trim exception - table entries. */ -} - -/* We don't need anything special. */ -int module_frob_arch_sections(Elf_Ehdr *hdr, - Elf_Shdr *sechdrs, - char *secstrings, - struct module *mod) -{ - return 0; -} - -int apply_relocate(Elf32_Shdr *sechdrs, - const char *strtab, - unsigned int symindex, - unsigned int relsec, - struct module *me) -{ - unsigned int i; - Elf32_Rel *rel = (void *)sechdrs[relsec].sh_addr; - Elf32_Sym *sym; - uint32_t *location; - - DEBUGP("Applying relocate section %u to %u\n", relsec, - sechdrs[relsec].sh_info); - for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { - /* This is where to make the change */ - location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr - + rel[i].r_offset; - /* This is the symbol it is referring to. Note that all - undefined symbols have been resolved. */ - sym = (Elf32_Sym *)sechdrs[symindex].sh_addr - + ELF32_R_SYM(rel[i].r_info); - - switch (ELF32_R_TYPE(rel[i].r_info)) { - case R_386_32: - /* We add the value into the location given */ - *location += sym->st_value; - break; - case R_386_PC32: - /* Add the value, subtract its postition */ - *location += sym->st_value - (uint32_t)location; - break; - default: - printk(KERN_ERR "module %s: Unknown relocation: %u\n", - me->name, ELF32_R_TYPE(rel[i].r_info)); - return -ENOEXEC; - } - } - return 0; -} - -int apply_relocate_add(Elf32_Shdr *sechdrs, - const char *strtab, - unsigned int symindex, - unsigned int relsec, - struct module *me) -{ - printk(KERN_ERR "module %s: ADD RELOCATION unsupported\n", - me->name); - return -ENOEXEC; -} - -int module_finalize(const Elf_Ehdr *hdr, - const Elf_Shdr *sechdrs, - struct module *me) -{ - const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, - *para = NULL; - char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; - - for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { - if (!strcmp(".text", secstrings + s->sh_name)) - text = s; - if (!strcmp(".altinstructions", secstrings + s->sh_name)) - alt = s; - if (!strcmp(".smp_locks", secstrings + s->sh_name)) - locks= s; - if (!strcmp(".parainstructions", secstrings + s->sh_name)) - para = s; - } - - if (alt) { - /* patch .altinstructions */ - void *aseg = (void *)alt->sh_addr; - apply_alternatives(aseg, aseg + alt->sh_size); - } - if (locks && text) { - void *lseg = (void *)locks->sh_addr; - void *tseg = (void *)text->sh_addr; - alternatives_smp_module_add(me, me->name, - lseg, lseg + locks->sh_size, - tseg, tseg + text->sh_size); - } - - if (para) { - void *pseg = (void *)para->sh_addr; - apply_paravirt(pseg, pseg + para->sh_size); - } - - return module_bug_finalize(hdr, sechdrs, me); -} - -void module_arch_cleanup(struct module *mod) -{ - alternatives_smp_module_del(mod); - module_bug_cleanup(mod); -} diff --git a/arch/i386/kernel/mpparse_32.c b/arch/i386/kernel/mpparse_32.c deleted file mode 100644 index 13abb4ebfb7..00000000000 --- a/arch/i386/kernel/mpparse_32.c +++ /dev/null @@ -1,1132 +0,0 @@ -/* - * Intel Multiprocessor Specification 1.1 and 1.4 - * compliant MP-table parsing routines. - * - * (c) 1995 Alan Cox, Building #3 - * (c) 1998, 1999, 2000 Ingo Molnar - * - * Fixes - * Erich Boleyn : MP v1.4 and additional changes. - * Alan Cox : Added EBDA scanning - * Ingo Molnar : various cleanups and rewrites - * Maciej W. Rozycki: Bits for default MP configurations - * Paul Diefenbaugh: Added full ACPI support - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -/* Have we found an MP table */ -int smp_found_config; -unsigned int __cpuinitdata maxcpus = NR_CPUS; - -/* - * Various Linux-internal data structures created from the - * MP-table. - */ -int apic_version [MAX_APICS]; -int mp_bus_id_to_type [MAX_MP_BUSSES]; -int mp_bus_id_to_node [MAX_MP_BUSSES]; -int mp_bus_id_to_local [MAX_MP_BUSSES]; -int quad_local_to_mp_bus_id [NR_CPUS/4][4]; -int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; -static int mp_current_pci_id; - -/* I/O APIC entries */ -struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS]; - -/* # of MP IRQ source entries */ -struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; - -/* MP IRQ source entries */ -int mp_irq_entries; - -int nr_ioapics; - -int pic_mode; -unsigned long mp_lapic_addr; - -unsigned int def_to_bigsmp = 0; - -/* Processor that is doing the boot up */ -unsigned int boot_cpu_physical_apicid = -1U; -/* Internal processor count */ -unsigned int __cpuinitdata num_processors; - -/* Bitmask of physically existing CPUs */ -physid_mask_t phys_cpu_present_map; - -u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; - -/* - * Intel MP BIOS table parsing routines: - */ - - -/* - * Checksum an MP configuration block. - */ - -static int __init mpf_checksum(unsigned char *mp, int len) -{ - int sum = 0; - - while (len--) - sum += *mp++; - - return sum & 0xFF; -} - -/* - * Have to match translation table entries to main table entries by counter - * hence the mpc_record variable .... can't see a less disgusting way of - * doing this .... - */ - -static int mpc_record; -static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __cpuinitdata; - -static void __cpuinit MP_processor_info (struct mpc_config_processor *m) -{ - int ver, apicid; - physid_mask_t phys_cpu; - - if (!(m->mpc_cpuflag & CPU_ENABLED)) - return; - - apicid = mpc_apic_id(m, translation_table[mpc_record]); - - if (m->mpc_featureflag&(1<<0)) - Dprintk(" Floating point unit present.\n"); - if (m->mpc_featureflag&(1<<7)) - Dprintk(" Machine Exception supported.\n"); - if (m->mpc_featureflag&(1<<8)) - Dprintk(" 64 bit compare & exchange supported.\n"); - if (m->mpc_featureflag&(1<<9)) - Dprintk(" Internal APIC present.\n"); - if (m->mpc_featureflag&(1<<11)) - Dprintk(" SEP present.\n"); - if (m->mpc_featureflag&(1<<12)) - Dprintk(" MTRR present.\n"); - if (m->mpc_featureflag&(1<<13)) - Dprintk(" PGE present.\n"); - if (m->mpc_featureflag&(1<<14)) - Dprintk(" MCA present.\n"); - if (m->mpc_featureflag&(1<<15)) - Dprintk(" CMOV present.\n"); - if (m->mpc_featureflag&(1<<16)) - Dprintk(" PAT present.\n"); - if (m->mpc_featureflag&(1<<17)) - Dprintk(" PSE present.\n"); - if (m->mpc_featureflag&(1<<18)) - Dprintk(" PSN present.\n"); - if (m->mpc_featureflag&(1<<19)) - Dprintk(" Cache Line Flush Instruction present.\n"); - /* 20 Reserved */ - if (m->mpc_featureflag&(1<<21)) - Dprintk(" Debug Trace and EMON Store present.\n"); - if (m->mpc_featureflag&(1<<22)) - Dprintk(" ACPI Thermal Throttle Registers present.\n"); - if (m->mpc_featureflag&(1<<23)) - Dprintk(" MMX present.\n"); - if (m->mpc_featureflag&(1<<24)) - Dprintk(" FXSR present.\n"); - if (m->mpc_featureflag&(1<<25)) - Dprintk(" XMM present.\n"); - if (m->mpc_featureflag&(1<<26)) - Dprintk(" Willamette New Instructions present.\n"); - if (m->mpc_featureflag&(1<<27)) - Dprintk(" Self Snoop present.\n"); - if (m->mpc_featureflag&(1<<28)) - Dprintk(" HT present.\n"); - if (m->mpc_featureflag&(1<<29)) - Dprintk(" Thermal Monitor present.\n"); - /* 30, 31 Reserved */ - - - if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { - Dprintk(" Bootup CPU\n"); - boot_cpu_physical_apicid = m->mpc_apicid; - } - - ver = m->mpc_apicver; - - /* - * Validate version - */ - if (ver == 0x0) { - printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! " - "fixing up to 0x10. (tell your hw vendor)\n", - m->mpc_apicid); - ver = 0x10; - } - apic_version[m->mpc_apicid] = ver; - - phys_cpu = apicid_to_cpu_present(apicid); - physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu); - - if (num_processors >= NR_CPUS) { - printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." - " Processor ignored.\n", NR_CPUS); - return; - } - - if (num_processors >= maxcpus) { - printk(KERN_WARNING "WARNING: maxcpus limit of %i reached." - " Processor ignored.\n", maxcpus); - return; - } - - cpu_set(num_processors, cpu_possible_map); - num_processors++; - - /* - * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y - * but we need to work other dependencies like SMP_SUSPEND etc - * before this can be done without some confusion. - * if (CPU_HOTPLUG_ENABLED || num_processors > 8) - * - Ashok Raj - */ - if (num_processors > 8) { - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_INTEL: - if (!APIC_XAPIC(ver)) { - def_to_bigsmp = 0; - break; - } - /* If P4 and above fall through */ - case X86_VENDOR_AMD: - def_to_bigsmp = 1; - } - } - bios_cpu_apicid[num_processors - 1] = m->mpc_apicid; -} - -static void __init MP_bus_info (struct mpc_config_bus *m) -{ - char str[7]; - - memcpy(str, m->mpc_bustype, 6); - str[6] = 0; - - mpc_oem_bus_info(m, str, translation_table[mpc_record]); - -#if MAX_MP_BUSSES < 256 - if (m->mpc_busid >= MAX_MP_BUSSES) { - printk(KERN_WARNING "MP table busid value (%d) for bustype %s " - " is too large, max. supported is %d\n", - m->mpc_busid, str, MAX_MP_BUSSES - 1); - return; - } -#endif - - if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) { - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; - } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA)-1) == 0) { - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA; - } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI)-1) == 0) { - mpc_oem_pci_bus(m, translation_table[mpc_record]); - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; - mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id; - mp_current_pci_id++; - } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) { - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA; - } else { - printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); - } -} - -static void __init MP_ioapic_info (struct mpc_config_ioapic *m) -{ - if (!(m->mpc_flags & MPC_APIC_USABLE)) - return; - - printk(KERN_INFO "I/O APIC #%d Version %d at 0x%lX.\n", - m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr); - if (nr_ioapics >= MAX_IO_APICS) { - printk(KERN_CRIT "Max # of I/O APICs (%d) exceeded (found %d).\n", - MAX_IO_APICS, nr_ioapics); - panic("Recompile kernel with bigger MAX_IO_APICS!.\n"); - } - if (!m->mpc_apicaddr) { - printk(KERN_ERR "WARNING: bogus zero I/O APIC address" - " found in MP table, skipping!\n"); - return; - } - mp_ioapics[nr_ioapics] = *m; - nr_ioapics++; -} - -static void __init MP_intsrc_info (struct mpc_config_intsrc *m) -{ - mp_irqs [mp_irq_entries] = *m; - Dprintk("Int: type %d, pol %d, trig %d, bus %d," - " IRQ %02x, APIC ID %x, APIC INT %02x\n", - m->mpc_irqtype, m->mpc_irqflag & 3, - (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, - m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq); - if (++mp_irq_entries == MAX_IRQ_SOURCES) - panic("Max # of irq sources exceeded!!\n"); -} - -static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m) -{ - Dprintk("Lint: type %d, pol %d, trig %d, bus %d," - " IRQ %02x, APIC ID %x, APIC LINT %02x\n", - m->mpc_irqtype, m->mpc_irqflag & 3, - (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid, - m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint); -} - -#ifdef CONFIG_X86_NUMAQ -static void __init MP_translation_info (struct mpc_config_translation *m) -{ - printk(KERN_INFO "Translation: record %d, type %d, quad %d, global %d, local %d\n", mpc_record, m->trans_type, m->trans_quad, m->trans_global, m->trans_local); - - if (mpc_record >= MAX_MPC_ENTRY) - printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n"); - else - translation_table[mpc_record] = m; /* stash this for later */ - if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad)) - node_set_online(m->trans_quad); -} - -/* - * Read/parse the MPC oem tables - */ - -static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, \ - unsigned short oemsize) -{ - int count = sizeof (*oemtable); /* the header size */ - unsigned char *oemptr = ((unsigned char *)oemtable)+count; - - mpc_record = 0; - printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n", oemtable); - if (memcmp(oemtable->oem_signature,MPC_OEM_SIGNATURE,4)) - { - printk(KERN_WARNING "SMP mpc oemtable: bad signature [%c%c%c%c]!\n", - oemtable->oem_signature[0], - oemtable->oem_signature[1], - oemtable->oem_signature[2], - oemtable->oem_signature[3]); - return; - } - if (mpf_checksum((unsigned char *)oemtable,oemtable->oem_length)) - { - printk(KERN_WARNING "SMP oem mptable: checksum error!\n"); - return; - } - while (count < oemtable->oem_length) { - switch (*oemptr) { - case MP_TRANSLATION: - { - struct mpc_config_translation *m= - (struct mpc_config_translation *)oemptr; - MP_translation_info(m); - oemptr += sizeof(*m); - count += sizeof(*m); - ++mpc_record; - break; - } - default: - { - printk(KERN_WARNING "Unrecognised OEM table entry type! - %d\n", (int) *oemptr); - return; - } - } - } -} - -static inline void mps_oem_check(struct mp_config_table *mpc, char *oem, - char *productid) -{ - if (strncmp(oem, "IBM NUMA", 8)) - printk("Warning! May not be a NUMA-Q system!\n"); - if (mpc->mpc_oemptr) - smp_read_mpc_oem((struct mp_config_oemtable *) mpc->mpc_oemptr, - mpc->mpc_oemsize); -} -#endif /* CONFIG_X86_NUMAQ */ - -/* - * Read/parse the MPC - */ - -static int __init smp_read_mpc(struct mp_config_table *mpc) -{ - char str[16]; - char oem[10]; - int count=sizeof(*mpc); - unsigned char *mpt=((unsigned char *)mpc)+count; - - if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) { - printk(KERN_ERR "SMP mptable: bad signature [0x%x]!\n", - *(u32 *)mpc->mpc_signature); - return 0; - } - if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) { - printk(KERN_ERR "SMP mptable: checksum error!\n"); - return 0; - } - if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) { - printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n", - mpc->mpc_spec); - return 0; - } - if (!mpc->mpc_lapic) { - printk(KERN_ERR "SMP mptable: null local APIC address!\n"); - return 0; - } - memcpy(oem,mpc->mpc_oem,8); - oem[8]=0; - printk(KERN_INFO "OEM ID: %s ",oem); - - memcpy(str,mpc->mpc_productid,12); - str[12]=0; - printk("Product ID: %s ",str); - - mps_oem_check(mpc, oem, str); - - printk("APIC at: 0x%lX\n",mpc->mpc_lapic); - - /* - * Save the local APIC address (it might be non-default) -- but only - * if we're not using ACPI. - */ - if (!acpi_lapic) - mp_lapic_addr = mpc->mpc_lapic; - - /* - * Now process the configuration blocks. - */ - mpc_record = 0; - while (count < mpc->mpc_length) { - switch(*mpt) { - case MP_PROCESSOR: - { - struct mpc_config_processor *m= - (struct mpc_config_processor *)mpt; - /* ACPI may have already provided this data */ - if (!acpi_lapic) - MP_processor_info(m); - mpt += sizeof(*m); - count += sizeof(*m); - break; - } - case MP_BUS: - { - struct mpc_config_bus *m= - (struct mpc_config_bus *)mpt; - MP_bus_info(m); - mpt += sizeof(*m); - count += sizeof(*m); - break; - } - case MP_IOAPIC: - { - struct mpc_config_ioapic *m= - (struct mpc_config_ioapic *)mpt; - MP_ioapic_info(m); - mpt+=sizeof(*m); - count+=sizeof(*m); - break; - } - case MP_INTSRC: - { - struct mpc_config_intsrc *m= - (struct mpc_config_intsrc *)mpt; - - MP_intsrc_info(m); - mpt+=sizeof(*m); - count+=sizeof(*m); - break; - } - case MP_LINTSRC: - { - struct mpc_config_lintsrc *m= - (struct mpc_config_lintsrc *)mpt; - MP_lintsrc_info(m); - mpt+=sizeof(*m); - count+=sizeof(*m); - break; - } - default: - { - count = mpc->mpc_length; - break; - } - } - ++mpc_record; - } - setup_apic_routing(); - if (!num_processors) - printk(KERN_ERR "SMP mptable: no processors registered!\n"); - return num_processors; -} - -static int __init ELCR_trigger(unsigned int irq) -{ - unsigned int port; - - port = 0x4d0 + (irq >> 3); - return (inb(port) >> (irq & 7)) & 1; -} - -static void __init construct_default_ioirq_mptable(int mpc_default_type) -{ - struct mpc_config_intsrc intsrc; - int i; - int ELCR_fallback = 0; - - intsrc.mpc_type = MP_INTSRC; - intsrc.mpc_irqflag = 0; /* conforming */ - intsrc.mpc_srcbus = 0; - intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid; - - intsrc.mpc_irqtype = mp_INT; - - /* - * If true, we have an ISA/PCI system with no IRQ entries - * in the MP table. To prevent the PCI interrupts from being set up - * incorrectly, we try to use the ELCR. The sanity check to see if - * there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can - * never be level sensitive, so we simply see if the ELCR agrees. - * If it does, we assume it's valid. - */ - if (mpc_default_type == 5) { - printk(KERN_INFO "ISA/PCI bus type with no IRQ information... falling back to ELCR\n"); - - if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13)) - printk(KERN_WARNING "ELCR contains invalid data... not using ELCR\n"); - else { - printk(KERN_INFO "Using ELCR to identify PCI interrupts\n"); - ELCR_fallback = 1; - } - } - - for (i = 0; i < 16; i++) { - switch (mpc_default_type) { - case 2: - if (i == 0 || i == 13) - continue; /* IRQ0 & IRQ13 not connected */ - /* fall through */ - default: - if (i == 2) - continue; /* IRQ2 is never connected */ - } - - if (ELCR_fallback) { - /* - * If the ELCR indicates a level-sensitive interrupt, we - * copy that information over to the MP table in the - * irqflag field (level sensitive, active high polarity). - */ - if (ELCR_trigger(i)) - intsrc.mpc_irqflag = 13; - else - intsrc.mpc_irqflag = 0; - } - - intsrc.mpc_srcbusirq = i; - intsrc.mpc_dstirq = i ? i : 2; /* IRQ0 to INTIN2 */ - MP_intsrc_info(&intsrc); - } - - intsrc.mpc_irqtype = mp_ExtINT; - intsrc.mpc_srcbusirq = 0; - intsrc.mpc_dstirq = 0; /* 8259A to INTIN0 */ - MP_intsrc_info(&intsrc); -} - -static inline void __init construct_default_ISA_mptable(int mpc_default_type) -{ - struct mpc_config_processor processor; - struct mpc_config_bus bus; - struct mpc_config_ioapic ioapic; - struct mpc_config_lintsrc lintsrc; - int linttypes[2] = { mp_ExtINT, mp_NMI }; - int i; - - /* - * local APIC has default address - */ - mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; - - /* - * 2 CPUs, numbered 0 & 1. - */ - processor.mpc_type = MP_PROCESSOR; - /* Either an integrated APIC or a discrete 82489DX. */ - processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; - processor.mpc_cpuflag = CPU_ENABLED; - processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | - (boot_cpu_data.x86_model << 4) | - boot_cpu_data.x86_mask; - processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; - processor.mpc_reserved[0] = 0; - processor.mpc_reserved[1] = 0; - for (i = 0; i < 2; i++) { - processor.mpc_apicid = i; - MP_processor_info(&processor); - } - - bus.mpc_type = MP_BUS; - bus.mpc_busid = 0; - switch (mpc_default_type) { - default: - printk("???\n"); - printk(KERN_ERR "Unknown standard configuration %d\n", - mpc_default_type); - /* fall through */ - case 1: - case 5: - memcpy(bus.mpc_bustype, "ISA ", 6); - break; - case 2: - case 6: - case 3: - memcpy(bus.mpc_bustype, "EISA ", 6); - break; - case 4: - case 7: - memcpy(bus.mpc_bustype, "MCA ", 6); - } - MP_bus_info(&bus); - if (mpc_default_type > 4) { - bus.mpc_busid = 1; - memcpy(bus.mpc_bustype, "PCI ", 6); - MP_bus_info(&bus); - } - - ioapic.mpc_type = MP_IOAPIC; - ioapic.mpc_apicid = 2; - ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; - ioapic.mpc_flags = MPC_APIC_USABLE; - ioapic.mpc_apicaddr = 0xFEC00000; - MP_ioapic_info(&ioapic); - - /* - * We set up most of the low 16 IO-APIC pins according to MPS rules. - */ - construct_default_ioirq_mptable(mpc_default_type); - - lintsrc.mpc_type = MP_LINTSRC; - lintsrc.mpc_irqflag = 0; /* conforming */ - lintsrc.mpc_srcbusid = 0; - lintsrc.mpc_srcbusirq = 0; - lintsrc.mpc_destapic = MP_APIC_ALL; - for (i = 0; i < 2; i++) { - lintsrc.mpc_irqtype = linttypes[i]; - lintsrc.mpc_destapiclint = i; - MP_lintsrc_info(&lintsrc); - } -} - -static struct intel_mp_floating *mpf_found; - -/* - * Scan the memory blocks for an SMP configuration block. - */ -void __init get_smp_config (void) -{ - struct intel_mp_floating *mpf = mpf_found; - - /* - * ACPI supports both logical (e.g. Hyper-Threading) and physical - * processors, where MPS only supports physical. - */ - if (acpi_lapic && acpi_ioapic) { - printk(KERN_INFO "Using ACPI (MADT) for SMP configuration information\n"); - return; - } - else if (acpi_lapic) - printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n"); - - printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification); - if (mpf->mpf_feature2 & (1<<7)) { - printk(KERN_INFO " IMCR and PIC compatibility mode.\n"); - pic_mode = 1; - } else { - printk(KERN_INFO " Virtual Wire compatibility mode.\n"); - pic_mode = 0; - } - - /* - * Now see if we need to read further. - */ - if (mpf->mpf_feature1 != 0) { - - printk(KERN_INFO "Default MP configuration #%d\n", mpf->mpf_feature1); - construct_default_ISA_mptable(mpf->mpf_feature1); - - } else if (mpf->mpf_physptr) { - - /* - * Read the physical hardware table. Anything here will - * override the defaults. - */ - if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr))) { - smp_found_config = 0; - printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"); - printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n"); - return; - } - /* - * If there are no explicit MP IRQ entries, then we are - * broken. We set up most of the low 16 IO-APIC pins to - * ISA defaults and hope it will work. - */ - if (!mp_irq_entries) { - struct mpc_config_bus bus; - - printk(KERN_ERR "BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n"); - - bus.mpc_type = MP_BUS; - bus.mpc_busid = 0; - memcpy(bus.mpc_bustype, "ISA ", 6); - MP_bus_info(&bus); - - construct_default_ioirq_mptable(0); - } - - } else - BUG(); - - printk(KERN_INFO "Processors: %d\n", num_processors); - /* - * Only use the first configuration found. - */ -} - -static int __init smp_scan_config (unsigned long base, unsigned long length) -{ - unsigned long *bp = phys_to_virt(base); - struct intel_mp_floating *mpf; - - Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length); - if (sizeof(*mpf) != 16) - printk("Error: MPF size\n"); - - while (length > 0) { - mpf = (struct intel_mp_floating *)bp; - if ((*bp == SMP_MAGIC_IDENT) && - (mpf->mpf_length == 1) && - !mpf_checksum((unsigned char *)bp, 16) && - ((mpf->mpf_specification == 1) - || (mpf->mpf_specification == 4)) ) { - - smp_found_config = 1; - printk(KERN_INFO "found SMP MP-table at %08lx\n", - virt_to_phys(mpf)); - reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE); - if (mpf->mpf_physptr) { - /* - * We cannot access to MPC table to compute - * table size yet, as only few megabytes from - * the bottom is mapped now. - * PC-9800's MPC table places on the very last - * of physical memory; so that simply reserving - * PAGE_SIZE from mpg->mpf_physptr yields BUG() - * in reserve_bootmem. - */ - unsigned long size = PAGE_SIZE; - unsigned long end = max_low_pfn * PAGE_SIZE; - if (mpf->mpf_physptr + size > end) - size = end - mpf->mpf_physptr; - reserve_bootmem(mpf->mpf_physptr, size); - } - - mpf_found = mpf; - return 1; - } - bp += 4; - length -= 16; - } - return 0; -} - -void __init find_smp_config (void) -{ - unsigned int address; - - /* - * FIXME: Linux assumes you have 640K of base ram.. - * this continues the error... - * - * 1) Scan the bottom 1K for a signature - * 2) Scan the top 1K of base RAM - * 3) Scan the 64K of bios - */ - if (smp_scan_config(0x0,0x400) || - smp_scan_config(639*0x400,0x400) || - smp_scan_config(0xF0000,0x10000)) - return; - /* - * If it is an SMP machine we should know now, unless the - * configuration is in an EISA/MCA bus machine with an - * extended bios data area. - * - * there is a real-mode segmented pointer pointing to the - * 4K EBDA area at 0x40E, calculate and scan it here. - * - * NOTE! There are Linux loaders that will corrupt the EBDA - * area, and as such this kind of SMP config may be less - * trustworthy, simply because the SMP table may have been - * stomped on during early boot. These loaders are buggy and - * should be fixed. - * - * MP1.4 SPEC states to only scan first 1K of 4K EBDA. - */ - - address = get_bios_ebda(); - if (address) - smp_scan_config(address, 0x400); -} - -int es7000_plat; - -/* -------------------------------------------------------------------------- - ACPI-based MP Configuration - -------------------------------------------------------------------------- */ - -#ifdef CONFIG_ACPI - -void __init mp_register_lapic_address(u64 address) -{ - mp_lapic_addr = (unsigned long) address; - - set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr); - - if (boot_cpu_physical_apicid == -1U) - boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); - - Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid); -} - -void __cpuinit mp_register_lapic (u8 id, u8 enabled) -{ - struct mpc_config_processor processor; - int boot_cpu = 0; - - if (MAX_APICS - id <= 0) { - printk(KERN_WARNING "Processor #%d invalid (max %d)\n", - id, MAX_APICS); - return; - } - - if (id == boot_cpu_physical_apicid) - boot_cpu = 1; - - processor.mpc_type = MP_PROCESSOR; - processor.mpc_apicid = id; - processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR)); - processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0); - processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0); - processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | - (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; - processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; - processor.mpc_reserved[0] = 0; - processor.mpc_reserved[1] = 0; - - MP_processor_info(&processor); -} - -#ifdef CONFIG_X86_IO_APIC - -#define MP_ISA_BUS 0 -#define MP_MAX_IOAPIC_PIN 127 - -static struct mp_ioapic_routing { - int apic_id; - int gsi_base; - int gsi_end; - u32 pin_programmed[4]; -} mp_ioapic_routing[MAX_IO_APICS]; - -static int mp_find_ioapic (int gsi) -{ - int i = 0; - - /* Find the IOAPIC that manages this GSI. */ - for (i = 0; i < nr_ioapics; i++) { - if ((gsi >= mp_ioapic_routing[i].gsi_base) - && (gsi <= mp_ioapic_routing[i].gsi_end)) - return i; - } - - printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi); - - return -1; -} - -void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base) -{ - int idx = 0; - int tmpid; - - if (nr_ioapics >= MAX_IO_APICS) { - printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " - "(found %d)\n", MAX_IO_APICS, nr_ioapics); - panic("Recompile kernel with bigger MAX_IO_APICS!\n"); - } - if (!address) { - printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address" - " found in MADT table, skipping!\n"); - return; - } - - idx = nr_ioapics++; - - mp_ioapics[idx].mpc_type = MP_IOAPIC; - mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE; - mp_ioapics[idx].mpc_apicaddr = address; - - set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); - if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) - && !APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) - tmpid = io_apic_get_unique_id(idx, id); - else - tmpid = id; - if (tmpid == -1) { - nr_ioapics--; - return; - } - mp_ioapics[idx].mpc_apicid = tmpid; - mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); - - /* - * Build basic GSI lookup table to facilitate gsi->io_apic lookups - * and to prevent reprogramming of IOAPIC pins (PCI GSIs). - */ - mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid; - mp_ioapic_routing[idx].gsi_base = gsi_base; - mp_ioapic_routing[idx].gsi_end = gsi_base + - io_apic_get_redir_entries(idx); - - printk("IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, " - "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, - mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, - mp_ioapic_routing[idx].gsi_base, - mp_ioapic_routing[idx].gsi_end); -} - -void __init -mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) -{ - struct mpc_config_intsrc intsrc; - int ioapic = -1; - int pin = -1; - - /* - * Convert 'gsi' to 'ioapic.pin'. - */ - ioapic = mp_find_ioapic(gsi); - if (ioapic < 0) - return; - pin = gsi - mp_ioapic_routing[ioapic].gsi_base; - - /* - * TBD: This check is for faulty timer entries, where the override - * erroneously sets the trigger to level, resulting in a HUGE - * increase of timer interrupts! - */ - if ((bus_irq == 0) && (trigger == 3)) - trigger = 1; - - intsrc.mpc_type = MP_INTSRC; - intsrc.mpc_irqtype = mp_INT; - intsrc.mpc_irqflag = (trigger << 2) | polarity; - intsrc.mpc_srcbus = MP_ISA_BUS; - intsrc.mpc_srcbusirq = bus_irq; /* IRQ */ - intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */ - intsrc.mpc_dstirq = pin; /* INTIN# */ - - Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n", - intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, - (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, - intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq); - - mp_irqs[mp_irq_entries] = intsrc; - if (++mp_irq_entries == MAX_IRQ_SOURCES) - panic("Max # of irq sources exceeded!\n"); -} - -void __init mp_config_acpi_legacy_irqs (void) -{ - struct mpc_config_intsrc intsrc; - int i = 0; - int ioapic = -1; - - /* - * Fabricate the legacy ISA bus (bus #31). - */ - mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA; - Dprintk("Bus #%d is ISA\n", MP_ISA_BUS); - - /* - * Older generations of ES7000 have no legacy identity mappings - */ - if (es7000_plat == 1) - return; - - /* - * Locate the IOAPIC that manages the ISA IRQs (0-15). - */ - ioapic = mp_find_ioapic(0); - if (ioapic < 0) - return; - - intsrc.mpc_type = MP_INTSRC; - intsrc.mpc_irqflag = 0; /* Conforming */ - intsrc.mpc_srcbus = MP_ISA_BUS; - intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; - - /* - * Use the default configuration for the IRQs 0-15. Unless - * overriden by (MADT) interrupt source override entries. - */ - for (i = 0; i < 16; i++) { - int idx; - - for (idx = 0; idx < mp_irq_entries; idx++) { - struct mpc_config_intsrc *irq = mp_irqs + idx; - - /* Do we already have a mapping for this ISA IRQ? */ - if (irq->mpc_srcbus == MP_ISA_BUS && irq->mpc_srcbusirq == i) - break; - - /* Do we already have a mapping for this IOAPIC pin */ - if ((irq->mpc_dstapic == intsrc.mpc_dstapic) && - (irq->mpc_dstirq == i)) - break; - } - - if (idx != mp_irq_entries) { - printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i); - continue; /* IRQ already used */ - } - - intsrc.mpc_irqtype = mp_INT; - intsrc.mpc_srcbusirq = i; /* Identity mapped */ - intsrc.mpc_dstirq = i; - - Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, " - "%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, - (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, - intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, - intsrc.mpc_dstirq); - - mp_irqs[mp_irq_entries] = intsrc; - if (++mp_irq_entries == MAX_IRQ_SOURCES) - panic("Max # of irq sources exceeded!\n"); - } -} - -#define MAX_GSI_NUM 4096 - -int mp_register_gsi(u32 gsi, int triggering, int polarity) -{ - int ioapic = -1; - int ioapic_pin = 0; - int idx, bit = 0; - static int pci_irq = 16; - /* - * Mapping between Global System Interrups, which - * represent all possible interrupts, and IRQs - * assigned to actual devices. - */ - static int gsi_to_irq[MAX_GSI_NUM]; - - /* Don't set up the ACPI SCI because it's already set up */ - if (acpi_gbl_FADT.sci_interrupt == gsi) - return gsi; - - ioapic = mp_find_ioapic(gsi); - if (ioapic < 0) { - printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi); - return gsi; - } - - ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base; - - if (ioapic_renumber_irq) - gsi = ioapic_renumber_irq(ioapic, gsi); - - /* - * Avoid pin reprogramming. PRTs typically include entries - * with redundant pin->gsi mappings (but unique PCI devices); - * we only program the IOAPIC on the first. - */ - bit = ioapic_pin % 32; - idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32); - if (idx > 3) { - printk(KERN_ERR "Invalid reference to IOAPIC pin " - "%d-%d\n", mp_ioapic_routing[ioapic].apic_id, - ioapic_pin); - return gsi; - } - if ((1< 15), but - * avoid a problem where the 8254 timer (IRQ0) is setup - * via an override (so it's not on pin 0 of the ioapic), - * and at the same time, the pin 0 interrupt is a PCI - * type. The gsi > 15 test could cause these two pins - * to be shared as IRQ0, and they are not shareable. - * So test for this condition, and if necessary, avoid - * the pin collision. - */ - if (gsi > 15 || (gsi == 0 && !timer_uses_ioapic_pin_0)) - gsi = pci_irq++; - /* - * Don't assign IRQ used by ACPI SCI - */ - if (gsi == acpi_gbl_FADT.sci_interrupt) - gsi = pci_irq++; - gsi_to_irq[irq] = gsi; - } else { - printk(KERN_ERR "GSI %u is too high\n", gsi); - return gsi; - } - } - - io_apic_set_pci_routing(ioapic, ioapic_pin, gsi, - triggering == ACPI_EDGE_SENSITIVE ? 0 : 1, - polarity == ACPI_ACTIVE_HIGH ? 0 : 1); - return gsi; -} - -#endif /* CONFIG_X86_IO_APIC */ -#endif /* CONFIG_ACPI */ diff --git a/arch/i386/kernel/msr.c b/arch/i386/kernel/msr.c deleted file mode 100644 index 0c1069b8d63..00000000000 --- a/arch/i386/kernel/msr.c +++ /dev/null @@ -1,224 +0,0 @@ -/* ----------------------------------------------------------------------- * - * - * Copyright 2000 H. Peter Anvin - All Rights Reserved - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, - * USA; either version 2 of the License, or (at your option) any later - * version; incorporated herein by reference. - * - * ----------------------------------------------------------------------- */ - -/* - * msr.c - * - * x86 MSR access device - * - * This device is accessed by lseek() to the appropriate register number - * and then read/write in chunks of 8 bytes. A larger size means multiple - * reads or writes of the same register. - * - * This driver uses /dev/cpu/%d/msr where %d is the minor number, and on - * an SMP box will direct the access to CPU %d. - */ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -static struct class *msr_class; - -static loff_t msr_seek(struct file *file, loff_t offset, int orig) -{ - loff_t ret = -EINVAL; - - lock_kernel(); - switch (orig) { - case 0: - file->f_pos = offset; - ret = file->f_pos; - break; - case 1: - file->f_pos += offset; - ret = file->f_pos; - } - unlock_kernel(); - return ret; -} - -static ssize_t msr_read(struct file *file, char __user * buf, - size_t count, loff_t * ppos) -{ - u32 __user *tmp = (u32 __user *) buf; - u32 data[2]; - u32 reg = *ppos; - int cpu = iminor(file->f_path.dentry->d_inode); - int err; - - if (count % 8) - return -EINVAL; /* Invalid chunk size */ - - for (; count; count -= 8) { - err = rdmsr_safe_on_cpu(cpu, reg, &data[0], &data[1]); - if (err) - return -EIO; - if (copy_to_user(tmp, &data, 8)) - return -EFAULT; - tmp += 2; - } - - return ((char __user *)tmp) - buf; -} - -static ssize_t msr_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) -{ - const u32 __user *tmp = (const u32 __user *)buf; - u32 data[2]; - u32 reg = *ppos; - int cpu = iminor(file->f_path.dentry->d_inode); - int err; - - if (count % 8) - return -EINVAL; /* Invalid chunk size */ - - for (; count; count -= 8) { - if (copy_from_user(&data, tmp, 8)) - return -EFAULT; - err = wrmsr_safe_on_cpu(cpu, reg, data[0], data[1]); - if (err) - return -EIO; - tmp += 2; - } - - return ((char __user *)tmp) - buf; -} - -static int msr_open(struct inode *inode, struct file *file) -{ - unsigned int cpu = iminor(file->f_path.dentry->d_inode); - struct cpuinfo_x86 *c = &(cpu_data)[cpu]; - - if (cpu >= NR_CPUS || !cpu_online(cpu)) - return -ENXIO; /* No such CPU */ - if (!cpu_has(c, X86_FEATURE_MSR)) - return -EIO; /* MSR not supported */ - - return 0; -} - -/* - * File operations we support - */ -static const struct file_operations msr_fops = { - .owner = THIS_MODULE, - .llseek = msr_seek, - .read = msr_read, - .write = msr_write, - .open = msr_open, -}; - -static int msr_device_create(int i) -{ - int err = 0; - struct device *dev; - - dev = device_create(msr_class, NULL, MKDEV(MSR_MAJOR, i), "msr%d",i); - if (IS_ERR(dev)) - err = PTR_ERR(dev); - return err; -} - -static int msr_class_cpu_callback(struct notifier_block *nfb, - unsigned long action, void *hcpu) -{ - unsigned int cpu = (unsigned long)hcpu; - - switch (action) { - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - msr_device_create(cpu); - break; - case CPU_DEAD: - case CPU_DEAD_FROZEN: - device_destroy(msr_class, MKDEV(MSR_MAJOR, cpu)); - break; - } - return NOTIFY_OK; -} - -static struct notifier_block __cpuinitdata msr_class_cpu_notifier = -{ - .notifier_call = msr_class_cpu_callback, -}; - -static int __init msr_init(void) -{ - int i, err = 0; - i = 0; - - if (register_chrdev(MSR_MAJOR, "cpu/msr", &msr_fops)) { - printk(KERN_ERR "msr: unable to get major %d for msr\n", - MSR_MAJOR); - err = -EBUSY; - goto out; - } - msr_class = class_create(THIS_MODULE, "msr"); - if (IS_ERR(msr_class)) { - err = PTR_ERR(msr_class); - goto out_chrdev; - } - for_each_online_cpu(i) { - err = msr_device_create(i); - if (err != 0) - goto out_class; - } - register_hotcpu_notifier(&msr_class_cpu_notifier); - - err = 0; - goto out; - -out_class: - i = 0; - for_each_online_cpu(i) - device_destroy(msr_class, MKDEV(MSR_MAJOR, i)); - class_destroy(msr_class); -out_chrdev: - unregister_chrdev(MSR_MAJOR, "cpu/msr"); -out: - return err; -} - -static void __exit msr_exit(void) -{ - int cpu = 0; - for_each_online_cpu(cpu) - device_destroy(msr_class, MKDEV(MSR_MAJOR, cpu)); - class_destroy(msr_class); - unregister_chrdev(MSR_MAJOR, "cpu/msr"); - unregister_hotcpu_notifier(&msr_class_cpu_notifier); -} - -module_init(msr_init); -module_exit(msr_exit) - -MODULE_AUTHOR("H. Peter Anvin "); -MODULE_DESCRIPTION("x86 generic MSR driver"); -MODULE_LICENSE("GPL"); diff --git a/arch/i386/kernel/nmi_32.c b/arch/i386/kernel/nmi_32.c deleted file mode 100644 index c7227e2180f..00000000000 --- a/arch/i386/kernel/nmi_32.c +++ /dev/null @@ -1,468 +0,0 @@ -/* - * linux/arch/i386/nmi.c - * - * NMI watchdog support on APIC systems - * - * Started by Ingo Molnar - * - * Fixes: - * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog. - * Mikael Pettersson : Power Management for local APIC NMI watchdog. - * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog. - * Pavel Machek and - * Mikael Pettersson : PM converted to driver model. Disable/enable API. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "mach_traps.h" - -int unknown_nmi_panic; -int nmi_watchdog_enabled; - -static cpumask_t backtrace_mask = CPU_MASK_NONE; - -/* nmi_active: - * >0: the lapic NMI watchdog is active, but can be disabled - * <0: the lapic NMI watchdog has not been set up, and cannot - * be enabled - * 0: the lapic NMI watchdog is disabled, but can be enabled - */ -atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ - -unsigned int nmi_watchdog = NMI_DEFAULT; -static unsigned int nmi_hz = HZ; - -static DEFINE_PER_CPU(short, wd_enabled); - -/* local prototypes */ -static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu); - -static int endflag __initdata = 0; - -#ifdef CONFIG_SMP -/* The performance counters used by NMI_LOCAL_APIC don't trigger when - * the CPU is idle. To make sure the NMI watchdog really ticks on all - * CPUs during the test make them busy. - */ -static __init void nmi_cpu_busy(void *data) -{ - local_irq_enable_in_hardirq(); - /* Intentionally don't use cpu_relax here. This is - to make sure that the performance counter really ticks, - even if there is a simulator or similar that catches the - pause instruction. On a real HT machine this is fine because - all other CPUs are busy with "useless" delay loops and don't - care if they get somewhat less cycles. */ - while (endflag == 0) - mb(); -} -#endif - -static int __init check_nmi_watchdog(void) -{ - unsigned int *prev_nmi_count; - int cpu; - - if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DISABLED)) - return 0; - - if (!atomic_read(&nmi_active)) - return 0; - - prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); - if (!prev_nmi_count) - return -1; - - printk(KERN_INFO "Testing NMI watchdog ... "); - - if (nmi_watchdog == NMI_LOCAL_APIC) - smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0); - - for_each_possible_cpu(cpu) - prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count; - local_irq_enable(); - mdelay((20*1000)/nmi_hz); // wait 20 ticks - - for_each_possible_cpu(cpu) { -#ifdef CONFIG_SMP - /* Check cpu_callin_map here because that is set - after the timer is started. */ - if (!cpu_isset(cpu, cpu_callin_map)) - continue; -#endif - if (!per_cpu(wd_enabled, cpu)) - continue; - if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { - printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", - cpu, - prev_nmi_count[cpu], - nmi_count(cpu)); - per_cpu(wd_enabled, cpu) = 0; - atomic_dec(&nmi_active); - } - } - endflag = 1; - if (!atomic_read(&nmi_active)) { - kfree(prev_nmi_count); - atomic_set(&nmi_active, -1); - return -1; - } - printk("OK.\n"); - - /* now that we know it works we can reduce NMI frequency to - something more reasonable; makes a difference in some configs */ - if (nmi_watchdog == NMI_LOCAL_APIC) - nmi_hz = lapic_adjust_nmi_hz(1); - - kfree(prev_nmi_count); - return 0; -} -/* This needs to happen later in boot so counters are working */ -late_initcall(check_nmi_watchdog); - -static int __init setup_nmi_watchdog(char *str) -{ - int nmi; - - get_option(&str, &nmi); - - if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE)) - return 0; - - nmi_watchdog = nmi; - return 1; -} - -__setup("nmi_watchdog=", setup_nmi_watchdog); - - -/* Suspend/resume support */ - -#ifdef CONFIG_PM - -static int nmi_pm_active; /* nmi_active before suspend */ - -static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) -{ - /* only CPU0 goes here, other CPUs should be offline */ - nmi_pm_active = atomic_read(&nmi_active); - stop_apic_nmi_watchdog(NULL); - BUG_ON(atomic_read(&nmi_active) != 0); - return 0; -} - -static int lapic_nmi_resume(struct sys_device *dev) -{ - /* only CPU0 goes here, other CPUs should be offline */ - if (nmi_pm_active > 0) { - setup_apic_nmi_watchdog(NULL); - touch_nmi_watchdog(); - } - return 0; -} - - -static struct sysdev_class nmi_sysclass = { - set_kset_name("lapic_nmi"), - .resume = lapic_nmi_resume, - .suspend = lapic_nmi_suspend, -}; - -static struct sys_device device_lapic_nmi = { - .id = 0, - .cls = &nmi_sysclass, -}; - -static int __init init_lapic_nmi_sysfs(void) -{ - int error; - - /* should really be a BUG_ON but b/c this is an - * init call, it just doesn't work. -dcz - */ - if (nmi_watchdog != NMI_LOCAL_APIC) - return 0; - - if (atomic_read(&nmi_active) < 0) - return 0; - - error = sysdev_class_register(&nmi_sysclass); - if (!error) - error = sysdev_register(&device_lapic_nmi); - return error; -} -/* must come after the local APIC's device_initcall() */ -late_initcall(init_lapic_nmi_sysfs); - -#endif /* CONFIG_PM */ - -static void __acpi_nmi_enable(void *__unused) -{ - apic_write_around(APIC_LVT0, APIC_DM_NMI); -} - -/* - * Enable timer based NMIs on all CPUs: - */ -void acpi_nmi_enable(void) -{ - if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) - on_each_cpu(__acpi_nmi_enable, NULL, 0, 1); -} - -static void __acpi_nmi_disable(void *__unused) -{ - apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); -} - -/* - * Disable timer based NMIs on all CPUs: - */ -void acpi_nmi_disable(void) -{ - if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) - on_each_cpu(__acpi_nmi_disable, NULL, 0, 1); -} - -void setup_apic_nmi_watchdog (void *unused) -{ - if (__get_cpu_var(wd_enabled)) - return; - - /* cheap hack to support suspend/resume */ - /* if cpu0 is not active neither should the other cpus */ - if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0)) - return; - - switch (nmi_watchdog) { - case NMI_LOCAL_APIC: - __get_cpu_var(wd_enabled) = 1; /* enable it before to avoid race with handler */ - if (lapic_watchdog_init(nmi_hz) < 0) { - __get_cpu_var(wd_enabled) = 0; - return; - } - /* FALL THROUGH */ - case NMI_IO_APIC: - __get_cpu_var(wd_enabled) = 1; - atomic_inc(&nmi_active); - } -} - -void stop_apic_nmi_watchdog(void *unused) -{ - /* only support LOCAL and IO APICs for now */ - if ((nmi_watchdog != NMI_LOCAL_APIC) && - (nmi_watchdog != NMI_IO_APIC)) - return; - if (__get_cpu_var(wd_enabled) == 0) - return; - if (nmi_watchdog == NMI_LOCAL_APIC) - lapic_watchdog_stop(); - __get_cpu_var(wd_enabled) = 0; - atomic_dec(&nmi_active); -} - -/* - * the best way to detect whether a CPU has a 'hard lockup' problem - * is to check it's local APIC timer IRQ counts. If they are not - * changing then that CPU has some problem. - * - * as these watchdog NMI IRQs are generated on every CPU, we only - * have to check the current processor. - * - * since NMIs don't listen to _any_ locks, we have to be extremely - * careful not to rely on unsafe variables. The printk might lock - * up though, so we have to break up any console locks first ... - * [when there will be more tty-related locks, break them up - * here too!] - */ - -static unsigned int - last_irq_sums [NR_CPUS], - alert_counter [NR_CPUS]; - -void touch_nmi_watchdog(void) -{ - if (nmi_watchdog > 0) { - unsigned cpu; - - /* - * Just reset the alert counters, (other CPUs might be - * spinning on locks we hold): - */ - for_each_present_cpu(cpu) { - if (alert_counter[cpu]) - alert_counter[cpu] = 0; - } - } - - /* - * Tickle the softlockup detector too: - */ - touch_softlockup_watchdog(); -} -EXPORT_SYMBOL(touch_nmi_watchdog); - -extern void die_nmi(struct pt_regs *, const char *msg); - -__kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) -{ - - /* - * Since current_thread_info()-> is always on the stack, and we - * always switch the stack NMI-atomically, it's safe to use - * smp_processor_id(). - */ - unsigned int sum; - int touched = 0; - int cpu = smp_processor_id(); - int rc=0; - - /* check for other users first */ - if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) - == NOTIFY_STOP) { - rc = 1; - touched = 1; - } - - if (cpu_isset(cpu, backtrace_mask)) { - static DEFINE_SPINLOCK(lock); /* Serialise the printks */ - - spin_lock(&lock); - printk("NMI backtrace for cpu %d\n", cpu); - dump_stack(); - spin_unlock(&lock); - cpu_clear(cpu, backtrace_mask); - } - - /* - * Take the local apic timer and PIT/HPET into account. We don't - * know which one is active, when we have highres/dyntick on - */ - sum = per_cpu(irq_stat, cpu).apic_timer_irqs + kstat_cpu(cpu).irqs[0]; - - /* if the none of the timers isn't firing, this cpu isn't doing much */ - if (!touched && last_irq_sums[cpu] == sum) { - /* - * Ayiee, looks like this CPU is stuck ... - * wait a few IRQs (5 seconds) before doing the oops ... - */ - alert_counter[cpu]++; - if (alert_counter[cpu] == 5*nmi_hz) - /* - * die_nmi will return ONLY if NOTIFY_STOP happens.. - */ - die_nmi(regs, "BUG: NMI Watchdog detected LOCKUP"); - } else { - last_irq_sums[cpu] = sum; - alert_counter[cpu] = 0; - } - /* see if the nmi watchdog went off */ - if (!__get_cpu_var(wd_enabled)) - return rc; - switch (nmi_watchdog) { - case NMI_LOCAL_APIC: - rc |= lapic_wd_event(nmi_hz); - break; - case NMI_IO_APIC: - /* don't know how to accurately check for this. - * just assume it was a watchdog timer interrupt - * This matches the old behaviour. - */ - rc = 1; - break; - } - return rc; -} - -int do_nmi_callback(struct pt_regs * regs, int cpu) -{ -#ifdef CONFIG_SYSCTL - if (unknown_nmi_panic) - return unknown_nmi_panic_callback(regs, cpu); -#endif - return 0; -} - -#ifdef CONFIG_SYSCTL - -static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) -{ - unsigned char reason = get_nmi_reason(); - char buf[64]; - - sprintf(buf, "NMI received for unknown reason %02x\n", reason); - die_nmi(regs, buf); - return 0; -} - -/* - * proc handler for /proc/sys/kernel/nmi - */ -int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, - void __user *buffer, size_t *length, loff_t *ppos) -{ - int old_state; - - nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0; - old_state = nmi_watchdog_enabled; - proc_dointvec(table, write, file, buffer, length, ppos); - if (!!old_state == !!nmi_watchdog_enabled) - return 0; - - if (atomic_read(&nmi_active) < 0 || nmi_watchdog == NMI_DISABLED) { - printk( KERN_WARNING "NMI watchdog is permanently disabled\n"); - return -EIO; - } - - if (nmi_watchdog == NMI_DEFAULT) { - if (lapic_watchdog_ok()) - nmi_watchdog = NMI_LOCAL_APIC; - else - nmi_watchdog = NMI_IO_APIC; - } - - if (nmi_watchdog == NMI_LOCAL_APIC) { - if (nmi_watchdog_enabled) - enable_lapic_nmi_watchdog(); - else - disable_lapic_nmi_watchdog(); - } else { - printk( KERN_WARNING - "NMI watchdog doesn't know what hardware to touch\n"); - return -EIO; - } - return 0; -} - -#endif - -void __trigger_all_cpu_backtrace(void) -{ - int i; - - backtrace_mask = cpu_online_map; - /* Wait for up to 10 seconds for all CPUs to do the backtrace */ - for (i = 0; i < 10 * 1000; i++) { - if (cpus_empty(backtrace_mask)) - break; - mdelay(1); - } -} - -EXPORT_SYMBOL(nmi_active); -EXPORT_SYMBOL(nmi_watchdog); diff --git a/arch/i386/kernel/numaq_32.c b/arch/i386/kernel/numaq_32.c deleted file mode 100644 index 9000d82c6dc..00000000000 --- a/arch/i386/kernel/numaq_32.c +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Written by: Patricia Gaughen, IBM Corporation - * - * Copyright (C) 2002, IBM Corp. - * - * All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Send feedback to - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT)) - -/* - * Function: smp_dump_qct() - * - * Description: gets memory layout from the quad config table. This - * function also updates node_online_map with the nodes (quads) present. - */ -static void __init smp_dump_qct(void) -{ - int node; - struct eachquadmem *eq; - struct sys_cfg_data *scd = - (struct sys_cfg_data *)__va(SYS_CFG_DATA_PRIV_ADDR); - - nodes_clear(node_online_map); - for_each_node(node) { - if (scd->quads_present31_0 & (1 << node)) { - node_set_online(node); - eq = &scd->eq[node]; - /* Convert to pages */ - node_start_pfn[node] = MB_TO_PAGES( - eq->hi_shrd_mem_start - eq->priv_mem_size); - node_end_pfn[node] = MB_TO_PAGES( - eq->hi_shrd_mem_start + eq->hi_shrd_mem_size); - - memory_present(node, - node_start_pfn[node], node_end_pfn[node]); - node_remap_size[node] = node_memmap_size_bytes(node, - node_start_pfn[node], - node_end_pfn[node]); - } - } -} - -/* - * Unlike Summit, we don't really care to let the NUMA-Q - * fall back to flat mode. Don't compile for NUMA-Q - * unless you really need it! - */ -int __init get_memcfg_numaq(void) -{ - smp_dump_qct(); - return 1; -} - -static int __init numaq_tsc_disable(void) -{ - if (num_online_nodes() > 1) { - printk(KERN_DEBUG "NUMAQ: disabling TSC\n"); - tsc_disable = 1; - } - return 0; -} -arch_initcall(numaq_tsc_disable); diff --git a/arch/i386/kernel/paravirt_32.c b/arch/i386/kernel/paravirt_32.c deleted file mode 100644 index 739cfb207dd..00000000000 --- a/arch/i386/kernel/paravirt_32.c +++ /dev/null @@ -1,392 +0,0 @@ -/* Paravirtualization interfaces - Copyright (C) 2006 Rusty Russell IBM Corporation - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -*/ -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* nop stub */ -void _paravirt_nop(void) -{ -} - -static void __init default_banner(void) -{ - printk(KERN_INFO "Booting paravirtualized kernel on %s\n", - paravirt_ops.name); -} - -char *memory_setup(void) -{ - return paravirt_ops.memory_setup(); -} - -/* Simple instruction patching code. */ -#define DEF_NATIVE(name, code) \ - extern const char start_##name[], end_##name[]; \ - asm("start_" #name ": " code "; end_" #name ":") - -DEF_NATIVE(irq_disable, "cli"); -DEF_NATIVE(irq_enable, "sti"); -DEF_NATIVE(restore_fl, "push %eax; popf"); -DEF_NATIVE(save_fl, "pushf; pop %eax"); -DEF_NATIVE(iret, "iret"); -DEF_NATIVE(irq_enable_sysexit, "sti; sysexit"); -DEF_NATIVE(read_cr2, "mov %cr2, %eax"); -DEF_NATIVE(write_cr3, "mov %eax, %cr3"); -DEF_NATIVE(read_cr3, "mov %cr3, %eax"); -DEF_NATIVE(clts, "clts"); -DEF_NATIVE(read_tsc, "rdtsc"); - -DEF_NATIVE(ud2a, "ud2a"); - -static unsigned native_patch(u8 type, u16 clobbers, void *ibuf, - unsigned long addr, unsigned len) -{ - const unsigned char *start, *end; - unsigned ret; - - switch(type) { -#define SITE(x) case PARAVIRT_PATCH(x): start = start_##x; end = end_##x; goto patch_site - SITE(irq_disable); - SITE(irq_enable); - SITE(restore_fl); - SITE(save_fl); - SITE(iret); - SITE(irq_enable_sysexit); - SITE(read_cr2); - SITE(read_cr3); - SITE(write_cr3); - SITE(clts); - SITE(read_tsc); -#undef SITE - - patch_site: - ret = paravirt_patch_insns(ibuf, len, start, end); - break; - - case PARAVIRT_PATCH(make_pgd): - case PARAVIRT_PATCH(make_pte): - case PARAVIRT_PATCH(pgd_val): - case PARAVIRT_PATCH(pte_val): -#ifdef CONFIG_X86_PAE - case PARAVIRT_PATCH(make_pmd): - case PARAVIRT_PATCH(pmd_val): -#endif - /* These functions end up returning exactly what - they're passed, in the same registers. */ - ret = paravirt_patch_nop(); - break; - - default: - ret = paravirt_patch_default(type, clobbers, ibuf, addr, len); - break; - } - - return ret; -} - -unsigned paravirt_patch_nop(void) -{ - return 0; -} - -unsigned paravirt_patch_ignore(unsigned len) -{ - return len; -} - -struct branch { - unsigned char opcode; - u32 delta; -} __attribute__((packed)); - -unsigned paravirt_patch_call(void *insnbuf, - const void *target, u16 tgt_clobbers, - unsigned long addr, u16 site_clobbers, - unsigned len) -{ - struct branch *b = insnbuf; - unsigned long delta = (unsigned long)target - (addr+5); - - if (tgt_clobbers & ~site_clobbers) - return len; /* target would clobber too much for this site */ - if (len < 5) - return len; /* call too long for patch site */ - - b->opcode = 0xe8; /* call */ - b->delta = delta; - BUILD_BUG_ON(sizeof(*b) != 5); - - return 5; -} - -unsigned paravirt_patch_jmp(const void *target, void *insnbuf, - unsigned long addr, unsigned len) -{ - struct branch *b = insnbuf; - unsigned long delta = (unsigned long)target - (addr+5); - - if (len < 5) - return len; /* call too long for patch site */ - - b->opcode = 0xe9; /* jmp */ - b->delta = delta; - - return 5; -} - -unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, - unsigned long addr, unsigned len) -{ - void *opfunc = *((void **)¶virt_ops + type); - unsigned ret; - - if (opfunc == NULL) - /* If there's no function, patch it with a ud2a (BUG) */ - ret = paravirt_patch_insns(insnbuf, len, start_ud2a, end_ud2a); - else if (opfunc == paravirt_nop) - /* If the operation is a nop, then nop the callsite */ - ret = paravirt_patch_nop(); - else if (type == PARAVIRT_PATCH(iret) || - type == PARAVIRT_PATCH(irq_enable_sysexit)) - /* If operation requires a jmp, then jmp */ - ret = paravirt_patch_jmp(opfunc, insnbuf, addr, len); - else - /* Otherwise call the function; assume target could - clobber any caller-save reg */ - ret = paravirt_patch_call(insnbuf, opfunc, CLBR_ANY, - addr, clobbers, len); - - return ret; -} - -unsigned paravirt_patch_insns(void *insnbuf, unsigned len, - const char *start, const char *end) -{ - unsigned insn_len = end - start; - - if (insn_len > len || start == NULL) - insn_len = len; - else - memcpy(insnbuf, start, insn_len); - - return insn_len; -} - -void init_IRQ(void) -{ - paravirt_ops.init_IRQ(); -} - -static void native_flush_tlb(void) -{ - __native_flush_tlb(); -} - -/* - * Global pages have to be flushed a bit differently. Not a real - * performance problem because this does not happen often. - */ -static void native_flush_tlb_global(void) -{ - __native_flush_tlb_global(); -} - -static void native_flush_tlb_single(unsigned long addr) -{ - __native_flush_tlb_single(addr); -} - -/* These are in entry.S */ -extern void native_iret(void); -extern void native_irq_enable_sysexit(void); - -static int __init print_banner(void) -{ - paravirt_ops.banner(); - return 0; -} -core_initcall(print_banner); - -static struct resource reserve_ioports = { - .start = 0, - .end = IO_SPACE_LIMIT, - .name = "paravirt-ioport", - .flags = IORESOURCE_IO | IORESOURCE_BUSY, -}; - -static struct resource reserve_iomem = { - .start = 0, - .end = -1, - .name = "paravirt-iomem", - .flags = IORESOURCE_MEM | IORESOURCE_BUSY, -}; - -/* - * Reserve the whole legacy IO space to prevent any legacy drivers - * from wasting time probing for their hardware. This is a fairly - * brute-force approach to disabling all non-virtual drivers. - * - * Note that this must be called very early to have any effect. - */ -int paravirt_disable_iospace(void) -{ - int ret; - - ret = request_resource(&ioport_resource, &reserve_ioports); - if (ret == 0) { - ret = request_resource(&iomem_resource, &reserve_iomem); - if (ret) - release_resource(&reserve_ioports); - } - - return ret; -} - -struct paravirt_ops paravirt_ops = { - .name = "bare hardware", - .paravirt_enabled = 0, - .kernel_rpl = 0, - .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */ - - .patch = native_patch, - .banner = default_banner, - .arch_setup = paravirt_nop, - .memory_setup = machine_specific_memory_setup, - .get_wallclock = native_get_wallclock, - .set_wallclock = native_set_wallclock, - .time_init = hpet_time_init, - .init_IRQ = native_init_IRQ, - - .cpuid = native_cpuid, - .get_debugreg = native_get_debugreg, - .set_debugreg = native_set_debugreg, - .clts = native_clts, - .read_cr0 = native_read_cr0, - .write_cr0 = native_write_cr0, - .read_cr2 = native_read_cr2, - .write_cr2 = native_write_cr2, - .read_cr3 = native_read_cr3, - .write_cr3 = native_write_cr3, - .read_cr4 = native_read_cr4, - .read_cr4_safe = native_read_cr4_safe, - .write_cr4 = native_write_cr4, - .save_fl = native_save_fl, - .restore_fl = native_restore_fl, - .irq_disable = native_irq_disable, - .irq_enable = native_irq_enable, - .safe_halt = native_safe_halt, - .halt = native_halt, - .wbinvd = native_wbinvd, - .read_msr = native_read_msr_safe, - .write_msr = native_write_msr_safe, - .read_tsc = native_read_tsc, - .read_pmc = native_read_pmc, - .sched_clock = native_sched_clock, - .get_cpu_khz = native_calculate_cpu_khz, - .load_tr_desc = native_load_tr_desc, - .set_ldt = native_set_ldt, - .load_gdt = native_load_gdt, - .load_idt = native_load_idt, - .store_gdt = native_store_gdt, - .store_idt = native_store_idt, - .store_tr = native_store_tr, - .load_tls = native_load_tls, - .write_ldt_entry = write_dt_entry, - .write_gdt_entry = write_dt_entry, - .write_idt_entry = write_dt_entry, - .load_esp0 = native_load_esp0, - - .set_iopl_mask = native_set_iopl_mask, - .io_delay = native_io_delay, - -#ifdef CONFIG_X86_LOCAL_APIC - .apic_write = native_apic_write, - .apic_write_atomic = native_apic_write_atomic, - .apic_read = native_apic_read, - .setup_boot_clock = setup_boot_APIC_clock, - .setup_secondary_clock = setup_secondary_APIC_clock, - .startup_ipi_hook = paravirt_nop, -#endif - .set_lazy_mode = paravirt_nop, - - .pagetable_setup_start = native_pagetable_setup_start, - .pagetable_setup_done = native_pagetable_setup_done, - - .flush_tlb_user = native_flush_tlb, - .flush_tlb_kernel = native_flush_tlb_global, - .flush_tlb_single = native_flush_tlb_single, - .flush_tlb_others = native_flush_tlb_others, - - .alloc_pt = paravirt_nop, - .alloc_pd = paravirt_nop, - .alloc_pd_clone = paravirt_nop, - .release_pt = paravirt_nop, - .release_pd = paravirt_nop, - - .set_pte = native_set_pte, - .set_pte_at = native_set_pte_at, - .set_pmd = native_set_pmd, - .pte_update = paravirt_nop, - .pte_update_defer = paravirt_nop, - -#ifdef CONFIG_HIGHPTE - .kmap_atomic_pte = kmap_atomic, -#endif - -#ifdef CONFIG_X86_PAE - .set_pte_atomic = native_set_pte_atomic, - .set_pte_present = native_set_pte_present, - .set_pud = native_set_pud, - .pte_clear = native_pte_clear, - .pmd_clear = native_pmd_clear, - - .pmd_val = native_pmd_val, - .make_pmd = native_make_pmd, -#endif - - .pte_val = native_pte_val, - .pgd_val = native_pgd_val, - - .make_pte = native_make_pte, - .make_pgd = native_make_pgd, - - .irq_enable_sysexit = native_irq_enable_sysexit, - .iret = native_iret, - - .dup_mmap = paravirt_nop, - .exit_mmap = paravirt_nop, - .activate_mm = paravirt_nop, -}; - -EXPORT_SYMBOL(paravirt_ops); diff --git a/arch/i386/kernel/pci-dma_32.c b/arch/i386/kernel/pci-dma_32.c deleted file mode 100644 index 048f09b6255..00000000000 --- a/arch/i386/kernel/pci-dma_32.c +++ /dev/null @@ -1,177 +0,0 @@ -/* - * Dynamic DMA mapping support. - * - * On i386 there is no hardware dynamic DMA address translation, - * so consistent alloc/free are merely page allocation/freeing. - * The rest of the dynamic DMA mapping interface is implemented - * in asm/pci.h. - */ - -#include -#include -#include -#include -#include -#include -#include - -struct dma_coherent_mem { - void *virt_base; - u32 device_base; - int size; - int flags; - unsigned long *bitmap; -}; - -void *dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp) -{ - void *ret; - struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; - int order = get_order(size); - /* ignore region specifiers */ - gfp &= ~(__GFP_DMA | __GFP_HIGHMEM); - - if (mem) { - int page = bitmap_find_free_region(mem->bitmap, mem->size, - order); - if (page >= 0) { - *dma_handle = mem->device_base + (page << PAGE_SHIFT); - ret = mem->virt_base + (page << PAGE_SHIFT); - memset(ret, 0, size); - return ret; - } - if (mem->flags & DMA_MEMORY_EXCLUSIVE) - return NULL; - } - - if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff)) - gfp |= GFP_DMA; - - ret = (void *)__get_free_pages(gfp, order); - - if (ret != NULL) { - memset(ret, 0, size); - *dma_handle = virt_to_phys(ret); - } - return ret; -} -EXPORT_SYMBOL(dma_alloc_coherent); - -void dma_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle) -{ - struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; - int order = get_order(size); - - if (mem && vaddr >= mem->virt_base && vaddr < (mem->virt_base + (mem->size << PAGE_SHIFT))) { - int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; - - bitmap_release_region(mem->bitmap, page, order); - } else - free_pages((unsigned long)vaddr, order); -} -EXPORT_SYMBOL(dma_free_coherent); - -int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, - dma_addr_t device_addr, size_t size, int flags) -{ - void __iomem *mem_base = NULL; - int pages = size >> PAGE_SHIFT; - int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long); - - if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0) - goto out; - if (!size) - goto out; - if (dev->dma_mem) - goto out; - - /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */ - - mem_base = ioremap(bus_addr, size); - if (!mem_base) - goto out; - - dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); - if (!dev->dma_mem) - goto out; - dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL); - if (!dev->dma_mem->bitmap) - goto free1_out; - - dev->dma_mem->virt_base = mem_base; - dev->dma_mem->device_base = device_addr; - dev->dma_mem->size = pages; - dev->dma_mem->flags = flags; - - if (flags & DMA_MEMORY_MAP) - return DMA_MEMORY_MAP; - - return DMA_MEMORY_IO; - - free1_out: - kfree(dev->dma_mem); - out: - if (mem_base) - iounmap(mem_base); - return 0; -} -EXPORT_SYMBOL(dma_declare_coherent_memory); - -void dma_release_declared_memory(struct device *dev) -{ - struct dma_coherent_mem *mem = dev->dma_mem; - - if(!mem) - return; - dev->dma_mem = NULL; - iounmap(mem->virt_base); - kfree(mem->bitmap); - kfree(mem); -} -EXPORT_SYMBOL(dma_release_declared_memory); - -void *dma_mark_declared_memory_occupied(struct device *dev, - dma_addr_t device_addr, size_t size) -{ - struct dma_coherent_mem *mem = dev->dma_mem; - int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT; - int pos, err; - - if (!mem) - return ERR_PTR(-EINVAL); - - pos = (device_addr - mem->device_base) >> PAGE_SHIFT; - err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages)); - if (err != 0) - return ERR_PTR(err); - return mem->virt_base + (pos << PAGE_SHIFT); -} -EXPORT_SYMBOL(dma_mark_declared_memory_occupied); - -#ifdef CONFIG_PCI -/* Many VIA bridges seem to corrupt data for DAC. Disable it here */ - -int forbid_dac; -EXPORT_SYMBOL(forbid_dac); - -static __devinit void via_no_dac(struct pci_dev *dev) -{ - if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { - printk(KERN_INFO "PCI: VIA PCI bridge detected. Disabling DAC.\n"); - forbid_dac = 1; - } -} -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac); - -static int check_iommu(char *s) -{ - if (!strcmp(s, "usedac")) { - forbid_dac = -1; - return 1; - } - return 0; -} -__setup("iommu=", check_iommu); -#endif diff --git a/arch/i386/kernel/pcspeaker.c b/arch/i386/kernel/pcspeaker.c deleted file mode 100644 index bc1f2d3ea27..00000000000 --- a/arch/i386/kernel/pcspeaker.c +++ /dev/null @@ -1,20 +0,0 @@ -#include -#include -#include - -static __init int add_pcspkr(void) -{ - struct platform_device *pd; - int ret; - - pd = platform_device_alloc("pcspkr", -1); - if (!pd) - return -ENOMEM; - - ret = platform_device_add(pd); - if (ret) - platform_device_put(pd); - - return ret; -} -device_initcall(add_pcspkr); diff --git a/arch/i386/kernel/process_32.c b/arch/i386/kernel/process_32.c deleted file mode 100644 index 84664710b78..00000000000 --- a/arch/i386/kernel/process_32.c +++ /dev/null @@ -1,951 +0,0 @@ -/* - * linux/arch/i386/kernel/process.c - * - * Copyright (C) 1995 Linus Torvalds - * - * Pentium III FXSR, SSE support - * Gareth Hughes , May 2000 - */ - -/* - * This file handles the architecture-dependent parts of process handling.. - */ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#ifdef CONFIG_MATH_EMULATION -#include -#endif - -#include - -#include -#include - -asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); - -static int hlt_counter; - -unsigned long boot_option_idle_override = 0; -EXPORT_SYMBOL(boot_option_idle_override); - -DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; -EXPORT_PER_CPU_SYMBOL(current_task); - -DEFINE_PER_CPU(int, cpu_number); -EXPORT_PER_CPU_SYMBOL(cpu_number); - -/* - * Return saved PC of a blocked thread. - */ -unsigned long thread_saved_pc(struct task_struct *tsk) -{ - return ((unsigned long *)tsk->thread.esp)[3]; -} - -/* - * Powermanagement idle function, if any.. - */ -void (*pm_idle)(void); -EXPORT_SYMBOL(pm_idle); -static DEFINE_PER_CPU(unsigned int, cpu_idle_state); - -void disable_hlt(void) -{ - hlt_counter++; -} - -EXPORT_SYMBOL(disable_hlt); - -void enable_hlt(void) -{ - hlt_counter--; -} - -EXPORT_SYMBOL(enable_hlt); - -/* - * We use this if we don't have any better - * idle routine.. - */ -void default_idle(void) -{ - if (!hlt_counter && boot_cpu_data.hlt_works_ok) { - current_thread_info()->status &= ~TS_POLLING; - /* - * TS_POLLING-cleared state must be visible before we - * test NEED_RESCHED: - */ - smp_mb(); - - local_irq_disable(); - if (!need_resched()) - safe_halt(); /* enables interrupts racelessly */ - else - local_irq_enable(); - current_thread_info()->status |= TS_POLLING; - } else { - /* loop is done by the caller */ - cpu_relax(); - } -} -#ifdef CONFIG_APM_MODULE -EXPORT_SYMBOL(default_idle); -#endif - -/* - * On SMP it's slightly faster (but much more power-consuming!) - * to poll the ->work.need_resched flag instead of waiting for the - * cross-CPU IPI to arrive. Use this option with caution. - */ -static void poll_idle (void) -{ - cpu_relax(); -} - -#ifdef CONFIG_HOTPLUG_CPU -#include -/* We don't actually take CPU down, just spin without interrupts. */ -static inline void play_dead(void) -{ - /* This must be done before dead CPU ack */ - cpu_exit_clear(); - wbinvd(); - mb(); - /* Ack it */ - __get_cpu_var(cpu_state) = CPU_DEAD; - - /* - * With physical CPU hotplug, we should halt the cpu - */ - local_irq_disable(); - while (1) - halt(); -} -#else -static inline void play_dead(void) -{ - BUG(); -} -#endif /* CONFIG_HOTPLUG_CPU */ - -/* - * The idle thread. There's no useful work to be - * done, so just try to conserve power and have a - * low exit latency (ie sit in a loop waiting for - * somebody to say that they'd like to reschedule) - */ -void cpu_idle(void) -{ - int cpu = smp_processor_id(); - - current_thread_info()->status |= TS_POLLING; - - /* endless idle loop with no priority at all */ - while (1) { - tick_nohz_stop_sched_tick(); - while (!need_resched()) { - void (*idle)(void); - - if (__get_cpu_var(cpu_idle_state)) - __get_cpu_var(cpu_idle_state) = 0; - - check_pgt_cache(); - rmb(); - idle = pm_idle; - - if (!idle) - idle = default_idle; - - if (cpu_is_offline(cpu)) - play_dead(); - - __get_cpu_var(irq_stat).idle_timestamp = jiffies; - idle(); - } - tick_nohz_restart_sched_tick(); - preempt_enable_no_resched(); - schedule(); - preempt_disable(); - } -} - -void cpu_idle_wait(void) -{ - unsigned int cpu, this_cpu = get_cpu(); - cpumask_t map, tmp = current->cpus_allowed; - - set_cpus_allowed(current, cpumask_of_cpu(this_cpu)); - put_cpu(); - - cpus_clear(map); - for_each_online_cpu(cpu) { - per_cpu(cpu_idle_state, cpu) = 1; - cpu_set(cpu, map); - } - - __get_cpu_var(cpu_idle_state) = 0; - - wmb(); - do { - ssleep(1); - for_each_online_cpu(cpu) { - if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu)) - cpu_clear(cpu, map); - } - cpus_and(map, map, cpu_online_map); - } while (!cpus_empty(map)); - - set_cpus_allowed(current, tmp); -} -EXPORT_SYMBOL_GPL(cpu_idle_wait); - -/* - * This uses new MONITOR/MWAIT instructions on P4 processors with PNI, - * which can obviate IPI to trigger checking of need_resched. - * We execute MONITOR against need_resched and enter optimized wait state - * through MWAIT. Whenever someone changes need_resched, we would be woken - * up from MWAIT (without an IPI). - * - * New with Core Duo processors, MWAIT can take some hints based on CPU - * capability. - */ -void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) -{ - if (!need_resched()) { - __monitor((void *)¤t_thread_info()->flags, 0, 0); - smp_mb(); - if (!need_resched()) - __mwait(eax, ecx); - } -} - -/* Default MONITOR/MWAIT with no hints, used for default C1 state */ -static void mwait_idle(void) -{ - local_irq_enable(); - mwait_idle_with_hints(0, 0); -} - -void __devinit select_idle_routine(const struct cpuinfo_x86 *c) -{ - if (cpu_has(c, X86_FEATURE_MWAIT)) { - printk("monitor/mwait feature present.\n"); - /* - * Skip, if setup has overridden idle. - * One CPU supports mwait => All CPUs supports mwait - */ - if (!pm_idle) { - printk("using mwait in idle threads.\n"); - pm_idle = mwait_idle; - } - } -} - -static int __init idle_setup(char *str) -{ - if (!strcmp(str, "poll")) { - printk("using polling idle threads.\n"); - pm_idle = poll_idle; -#ifdef CONFIG_X86_SMP - if (smp_num_siblings > 1) - printk("WARNING: polling idle and HT enabled, performance may degrade.\n"); -#endif - } else if (!strcmp(str, "mwait")) - force_mwait = 1; - else - return -1; - - boot_option_idle_override = 1; - return 0; -} -early_param("idle", idle_setup); - -void show_regs(struct pt_regs * regs) -{ - unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; - unsigned long d0, d1, d2, d3, d6, d7; - - printk("\n"); - printk("Pid: %d, comm: %20s\n", current->pid, current->comm); - printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id()); - print_symbol("EIP is at %s\n", regs->eip); - - if (user_mode_vm(regs)) - printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp); - printk(" EFLAGS: %08lx %s (%s %.*s)\n", - regs->eflags, print_tainted(), init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); - printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", - regs->eax,regs->ebx,regs->ecx,regs->edx); - printk("ESI: %08lx EDI: %08lx EBP: %08lx", - regs->esi, regs->edi, regs->ebp); - printk(" DS: %04x ES: %04x FS: %04x\n", - 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xfs); - - cr0 = read_cr0(); - cr2 = read_cr2(); - cr3 = read_cr3(); - cr4 = read_cr4_safe(); - printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4); - - get_debugreg(d0, 0); - get_debugreg(d1, 1); - get_debugreg(d2, 2); - get_debugreg(d3, 3); - printk("DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n", - d0, d1, d2, d3); - get_debugreg(d6, 6); - get_debugreg(d7, 7); - printk("DR6: %08lx DR7: %08lx\n", d6, d7); - - show_trace(NULL, regs, ®s->esp); -} - -/* - * This gets run with %ebx containing the - * function to call, and %edx containing - * the "args". - */ -extern void kernel_thread_helper(void); - -/* - * Create a kernel thread - */ -int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) -{ - struct pt_regs regs; - - memset(®s, 0, sizeof(regs)); - - regs.ebx = (unsigned long) fn; - regs.edx = (unsigned long) arg; - - regs.xds = __USER_DS; - regs.xes = __USER_DS; - regs.xfs = __KERNEL_PERCPU; - regs.orig_eax = -1; - regs.eip = (unsigned long) kernel_thread_helper; - regs.xcs = __KERNEL_CS | get_kernel_rpl(); - regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2; - - /* Ok, create the new process.. */ - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); -} -EXPORT_SYMBOL(kernel_thread); - -/* - * Free current thread data structures etc.. - */ -void exit_thread(void) -{ - /* The process may have allocated an io port bitmap... nuke it. */ - if (unlikely(test_thread_flag(TIF_IO_BITMAP))) { - struct task_struct *tsk = current; - struct thread_struct *t = &tsk->thread; - int cpu = get_cpu(); - struct tss_struct *tss = &per_cpu(init_tss, cpu); - - kfree(t->io_bitmap_ptr); - t->io_bitmap_ptr = NULL; - clear_thread_flag(TIF_IO_BITMAP); - /* - * Careful, clear this in the TSS too: - */ - memset(tss->io_bitmap, 0xff, tss->io_bitmap_max); - t->io_bitmap_max = 0; - tss->io_bitmap_owner = NULL; - tss->io_bitmap_max = 0; - tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; - put_cpu(); - } -} - -void flush_thread(void) -{ - struct task_struct *tsk = current; - - memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8); - memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); - clear_tsk_thread_flag(tsk, TIF_DEBUG); - /* - * Forget coprocessor state.. - */ - clear_fpu(tsk); - clear_used_math(); -} - -void release_thread(struct task_struct *dead_task) -{ - BUG_ON(dead_task->mm); - release_vm86_irqs(dead_task); -} - -/* - * This gets called before we allocate a new thread and copy - * the current task into it. - */ -void prepare_to_copy(struct task_struct *tsk) -{ - unlazy_fpu(tsk); -} - -int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, - unsigned long unused, - struct task_struct * p, struct pt_regs * regs) -{ - struct pt_regs * childregs; - struct task_struct *tsk; - int err; - - childregs = task_pt_regs(p); - *childregs = *regs; - childregs->eax = 0; - childregs->esp = esp; - - p->thread.esp = (unsigned long) childregs; - p->thread.esp0 = (unsigned long) (childregs+1); - - p->thread.eip = (unsigned long) ret_from_fork; - - savesegment(gs,p->thread.gs); - - tsk = current; - if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { - p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, - IO_BITMAP_BYTES, GFP_KERNEL); - if (!p->thread.io_bitmap_ptr) { - p->thread.io_bitmap_max = 0; - return -ENOMEM; - } - set_tsk_thread_flag(p, TIF_IO_BITMAP); - } - - /* - * Set a new TLS for the child thread? - */ - if (clone_flags & CLONE_SETTLS) { - struct desc_struct *desc; - struct user_desc info; - int idx; - - err = -EFAULT; - if (copy_from_user(&info, (void __user *)childregs->esi, sizeof(info))) - goto out; - err = -EINVAL; - if (LDT_empty(&info)) - goto out; - - idx = info.entry_number; - if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) - goto out; - - desc = p->thread.tls_array + idx - GDT_ENTRY_TLS_MIN; - desc->a = LDT_entry_a(&info); - desc->b = LDT_entry_b(&info); - } - - err = 0; - out: - if (err && p->thread.io_bitmap_ptr) { - kfree(p->thread.io_bitmap_ptr); - p->thread.io_bitmap_max = 0; - } - return err; -} - -/* - * fill in the user structure for a core dump.. - */ -void dump_thread(struct pt_regs * regs, struct user * dump) -{ - int i; - -/* changed the size calculations - should hopefully work better. lbt */ - dump->magic = CMAGIC; - dump->start_code = 0; - dump->start_stack = regs->esp & ~(PAGE_SIZE - 1); - dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT; - dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT; - dump->u_dsize -= dump->u_tsize; - dump->u_ssize = 0; - for (i = 0; i < 8; i++) - dump->u_debugreg[i] = current->thread.debugreg[i]; - - if (dump->start_stack < TASK_SIZE) - dump->u_ssize = ((unsigned long) (TASK_SIZE - dump->start_stack)) >> PAGE_SHIFT; - - dump->regs.ebx = regs->ebx; - dump->regs.ecx = regs->ecx; - dump->regs.edx = regs->edx; - dump->regs.esi = regs->esi; - dump->regs.edi = regs->edi; - dump->regs.ebp = regs->ebp; - dump->regs.eax = regs->eax; - dump->regs.ds = regs->xds; - dump->regs.es = regs->xes; - dump->regs.fs = regs->xfs; - savesegment(gs,dump->regs.gs); - dump->regs.orig_eax = regs->orig_eax; - dump->regs.eip = regs->eip; - dump->regs.cs = regs->xcs; - dump->regs.eflags = regs->eflags; - dump->regs.esp = regs->esp; - dump->regs.ss = regs->xss; - - dump->u_fpvalid = dump_fpu (regs, &dump->i387); -} -EXPORT_SYMBOL(dump_thread); - -/* - * Capture the user space registers if the task is not running (in user space) - */ -int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs) -{ - struct pt_regs ptregs = *task_pt_regs(tsk); - ptregs.xcs &= 0xffff; - ptregs.xds &= 0xffff; - ptregs.xes &= 0xffff; - ptregs.xss &= 0xffff; - - elf_core_copy_regs(regs, &ptregs); - - return 1; -} - -#ifdef CONFIG_SECCOMP -void hard_disable_TSC(void) -{ - write_cr4(read_cr4() | X86_CR4_TSD); -} -void disable_TSC(void) -{ - preempt_disable(); - if (!test_and_set_thread_flag(TIF_NOTSC)) - /* - * Must flip the CPU state synchronously with - * TIF_NOTSC in the current running context. - */ - hard_disable_TSC(); - preempt_enable(); -} -void hard_enable_TSC(void) -{ - write_cr4(read_cr4() & ~X86_CR4_TSD); -} -#endif /* CONFIG_SECCOMP */ - -static noinline void -__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, - struct tss_struct *tss) -{ - struct thread_struct *next; - - next = &next_p->thread; - - if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { - set_debugreg(next->debugreg[0], 0); - set_debugreg(next->debugreg[1], 1); - set_debugreg(next->debugreg[2], 2); - set_debugreg(next->debugreg[3], 3); - /* no 4 and 5 */ - set_debugreg(next->debugreg[6], 6); - set_debugreg(next->debugreg[7], 7); - } - -#ifdef CONFIG_SECCOMP - if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ - test_tsk_thread_flag(next_p, TIF_NOTSC)) { - /* prev and next are different */ - if (test_tsk_thread_flag(next_p, TIF_NOTSC)) - hard_disable_TSC(); - else - hard_enable_TSC(); - } -#endif - - if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { - /* - * Disable the bitmap via an invalid offset. We still cache - * the previous bitmap owner and the IO bitmap contents: - */ - tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; - return; - } - - if (likely(next == tss->io_bitmap_owner)) { - /* - * Previous owner of the bitmap (hence the bitmap content) - * matches the next task, we dont have to do anything but - * to set a valid offset in the TSS: - */ - tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; - return; - } - /* - * Lazy TSS's I/O bitmap copy. We set an invalid offset here - * and we let the task to get a GPF in case an I/O instruction - * is performed. The handler of the GPF will verify that the - * faulting task has a valid I/O bitmap and, it true, does the - * real copy and restart the instruction. This will save us - * redundant copies when the currently switched task does not - * perform any I/O during its timeslice. - */ - tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY; -} - -/* - * switch_to(x,yn) should switch tasks from x to y. - * - * We fsave/fwait so that an exception goes off at the right time - * (as a call from the fsave or fwait in effect) rather than to - * the wrong process. Lazy FP saving no longer makes any sense - * with modern CPU's, and this simplifies a lot of things (SMP - * and UP become the same). - * - * NOTE! We used to use the x86 hardware context switching. The - * reason for not using it any more becomes apparent when you - * try to recover gracefully from saved state that is no longer - * valid (stale segment register values in particular). With the - * hardware task-switch, there is no way to fix up bad state in - * a reasonable manner. - * - * The fact that Intel documents the hardware task-switching to - * be slow is a fairly red herring - this code is not noticeably - * faster. However, there _is_ some room for improvement here, - * so the performance issues may eventually be a valid point. - * More important, however, is the fact that this allows us much - * more flexibility. - * - * The return value (in %eax) will be the "prev" task after - * the task-switch, and shows up in ret_from_fork in entry.S, - * for example. - */ -struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) -{ - struct thread_struct *prev = &prev_p->thread, - *next = &next_p->thread; - int cpu = smp_processor_id(); - struct tss_struct *tss = &per_cpu(init_tss, cpu); - - /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ - - __unlazy_fpu(prev_p); - - - /* we're going to use this soon, after a few expensive things */ - if (next_p->fpu_counter > 5) - prefetch(&next->i387.fxsave); - - /* - * Reload esp0. - */ - load_esp0(tss, next); - - /* - * Save away %gs. No need to save %fs, as it was saved on the - * stack on entry. No need to save %es and %ds, as those are - * always kernel segments while inside the kernel. Doing this - * before setting the new TLS descriptors avoids the situation - * where we temporarily have non-reloadable segments in %fs - * and %gs. This could be an issue if the NMI handler ever - * used %fs or %gs (it does not today), or if the kernel is - * running inside of a hypervisor layer. - */ - savesegment(gs, prev->gs); - - /* - * Load the per-thread Thread-Local Storage descriptor. - */ - load_TLS(next, cpu); - - /* - * Restore IOPL if needed. In normal use, the flags restore - * in the switch assembly will handle this. But if the kernel - * is running virtualized at a non-zero CPL, the popf will - * not restore flags, so it must be done in a separate step. - */ - if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl)) - set_iopl_mask(next->iopl); - - /* - * Now maybe handle debug registers and/or IO bitmaps - */ - if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV || - task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) - __switch_to_xtra(prev_p, next_p, tss); - - /* - * Leave lazy mode, flushing any hypercalls made here. - * This must be done before restoring TLS segments so - * the GDT and LDT are properly updated, and must be - * done before math_state_restore, so the TS bit is up - * to date. - */ - arch_leave_lazy_cpu_mode(); - - /* If the task has used fpu the last 5 timeslices, just do a full - * restore of the math state immediately to avoid the trap; the - * chances of needing FPU soon are obviously high now - */ - if (next_p->fpu_counter > 5) - math_state_restore(); - - /* - * Restore %gs if needed (which is common) - */ - if (prev->gs | next->gs) - loadsegment(gs, next->gs); - - x86_write_percpu(current_task, next_p); - - return prev_p; -} - -asmlinkage int sys_fork(struct pt_regs regs) -{ - return do_fork(SIGCHLD, regs.esp, ®s, 0, NULL, NULL); -} - -asmlinkage int sys_clone(struct pt_regs regs) -{ - unsigned long clone_flags; - unsigned long newsp; - int __user *parent_tidptr, *child_tidptr; - - clone_flags = regs.ebx; - newsp = regs.ecx; - parent_tidptr = (int __user *)regs.edx; - child_tidptr = (int __user *)regs.edi; - if (!newsp) - newsp = regs.esp; - return do_fork(clone_flags, newsp, ®s, 0, parent_tidptr, child_tidptr); -} - -/* - * This is trivial, and on the face of it looks like it - * could equally well be done in user mode. - * - * Not so, for quite unobvious reasons - register pressure. - * In user mode vfork() cannot have a stack frame, and if - * done by calling the "clone()" system call directly, you - * do not have enough call-clobbered registers to hold all - * the information you need. - */ -asmlinkage int sys_vfork(struct pt_regs regs) -{ - return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.esp, ®s, 0, NULL, NULL); -} - -/* - * sys_execve() executes a new program. - */ -asmlinkage int sys_execve(struct pt_regs regs) -{ - int error; - char * filename; - - filename = getname((char __user *) regs.ebx); - error = PTR_ERR(filename); - if (IS_ERR(filename)) - goto out; - error = do_execve(filename, - (char __user * __user *) regs.ecx, - (char __user * __user *) regs.edx, - ®s); - if (error == 0) { - task_lock(current); - current->ptrace &= ~PT_DTRACE; - task_unlock(current); - /* Make sure we don't return using sysenter.. */ - set_thread_flag(TIF_IRET); - } - putname(filename); -out: - return error; -} - -#define top_esp (THREAD_SIZE - sizeof(unsigned long)) -#define top_ebp (THREAD_SIZE - 2*sizeof(unsigned long)) - -unsigned long get_wchan(struct task_struct *p) -{ - unsigned long ebp, esp, eip; - unsigned long stack_page; - int count = 0; - if (!p || p == current || p->state == TASK_RUNNING) - return 0; - stack_page = (unsigned long)task_stack_page(p); - esp = p->thread.esp; - if (!stack_page || esp < stack_page || esp > top_esp+stack_page) - return 0; - /* include/asm-i386/system.h:switch_to() pushes ebp last. */ - ebp = *(unsigned long *) esp; - do { - if (ebp < stack_page || ebp > top_ebp+stack_page) - return 0; - eip = *(unsigned long *) (ebp+4); - if (!in_sched_functions(eip)) - return eip; - ebp = *(unsigned long *) ebp; - } while (count++ < 16); - return 0; -} - -/* - * sys_alloc_thread_area: get a yet unused TLS descriptor index. - */ -static int get_free_idx(void) -{ - struct thread_struct *t = ¤t->thread; - int idx; - - for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++) - if (desc_empty(t->tls_array + idx)) - return idx + GDT_ENTRY_TLS_MIN; - return -ESRCH; -} - -/* - * Set a given TLS descriptor: - */ -asmlinkage int sys_set_thread_area(struct user_desc __user *u_info) -{ - struct thread_struct *t = ¤t->thread; - struct user_desc info; - struct desc_struct *desc; - int cpu, idx; - - if (copy_from_user(&info, u_info, sizeof(info))) - return -EFAULT; - idx = info.entry_number; - - /* - * index -1 means the kernel should try to find and - * allocate an empty descriptor: - */ - if (idx == -1) { - idx = get_free_idx(); - if (idx < 0) - return idx; - if (put_user(idx, &u_info->entry_number)) - return -EFAULT; - } - - if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) - return -EINVAL; - - desc = t->tls_array + idx - GDT_ENTRY_TLS_MIN; - - /* - * We must not get preempted while modifying the TLS. - */ - cpu = get_cpu(); - - if (LDT_empty(&info)) { - desc->a = 0; - desc->b = 0; - } else { - desc->a = LDT_entry_a(&info); - desc->b = LDT_entry_b(&info); - } - load_TLS(t, cpu); - - put_cpu(); - - return 0; -} - -/* - * Get the current Thread-Local Storage area: - */ - -#define GET_BASE(desc) ( \ - (((desc)->a >> 16) & 0x0000ffff) | \ - (((desc)->b << 16) & 0x00ff0000) | \ - ( (desc)->b & 0xff000000) ) - -#define GET_LIMIT(desc) ( \ - ((desc)->a & 0x0ffff) | \ - ((desc)->b & 0xf0000) ) - -#define GET_32BIT(desc) (((desc)->b >> 22) & 1) -#define GET_CONTENTS(desc) (((desc)->b >> 10) & 3) -#define GET_WRITABLE(desc) (((desc)->b >> 9) & 1) -#define GET_LIMIT_PAGES(desc) (((desc)->b >> 23) & 1) -#define GET_PRESENT(desc) (((desc)->b >> 15) & 1) -#define GET_USEABLE(desc) (((desc)->b >> 20) & 1) - -asmlinkage int sys_get_thread_area(struct user_desc __user *u_info) -{ - struct user_desc info; - struct desc_struct *desc; - int idx; - - if (get_user(idx, &u_info->entry_number)) - return -EFAULT; - if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) - return -EINVAL; - - memset(&info, 0, sizeof(info)); - - desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN; - - info.entry_number = idx; - info.base_addr = GET_BASE(desc); - info.limit = GET_LIMIT(desc); - info.seg_32bit = GET_32BIT(desc); - info.contents = GET_CONTENTS(desc); - info.read_exec_only = !GET_WRITABLE(desc); - info.limit_in_pages = GET_LIMIT_PAGES(desc); - info.seg_not_present = !GET_PRESENT(desc); - info.useable = GET_USEABLE(desc); - - if (copy_to_user(u_info, &info, sizeof(info))) - return -EFAULT; - return 0; -} - -unsigned long arch_align_stack(unsigned long sp) -{ - if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) - sp -= get_random_int() % 8192; - return sp & ~0xf; -} diff --git a/arch/i386/kernel/ptrace_32.c b/arch/i386/kernel/ptrace_32.c deleted file mode 100644 index 7c1b92522e9..00000000000 --- a/arch/i386/kernel/ptrace_32.c +++ /dev/null @@ -1,723 +0,0 @@ -/* ptrace.c */ -/* By Ross Biro 1/23/92 */ -/* - * Pentium III FXSR, SSE support - * Gareth Hughes , May 2000 - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * does not yet catch signals sent when the child dies. - * in exit.c or in signal.c. - */ - -/* - * Determines which flags the user has access to [1 = access, 0 = no access]. - * Prohibits changing ID(21), VIP(20), VIF(19), VM(17), NT(14), IOPL(12-13), IF(9). - * Also masks reserved bits (31-22, 15, 5, 3, 1). - */ -#define FLAG_MASK 0x00050dd5 - -/* set's the trap flag. */ -#define TRAP_FLAG 0x100 - -/* - * Offset of eflags on child stack.. - */ -#define EFL_OFFSET offsetof(struct pt_regs, eflags) - -static inline struct pt_regs *get_child_regs(struct task_struct *task) -{ - void *stack_top = (void *)task->thread.esp0; - return stack_top - sizeof(struct pt_regs); -} - -/* - * This routine will get a word off of the processes privileged stack. - * the offset is bytes into the pt_regs structure on the stack. - * This routine assumes that all the privileged stacks are in our - * data space. - */ -static inline int get_stack_long(struct task_struct *task, int offset) -{ - unsigned char *stack; - - stack = (unsigned char *)task->thread.esp0 - sizeof(struct pt_regs); - stack += offset; - return (*((int *)stack)); -} - -/* - * This routine will put a word on the processes privileged stack. - * the offset is bytes into the pt_regs structure on the stack. - * This routine assumes that all the privileged stacks are in our - * data space. - */ -static inline int put_stack_long(struct task_struct *task, int offset, - unsigned long data) -{ - unsigned char * stack; - - stack = (unsigned char *)task->thread.esp0 - sizeof(struct pt_regs); - stack += offset; - *(unsigned long *) stack = data; - return 0; -} - -static int putreg(struct task_struct *child, - unsigned long regno, unsigned long value) -{ - switch (regno >> 2) { - case GS: - if (value && (value & 3) != 3) - return -EIO; - child->thread.gs = value; - return 0; - case DS: - case ES: - case FS: - if (value && (value & 3) != 3) - return -EIO; - value &= 0xffff; - break; - case SS: - case CS: - if ((value & 3) != 3) - return -EIO; - value &= 0xffff; - break; - case EFL: - value &= FLAG_MASK; - value |= get_stack_long(child, EFL_OFFSET) & ~FLAG_MASK; - break; - } - if (regno > FS*4) - regno -= 1*4; - put_stack_long(child, regno, value); - return 0; -} - -static unsigned long getreg(struct task_struct *child, - unsigned long regno) -{ - unsigned long retval = ~0UL; - - switch (regno >> 2) { - case GS: - retval = child->thread.gs; - break; - case DS: - case ES: - case FS: - case SS: - case CS: - retval = 0xffff; - /* fall through */ - default: - if (regno > FS*4) - regno -= 1*4; - retval &= get_stack_long(child, regno); - } - return retval; -} - -#define LDT_SEGMENT 4 - -static unsigned long convert_eip_to_linear(struct task_struct *child, struct pt_regs *regs) -{ - unsigned long addr, seg; - - addr = regs->eip; - seg = regs->xcs & 0xffff; - if (regs->eflags & VM_MASK) { - addr = (addr & 0xffff) + (seg << 4); - return addr; - } - - /* - * We'll assume that the code segments in the GDT - * are all zero-based. That is largely true: the - * TLS segments are used for data, and the PNPBIOS - * and APM bios ones we just ignore here. - */ - if (seg & LDT_SEGMENT) { - u32 *desc; - unsigned long base; - - seg &= ~7UL; - - down(&child->mm->context.sem); - if (unlikely((seg >> 3) >= child->mm->context.size)) - addr = -1L; /* bogus selector, access would fault */ - else { - desc = child->mm->context.ldt + seg; - base = ((desc[0] >> 16) | - ((desc[1] & 0xff) << 16) | - (desc[1] & 0xff000000)); - - /* 16-bit code segment? */ - if (!((desc[1] >> 22) & 1)) - addr &= 0xffff; - addr += base; - } - up(&child->mm->context.sem); - } - return addr; -} - -static inline int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs) -{ - int i, copied; - unsigned char opcode[15]; - unsigned long addr = convert_eip_to_linear(child, regs); - - copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0); - for (i = 0; i < copied; i++) { - switch (opcode[i]) { - /* popf and iret */ - case 0x9d: case 0xcf: - return 1; - /* opcode and address size prefixes */ - case 0x66: case 0x67: - continue; - /* irrelevant prefixes (segment overrides and repeats) */ - case 0x26: case 0x2e: - case 0x36: case 0x3e: - case 0x64: case 0x65: - case 0xf0: case 0xf2: case 0xf3: - continue; - - /* - * pushf: NOTE! We should probably not let - * the user see the TF bit being set. But - * it's more pain than it's worth to avoid - * it, and a debugger could emulate this - * all in user space if it _really_ cares. - */ - case 0x9c: - default: - return 0; - } - } - return 0; -} - -static void set_singlestep(struct task_struct *child) -{ - struct pt_regs *regs = get_child_regs(child); - - /* - * Always set TIF_SINGLESTEP - this guarantees that - * we single-step system calls etc.. This will also - * cause us to set TF when returning to user mode. - */ - set_tsk_thread_flag(child, TIF_SINGLESTEP); - - /* - * If TF was already set, don't do anything else - */ - if (regs->eflags & TRAP_FLAG) - return; - - /* Set TF on the kernel stack.. */ - regs->eflags |= TRAP_FLAG; - - /* - * ..but if TF is changed by the instruction we will trace, - * don't mark it as being "us" that set it, so that we - * won't clear it by hand later. - */ - if (is_setting_trap_flag(child, regs)) - return; - - child->ptrace |= PT_DTRACE; -} - -static void clear_singlestep(struct task_struct *child) -{ - /* Always clear TIF_SINGLESTEP... */ - clear_tsk_thread_flag(child, TIF_SINGLESTEP); - - /* But touch TF only if it was set by us.. */ - if (child->ptrace & PT_DTRACE) { - struct pt_regs *regs = get_child_regs(child); - regs->eflags &= ~TRAP_FLAG; - child->ptrace &= ~PT_DTRACE; - } -} - -/* - * Called by kernel/ptrace.c when detaching.. - * - * Make sure the single step bit is not set. - */ -void ptrace_disable(struct task_struct *child) -{ - clear_singlestep(child); - clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); -} - -/* - * Perform get_thread_area on behalf of the traced child. - */ -static int -ptrace_get_thread_area(struct task_struct *child, - int idx, struct user_desc __user *user_desc) -{ - struct user_desc info; - struct desc_struct *desc; - -/* - * Get the current Thread-Local Storage area: - */ - -#define GET_BASE(desc) ( \ - (((desc)->a >> 16) & 0x0000ffff) | \ - (((desc)->b << 16) & 0x00ff0000) | \ - ( (desc)->b & 0xff000000) ) - -#define GET_LIMIT(desc) ( \ - ((desc)->a & 0x0ffff) | \ - ((desc)->b & 0xf0000) ) - -#define GET_32BIT(desc) (((desc)->b >> 22) & 1) -#define GET_CONTENTS(desc) (((desc)->b >> 10) & 3) -#define GET_WRITABLE(desc) (((desc)->b >> 9) & 1) -#define GET_LIMIT_PAGES(desc) (((desc)->b >> 23) & 1) -#define GET_PRESENT(desc) (((desc)->b >> 15) & 1) -#define GET_USEABLE(desc) (((desc)->b >> 20) & 1) - - if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) - return -EINVAL; - - desc = child->thread.tls_array + idx - GDT_ENTRY_TLS_MIN; - - info.entry_number = idx; - info.base_addr = GET_BASE(desc); - info.limit = GET_LIMIT(desc); - info.seg_32bit = GET_32BIT(desc); - info.contents = GET_CONTENTS(desc); - info.read_exec_only = !GET_WRITABLE(desc); - info.limit_in_pages = GET_LIMIT_PAGES(desc); - info.seg_not_present = !GET_PRESENT(desc); - info.useable = GET_USEABLE(desc); - - if (copy_to_user(user_desc, &info, sizeof(info))) - return -EFAULT; - - return 0; -} - -/* - * Perform set_thread_area on behalf of the traced child. - */ -static int -ptrace_set_thread_area(struct task_struct *child, - int idx, struct user_desc __user *user_desc) -{ - struct user_desc info; - struct desc_struct *desc; - - if (copy_from_user(&info, user_desc, sizeof(info))) - return -EFAULT; - - if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) - return -EINVAL; - - desc = child->thread.tls_array + idx - GDT_ENTRY_TLS_MIN; - if (LDT_empty(&info)) { - desc->a = 0; - desc->b = 0; - } else { - desc->a = LDT_entry_a(&info); - desc->b = LDT_entry_b(&info); - } - - return 0; -} - -long arch_ptrace(struct task_struct *child, long request, long addr, long data) -{ - struct user * dummy = NULL; - int i, ret; - unsigned long __user *datap = (unsigned long __user *)data; - - switch (request) { - /* when I and D space are separate, these will need to be fixed. */ - case PTRACE_PEEKTEXT: /* read word at location addr. */ - case PTRACE_PEEKDATA: - ret = generic_ptrace_peekdata(child, addr, data); - break; - - /* read the word at location addr in the USER area. */ - case PTRACE_PEEKUSR: { - unsigned long tmp; - - ret = -EIO; - if ((addr & 3) || addr < 0 || - addr > sizeof(struct user) - 3) - break; - - tmp = 0; /* Default return condition */ - if(addr < FRAME_SIZE*sizeof(long)) - tmp = getreg(child, addr); - if(addr >= (long) &dummy->u_debugreg[0] && - addr <= (long) &dummy->u_debugreg[7]){ - addr -= (long) &dummy->u_debugreg[0]; - addr = addr >> 2; - tmp = child->thread.debugreg[addr]; - } - ret = put_user(tmp, datap); - break; - } - - /* when I and D space are separate, this will have to be fixed. */ - case PTRACE_POKETEXT: /* write the word at location addr. */ - case PTRACE_POKEDATA: - ret = generic_ptrace_pokedata(child, addr, data); - break; - - case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ - ret = -EIO; - if ((addr & 3) || addr < 0 || - addr > sizeof(struct user) - 3) - break; - - if (addr < FRAME_SIZE*sizeof(long)) { - ret = putreg(child, addr, data); - break; - } - /* We need to be very careful here. We implicitly - want to modify a portion of the task_struct, and we - have to be selective about what portions we allow someone - to modify. */ - - ret = -EIO; - if(addr >= (long) &dummy->u_debugreg[0] && - addr <= (long) &dummy->u_debugreg[7]){ - - if(addr == (long) &dummy->u_debugreg[4]) break; - if(addr == (long) &dummy->u_debugreg[5]) break; - if(addr < (long) &dummy->u_debugreg[4] && - ((unsigned long) data) >= TASK_SIZE-3) break; - - /* Sanity-check data. Take one half-byte at once with - * check = (val >> (16 + 4*i)) & 0xf. It contains the - * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits - * 2 and 3 are LENi. Given a list of invalid values, - * we do mask |= 1 << invalid_value, so that - * (mask >> check) & 1 is a correct test for invalid - * values. - * - * R/Wi contains the type of the breakpoint / - * watchpoint, LENi contains the length of the watched - * data in the watchpoint case. - * - * The invalid values are: - * - LENi == 0x10 (undefined), so mask |= 0x0f00. - * - R/Wi == 0x10 (break on I/O reads or writes), so - * mask |= 0x4444. - * - R/Wi == 0x00 && LENi != 0x00, so we have mask |= - * 0x1110. - * - * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54. - * - * See the Intel Manual "System Programming Guide", - * 15.2.4 - * - * Note that LENi == 0x10 is defined on x86_64 in long - * mode (i.e. even for 32-bit userspace software, but - * 64-bit kernel), so the x86_64 mask value is 0x5454. - * See the AMD manual no. 24593 (AMD64 System - * Programming)*/ - - if(addr == (long) &dummy->u_debugreg[7]) { - data &= ~DR_CONTROL_RESERVED; - for(i=0; i<4; i++) - if ((0x5f54 >> ((data >> (16 + 4*i)) & 0xf)) & 1) - goto out_tsk; - if (data) - set_tsk_thread_flag(child, TIF_DEBUG); - else - clear_tsk_thread_flag(child, TIF_DEBUG); - } - addr -= (long) &dummy->u_debugreg; - addr = addr >> 2; - child->thread.debugreg[addr] = data; - ret = 0; - } - break; - - case PTRACE_SYSEMU: /* continue and stop at next syscall, which will not be executed */ - case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ - case PTRACE_CONT: /* restart after signal. */ - ret = -EIO; - if (!valid_signal(data)) - break; - if (request == PTRACE_SYSEMU) { - set_tsk_thread_flag(child, TIF_SYSCALL_EMU); - clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - } else if (request == PTRACE_SYSCALL) { - set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); - } else { - clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); - clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - } - child->exit_code = data; - /* make sure the single step bit is not set. */ - clear_singlestep(child); - wake_up_process(child); - ret = 0; - break; - -/* - * make the child exit. Best I can do is send it a sigkill. - * perhaps it should be put in the status that it wants to - * exit. - */ - case PTRACE_KILL: - ret = 0; - if (child->exit_state == EXIT_ZOMBIE) /* already dead */ - break; - child->exit_code = SIGKILL; - /* make sure the single step bit is not set. */ - clear_singlestep(child); - wake_up_process(child); - break; - - case PTRACE_SYSEMU_SINGLESTEP: /* Same as SYSEMU, but singlestep if not syscall */ - case PTRACE_SINGLESTEP: /* set the trap flag. */ - ret = -EIO; - if (!valid_signal(data)) - break; - - if (request == PTRACE_SYSEMU_SINGLESTEP) - set_tsk_thread_flag(child, TIF_SYSCALL_EMU); - else - clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); - - clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - set_singlestep(child); - child->exit_code = data; - /* give it a chance to run. */ - wake_up_process(child); - ret = 0; - break; - - case PTRACE_DETACH: - /* detach a process that was attached. */ - ret = ptrace_detach(child, data); - break; - - case PTRACE_GETREGS: { /* Get all gp regs from the child. */ - if (!access_ok(VERIFY_WRITE, datap, FRAME_SIZE*sizeof(long))) { - ret = -EIO; - break; - } - for ( i = 0; i < FRAME_SIZE*sizeof(long); i += sizeof(long) ) { - __put_user(getreg(child, i), datap); - datap++; - } - ret = 0; - break; - } - - case PTRACE_SETREGS: { /* Set all gp regs in the child. */ - unsigned long tmp; - if (!access_ok(VERIFY_READ, datap, FRAME_SIZE*sizeof(long))) { - ret = -EIO; - break; - } - for ( i = 0; i < FRAME_SIZE*sizeof(long); i += sizeof(long) ) { - __get_user(tmp, datap); - putreg(child, i, tmp); - datap++; - } - ret = 0; - break; - } - - case PTRACE_GETFPREGS: { /* Get the child FPU state. */ - if (!access_ok(VERIFY_WRITE, datap, - sizeof(struct user_i387_struct))) { - ret = -EIO; - break; - } - ret = 0; - if (!tsk_used_math(child)) - init_fpu(child); - get_fpregs((struct user_i387_struct __user *)data, child); - break; - } - - case PTRACE_SETFPREGS: { /* Set the child FPU state. */ - if (!access_ok(VERIFY_READ, datap, - sizeof(struct user_i387_struct))) { - ret = -EIO; - break; - } - set_stopped_child_used_math(child); - set_fpregs(child, (struct user_i387_struct __user *)data); - ret = 0; - break; - } - - case PTRACE_GETFPXREGS: { /* Get the child extended FPU state. */ - if (!access_ok(VERIFY_WRITE, datap, - sizeof(struct user_fxsr_struct))) { - ret = -EIO; - break; - } - if (!tsk_used_math(child)) - init_fpu(child); - ret = get_fpxregs((struct user_fxsr_struct __user *)data, child); - break; - } - - case PTRACE_SETFPXREGS: { /* Set the child extended FPU state. */ - if (!access_ok(VERIFY_READ, datap, - sizeof(struct user_fxsr_struct))) { - ret = -EIO; - break; - } - set_stopped_child_used_math(child); - ret = set_fpxregs(child, (struct user_fxsr_struct __user *)data); - break; - } - - case PTRACE_GET_THREAD_AREA: - ret = ptrace_get_thread_area(child, addr, - (struct user_desc __user *) data); - break; - - case PTRACE_SET_THREAD_AREA: - ret = ptrace_set_thread_area(child, addr, - (struct user_desc __user *) data); - break; - - default: - ret = ptrace_request(child, request, addr, data); - break; - } - out_tsk: - return ret; -} - -void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) -{ - struct siginfo info; - - tsk->thread.trap_no = 1; - tsk->thread.error_code = error_code; - - memset(&info, 0, sizeof(info)); - info.si_signo = SIGTRAP; - info.si_code = TRAP_BRKPT; - - /* User-mode eip? */ - info.si_addr = user_mode_vm(regs) ? (void __user *) regs->eip : NULL; - - /* Send us the fakey SIGTRAP */ - force_sig_info(SIGTRAP, &info, tsk); -} - -/* notification of system call entry/exit - * - triggered by current->work.syscall_trace - */ -__attribute__((regparm(3))) -int do_syscall_trace(struct pt_regs *regs, int entryexit) -{ - int is_sysemu = test_thread_flag(TIF_SYSCALL_EMU); - /* - * With TIF_SYSCALL_EMU set we want to ignore TIF_SINGLESTEP for syscall - * interception - */ - int is_singlestep = !is_sysemu && test_thread_flag(TIF_SINGLESTEP); - int ret = 0; - - /* do the secure computing check first */ - if (!entryexit) - secure_computing(regs->orig_eax); - - if (unlikely(current->audit_context)) { - if (entryexit) - audit_syscall_exit(AUDITSC_RESULT(regs->eax), - regs->eax); - /* Debug traps, when using PTRACE_SINGLESTEP, must be sent only - * on the syscall exit path. Normally, when TIF_SYSCALL_AUDIT is - * not used, entry.S will call us only on syscall exit, not - * entry; so when TIF_SYSCALL_AUDIT is used we must avoid - * calling send_sigtrap() on syscall entry. - * - * Note that when PTRACE_SYSEMU_SINGLESTEP is used, - * is_singlestep is false, despite his name, so we will still do - * the correct thing. - */ - else if (is_singlestep) - goto out; - } - - if (!(current->ptrace & PT_PTRACED)) - goto out; - - /* If a process stops on the 1st tracepoint with SYSCALL_TRACE - * and then is resumed with SYSEMU_SINGLESTEP, it will come in - * here. We have to check this and return */ - if (is_sysemu && entryexit) - return 0; - - /* Fake a debug trap */ - if (is_singlestep) - send_sigtrap(current, regs, 0); - - if (!test_thread_flag(TIF_SYSCALL_TRACE) && !is_sysemu) - goto out; - - /* the 0x80 provides a way for the tracing parent to distinguish - between a syscall stop and SIGTRAP delivery */ - /* Note that the debugger could change the result of test_thread_flag!*/ - ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80:0)); - - /* - * this isn't the same as continuing with a signal, but it will do - * for normal use. strace only continues with a signal if the - * stopping signal is not SIGTRAP. -brl - */ - if (current->exit_code) { - send_sig(current->exit_code, current, 1); - current->exit_code = 0; - } - ret = is_sysemu; -out: - if (unlikely(current->audit_context) && !entryexit) - audit_syscall_entry(AUDIT_ARCH_I386, regs->orig_eax, - regs->ebx, regs->ecx, regs->edx, regs->esi); - if (ret == 0) - return 0; - - regs->orig_eax = -1; /* force skip of syscall restarting */ - if (unlikely(current->audit_context)) - audit_syscall_exit(AUDITSC_RESULT(regs->eax), regs->eax); - return 1; -} diff --git a/arch/i386/kernel/quirks.c b/arch/i386/kernel/quirks.c deleted file mode 100644 index 6722469c263..00000000000 --- a/arch/i386/kernel/quirks.c +++ /dev/null @@ -1,49 +0,0 @@ -/* - * This file contains work-arounds for x86 and x86_64 platform bugs. - */ -#include -#include - -#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI) - -static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) -{ - u8 config, rev; - u32 word; - - /* BIOS may enable hardware IRQ balancing for - * E7520/E7320/E7525(revision ID 0x9 and below) - * based platforms. - * Disable SW irqbalance/affinity on those platforms. - */ - pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev); - if (rev > 0x9) - return; - - /* enable access to config space*/ - pci_read_config_byte(dev, 0xf4, &config); - pci_write_config_byte(dev, 0xf4, config|0x2); - - /* read xTPR register */ - raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word); - - if (!(word & (1 << 13))) { - printk(KERN_INFO "Intel E7520/7320/7525 detected. " - "Disabling irq balancing and affinity\n"); -#ifdef CONFIG_IRQBALANCE - irqbalance_disable(""); -#endif - noirqdebug_setup(""); -#ifdef CONFIG_PROC_FS - no_irq_affinity = 1; -#endif - } - - /* put back the original value for config space*/ - if (!(config & 0x2)) - pci_write_config_byte(dev, 0xf4, config); -} -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, quirk_intel_irqbalance); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, quirk_intel_irqbalance); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, quirk_intel_irqbalance); -#endif diff --git a/arch/i386/kernel/reboot_32.c b/arch/i386/kernel/reboot_32.c deleted file mode 100644 index 0d796248866..00000000000 --- a/arch/i386/kernel/reboot_32.c +++ /dev/null @@ -1,413 +0,0 @@ -/* - * linux/arch/i386/kernel/reboot.c - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "mach_reboot.h" -#include -#include - -/* - * Power off function, if any - */ -void (*pm_power_off)(void); -EXPORT_SYMBOL(pm_power_off); - -static int reboot_mode; -static int reboot_thru_bios; - -#ifdef CONFIG_SMP -static int reboot_cpu = -1; -#endif -static int __init reboot_setup(char *str) -{ - while(1) { - switch (*str) { - case 'w': /* "warm" reboot (no memory testing etc) */ - reboot_mode = 0x1234; - break; - case 'c': /* "cold" reboot (with memory testing etc) */ - reboot_mode = 0x0; - break; - case 'b': /* "bios" reboot by jumping through the BIOS */ - reboot_thru_bios = 1; - break; - case 'h': /* "hard" reboot by toggling RESET and/or crashing the CPU */ - reboot_thru_bios = 0; - break; -#ifdef CONFIG_SMP - case 's': /* "smp" reboot by executing reset on BSP or other CPU*/ - if (isdigit(*(str+1))) { - reboot_cpu = (int) (*(str+1) - '0'); - if (isdigit(*(str+2))) - reboot_cpu = reboot_cpu*10 + (int)(*(str+2) - '0'); - } - /* we will leave sorting out the final value - when we are ready to reboot, since we might not - have set up boot_cpu_id or smp_num_cpu */ - break; -#endif - } - if((str = strchr(str,',')) != NULL) - str++; - else - break; - } - return 1; -} - -__setup("reboot=", reboot_setup); - -/* - * Reboot options and system auto-detection code provided by - * Dell Inc. so their systems "just work". :-) - */ - -/* - * Some machines require the "reboot=b" commandline option, this quirk makes that automatic. - */ -static int __init set_bios_reboot(struct dmi_system_id *d) -{ - if (!reboot_thru_bios) { - reboot_thru_bios = 1; - printk(KERN_INFO "%s series board detected. Selecting BIOS-method for reboots.\n", d->ident); - } - return 0; -} - -static struct dmi_system_id __initdata reboot_dmi_table[] = { - { /* Handle problems with rebooting on Dell E520's */ - .callback = set_bios_reboot, - .ident = "Dell E520", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "Dell DM061"), - }, - }, - { /* Handle problems with rebooting on Dell 1300's */ - .callback = set_bios_reboot, - .ident = "Dell PowerEdge 1300", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), - DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 1300/"), - }, - }, - { /* Handle problems with rebooting on Dell 300's */ - .callback = set_bios_reboot, - .ident = "Dell PowerEdge 300", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), - DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 300/"), - }, - }, - { /* Handle problems with rebooting on Dell Optiplex 745's SFF*/ - .callback = set_bios_reboot, - .ident = "Dell OptiPlex 745", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"), - DMI_MATCH(DMI_BOARD_NAME, "0WF810"), - }, - }, - { /* Handle problems with rebooting on Dell 2400's */ - .callback = set_bios_reboot, - .ident = "Dell PowerEdge 2400", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), - DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2400"), - }, - }, - { /* Handle problems with rebooting on HP laptops */ - .callback = set_bios_reboot, - .ident = "HP Compaq Laptop", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), - DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq"), - }, - }, - { } -}; - -static int __init reboot_init(void) -{ - dmi_check_system(reboot_dmi_table); - return 0; -} - -core_initcall(reboot_init); - -/* The following code and data reboots the machine by switching to real - mode and jumping to the BIOS reset entry point, as if the CPU has - really been reset. The previous version asked the keyboard - controller to pulse the CPU reset line, which is more thorough, but - doesn't work with at least one type of 486 motherboard. It is easy - to stop this code working; hence the copious comments. */ - -static unsigned long long -real_mode_gdt_entries [3] = -{ - 0x0000000000000000ULL, /* Null descriptor */ - 0x00009a000000ffffULL, /* 16-bit real-mode 64k code at 0x00000000 */ - 0x000092000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */ -}; - -static struct Xgt_desc_struct -real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, (long)real_mode_gdt_entries }, -real_mode_idt = { 0x3ff, 0 }, -no_idt = { 0, 0 }; - - -/* This is 16-bit protected mode code to disable paging and the cache, - switch to real mode and jump to the BIOS reset code. - - The instruction that switches to real mode by writing to CR0 must be - followed immediately by a far jump instruction, which set CS to a - valid value for real mode, and flushes the prefetch queue to avoid - running instructions that have already been decoded in protected - mode. - - Clears all the flags except ET, especially PG (paging), PE - (protected-mode enable) and TS (task switch for coprocessor state - save). Flushes the TLB after paging has been disabled. Sets CD and - NW, to disable the cache on a 486, and invalidates the cache. This - is more like the state of a 486 after reset. I don't know if - something else should be done for other chips. - - More could be done here to set up the registers as if a CPU reset had - occurred; hopefully real BIOSs don't assume much. */ - -static unsigned char real_mode_switch [] = -{ - 0x66, 0x0f, 0x20, 0xc0, /* movl %cr0,%eax */ - 0x66, 0x83, 0xe0, 0x11, /* andl $0x00000011,%eax */ - 0x66, 0x0d, 0x00, 0x00, 0x00, 0x60, /* orl $0x60000000,%eax */ - 0x66, 0x0f, 0x22, 0xc0, /* movl %eax,%cr0 */ - 0x66, 0x0f, 0x22, 0xd8, /* movl %eax,%cr3 */ - 0x66, 0x0f, 0x20, 0xc3, /* movl %cr0,%ebx */ - 0x66, 0x81, 0xe3, 0x00, 0x00, 0x00, 0x60, /* andl $0x60000000,%ebx */ - 0x74, 0x02, /* jz f */ - 0x0f, 0x09, /* wbinvd */ - 0x24, 0x10, /* f: andb $0x10,al */ - 0x66, 0x0f, 0x22, 0xc0 /* movl %eax,%cr0 */ -}; -static unsigned char jump_to_bios [] = -{ - 0xea, 0x00, 0x00, 0xff, 0xff /* ljmp $0xffff,$0x0000 */ -}; - -/* - * Switch to real mode and then execute the code - * specified by the code and length parameters. - * We assume that length will aways be less that 100! - */ -void machine_real_restart(unsigned char *code, int length) -{ - local_irq_disable(); - - /* Write zero to CMOS register number 0x0f, which the BIOS POST - routine will recognize as telling it to do a proper reboot. (Well - that's what this book in front of me says -- it may only apply to - the Phoenix BIOS though, it's not clear). At the same time, - disable NMIs by setting the top bit in the CMOS address register, - as we're about to do peculiar things to the CPU. I'm not sure if - `outb_p' is needed instead of just `outb'. Use it to be on the - safe side. (Yes, CMOS_WRITE does outb_p's. - Paul G.) - */ - - spin_lock(&rtc_lock); - CMOS_WRITE(0x00, 0x8f); - spin_unlock(&rtc_lock); - - /* Remap the kernel at virtual address zero, as well as offset zero - from the kernel segment. This assumes the kernel segment starts at - virtual address PAGE_OFFSET. */ - - memcpy (swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, - sizeof (swapper_pg_dir [0]) * KERNEL_PGD_PTRS); - - /* - * Use `swapper_pg_dir' as our page directory. - */ - load_cr3(swapper_pg_dir); - - /* Write 0x1234 to absolute memory location 0x472. The BIOS reads - this on booting to tell it to "Bypass memory test (also warm - boot)". This seems like a fairly standard thing that gets set by - REBOOT.COM programs, and the previous reset routine did this - too. */ - - *((unsigned short *)0x472) = reboot_mode; - - /* For the switch to real mode, copy some code to low memory. It has - to be in the first 64k because it is running in 16-bit mode, and it - has to have the same physical and virtual address, because it turns - off paging. Copy it near the end of the first page, out of the way - of BIOS variables. */ - - memcpy ((void *) (0x1000 - sizeof (real_mode_switch) - 100), - real_mode_switch, sizeof (real_mode_switch)); - memcpy ((void *) (0x1000 - 100), code, length); - - /* Set up the IDT for real mode. */ - - load_idt(&real_mode_idt); - - /* Set up a GDT from which we can load segment descriptors for real - mode. The GDT is not used in real mode; it is just needed here to - prepare the descriptors. */ - - load_gdt(&real_mode_gdt); - - /* Load the data segment registers, and thus the descriptors ready for - real mode. The base address of each segment is 0x100, 16 times the - selector value being loaded here. This is so that the segment - registers don't have to be reloaded after switching to real mode: - the values are consistent for real mode operation already. */ - - __asm__ __volatile__ ("movl $0x0010,%%eax\n" - "\tmovl %%eax,%%ds\n" - "\tmovl %%eax,%%es\n" - "\tmovl %%eax,%%fs\n" - "\tmovl %%eax,%%gs\n" - "\tmovl %%eax,%%ss" : : : "eax"); - - /* Jump to the 16-bit code that we copied earlier. It disables paging - and the cache, switches to real mode, and jumps to the BIOS reset - entry point. */ - - __asm__ __volatile__ ("ljmp $0x0008,%0" - : - : "i" ((void *) (0x1000 - sizeof (real_mode_switch) - 100))); -} -#ifdef CONFIG_APM_MODULE -EXPORT_SYMBOL(machine_real_restart); -#endif - -static void native_machine_shutdown(void) -{ -#ifdef CONFIG_SMP - int reboot_cpu_id; - - /* The boot cpu is always logical cpu 0 */ - reboot_cpu_id = 0; - - /* See if there has been given a command line override */ - if ((reboot_cpu != -1) && (reboot_cpu < NR_CPUS) && - cpu_isset(reboot_cpu, cpu_online_map)) { - reboot_cpu_id = reboot_cpu; - } - - /* Make certain the cpu I'm rebooting on is online */ - if (!cpu_isset(reboot_cpu_id, cpu_online_map)) { - reboot_cpu_id = smp_processor_id(); - } - - /* Make certain I only run on the appropriate processor */ - set_cpus_allowed(current, cpumask_of_cpu(reboot_cpu_id)); - - /* O.K. Now that I'm on the appropriate processor, stop - * all of the others, and disable their local APICs. - */ - - smp_send_stop(); -#endif /* CONFIG_SMP */ - - lapic_shutdown(); - -#ifdef CONFIG_X86_IO_APIC - disable_IO_APIC(); -#endif -} - -void __attribute__((weak)) mach_reboot_fixups(void) -{ -} - -static void native_machine_emergency_restart(void) -{ - if (!reboot_thru_bios) { - if (efi_enabled) { - efi.reset_system(EFI_RESET_COLD, EFI_SUCCESS, 0, NULL); - load_idt(&no_idt); - __asm__ __volatile__("int3"); - } - /* rebooting needs to touch the page at absolute addr 0 */ - *((unsigned short *)__va(0x472)) = reboot_mode; - for (;;) { - mach_reboot_fixups(); /* for board specific fixups */ - mach_reboot(); - /* That didn't work - force a triple fault.. */ - load_idt(&no_idt); - __asm__ __volatile__("int3"); - } - } - if (efi_enabled) - efi.reset_system(EFI_RESET_WARM, EFI_SUCCESS, 0, NULL); - - machine_real_restart(jump_to_bios, sizeof(jump_to_bios)); -} - -static void native_machine_restart(char * __unused) -{ - machine_shutdown(); - machine_emergency_restart(); -} - -static void native_machine_halt(void) -{ -} - -static void native_machine_power_off(void) -{ - if (pm_power_off) { - machine_shutdown(); - pm_power_off(); - } -} - - -struct machine_ops machine_ops = { - .power_off = native_machine_power_off, - .shutdown = native_machine_shutdown, - .emergency_restart = native_machine_emergency_restart, - .restart = native_machine_restart, - .halt = native_machine_halt, -}; - -void machine_power_off(void) -{ - machine_ops.power_off(); -} - -void machine_shutdown(void) -{ - machine_ops.shutdown(); -} - -void machine_emergency_restart(void) -{ - machine_ops.emergency_restart(); -} - -void machine_restart(char *cmd) -{ - machine_ops.restart(cmd); -} - -void machine_halt(void) -{ - machine_ops.halt(); -} diff --git a/arch/i386/kernel/reboot_fixups_32.c b/arch/i386/kernel/reboot_fixups_32.c deleted file mode 100644 index 03e1cce58f4..00000000000 --- a/arch/i386/kernel/reboot_fixups_32.c +++ /dev/null @@ -1,68 +0,0 @@ -/* - * linux/arch/i386/kernel/reboot_fixups.c - * - * This is a good place to put board specific reboot fixups. - * - * List of supported fixups: - * geode-gx1/cs5530a - Jaya Kumar - * geode-gx/lx/cs5536 - Andres Salomon - * - */ - -#include -#include -#include -#include - -static void cs5530a_warm_reset(struct pci_dev *dev) -{ - /* writing 1 to the reset control register, 0x44 causes the - cs5530a to perform a system warm reset */ - pci_write_config_byte(dev, 0x44, 0x1); - udelay(50); /* shouldn't get here but be safe and spin-a-while */ - return; -} - -static void cs5536_warm_reset(struct pci_dev *dev) -{ - /* - * 6.6.2.12 Soft Reset (DIVIL_SOFT_RESET) - * writing 1 to the LSB of this MSR causes a hard reset. - */ - wrmsrl(0x51400017, 1ULL); - udelay(50); /* shouldn't get here but be safe and spin a while */ -} - -struct device_fixup { - unsigned int vendor; - unsigned int device; - void (*reboot_fixup)(struct pci_dev *); -}; - -static struct device_fixup fixups_table[] = { -{ PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, cs5530a_warm_reset }, -{ PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, cs5536_warm_reset }, -}; - -/* - * we see if any fixup is available for our current hardware. if there - * is a fixup, we call it and we expect to never return from it. if we - * do return, we keep looking and then eventually fall back to the - * standard mach_reboot on return. - */ -void mach_reboot_fixups(void) -{ - struct device_fixup *cur; - struct pci_dev *dev; - int i; - - for (i=0; i < ARRAY_SIZE(fixups_table); i++) { - cur = &(fixups_table[i]); - dev = pci_get_device(cur->vendor, cur->device, NULL); - if (!dev) - continue; - - cur->reboot_fixup(dev); - } -} - diff --git a/arch/i386/kernel/relocate_kernel_32.S b/arch/i386/kernel/relocate_kernel_32.S deleted file mode 100644 index f151d6fae46..00000000000 --- a/arch/i386/kernel/relocate_kernel_32.S +++ /dev/null @@ -1,252 +0,0 @@ -/* - * relocate_kernel.S - put the kernel image in place to boot - * Copyright (C) 2002-2004 Eric Biederman - * - * This source code is licensed under the GNU General Public License, - * Version 2. See the file COPYING for more details. - */ - -#include -#include -#include - -/* - * Must be relocatable PIC code callable as a C function - */ - -#define PTR(x) (x << 2) -#define PAGE_ALIGNED (1 << PAGE_SHIFT) -#define PAGE_ATTR 0x63 /* _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY */ -#define PAE_PGD_ATTR 0x01 /* _PAGE_PRESENT */ - - .text - .align PAGE_ALIGNED - .globl relocate_kernel -relocate_kernel: - movl 8(%esp), %ebp /* list of pages */ - -#ifdef CONFIG_X86_PAE - /* map the control page at its virtual address */ - - movl PTR(VA_PGD)(%ebp), %edi - movl PTR(VA_CONTROL_PAGE)(%ebp), %eax - andl $0xc0000000, %eax - shrl $27, %eax - addl %edi, %eax - - movl PTR(PA_PMD_0)(%ebp), %edx - orl $PAE_PGD_ATTR, %edx - movl %edx, (%eax) - - movl PTR(VA_PMD_0)(%ebp), %edi - movl PTR(VA_CONTROL_PAGE)(%ebp), %eax - andl $0x3fe00000, %eax - shrl $18, %eax - addl %edi, %eax - - movl PTR(PA_PTE_0)(%ebp), %edx - orl $PAGE_ATTR, %edx - movl %edx, (%eax) - - movl PTR(VA_PTE_0)(%ebp), %edi - movl PTR(VA_CONTROL_PAGE)(%ebp), %eax - andl $0x001ff000, %eax - shrl $9, %eax - addl %edi, %eax - - movl PTR(PA_CONTROL_PAGE)(%ebp), %edx - orl $PAGE_ATTR, %edx - movl %edx, (%eax) - - /* identity map the control page at its physical address */ - - movl PTR(VA_PGD)(%ebp), %edi - movl PTR(PA_CONTROL_PAGE)(%ebp), %eax - andl $0xc0000000, %eax - shrl $27, %eax - addl %edi, %eax - - movl PTR(PA_PMD_1)(%ebp), %edx - orl $PAE_PGD_ATTR, %edx - movl %edx, (%eax) - - movl PTR(VA_PMD_1)(%ebp), %edi - movl PTR(PA_CONTROL_PAGE)(%ebp), %eax - andl $0x3fe00000, %eax - shrl $18, %eax - addl %edi, %eax - - movl PTR(PA_PTE_1)(%ebp), %edx - orl $PAGE_ATTR, %edx - movl %edx, (%eax) - - movl PTR(VA_PTE_1)(%ebp), %edi - movl PTR(PA_CONTROL_PAGE)(%ebp), %eax - andl $0x001ff000, %eax - shrl $9, %eax - addl %edi, %eax - - movl PTR(PA_CONTROL_PAGE)(%ebp), %edx - orl $PAGE_ATTR, %edx - movl %edx, (%eax) -#else - /* map the control page at its virtual address */ - - movl PTR(VA_PGD)(%ebp), %edi - movl PTR(VA_CONTROL_PAGE)(%ebp), %eax - andl $0xffc00000, %eax - shrl $20, %eax - addl %edi, %eax - - movl PTR(PA_PTE_0)(%ebp), %edx - orl $PAGE_ATTR, %edx - movl %edx, (%eax) - - movl PTR(VA_PTE_0)(%ebp), %edi - movl PTR(VA_CONTROL_PAGE)(%ebp), %eax - andl $0x003ff000, %eax - shrl $10, %eax - addl %edi, %eax - - movl PTR(PA_CONTROL_PAGE)(%ebp), %edx - orl $PAGE_ATTR, %edx - movl %edx, (%eax) - - /* identity map the control page at its physical address */ - - movl PTR(VA_PGD)(%ebp), %edi - movl PTR(PA_CONTROL_PAGE)(%ebp), %eax - andl $0xffc00000, %eax - shrl $20, %eax - addl %edi, %eax - - movl PTR(PA_PTE_1)(%ebp), %edx - orl $PAGE_ATTR, %edx - movl %edx, (%eax) - - movl PTR(VA_PTE_1)(%ebp), %edi - movl PTR(PA_CONTROL_PAGE)(%ebp), %eax - andl $0x003ff000, %eax - shrl $10, %eax - addl %edi, %eax - - movl PTR(PA_CONTROL_PAGE)(%ebp), %edx - orl $PAGE_ATTR, %edx - movl %edx, (%eax) -#endif - -relocate_new_kernel: - /* read the arguments and say goodbye to the stack */ - movl 4(%esp), %ebx /* page_list */ - movl 8(%esp), %ebp /* list of pages */ - movl 12(%esp), %edx /* start address */ - movl 16(%esp), %ecx /* cpu_has_pae */ - - /* zero out flags, and disable interrupts */ - pushl $0 - popfl - - /* get physical address of control page now */ - /* this is impossible after page table switch */ - movl PTR(PA_CONTROL_PAGE)(%ebp), %edi - - /* switch to new set of page tables */ - movl PTR(PA_PGD)(%ebp), %eax - movl %eax, %cr3 - - /* setup a new stack at the end of the physical control page */ - lea 4096(%edi), %esp - - /* jump to identity mapped page */ - movl %edi, %eax - addl $(identity_mapped - relocate_kernel), %eax - pushl %eax - ret - -identity_mapped: - /* store the start address on the stack */ - pushl %edx - - /* Set cr0 to a known state: - * 31 0 == Paging disabled - * 18 0 == Alignment check disabled - * 16 0 == Write protect disabled - * 3 0 == No task switch - * 2 0 == Don't do FP software emulation. - * 0 1 == Proctected mode enabled - */ - movl %cr0, %eax - andl $~((1<<31)|(1<<18)|(1<<16)|(1<<3)|(1<<2)), %eax - orl $(1<<0), %eax - movl %eax, %cr0 - - /* clear cr4 if applicable */ - testl %ecx, %ecx - jz 1f - /* Set cr4 to a known state: - * Setting everything to zero seems safe. - */ - movl %cr4, %eax - andl $0, %eax - movl %eax, %cr4 - - jmp 1f -1: - - /* Flush the TLB (needed?) */ - xorl %eax, %eax - movl %eax, %cr3 - - /* Do the copies */ - movl %ebx, %ecx - jmp 1f - -0: /* top, read another word from the indirection page */ - movl (%ebx), %ecx - addl $4, %ebx -1: - testl $0x1, %ecx /* is it a destination page */ - jz 2f - movl %ecx, %edi - andl $0xfffff000, %edi - jmp 0b -2: - testl $0x2, %ecx /* is it an indirection page */ - jz 2f - movl %ecx, %ebx - andl $0xfffff000, %ebx - jmp 0b -2: - testl $0x4, %ecx /* is it the done indicator */ - jz 2f - jmp 3f -2: - testl $0x8, %ecx /* is it the source indicator */ - jz 0b /* Ignore it otherwise */ - movl %ecx, %esi /* For every source page do a copy */ - andl $0xfffff000, %esi - - movl $1024, %ecx - rep ; movsl - jmp 0b - -3: - - /* To be certain of avoiding problems with self-modifying code - * I need to execute a serializing instruction here. - * So I flush the TLB, it's handy, and not processor dependent. - */ - xorl %eax, %eax - movl %eax, %cr3 - - /* set all of the registers to known values */ - /* leave %esp alone */ - - xorl %eax, %eax - xorl %ebx, %ebx - xorl %ecx, %ecx - xorl %edx, %edx - xorl %esi, %esi - xorl %edi, %edi - xorl %ebp, %ebp - ret diff --git a/arch/i386/kernel/scx200_32.c b/arch/i386/kernel/scx200_32.c deleted file mode 100644 index c7d3df23f58..00000000000 --- a/arch/i386/kernel/scx200_32.c +++ /dev/null @@ -1,131 +0,0 @@ -/* linux/arch/i386/kernel/scx200.c - - Copyright (c) 2001,2002 Christer Weinigel - - National Semiconductor SCx200 support. */ - -#include -#include -#include -#include -#include -#include - -#include -#include - -/* Verify that the configuration block really is there */ -#define scx200_cb_probe(base) (inw((base) + SCx200_CBA) == (base)) - -#define NAME "scx200" - -MODULE_AUTHOR("Christer Weinigel "); -MODULE_DESCRIPTION("NatSemi SCx200 Driver"); -MODULE_LICENSE("GPL"); - -unsigned scx200_gpio_base = 0; -long scx200_gpio_shadow[2]; - -unsigned scx200_cb_base = 0; - -static struct pci_device_id scx200_tbl[] = { - { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SCx200_BRIDGE) }, - { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE) }, - { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SCx200_XBUS) }, - { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_XBUS) }, - { }, -}; -MODULE_DEVICE_TABLE(pci,scx200_tbl); - -static int __devinit scx200_probe(struct pci_dev *, const struct pci_device_id *); - -static struct pci_driver scx200_pci_driver = { - .name = "scx200", - .id_table = scx200_tbl, - .probe = scx200_probe, -}; - -static DEFINE_MUTEX(scx200_gpio_config_lock); - -static void __devinit scx200_init_shadow(void) -{ - int bank; - - /* read the current values driven on the GPIO signals */ - for (bank = 0; bank < 2; ++bank) - scx200_gpio_shadow[bank] = inl(scx200_gpio_base + 0x10 * bank); -} - -static int __devinit scx200_probe(struct pci_dev *pdev, const struct pci_device_id *ent) -{ - unsigned base; - - if (pdev->device == PCI_DEVICE_ID_NS_SCx200_BRIDGE || - pdev->device == PCI_DEVICE_ID_NS_SC1100_BRIDGE) { - base = pci_resource_start(pdev, 0); - printk(KERN_INFO NAME ": GPIO base 0x%x\n", base); - - if (request_region(base, SCx200_GPIO_SIZE, "NatSemi SCx200 GPIO") == 0) { - printk(KERN_ERR NAME ": can't allocate I/O for GPIOs\n"); - return -EBUSY; - } - - scx200_gpio_base = base; - scx200_init_shadow(); - - } else { - /* find the base of the Configuration Block */ - if (scx200_cb_probe(SCx200_CB_BASE_FIXED)) { - scx200_cb_base = SCx200_CB_BASE_FIXED; - } else { - pci_read_config_dword(pdev, SCx200_CBA_SCRATCH, &base); - if (scx200_cb_probe(base)) { - scx200_cb_base = base; - } else { - printk(KERN_WARNING NAME ": Configuration Block not found\n"); - return -ENODEV; - } - } - printk(KERN_INFO NAME ": Configuration Block base 0x%x\n", scx200_cb_base); - } - - return 0; -} - -u32 scx200_gpio_configure(unsigned index, u32 mask, u32 bits) -{ - u32 config, new_config; - - mutex_lock(&scx200_gpio_config_lock); - - outl(index, scx200_gpio_base + 0x20); - config = inl(scx200_gpio_base + 0x24); - - new_config = (config & mask) | bits; - outl(new_config, scx200_gpio_base + 0x24); - - mutex_unlock(&scx200_gpio_config_lock); - - return config; -} - -static int __init scx200_init(void) -{ - printk(KERN_INFO NAME ": NatSemi SCx200 Driver\n"); - - return pci_register_driver(&scx200_pci_driver); -} - -static void __exit scx200_cleanup(void) -{ - pci_unregister_driver(&scx200_pci_driver); - release_region(scx200_gpio_base, SCx200_GPIO_SIZE); -} - -module_init(scx200_init); -module_exit(scx200_cleanup); - -EXPORT_SYMBOL(scx200_gpio_base); -EXPORT_SYMBOL(scx200_gpio_shadow); -EXPORT_SYMBOL(scx200_gpio_configure); -EXPORT_SYMBOL(scx200_cb_base); diff --git a/arch/i386/kernel/setup_32.c b/arch/i386/kernel/setup_32.c deleted file mode 100644 index d474cd639bc..00000000000 --- a/arch/i386/kernel/setup_32.c +++ /dev/null @@ -1,653 +0,0 @@ -/* - * linux/arch/i386/kernel/setup.c - * - * Copyright (C) 1995 Linus Torvalds - * - * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 - * - * Memory region support - * David Parsons , July-August 1999 - * - * Added E820 sanitization routine (removes overlapping memory regions); - * Brian Moyle , February 2001 - * - * Moved CPU detection code to cpu/${cpu}.c - * Patrick Mochel , March 2002 - * - * Provisions for empty E820 memory regions (reported by certain BIOSes). - * Alex Achenbach , December 2002. - * - */ - -/* - * This file handles the architecture-dependent parts of initialization - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include