diff options
author | Xavier Leroy <xavier.leroy@inria.fr> | 2008-01-03 09:37:10 +0000 |
---|---|---|
committer | Xavier Leroy <xavier.leroy@inria.fr> | 2008-01-03 09:37:10 +0000 |
commit | a3c0366f4e41bed30ba2170d782d06fbbfd1b8ba (patch) | |
tree | 66860726f88e849cf14c8d454aee8cdc57039623 | |
parent | 4235c38b7f172d84ac2a8f6a50f0a58ce32d5b03 (diff) |
PR#4448: replace dense page table by sparse hash table.
Introduction and use of macros Is_in_value_area, Is_in_heap_or_young.
Removal of USE_MMAP_INSTEAD_OF_MALLOC.
git-svn-id: http://caml.inria.fr/svn/ocaml/trunk@8743 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02
-rw-r--r-- | asmrun/natdynlink.c | 45 | ||||
-rw-r--r-- | asmrun/signals_asm.c | 9 | ||||
-rw-r--r-- | asmrun/startup.c | 41 | ||||
-rw-r--r-- | byterun/array.c | 4 | ||||
-rw-r--r-- | byterun/compact.c | 2 | ||||
-rw-r--r-- | byterun/compare.c | 7 | ||||
-rw-r--r-- | byterun/config.h | 9 | ||||
-rw-r--r-- | byterun/extern.c | 4 | ||||
-rw-r--r-- | byterun/finalise.c | 4 | ||||
-rw-r--r-- | byterun/freelist.c | 1 | ||||
-rw-r--r-- | byterun/gc_ctrl.c | 1 | ||||
-rw-r--r-- | byterun/hash.c | 2 | ||||
-rw-r--r-- | byterun/instrtrace.c | 4 | ||||
-rw-r--r-- | byterun/major_gc.c | 32 | ||||
-rw-r--r-- | byterun/major_gc.h | 17 | ||||
-rw-r--r-- | byterun/memory.c | 228 | ||||
-rw-r--r-- | byterun/memory.h | 17 | ||||
-rw-r--r-- | byterun/minor_gc.c | 14 | ||||
-rw-r--r-- | byterun/mlvalues.h | 18 | ||||
-rw-r--r-- | byterun/obj.c | 4 | ||||
-rw-r--r-- | byterun/startup.c | 4 | ||||
-rw-r--r-- | byterun/unix.c | 46 | ||||
-rw-r--r-- | byterun/weak.c | 2 | ||||
-rwxr-xr-x | configure | 8 |
24 files changed, 248 insertions, 275 deletions
diff --git a/asmrun/natdynlink.c b/asmrun/natdynlink.c index 05da896b9..c14c4e5f7 100644 --- a/asmrun/natdynlink.c +++ b/asmrun/natdynlink.c @@ -25,45 +25,6 @@ static void *getsym(void *handle, char *module, char *name, int opt){ return sym; } - -/* Data segments are used by the Is_atom predicate (mlvalues.h) - to detect static Caml blocks. - - Code segments are used in signals_asm.c - - TODO: use dichotomic search -*/ - -typedef struct segment { - void *begin; - void *end; - struct segment *next; -} segment; - -segment *caml_dyn_data_segments = NULL; -segment *caml_dyn_code_segments = NULL; - -static segment *segment_cons(void *begin, void *end, segment *tl) { - segment *lnk = caml_stat_alloc(sizeof(segment)); - lnk->begin = begin; - lnk->end = end; - lnk->next = tl; - return lnk; -} - -int caml_is_in_data(void *p) { - segment *lnk; - for (lnk = caml_dyn_data_segments; NULL != lnk; lnk = lnk->next) - if (p >= lnk->begin && p <= lnk->end) return 1; - return 0; -} -int caml_is_in_code(void *p) { - segment *lnk; - for (lnk = caml_dyn_code_segments; NULL != lnk; lnk = lnk->next) - if (p >= lnk->begin && p <= lnk->end) return 1; - return 0; -} - extern char caml_globals_map[]; CAMLprim value caml_natdynlink_getmap(value unit) @@ -120,13 +81,13 @@ CAMLprim value caml_natdynlink_run(void *handle, value symbol) { sym = optsym("__data_begin"); sym2 = optsym("__data_end"); if (NULL != sym && NULL != sym2) - caml_dyn_data_segments = segment_cons(sym,sym2,caml_dyn_data_segments); + caml_page_table_add(In_static_data, sym, sym2); sym = optsym("__code_begin"); sym2 = optsym("__code_end"); if (NULL != sym && NULL != sym2) - caml_dyn_code_segments = segment_cons(sym,sym2,caml_dyn_code_segments); - + caml_page_table_add(In_code_area, sym, sym2); + entrypoint = optsym("__entry"); if (NULL != entrypoint) result = caml_callback((value)(&entrypoint), 0); else result = Val_unit; diff --git a/asmrun/signals_asm.c b/asmrun/signals_asm.c index 85d441dae..f8f542ada 100644 --- a/asmrun/signals_asm.c +++ b/asmrun/signals_asm.c @@ -46,12 +46,11 @@ extern void caml_win32_overflow_detection(); #endif extern char * caml_code_area_start, * caml_code_area_end; -CAMLextern int caml_is_in_code(void *); -#define In_code_area(pc) \ +#define Is_in_code_area(pc) \ ( ((char *)(pc) >= caml_code_area_start && \ (char *)(pc) <= caml_code_area_end) \ - || caml_is_in_code((void *)(pc)) ) + || (Classify_addr(pc) & In_code_area) ) /* This routine is the common entry point for garbage collection and signal handling. It can trigger a callback to Caml code. @@ -86,7 +85,7 @@ DECLARE_SIGNAL_HANDLER(handle_signal) Use the signal context to modify that register too, but only if we are inside Caml code (not inside C code). */ #if defined(CONTEXT_PC) && defined(CONTEXT_YOUNG_LIMIT) - if (In_code_area(CONTEXT_PC)) + if (Is_in_code_area(CONTEXT_PC)) CONTEXT_YOUNG_LIMIT = (context_reg) caml_young_limit; #endif } @@ -192,7 +191,7 @@ DECLARE_SIGNAL_HANDLER(segv_handler) && fault_addr < system_stack_top && fault_addr >= system_stack_top - limit.rlim_cur - 0x2000 #ifdef CONTEXT_PC - && In_code_area(CONTEXT_PC) + && Is_in_code_area(CONTEXT_PC) #endif ) { /* Turn this into a Stack_overflow exception */ diff --git a/asmrun/startup.c b/asmrun/startup.c index 54610e6c6..a05cf1325 100644 --- a/asmrun/startup.c +++ b/asmrun/startup.c @@ -23,6 +23,7 @@ #include "fail.h" #include "gc.h" #include "gc_ctrl.h" +#include "memory.h" #include "misc.h" #include "mlvalues.h" #include "osdeps.h" @@ -35,33 +36,39 @@ extern int caml_parser_trace; CAMLexport header_t caml_atom_table[256]; -char * caml_static_data_start, * caml_static_data_end; char * caml_code_area_start, * caml_code_area_end; /* Initialize the atom table and the static data and code area limits. */ struct segment { char * begin; char * end; }; -static void minmax_table(struct segment *table, char **min, char **max) -{ - int i; - *min = table[0].begin; - *max = table[0].end; - for (i = 1; table[i].begin != 0; i++) { - if (table[i].begin < *min) *min = table[i].begin; - if (table[i].end > *max) *max = table[i].end; - } -} - static void init_atoms(void) { - int i; extern struct segment caml_data_segments[], caml_code_segments[]; + int i; - for (i = 0; i < 256; i++) caml_atom_table[i] = Make_header(0, i, Caml_white); - minmax_table(caml_data_segments, - &caml_static_data_start, &caml_static_data_end); - minmax_table(caml_code_segments, &caml_code_area_start, &caml_code_area_end); + for (i = 0; i < 256; i++) { + caml_atom_table[i] = Make_header(0, i, Caml_white); + } + if (caml_page_table_add(In_static_data, + caml_atom_table, caml_atom_table + 256) != 0) + caml_fatal_error("Fatal error: not enough memory for the initial page table"); + + for (i = 0; caml_data_segments[i].begin != 0; i++) { + if (caml_page_table_add(In_static_data, + caml_data_segments[i].begin, + caml_data_segments[i].end) != 0) + caml_fatal_error("Fatal error: not enough memory for the initial page table"); + } + + caml_code_area_start = caml_code_segments[0].begin; + caml_code_area_end = caml_code_segments[0].end; + for (i = 1; caml_code_segments[i].begin != 0; i++) { + if (caml_code_segments[i].begin < caml_code_area_start) + caml_code_area_start = caml_code_segments[i].begin; + if (caml_code_segments[i].end > caml_code_area_end) + caml_code_area_end = caml_code_segments[i].end; + } } /* Configuration parameters and flags */ diff --git a/byterun/array.c b/byterun/array.c index a9902b7c6..d8eb3a2b4 100644 --- a/byterun/array.c +++ b/byterun/array.c @@ -139,7 +139,7 @@ CAMLprim value caml_make_vect(value len, value init) res = Atom(0); } else if (Is_block(init) - && (Is_young(init) || Is_in_heap(init) || Is_atom(init)) + && Is_in_value_area(init) && Tag_val(init) == Double_tag) { d = Double_val(init); wsize = size * Double_wosize; @@ -181,7 +181,7 @@ CAMLprim value caml_make_array(value init) } else { v = Field(init, 0); if (Is_long(v) - || (!Is_young(v) && !Is_in_heap(v) && !Is_atom(v)) + || ! Is_in_value_area(v) || Tag_val(v) != Double_tag) { CAMLreturn (init); } else { diff --git a/byterun/compact.c b/byterun/compact.c index a6860d529..25a0080b1 100644 --- a/byterun/compact.c +++ b/byterun/compact.c @@ -60,7 +60,7 @@ static void invert_pointer_at (word *p) /* Use Ecolor (q) == 0 instead of Is_block (q) because q could be an inverted pointer for an infix header (with Ecolor == 2). */ - if (Ecolor (q) == 0 && Is_in_heap (q)){ + if (Ecolor (q) == 0 && (Classify_addr (q) & In_heap)){ switch (Ecolor (Hd_val (q))){ case 0: case 3: /* Pointer or header: insert in inverted list. */ diff --git a/byterun/compare.c b/byterun/compare.c index f6a18b1d4..42b1d9d73 100644 --- a/byterun/compare.c +++ b/byterun/compare.c @@ -104,7 +104,7 @@ static intnat compare_val(value v1, value v2, int total) if (Is_long(v2)) return Long_val(v1) - Long_val(v2); /* Subtraction above cannot overflow and cannot result in UNORDERED */ - if ((Is_young(v2) || Is_in_heap(v2) || Is_atom(v2)) && + if (Is_in_value_area(v2) && Tag_val(v2) == Forward_tag) { v2 = Forward_val(v2); continue; @@ -112,7 +112,7 @@ static intnat compare_val(value v1, value v2, int total) return LESS; /* v1 long < v2 block */ } if (Is_long(v2)) { - if ((Is_young(v1) || Is_in_heap(v1) || Is_atom(v1)) && + if (Is_in_value_area(v1) && Tag_val(v1) == Forward_tag) { v1 = Forward_val(v1); continue; @@ -122,8 +122,7 @@ static intnat compare_val(value v1, value v2, int total) /* If one of the objects is outside the heap (but is not an atom), use address comparison. Since both addresses are 2-aligned, shift lsb off to avoid overflow in subtraction. */ - if ((!Is_young(v1) && !Is_in_heap(v1) && !Is_atom(v1)) || - (!Is_young(v2) && !Is_in_heap(v2) && !Is_atom(v2))) { + if (! Is_in_value_area(v1) || ! Is_in_value_area(v2)) { if (v1 == v2) goto next_item; return (v1 >> 1) - (v2 >> 1); /* Subtraction above cannot result in UNORDERED */ diff --git a/byterun/config.h b/byterun/config.h index 25681e7f5..00c70978f 100644 --- a/byterun/config.h +++ b/byterun/config.h @@ -107,7 +107,7 @@ typedef struct { uint32 l, h; } uint64, int64; /* Memory model parameters */ /* The size of a page for memory management (in bytes) is [1 << Page_log]. - It must be a multiple of [sizeof (value)]. */ + It must be a multiple of [sizeof (value)] and >= 8. */ #define Page_log 12 /* A page is 4 kilobytes. */ /* Initial size of stack (bytes). */ @@ -143,12 +143,13 @@ typedef struct { uint32 l, h; } uint64, int64; #define Heap_chunk_min (2 * Page_size / sizeof (value)) /* Default size increment when growing the heap. (words) - Must be a multiple of [Page_size / sizeof (value)]. */ -#define Heap_chunk_def (15 * Page_size) + Must be a multiple of [Page_size / sizeof (value)]. + (Approx 512 Kb for a 32-bit platform, 1 Mb for a 64-bit platform.) */ +#define Heap_chunk_def (31 * Page_size) /* Default initial size of the major heap (words); same constraints as for Heap_chunk_def. */ -#define Init_heap_def (15 * Page_size) +#define Init_heap_def (31 * Page_size) /* Default speed setting for the major GC. The heap will grow until diff --git a/byterun/extern.c b/byterun/extern.c index 8ff896e8a..0578d7dbe 100644 --- a/byterun/extern.c +++ b/byterun/extern.c @@ -306,14 +306,14 @@ static void extern_rec(value v) writecode32(CODE_INT32, n); return; } - if (Is_young(v) || Is_in_heap(v) || Is_atom(v)) { + if (Is_in_value_area(v)) { header_t hd = Hd_val(v); tag_t tag = Tag_hd(hd); mlsize_t sz = Wosize_hd(hd); if (tag == Forward_tag) { value f = Forward_val (v); - if (Is_block (f) && (Is_young (f) || Is_in_heap (f)) + if (Is_block (f) && Is_in_value_area(f) && (Tag_val (f) == Forward_tag || Tag_val (f) == Lazy_tag || Tag_val (f) == Double_tag)){ /* Do not short-circuit the pointer. */ diff --git a/byterun/finalise.c b/byterun/finalise.c index e41131148..dce6edd61 100644 --- a/byterun/finalise.c +++ b/byterun/finalise.c @@ -85,7 +85,7 @@ void caml_final_update (void) if (Is_white_val (final_table[i].val)){ if (Tag_val (final_table[i].val) == Forward_tag){ value fv = Forward_val (final_table[i].val); - if (Is_block (fv) && (Is_young (fv) || Is_in_heap (fv)) + if (Is_block (fv) && Is_in_value_area(fv) && (Tag_val (fv) == Forward_tag || Tag_val (fv) == Lazy_tag || Tag_val (fv) == Double_tag)){ /* Do not short-circuit the pointer. */ @@ -206,7 +206,7 @@ void caml_final_empty_young (void) /* Put (f,v) in the recent set. */ CAMLprim value caml_final_register (value f, value v) { - if (!(Is_block (v) && (Is_in_heap (v) || Is_young (v)))){ + if (!(Is_block (v) && Is_in_heap_or_young(v))) { caml_invalid_argument ("Gc.finalise"); } Assert (old <= young); diff --git a/byterun/freelist.c b/byterun/freelist.c index c463d91f7..d91f7f243 100644 --- a/byterun/freelist.c +++ b/byterun/freelist.c @@ -17,6 +17,7 @@ #include "freelist.h" #include "gc.h" #include "gc_ctrl.h" +#include "memory.h" #include "major_gc.h" #include "misc.h" #include "mlvalues.h" diff --git a/byterun/gc_ctrl.c b/byterun/gc_ctrl.c index 12bfc9b0a..1dce5cb08 100644 --- a/byterun/gc_ctrl.c +++ b/byterun/gc_ctrl.c @@ -458,6 +458,7 @@ void caml_init_gc (uintnat minor_size, uintnat major_size, caml_gc_message (-1, "### O'Caml runtime: debug mode ###\n", 0); #endif + caml_page_table_initialize(Bsize_wsize(minor_size) + major_heap_size); caml_set_minor_heap_size (Bsize_wsize (norm_minsize (minor_size))); caml_major_heap_increment = Bsize_wsize (norm_heapincr (major_incr)); caml_percent_free = norm_pfree (percent_fr); diff --git a/byterun/hash.c b/byterun/hash.c index feb4619d1..13709d4a7 100644 --- a/byterun/hash.c +++ b/byterun/hash.c @@ -62,7 +62,7 @@ static void hash_aux(value obj) We can inspect the block contents. */ Assert (Is_block (obj)); - if (Is_young(obj) || Is_in_heap(obj) || Is_atom(obj)) { + if (Is_in_value_area(obj)) { tag = Tag_val(obj); switch (tag) { case String_tag: diff --git a/byterun/instrtrace.c b/byterun/instrtrace.c index 1932e08f6..3734d8241 100644 --- a/byterun/instrtrace.c +++ b/byterun/instrtrace.c @@ -181,9 +181,7 @@ caml_trace_value_file (value v, code_t prog, int proglen, FILE * f) fprintf (f, "%#lx", v); if (!v) return; - if (Is_atom (v)) - fprintf (f, "=atom%ld", v - Atom (0)); - else if (prog && v % sizeof (int) == 0 + if (prog && v % sizeof (int) == 0 && (code_t) v >= prog && (code_t) v < (code_t) ((char *) prog + proglen)) fprintf (f, "=code@%d", (code_t) v - prog); diff --git a/byterun/major_gc.c b/byterun/major_gc.c index 54759b26f..18da47721 100644 --- a/byterun/major_gc.c +++ b/byterun/major_gc.c @@ -31,9 +31,7 @@ uintnat caml_percent_free; intnat caml_major_heap_increment; -CAMLexport char *caml_heap_start, *caml_heap_end; -CAMLexport page_table_entry *caml_page_table; -asize_t caml_page_low, caml_page_high; +CAMLexport char *caml_heap_start; char *caml_gc_sweep_hp; int caml_gc_phase; /* always Phase_mark, Phase_sweep, or Phase_idle */ static value *gray_vals; @@ -143,7 +141,7 @@ static void mark_slice (intnat work) hd = Hd_val (child); if (Tag_hd (hd) == Forward_tag){ value f = Forward_val (child); - if (Is_block (f) && (Is_young (f) || Is_in_heap (f)) + if (Is_block (f) && Is_in_value_area(f) && (Tag_val (f) == Forward_tag || Tag_val (f) == Lazy_tag || Tag_val (f) == Double_tag)){ /* Do not short-circuit the pointer. */ @@ -214,7 +212,7 @@ static void mark_slice (intnat work) && Is_block (curfield) && Is_in_heap (curfield)){ if (Tag_val (curfield) == Forward_tag){ value f = Forward_val (curfield); - if (Is_block (f) && (Is_young (f) || Is_in_heap (f))){ + if (Is_block (f) && Is_in_value_area(f)) { if (Tag_val (f) == Forward_tag || Tag_val (f) == Lazy_tag || Tag_val (f) == Double_tag){ /* Do not short-circuit the pointer. */ @@ -441,10 +439,6 @@ asize_t caml_round_heap_chunk_size (asize_t request) void caml_init_major_heap (asize_t heap_size) { - asize_t i; - asize_t page_table_size; - page_table_entry *page_table_block; - caml_stat_heap_size = clip_heap_chunk_size (heap_size); caml_stat_top_heap_size = caml_stat_heap_size; Assert (caml_stat_heap_size % Page_size == 0); @@ -452,23 +446,11 @@ void caml_init_major_heap (asize_t heap_size) if (caml_heap_start == NULL) caml_fatal_error ("Fatal error: not enough memory for the initial heap.\n"); Chunk_next (caml_heap_start) = NULL; - caml_heap_end = caml_heap_start + caml_stat_heap_size; - Assert ((uintnat) caml_heap_end % Page_size == 0); - caml_stat_heap_chunks = 1; - caml_page_low = Page (caml_heap_start); - caml_page_high = Page (caml_heap_end); - - page_table_size = caml_page_high - caml_page_low; - page_table_block = - (page_table_entry *) malloc (page_table_size * sizeof (page_table_entry)); - if (page_table_block == NULL){ - caml_fatal_error ("Fatal error: not enough memory for the initial heap.\n"); - } - caml_page_table = page_table_block - caml_page_low; - for (i = Page (caml_heap_start); i < Page (caml_heap_end); i++){ - caml_page_table [i] = In_heap; + if (caml_page_table_add(In_heap, caml_heap_start, + caml_heap_start + caml_stat_heap_size) != 0) { + caml_fatal_error ("Fatal error: not enough memory for the initial page table.\n"); } caml_fl_init_merge (); @@ -478,7 +460,7 @@ void caml_init_major_heap (asize_t heap_size) gray_vals_size = 2048; gray_vals = (value *) malloc (gray_vals_size * sizeof (value)); if (gray_vals == NULL) - caml_fatal_error ("Fatal error: not enough memory for the initial heap.\n"); + caml_fatal_error ("Fatal error: not enough memory for the gray cache.\n"); gray_vals_cur = gray_vals; gray_vals_end = gray_vals + gray_vals_size; heap_is_pure = 1; diff --git a/byterun/major_gc.h b/byterun/major_gc.h index 47aa5e59f..5607a2675 100644 --- a/byterun/major_gc.h +++ b/byterun/major_gc.h @@ -42,27 +42,10 @@ extern uintnat caml_fl_size_at_phase_change; #define Phase_sweep 1 #define Phase_idle 2 -#ifdef __alpha -typedef int page_table_entry; -#else -typedef char page_table_entry; -#endif - CAMLextern char *caml_heap_start; -CAMLextern char *caml_heap_end; extern uintnat total_heap_size; -CAMLextern page_table_entry *caml_page_table; -extern asize_t caml_page_low, caml_page_high; extern char *caml_gc_sweep_hp; -#define In_heap 1 -#define Not_in_heap 0 -#define Page(p) ((uintnat) (p) >> Page_log) -#define Is_in_heap(p) \ - (Assert (Is_block ((value) (p))), \ - (addr)(p) >= (addr)caml_heap_start && (addr)(p) < (addr)caml_heap_end \ - && caml_page_table [Page (p)]) - void caml_init_major_heap (asize_t); /* size in bytes */ asize_t caml_round_heap_chunk_size (asize_t); /* size in bytes */ void caml_darken (value, value *); diff --git a/byterun/memory.c b/byterun/memory.c index 660263e2e..934610c9c 100644 --- a/byterun/memory.c +++ b/byterun/memory.c @@ -27,10 +27,159 @@ #include "mlvalues.h" #include "signals.h" -#ifdef USE_MMAP_INSTEAD_OF_MALLOC -extern char * caml_aligned_mmap (asize_t size, int modulo, void ** block); -extern void caml_aligned_munmap (char * addr, asize_t size); +/* Page table management */ + +#define Page(p) ((uintnat) (p) >> Page_log) +#define Page_mask ((uintnat) -1 << Page_log) + +/* The page table is represented sparsely as a hash table + with linear probing */ + +struct page_table { + mlsize_t size; /* size == 1 << (wordsize - shift) */ + int shift; + mlsize_t mask; /* mask == size - 1 */ + mlsize_t occupancy; + uintnat * entries; /* [size] */ +}; + +static struct page_table caml_page_table; + +/* Page table entries are the logical 'or' of + - the key: address of a page (low Page_log bits = 0) + - the data: a 8-bit integer */ + +#define Page_entry_matches(entry,addr) \ + ((((entry) ^ (addr)) & Page_mask) == 0) + +/* Multiplicative Fibonacci hashing + (Knuth, TAOCP vol 3, section 6.4, page 518). + HASH_FACTOR is (sqrt(5) - 1) / 2 * 2^wordsize. */ +#ifdef ARCH_SIXTYFOUR +#define HASH_FACTOR 11400714819323198486UL +#else +#define HASH_FACTOR 2654435769UL #endif +#define Hash(v) (((v) * HASH_FACTOR) >> caml_page_table.shift) + +int caml_page_table_lookup(void * addr) +{ + uintnat h, e; + + h = Hash(Page(addr)); + /* The first hit is almost always successful, so optimize for this case */ + e = caml_page_table.entries[h]; + if (Page_entry_matches(e, (uintnat)addr)) return e & 0xFF; + while(1) { + if (e == 0) return 0; + h = (h + 1) & caml_page_table.mask; + e = caml_page_table.entries[h]; + if (Page_entry_matches(e, (uintnat)addr)) return e & 0xFF; + } +} + +int caml_page_table_initialize(mlsize_t bytesize) +{ + uintnat pagesize = Page(bytesize); + + caml_page_table.size = 1; + caml_page_table.shift = 8 * sizeof(uintnat); + /* Aim for initial load factor between 1/4 and 1/2 */ + while (caml_page_table.size < 2 * pagesize) { + caml_page_table.size <<= 1; + caml_page_table.shift -= 1; + } + caml_page_table.mask = caml_page_table.size - 1; + caml_page_table.occupancy = 0; + caml_page_table.entries = calloc(caml_page_table.size, sizeof(uintnat)); + if (caml_page_table.entries == NULL) + return -1; + else + return 0; +} + +static int caml_page_table_resize(void) +{ + struct page_table old = caml_page_table; + uintnat * new_entries; + uintnat i, h; + + caml_gc_message (0x08, "Growing page table to %lu entries\n", + caml_page_table.size); + + new_entries = calloc(2 * old.size, sizeof(uintnat)); + if (new_entries == NULL) { + caml_gc_message (0x08, "No room for growing page table\n", 0); + return -1; + } + + caml_page_table.size = 2 * old.size; + caml_page_table.shift = old.shift - 1; + caml_page_table.mask = caml_page_table.size - 1; + caml_page_table.occupancy = old.occupancy; + caml_page_table.entries = new_entries; + + for (i = 0; i < old.size; i++) { + uintnat e = old.entries[i]; + if (e == 0) continue; + h = Hash(Page(e)); + while (caml_page_table.entries[h] != 0) + h = (h + 1) & caml_page_table.mask; + caml_page_table.entries[h] = e; + } + + free(old.entries); + return 0; +} + +static int caml_page_table_modify(uintnat page, int toclear, int toset) +{ + uintnat h; + + Assert ((page & ~Page_mask) == 0); + + /* Resize to keep load factor below 1/2 */ + if (caml_page_table.occupancy * 2 >= caml_page_table.size) { + if (caml_page_table_resize() != 0) return -1; + } + h = Hash(Page(page)); + while (1) { + if (caml_page_table.entries[h] == 0) { + caml_page_table.entries[h] = page | toset; + caml_page_table.occupancy++; + break; + } + if (Page_entry_matches(caml_page_table.entries[h], page)) { + caml_page_table.entries[h] = + (caml_page_table.entries[h] & ~toclear) | toset; + break; + } + h = (h + 1) & caml_page_table.mask; + } + return 0; +} + +int caml_page_table_add(int kind, void * start, void * end) +{ + uintnat pstart = (uintnat) start & Page_mask; + uintnat pend = ((uintnat) end - 1) & Page_mask; + uintnat p; + + for (p = pstart; p <= pend; p += Page_size) + if (caml_page_table_modify(p, 0, kind) != 0) return -1; + return 0; +} + +int caml_page_table_remove(int kind, void * start, void * end) +{ + uintnat pstart = (uintnat) start & Page_mask; + uintnat pend = ((uintnat) end - 1) & Page_mask; + uintnat p; + + for (p = pstart; p <= pend; p += Page_size) + if (caml_page_table_modify(p, kind, 0) != 0) return -1; + return 0; +} /* Allocate a block of the requested size, to be passed to [caml_add_to_heap] later. @@ -44,13 +193,8 @@ char *caml_alloc_for_heap (asize_t request) char *mem; void *block; Assert (request % Page_size == 0); -#ifdef USE_MMAP_INSTEAD_OF_MALLOC - mem = caml_aligned_mmap (request + sizeof (heap_chunk_head), - sizeof (heap_chunk_head), &block); -#else mem = caml_aligned_malloc (request + sizeof (heap_chunk_head), sizeof (heap_chunk_head), &block); -#endif if (mem == NULL) return NULL; mem += sizeof (heap_chunk_head); Chunk_size (mem) = request; @@ -63,12 +207,7 @@ char *caml_alloc_for_heap (asize_t request) */ void caml_free_for_heap (char *mem) { -#ifdef USE_MMAP_INSTEAD_OF_MALLOC - caml_aligned_munmap (Chunk_block (mem), - Chunk_size (mem) + sizeof (heap_chunk_head)); -#else free (Chunk_block (mem)); -#endif } /* Take a chunk of memory as argument, which must be the result of a @@ -82,7 +221,6 @@ void caml_free_for_heap (char *mem) */ int caml_add_to_heap (char *m) { - asize_t i; Assert (Chunk_size (m) % Page_size == 0); #ifdef DEBUG /* Should check the contents of the block. */ @@ -91,56 +229,9 @@ int caml_add_to_heap (char *m) caml_gc_message (0x04, "Growing heap to %luk bytes\n", (caml_stat_heap_size + Chunk_size (m)) / 1024); - /* Extend the page table as needed. */ - if (Page (m) < caml_page_low){ - page_table_entry *block, *new_page_table; - asize_t new_page_low = Page (m); - asize_t new_size = caml_page_high - new_page_low; - - caml_gc_message (0x08, "Growing page table to %lu entries\n", new_size); - block = malloc (new_size * sizeof (page_table_entry)); - if (block == NULL){ - caml_gc_message (0x08, "No room for growing page table\n", 0); - return -1; - } - new_page_table = block - new_page_low; - for (i = new_page_low; i < caml_page_low; i++){ - new_page_table [i] = Not_in_heap; - } - for (i = caml_page_low; i < caml_page_high; i++){ - new_page_table [i] = caml_page_table [i]; - } - free (caml_page_table + caml_page_low); - caml_page_table = new_page_table; - caml_page_low = new_page_low; - } - if (Page (m + Chunk_size (m)) > caml_page_high){ - page_table_entry *block, *new_page_table; - asize_t new_page_high = Page (m + Chunk_size (m)); - asize_t new_size = new_page_high - caml_page_low; - - caml_gc_message (0x08, "Growing page table to %lu entries\n", new_size); - block = malloc (new_size * sizeof (page_table_entry)); - if (block == NULL){ - caml_gc_message (0x08, "No room for growing page table\n", 0); - return -1; - } - new_page_table = block - caml_page_low; - for (i = caml_page_low; i < caml_page_high; i++){ - new_page_table [i] = caml_page_table [i]; - } - for (i = caml_page_high; i < new_page_high; i++){ - new_page_table [i] = Not_in_heap; - } - free (caml_page_table + caml_page_low); - caml_page_table = new_page_table; - caml_page_high = new_page_high; - } - - /* Mark the pages as being in the heap. */ - for (i = Page (m); i < Page (m + Chunk_size (m)); i++){ - caml_page_table [i] = In_heap; - } + /* Register block in page table */ + if (caml_page_table_add(In_heap, m, m + Chunk_size(m)) != 0) + return -1; /* Chain this heap chunk. */ { @@ -157,10 +248,6 @@ int caml_add_to_heap (char *m) ++ caml_stat_heap_chunks; } - /* Update the heap bounds as needed. */ - /* already done: if (m < caml_heap_start) heap_start = m; */ - if (m + Chunk_size (m) > caml_heap_end) caml_heap_end = m + Chunk_size (m); - caml_stat_heap_size += Chunk_size (m); if (caml_stat_heap_size > caml_stat_top_heap_size){ caml_stat_top_heap_size = caml_stat_heap_size; @@ -201,7 +288,6 @@ static char *expand_heap (mlsize_t request) void caml_shrink_heap (char *chunk) { char **cp; - asize_t i; /* Never deallocate the first block, because caml_heap_start is both the first block and the base address for page numbers, and we don't @@ -232,9 +318,7 @@ void caml_shrink_heap (char *chunk) *cp = Chunk_next (chunk); /* Remove the pages of [chunk] from the page table. */ - for (i = Page (chunk); i < Page (chunk + Chunk_size (chunk)); i++){ - caml_page_table [i] = Not_in_heap; - } + caml_page_table_remove(In_heap, chunk, chunk + Chunk_size (chunk)); /* Free the [malloc] block that contains [chunk]. */ caml_free_for_heap (chunk); diff --git a/byterun/memory.h b/byterun/memory.h index d369b14b9..f640aa7cd 100644 --- a/byterun/memory.h +++ b/byterun/memory.h @@ -49,6 +49,23 @@ color_t caml_allocation_color (void *hp); /* <private> */ +#define Not_in_heap 0 +#define In_heap 1 +#define In_young 2 +#define In_static_data 4 +#define In_code_area 8 + +#define Classify_addr(a) (caml_page_table_lookup((void *)(a))) +#define Is_in_value_area(a) \ + (Classify_addr(a) & (In_heap | In_young | In_static_data)) +#define Is_in_heap(a) (Classify_addr(a) & In_heap) +#define Is_in_heap_or_young(a) (Classify_addr(a) & (In_heap | In_young)) + +int caml_page_table_lookup(void * addr); +int caml_page_table_add(int kind, void * start, void * end); +int caml_page_table_remove(int kind, void * start, void * end); +int caml_page_table_initialize(mlsize_t bytesize); + #ifdef DEBUG #define DEBUG_clear(result, wosize) do{ \ uintnat caml__DEBUG_i; \ diff --git a/byterun/minor_gc.c b/byterun/minor_gc.c index 396aff504..16526f85e 100644 --- a/byterun/minor_gc.c +++ b/byterun/minor_gc.c @@ -29,6 +29,7 @@ #include "weak.h" asize_t caml_minor_heap_size; +static void *caml_young_base = NULL; CAMLexport char *caml_young_start = NULL, *caml_young_end = NULL; CAMLexport char *caml_young_ptr = NULL, *caml_young_limit = NULL; @@ -71,16 +72,23 @@ static void clear_table (struct caml_ref_table *tbl) void caml_set_minor_heap_size (asize_t size) { char *new_heap; + void *new_heap_base; Assert (size >= Minor_heap_min); Assert (size <= Minor_heap_max); Assert (size % sizeof (value) == 0); if (caml_young_ptr != caml_young_end) caml_minor_collection (); Assert (caml_young_ptr == caml_young_end); - new_heap = (char *) caml_stat_alloc (size); + new_heap = caml_aligned_malloc(size, 0, &new_heap_base); + if (new_heap == NULL) caml_raise_out_of_memory(); + if (caml_page_table_add(In_young, new_heap, new_heap + size) != 0) + caml_raise_out_of_memory(); + if (caml_young_start != NULL){ - caml_stat_free (caml_young_start); + caml_page_table_remove(In_young, caml_young_start, caml_young_end); + free (caml_young_base); } + caml_young_base = new_heap_base; caml_young_start = new_heap; caml_young_end = new_heap + size; caml_young_limit = caml_young_start; @@ -146,7 +154,7 @@ void caml_oldify_one (value v, value *p) tag_t ft = 0; Assert (tag == Forward_tag); - if (Is_block (f) && (Is_young (f) || Is_in_heap (f))){ + if (Is_block (f) && Is_in_value_area(f)) { ft = Tag_val (Hd_val (f) == 0 ? Field (f, 0) : f); } if (ft == Forward_tag || ft == Lazy_tag || ft == Double_tag){ diff --git a/byterun/mlvalues.h b/byterun/mlvalues.h index 4d8a690ba..110c808eb 100644 --- a/byterun/mlvalues.h +++ b/byterun/mlvalues.h @@ -268,24 +268,6 @@ CAMLextern int64 caml_Int64_val(value v); CAMLextern header_t caml_atom_table[]; #define Atom(tag) (Val_hp (&(caml_atom_table [(tag)]))) -/* Is_atom tests whether a well-formed block is statically allocated - outside the heap. For the bytecode system, only zero-sized block (Atoms) - fall in this class. For the native-code generator, data - emitted by the code generator (as described in the table - caml_data_segments) are also atoms. */ - -#ifndef NATIVE_CODE -#define Is_atom(v) ((v) >= Atom(0) && (v) <= Atom(255)) -#else -CAMLextern char * caml_static_data_start, * caml_static_data_end; -CAMLextern int caml_is_in_data(void *); -#define Is_atom(v) \ - ( ( (char *)(v) >= caml_static_data_start \ - &&(char *)(v) < caml_static_data_end ) \ - || ((v) >= Atom(0) && (v) <= Atom(255)) \ - || (caml_is_in_data((void *)v)) ) -#endif - /* Booleans are integers 0 or 1 */ #define Val_bool(x) Val_int((x) != 0) diff --git a/byterun/obj.c b/byterun/obj.c index ee16ba575..3ee12201e 100644 --- a/byterun/obj.c +++ b/byterun/obj.c @@ -67,7 +67,7 @@ CAMLprim value caml_obj_tag(value arg) { if (Is_long (arg)){ return Val_int (1000); - }else if (Is_young (arg) || Is_in_heap (arg) || Is_atom (arg)){ + }else if (Is_in_value_area (arg)){ return Val_int(Tag_val(arg)); }else{ return Val_int (1001); @@ -171,7 +171,7 @@ CAMLprim value caml_obj_truncate (value v, value newsize) CAMLprim value caml_lazy_follow_forward (value v) { - if (Is_block (v) && (Is_young (v) || Is_in_heap (v)) + if (Is_block (v) && Is_in_value_area(v) && Tag_val (v) == Forward_tag){ return Forward_val (v); }else{ diff --git a/byterun/startup.c b/byterun/startup.c index c2cea2c4b..b639cf571 100644 --- a/byterun/startup.c +++ b/byterun/startup.c @@ -72,6 +72,10 @@ static void init_atoms(void) { int i; for(i = 0; i < 256; i++) caml_atom_table[i] = Make_header(0, i, Caml_white); + if (caml_page_table_add(In_static_data, + caml_atom_table, caml_atom_table + 256) != 0) { + caml_fatal_error("Fatal error: not enough memory for the initial page table"); + } } /* Read the trailer of a bytecode file */ diff --git a/byterun/unix.c b/byterun/unix.c index 0eb979f0a..00b27de74 100644 --- a/byterun/unix.c +++ b/byterun/unix.c @@ -387,52 +387,6 @@ char * caml_dlerror(void) #endif -#ifdef USE_MMAP_INSTEAD_OF_MALLOC - -/* The code below supports the use of mmap() rather than malloc() - for allocating the chunks composing the major heap. - This code is needed on 64-bit Linux platforms, where the native - malloc() implementation can return pointers several *exabytes* apart, - (some coming from mmap(), other from sbrk()); this makes the - page table *way* too large. */ - -#include <sys/mman.h> - -char *caml_aligned_mmap (asize_t size, int modulo, void **block) -{ - char *raw_mem; - uintnat aligned_mem; - Assert (modulo < Page_size); - raw_mem = (char *) mmap(NULL, size + Page_size, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (raw_mem == MAP_FAILED) return NULL; - *block = raw_mem; - raw_mem += modulo; /* Address to be aligned */ - aligned_mem = (((uintnat) raw_mem / Page_size + 1) * Page_size); -#ifdef DEBUG - { - uintnat *p; - uintnat *p0 = (void *) *block, - *p1 = (void *) (aligned_mem - modulo), - *p2 = (void *) (aligned_mem - modulo + size), - *p3 = (void *) ((char *) *block + size + Page_size); - - for (p = p0; p < p1; p++) *p = Debug_filler_align; - for (p = p1; p < p2; p++) *p = Debug_uninit_align; - for (p = p2; p < p3; p++) *p = Debug_filler_align; - } -#endif - return (char *) (aligned_mem - modulo); -} - -void caml_aligned_munmap (char * addr, asize_t size) -{ - int retcode = munmap (addr, size + Page_size); - Assert(retcode == 0); -} - -#endif - /* Add to [contents] the (short) names of the files contained in the directory named [dirname]. No entries are added for [.] and [..]. Return 0 on success, -1 on error; set errno in the case of error. */ diff --git a/byterun/weak.c b/byterun/weak.c index 01a35ec89..1f121af3a 100644 --- a/byterun/weak.c +++ b/byterun/weak.c @@ -114,7 +114,7 @@ CAMLprim value caml_weak_get_copy (value ar, value n) v = Field (ar, offset); if (v == caml_weak_none) CAMLreturn (None_val); - if (Is_block (v) && (Is_young (v) || Is_in_heap (v))){ + if (Is_block (v) && Is_in_heap_or_young(v)) { elt = caml_alloc (Wosize_val (v), Tag_val (v)); /* The GC may erase or move v during this call to caml_alloc. */ v = Field (ar, offset); @@ -598,14 +598,6 @@ if test -z "$mkmaindll"; then mkmaindll=$mksharedlib fi -# Further machine-specific hacks - -case "$host" in - ia64-*-linux*|alpha*-*-linux*|x86_64-*-linux*) - echo "Will use mmap() instead of malloc() for allocation of major heap chunks." - echo "#define USE_MMAP_INSTEAD_OF_MALLOC" >> s.h;; -esac - # Configure the native-code compiler arch=none |