LOGIN   :::   RECOVER PASS   :::   GET ACCOUNT    
Browse
  • Projects
  • Code (CVS)
  • Forums
  • News
  • Articles
  • Polls
  •  
    OpenCores
  • FAQ
  • CVS HowTo
  • Mission
  • Media
  • Tools
  • Advertise
  • Mirrors
  • Logos
  • Contact us
  • Job Opportunity
  •  
    Tools
  • Search
      
  • Download Cores (CVSGet)
  •  
    More
  • Wishbone
  • Perlilog
  • EDA tools
  • OpenTech CD
  •  
    Navigation: All forums > Cvs-checkins > Message List > Message Post

    Message

    Reply | Reply all
    Date Prev | Date Next | Thread Prev | Thread Next Date Index | Thread Index

    From: cvs at opencores.org<cvs@o...>
    Date: Tue Dec 20 12:47:05 CET 2005
    Subject: [cvs-checkins] MODIFIED: or1k ...
    Top
    Date: 00/05/12 20:12:47

    Added: or1k/rc203soc/sw/uClinux/mm Makefile filemap.c kmalloc.c
    memory.c mlock.c mmap.c mprotect.c mremap.c
    page_alloc.c page_io.c swap.c swap_state.c
    swapfile.c vmalloc.c vmscan.c
    Log:
    First Import of RC20x uClinux


    Revision Changes Path
    1.1 or1k/rc203soc/sw/uClinux/mm/Makefile

    http://www.opencores.org/cvsweb.shtml/or1k/rc203soc/sw/uClinux/mm/Makefile?rev=1.1&content-type=text/x-cvsweb-markup

    Index: Makefile
    ===================================================================
    #
    # Makefile for the linux memory manager.
    #
    # Note! Dependencies are done automagically by 'make dep', which also
    # removes any old dependencies. DON'T put your own dependencies here
    # unless it's something special (ie not a .c file).
    #
    # Note 2! The CFLAGS definition is now in the main makefile...

    O_TARGET := mm.o
    O_OBJS := memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \
    kmalloc.o vmalloc.o \
    swap.o vmscan.o page_io.o page_alloc.o swap_state.o swapfile.o

    include $(TOPDIR)/Rules.make



    1.1 or1k/rc203soc/sw/uClinux/mm/filemap.c

    http://www.opencores.org/cvsweb.shtml/or1k/rc203soc/sw/uClinux/mm/filemap.c?rev=1.1&content-type=text/x-cvsweb-markup

    Index: filemap.c
    ===================================================================
    /*
    * linux/mm/filemap.c
    *
    * Copyright (C) 1994, 1995 Linus Torvalds
    */

    /*
    * This file handles the generic file mmap semantics used by
    * most "normal" filesystems (but you don't /have/ to use this:
    * the NFS filesystem does this differently, for example)
    */
    #include <linux/config.h> /* CONFIG_READA_SMALL */
    #include <linux/stat.h>
    #include <linux/sched.h>
    #include <linux/kernel.h>
    #include <linux/mm.h>
    #include <linux/shm.h>
    #include <linux/errno.h>
    #include <linux/mman.h>
    #include <linux/string.h>
    #include <linux/malloc.h>
    #include <linux/fs.h>
    #include <linux/locks.h>
    #include <linux/pagemap.h>
    #include <linux/swap.h>

    #include <asm/segment.h>
    #include <asm/system.h>
    #include <asm/pgtable.h>

    /*
    * Shared mappings implemented 30.11.1994. It's not fully working yet,
    * though.
    *
    * Shared mappings now work. 15.8.1995 Bruno.
    */

    unsigned long page_cache_size = 0;
    struct page * page_hash_table[PAGE_HASH_SIZE];

    /*
    * Simple routines for both non-shared and shared mappings.
    */

    #define release_page(page) __free_page((page))

    /*
    * Invalidate the pages of an inode, removing all pages that aren't
    * locked down (those are sure to be up-to-date anyway, so we shouldn't
    * invalidate them).
    */
    void invalidate_inode_pages(struct inode * inode)
    {
    struct page ** p;
    struct page * page;

    p = &inode->i_pages;
    while ((page = *p) != NULL) {
    if (PageLocked(page)) {
    p = &page->next;
    continue;
    } inode->i_nrpages--; if ((*p = page->next) != NULL) (*p)->prev = page->prev; page->dirty = 0; page->next = NULL; page->prev = NULL; remove_page_from_hash_queue(page); page->inode = NULL; __free_page(page); continue; } } /* * Truncate the page cache at a set offset, removing the pages * that are beyond that offset (and zeroing out partial pages). */ void truncate_inode_pages(struct inode * inode, unsigned long start) { struct page ** p; struct page * page; repeat: p = &inode->i_pages; while ((page = *p) != NULL) { unsigned long offset = page->offset; /* page wholly truncated - free it */ if (offset >= start) { if (PageLocked(page)) { __wait_on_page(page); goto repeat; } inode->i_nrpages--; if ((*p = page->next) != NULL) (*p)->prev = page->prev; page->dirty = 0; page->next = NULL; page->prev = NULL; remove_page_from_hash_queue(page); page->inode = NULL; __free_page(page); continue; } p = &page->next; offset = start - offset; /* partial truncate, clear end of page */ if (offset < PAGE_SIZE) { unsigned long address = page_address(page); memset((void *) (offset + address), 0, PAGE_SIZE - offset); flush_page_to_ram(address); } } } int shrink_mmap(int priority, int dma, int free_buf) { static int clock = 0; struct page * page; unsigned long limit = MAP_NR(high_memory); struct buffer_head *tmp, *bh; int count_max, count_min; count_max = (limit<<1) >> (priority>>1); count_min = (limit<<1) >> (priority); page = mem_map + clock; do { count_max--; if (page->inode || page->buffers) count_min--; if (PageLocked(page)) goto next; if (dma && !PageDMA(page)) goto next; /* First of all, regenerate the page's referenced bit from any buffers in the page */ bh = page->buffers; if (bh) { tmp = bh; do { if (buffer_touched(tmp)) { clear_bit(BH_Touched, &tmp->b_state); set_bit(PG_referenced, &page->flags); } tmp = tmp->b_this_page; } while (tmp != bh); } /* We can't throw away shared pages, but we do mark them as referenced. This relies on the fact that no page is currently in both the page cache and the buffer cache; we'd have to modify the following test to allow for that case. */ switch (page->count) { case 1: /* If it has been referenced recently, don't free it */ if (clear_bit(PG_referenced, &page->flags)) { /* age this page potential used */ if (priority < 4) age_page(page); break; } /* is it a page cache page? */ if (page->inode) { remove_page_from_hash_queue(page); remove_page_from_inode_queue(page); __free_page(page); return 1; } /* is it a buffer cache page? */ if (free_buf && bh && try_to_free_buffer(bh, &bh, 6)) return 1; break; default: /* more than one users: we can't throw it away */ set_bit(PG_referenced, &page->flags); /* fall through */ case 0: /* nothing */ } next: page++; clock++; if (clock >= limit) { clock = 0; page = mem_map; } } while (count_max > 0 && count_min > 0); return 0; } /* * This is called from try_to_swap_out() when we try to get rid of some * pages.. If we're unmapping the last occurrence of this page, we also * free it from the page hash-queues etc, as we don't want to keep it * in-core unnecessarily. */ unsigned long page_unuse(unsigned long page) { struct page * p = mem_map + MAP_NR(page); int count = p->count; if (count != 2) return count; if (!p->inode) return count; remove_page_from_hash_queue(p); remove_page_from_inode_queue(p); free_page(page); return 1; } /* * Update a page cache copy, when we're doing a "write()" system call * See also "update_vm_cache()". */ void update_vm_cache(struct inode * inode, unsigned long pos, const char * buf, int count) { unsigned long offset, len; offset = (pos & ~PAGE_MASK); pos = pos & PAGE_MASK; len = PAGE_SIZE - offset; do { struct page * page; if (len > count) len = count; page = find_page(inode, pos); if (page) { wait_on_page(page); memcpy((void *) (offset + page_address(page)), buf, len); release_page(page); } count -= len; buf += len; len = PAGE_SIZE; offset = 0; pos += PAGE_SIZE; } while (count); } static inline void add_to_page_cache(struct page * page, struct inode * inode, unsigned long offset, struct page **hash) { page->count++; page->flags &= ~((1 << PG_uptodate) | (1 << PG_error)); page->offset = offset; add_page_to_inode_queue(inode, page); __add_page_to_hash_queue(page, hash); } /* * Try to read ahead in the file. "page_cache" is a potentially free page * that we could use for the cache (if it is 0 we can try to create one, * this is all overlapped with the IO on the previous page finishing anyway) */ static unsigned long try_to_read_ahead(struct inode * inode, unsigned long offset, unsigned long page_cache) { struct page * page; struct page ** hash; offset &= PAGE_MASK; switch (page_cache) { case 0: page_cache = __get_free_page(GFP_KERNEL); if (!page_cache) break; default: if (offset >= inode->i_size) break; hash = page_hash(inode, offset); page = __find_page(inode, offset, *hash); if (!page) { /* * Ok, add the new page to the hash-queues... */ page = mem_map + MAP_NR(page_cache); add_to_page_cache(page, inode, offset, hash); inode->i_op->readpage(inode, page); page_cache = 0; } release_page(page); } return page_cache; } /* * Wait for IO to complete on a locked page. * * This must be called with the caller "holding" the page, * ie with increased "page->count" so that the page won't * go away during the wait.. */ void __wait_on_page(struct page *page) { struct wait_queue wait = { current, NULL }; add_wait_queue(&page->wait, &wait); repeat: run_task_queue(&tq_disk); current->state = TASK_UNINTERRUPTIBLE; if (PageLocked(page)) { schedule(); goto repeat; } remove_wait_queue(&page->wait, &wait); current->state = TASK_RUNNING; } #if 0 #define PROFILE_READAHEAD #define DEBUG_READAHEAD #endif /* * Read-ahead profiling information * -------------------------------- * Every PROFILE_MAXREADCOUNT, the following information is written * to the syslog: * Percentage of asynchronous read-ahead. * Average of read-ahead fields context value. * If DEBUG_READAHEAD is defined, a snapshot of these fields is written * to the syslog. */ #ifdef PROFILE_READAHEAD #define PROFILE_MAXREADCOUNT 1000 static unsigned long total_reada; static unsigned long total_async; static unsigned long total_ramax; static unsigned long total_ralen; static unsigned long total_rawin; static void profile_readahead(int async, struct file *filp) { unsigned long flags; ++total_reada; if (async) ++total_async; total_ramax += filp->f_ramax; total_ralen += filp->f_ralen; total_rawin += filp->f_rawin; if (total_reada > PROFILE_MAXREADCOUNT) { save_flags(flags); cli(); if (!(total_reada > PROFILE_MAXREADCOUNT)) { restore_flags(flags); return; } printk("Readahead average: max=%ld, len=%ld, win=%ld, async=%ld%%\n", total_ramax/total_reada, total_ralen/total_reada, total_rawin/total_reada, (total_async*100)/total_reada); #ifdef DEBUG_READAHEAD printk("Readahead snapshot: max=%ld, len=%ld, win=%ld, raend=%ld\n", filp->f_ramax, filp->f_ralen, filp->f_rawin, filp->f_raend); #endif total_reada = 0; total_async = 0; total_ramax = 0; total_ralen = 0; total_rawin = 0; restore_flags(flags); } } #endif /* defined PROFILE_READAHEAD */ /* * Read-ahead context: * ------------------- * The read ahead context fields of the "struct file" are the following: * - f_raend : position of the first byte after the last page we tried to * read ahead. * - f_ramax : current read-ahead maximum size. * - f_ralen : length of the current IO read block we tried to read-ahead. * - f_rawin : length of the current read-ahead window. * if last read-ahead was synchronous then * f_rawin = f_ralen * otherwise (was asynchronous) * f_rawin = previous value of f_ralen + f_ralen * * Read-ahead limits: * ------------------ * MIN_READAHEAD : minimum read-ahead size when read-ahead. * MAX_READAHEAD : maximum read-ahead size when read-ahead. * * Synchronous read-ahead benefits: * -------------------------------- * Using reasonable IO xfer length from peripheral devices increase system * performances. * Reasonable means, in this context, not too large but not too small. * The actual maximum value is: * MAX_READAHEAD + PAGE_SIZE = 76k is CONFIG_READA_SMALL is undefined * and 32K if defined (4K page size assumed). * * Asynchronous read-ahead benefits: * --------------------------------- * Overlapping next read request and user process execution increase system * performance. * * Read-ahead risks: * ----------------- * We have to guess which further data are needed by the user process. * If these data are often not really needed, it's bad for system * performances. * However, we know that files are often accessed sequentially by * application programs and it seems that it is possible to have some good * strategy in that guessing. * We only try to read-ahead files that seems to be read sequentially. * * Asynchronous read-ahead risks: * ------------------------------ * In order to maximize overlapping, we must start some asynchronous read * request from the device, as soon as possible. * We must be very careful about: * - The number of effective pending IO read requests. * ONE seems to be the only reasonable value. * - The total memory pool usage for the file access stream. * This maximum memory usage is implicitly 2 IO read chunks: * 2*(MAX_READAHEAD + PAGE_SIZE) = 156K if CONFIG_READA_SMALL is undefined, * 64k if defined (4K page size assumed). */ #define PageAlignSize(size) (((size) + PAGE_SIZE -1) & PAGE_MASK) #ifdef CONFIG_READA_SMALL /* small readahead */ #define MAX_READAHEAD PageAlignSize(4096*7) #define MIN_READAHEAD PageAlignSize(4096*2) #else /* large readahead */ #define MAX_READAHEAD PageAlignSize(4096*18) #define MIN_READAHEAD PageAlignSize(4096*3) #endif static inline unsigned long generic_file_readahead(int reada_ok, struct file * filp, struct inode * inode, unsigned long ppos, struct page * page, unsigned long page_cache) { unsigned long max_ahead, ahead; unsigned long raend; raend = filp->f_raend & PAGE_MASK; max_ahead = 0; /* * The current page is locked. * If the current position is inside the previous read IO request, do not * try to reread previously read ahead pages. * Otherwise decide or not to read ahead some pages synchronously. * If we are not going to read ahead, set the read ahead context for this * page only. */ if (PageLocked(page)) { if (!filp->f_ralen || ppos >= raend || ppos + filp->f_ralen < raend) { raend = ppos; if (raend < inode->i_size) max_ahead = filp->f_ramax; filp->f_rawin = 0; filp->f_ralen = PAGE_SIZE; if (!max_ahead) { filp->f_raend = ppos + filp->f_ralen; filp->f_rawin += filp->f_ralen; } } } /* * The current page is not locked. * If we were reading ahead and, * if the current max read ahead size is not zero and, * if the current position is inside the last read-ahead IO request, * it is the moment to try to read ahead asynchronously. * We will later force unplug device in order to force asynchronous read IO. */ else if (reada_ok && filp->f_ramax && raend >= PAGE_SIZE && ppos <= raend && ppos + filp->f_ralen >= raend) { /* * Add ONE page to max_ahead in order to try to have about the same IO max size * as synchronous read-ahead (MAX_READAHEAD + 1)*PAGE_SIZE. * Compute the position of the last page we have tried to read in order to * begin to read ahead just at the next page. */ raend -= PAGE_SIZE; if (raend < inode->i_size) max_ahead = filp->f_ramax + PAGE_SIZE; if (max_ahead) { filp->f_rawin = filp->f_ralen; filp->f_ralen = 0; reada_ok = 2; } } /* * Try to read ahead pages. * We hope that ll_rw_blk() plug/unplug, coalescence, requests sort and the * scheduler, will work enough for us to avoid too bad actuals IO requests. */ ahead = 0; while (ahead < max_ahead) { ahead += PAGE_SIZE; page_cache = try_to_read_ahead(inode, raend + ahead, page_cache); } /* * If we tried to read ahead some pages, * If we tried to read ahead asynchronously, * Try to force unplug of the device in order to start an asynchronous * read IO request. * Update the read-ahead context. * Store the length of the current read-ahead window. * Double the current max read ahead size. * That heuristic avoid to do some large IO for files that are not really * accessed sequentially. */ if (ahead) { if (reada_ok == 2) { run_task_queue(&tq_disk); } filp->f_ralen += ahead; filp->f_rawin += filp->f_ralen; filp->f_raend = raend + ahead + PAGE_SIZE; filp->f_ramax += filp->f_ramax; if (filp->f_ramax > MAX_READAHEAD) filp->f_ramax = MAX_READAHEAD; #ifdef PROFILE_READAHEAD profile_readahead((reada_ok == 2), filp); #endif } return page_cache; } /* * This is a generic file read routine, and uses the * inode->i_op->readpage() function for the actual low-level * stuff. * * This is really ugly. But the goto's actually try to clarify some * of the logic when it comes to error handling etc. */ int generic_file_read(struct inode * inode, struct file * filp, char * buf, int count) { int error, read; unsigned long pos, ppos, page_cache; int reada_ok; error = 0; read = 0; page_cache = 0; pos = filp->f_pos; ppos = pos & PAGE_MASK; /* * If the current position is outside the previous read-ahead window, * we reset the current read-ahead context and set read ahead max to zero * (will be set to just needed value later), * otherwise, we assume that the file accesses are sequential enough to * continue read-ahead. */ if (ppos > filp->f_raend || ppos + filp->f_rawin < filp->f_raend) { reada_ok = 0; filp->f_raend = 0; filp->f_ralen = 0; filp->f_ramax = 0; filp->f_rawin = 0; } else { reada_ok = 1; } /* * Adjust the current value of read-ahead max. * If the read operation stay in the first half page, force no readahead. * Otherwise try to increase read ahead max just enough to do the read request. * Then, at least MIN_READAHEAD if read ahead is ok, * and at most MAX_READAHEAD in all cases. */ if (pos + count <= (PAGE_SIZE >> 1)) { filp->f_ramax = 0; } else { unsigned long needed; needed = ((pos + count) & PAGE_MASK) - ppos; if (filp->f_ramax < needed) filp->f_ramax = needed; if (reada_ok && filp->f_ramax < MIN_READAHEAD) filp->f_ramax = MIN_READAHEAD; if (filp->f_ramax > MAX_READAHEAD) filp->f_ramax = MAX_READAHEAD; } for (;;) { struct page *page, **hash; if (pos >= inode->i_size) break; /* * Try to find the data in the page cache.. */ hash = page_hash(inode, pos & PAGE_MASK); page = __find_page(inode, pos & PAGE_MASK, *hash); if (!page) goto no_cached_page; found_page: /* * Try to read ahead only if the current page is filled or being filled. * Otherwise, if we were reading ahead, decrease max read ahead size to * the minimum value. * In this context, that seems to may happen only on some read error or if * the page has been rewritten. */ if (PageUptodate(page) || PageLocked(page)) page_cache = generic_file_readahead(reada_ok, filp, inode, pos & PAGE_MASK, page, page_cache); else if (reada_ok && filp->f_ramax > MIN_READAHEAD) filp->f_ramax = MIN_READAHEAD; wait_on_page(page); if (!PageUptodate(page)) goto page_read_error; success: /* * Ok, we have the page, it's up-to-date and ok, * so now we can finally copy it to user space... */ { unsigned long offset, nr; offset = pos & ~PAGE_MASK; nr = PAGE_SIZE - offset; if (nr > count) nr = count; if (nr > inode->i_size - pos) nr = inode->i_size - pos; memcpy_tofs(buf, (void *) (page_address(page) + offset), nr); release_page(page); buf += nr; pos += nr; read += nr; count -= nr; if (count) { /* * to prevent hogging the CPU on well-cached systems, * schedule if needed, it's safe to do it here: */ if (need_resched) schedule(); continue; } break; } no_cached_page: /* * Ok, it wasn't cached, so we need to create a new * page.. */ if (!page_cache) { page_cache = __get_free_page(GFP_KERNEL); /* * That could have slept, so go around to the * very beginning.. */ if (page_cache) continue; error = -ENOMEM; break; } /* * Ok, add the new page to the hash-queues... */ page = mem_map + MAP_NR(page_cache); page_cache = 0; add_to_page_cache(page, inode, pos & PAGE_MASK, hash); /* * Error handling is tricky. If we get a read error, * the cached page stays in the cache (but uptodate=0), * and the next process that accesses it will try to * re-read it. This is needed for NFS etc, where the * identity of the reader can decide if we can read the * page or not.. */ /* * We have to read the page. * If we were reading ahead, we had previously tried to read this page, * That means that the page has probably been removed from the cache before * the application process needs it, or has been rewritten. * Decrease max readahead size to the minimum value in that situation. */ if (reada_ok && filp->f_ramax > MIN_READAHEAD) filp->f_ramax = MIN_READAHEAD; error = inode->i_op->readpage(inode, page); if (!error) goto found_page; release_page(page); break; page_read_error: /* * We found the page, but it wasn't up-to-date. * Try to re-read it _once_. We do this synchronously, * because this happens only if there were errors. */ error = inode->i_op->readpage(inode, page); if (!error) { wait_on_page(page); if (PageUptodate(page) && !PageError(page)) goto success; error = -EIO; /* Some unspecified error occurred.. */ } release_page(page); break; } filp->f_pos = pos; filp->f_reada = 1; if (page_cache) free_page(page_cache); UPDATE_ATIME(inode) if (!read) read = error; return read; } /* * Semantics for shared and private memory areas are different past the end * of the file. A shared mapping past the last page of the file is an error * and results in a SIGBUS, while a private mapping just maps in a zero page. * * The goto's are kind of ugly, but this streamlines the normal case of having * it in the page cache, and handles the special cases reasonably without * having a lot of duplicated code. */ static unsigned long filemap_nopage(struct vm_area_struct * area, unsigned long address, int no_share) { unsigned long offset; struct page * page, **hash; struct inode * inode = area->vm_inode; unsigned long old_page, new_page; new_page = 0; offset = (address & PAGE_MASK) - area->vm_start + area->vm_offset; if (offset >= inode->i_size && (area->vm_flags & VM_SHARED) && area->vm_mm == current->mm) goto no_page; /* * Do we have something in the page cache already? */ hash = page_hash(inode, offset); page = __find_page(inode, offset, *hash); if (!page) goto no_cached_page; found_page: /* * Ok, found a page in the page cache, now we need to check * that it's up-to-date. First check whether we'll need an * extra page -- better to overlap the allocation with the I/O. */ if (no_share && !new_page) { new_page = __get_free_page(GFP_KERNEL); if (!new_page) goto failure; } if (PageLocked(page)) goto page_locked_wait; if (!PageUptodate(page)) goto page_read_error; success: /* * Found the page, need to check sharing and possibly * copy it over to another page.. */ old_page = page_address(page); if (!no_share) { /* * Ok, we can share the cached page directly.. Get rid * of any potential extra pages. */ if (new_page) free_page(new_page); flush_page_to_ram(old_page); return old_page; } /* * No sharing ... copy to the new page. */ memcpy((void *) new_page, (void *) old_page, PAGE_SIZE); flush_page_to_ram(new_page); release_page(page); return new_page; no_cached_page: new_page = __get_free_page(GFP_KERNEL); if (!new_page) goto no_page; /* * During getting the above page we might have slept, * so we need to re-check the situation with the page * cache.. The page we just got may be useful if we * can't share, so don't get rid of it here. */ page = find_page(inode, offset); if (page) goto found_page; /* * Now, create a new page-cache page from the page we got */ page = mem_map + MAP_NR(new_page); new_page = 0; add_to_page_cache(page, inode, offset, hash); if (inode->i_op->readpage(inode, page) != 0) goto failure; /* * Do a very limited read-ahead if appropriate */ if (PageLocked(page)) new_page = try_to_read_ahead(inode, offset + PAGE_SIZE, 0); goto found_page; page_locked_wait: __wait_on_page(page); if (PageUptodate(page)) goto success; page_read_error: /* * Umm, take care of errors if the page isn't up-to-date. * Try to re-read it _once_. We do this synchronously, * because there really aren't any performance issues here * and we need to check for errors. */ if (inode->i_op->readpage(inode, page) != 0) goto failure; wait_on_page(page); if (PageError(page)) goto failure; if (PageUptodate(page)) goto success; /* * Uhhuh.. Things didn't work out. Return zero to tell the * mm layer so, possibly freeing the page cache page first. */ failure: release_page(page); if (new_page) free_page(new_page); no_page: return 0; } /* * Tries to write a shared mapped page to its backing store. May return -EIO * if the disk is full. */ static inline int do_write_page(struct inode * inode, struct file * file, const char * page, unsigned long offset) { int old_fs, retval; unsigned long size; size = offset + PAGE_SIZE; /* refuse to extend file size.. */ if (S_ISREG(inode->i_mode)) { if (size > inode->i_size) size = inode->i_size; /* Ho humm.. We should have tested for this earlier */ if (size < offset) return -EIO; } size -= offset; old_fs = get_fs(); set_fs(KERNEL_DS); retval = -EIO; if (size == file->f_op->write(inode, file, (const char *) page, size)) retval = 0; set_fs(old_fs); return retval; } static int filemap_write_page(struct vm_area_struct * vma, unsigned long offset, unsigned long page) { int result; struct file file; struct inode * inode; struct buffer_head * bh; bh = mem_map[MAP_NR(page)].buffers; if (bh) { /* whee.. just mark the buffer heads dirty */ struct buffer_head * tmp = bh; do { mark_buffer_dirty(tmp, 0); tmp = tmp->b_this_page; } while (tmp != bh); return 0; } inode = vma->vm_inode; file.f_op = inode->i_op->default_file_ops; if (!file.f_op->write) return -EIO; file.f_mode = 3; file.f_flags = 0; file.f_count = 1; file.f_inode = inode; file.f_pos = offset; file.f_reada = 0; down(&inode->i_sem); result = do_write_page(inode, &file, (const char *) page, offset); up(&inode->i_sem); return result; } /* * Swapping to a shared file: while we're busy writing out the page * (and the page still exists in memory), we save the page information * in the page table, so that "filemap_swapin()" can re-use the page * immediately if it is called while we're busy swapping it out.. * * Once we've written it all out, we mark the page entry "empty", which * will result in a normal page-in (instead of a swap-in) from the now * up-to-date disk file. */ int filemap_swapout(struct vm_area_struct * vma, unsigned long offset, pte_t *page_table) { int error; unsigned long page = pte_page(*page_table); unsigned long entry = SWP_ENTRY(SHM_SWP_TYPE, MAP_NR(page)); flush_cache_page(vma, (offset + vma->vm_start - vma->vm_offset)); set_pte(page_table, __pte(entry)); flush_tlb_page(vma, (offset + vma->vm_start - vma->vm_offset)); error = filemap_write_page(vma, offset, page); if (pte_val(*page_table) == entry) pte_clear(page_table); return error; } /* * filemap_swapin() is called only if we have something in the page * tables that is non-zero (but not present), which we know to be the * page index of a page that is busy being swapped out (see above). * So we just use it directly.. */ static pte_t filemap_swapin(struct vm_area_struct * vma, unsigned long offset, unsigned long entry) { unsigned long page = SWP_OFFSET(entry); mem_map[page].count++; page = (page << PAGE_SHIFT) + PAGE_OFFSET; return mk_pte(page,vma->vm_page_prot); } static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma, unsigned long address, unsigned int flags) { pte_t pte = *ptep; unsigned long page; int error; if (pte_none(pte)) return 0; if (!(flags & MS_INVALIDATE)) { if (!pte_present(pte)) return 0; if (!pte_dirty(pte)) return 0; flush_page_to_ram(pte_page(pte)); flush_cache_page(vma, address); set_pte(ptep, pte_mkclean(pte)); flush_tlb_page(vma, address); page = pte_page(pte); mem_map[MAP_NR(page)].count++; } else { flush_cache_page(vma, address); pte_clear(ptep); flush_tlb_page(vma, address); if (!pte_present(pte)) { swap_free(pte_val(pte)); return 0; } page = pte_page(pte); if (!pte_dirty(pte) || flags == MS_INVALIDATE) { free_page(page); return 0; } } error = filemap_write_page(vma, address - vma->vm_start + vma->vm_offset, page); free_page(page); return error; } static inline int filemap_sync_pte_range(pmd_t * pmd, unsigned long address, unsigned long size, struct vm_area_struct *vma, unsigned long offset, unsigned int flags) { pte_t * pte; unsigned long end; int error; if (pmd_none(*pmd)) return 0; if (pmd_bad(*pmd)) { printk("filemap_sync_pte_range: bad pmd (%08lx)\n", pmd_val(*pmd)); pmd_clear(pmd); return 0; } pte = pte_offset(pmd, address); offset += address & PMD_MASK; address &= ~PMD_MASK; end = address + size; if (end > PMD_SIZE) end = PMD_SIZE; error = 0; do { error |= filemap_sync_pte(pte, vma, address + offset, flags); address += PAGE_SIZE; pte++; } while (address < end); return error; } static inline int filemap_sync_pmd_range(pgd_t * pgd, unsigned long address, unsigned long size, struct vm_area_struct *vma, unsigned int flags) { pmd_t * pmd; unsigned long offset, end; int error; if (pgd_none(*pgd)) return 0; if (pgd_bad(*pgd)) { printk("filemap_sync_pmd_range: bad pgd (%08lx)\n", pgd_val(*pgd)); pgd_clear(pgd); return 0; } pmd = pmd_offset(pgd, address); offset = address & PGDIR_MASK; address &= ~PGDIR_MASK; end = address + size; if (end > PGDIR_SIZE) end = PGDIR_SIZE; error = 0; do { error |= filemap_sync_pte_range(pmd, address, end - address, vma, offset, flags); address = (address + PMD_SIZE) & PMD_MASK; pmd++; } while (address < end); return error; } static int filemap_sync(struct vm_area_struct * vma, unsigned long address, size_t size, unsigned int flags) { pgd_t * dir; unsigned long end = address + size; int error = 0; dir = pgd_offset(vma->vm_mm, address); flush_cache_range(vma->vm_mm, end - size, end); while (address < end) { error |= filemap_sync_pmd_range(dir, address, end - address, vma, flags); address = (address + PGDIR_SIZE) & PGDIR_MASK; dir++; } flush_tlb_range(vma->vm_mm, end - size, end); return error; } /* * This handles (potentially partial) area unmaps.. */ static void filemap_unmap(struct vm_area_struct *vma, unsigned long start, size_t len) { filemap_sync(vma, start, len, MS_ASYNC); } /* * Shared mappings need to be able to do the right thing at * close/unmap/sync. They will also use the private file as * backing-store for swapping.. */ static struct vm_operations_struct file_shared_mmap = { NULL, /* no special open */ NULL, /* no special close */ filemap_unmap, /* unmap - we need to sync the pages */ NULL, /* no special protect */ filemap_sync, /* sync */ NULL, /* advise */ filemap_nopage, /* nopage */ NULL, /* wppage */ filemap_swapout, /* swapout */ filemap_swapin, /* swapin */ }; /* * Private mappings just need to be able to load in the map. * * (This is actually used for shared mappings as well, if we * know they can't ever get write permissions..) */ static struct vm_operations_struct file_private_mmap = { NULL, /* open */ NULL, /* close */ NULL, /* unmap */ NULL, /* protect */ NULL, /* sync */ NULL, /* advise */ filemap_nopage, /* nopage */ NULL, /* wppage */ NULL, /* swapout */ NULL, /* swapin */ }; /* This is used for a general mmap of a disk file */ int generic_file_mmap(struct inode * inode, struct file * file, struct vm_area_struct * vma) { struct vm_operations_struct * ops; if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) { ops = &file_shared_mmap; /* share_page() can only guarantee proper page sharing if * the offsets are all page aligned. */ if (vma->vm_offset & (PAGE_SIZE - 1)) return -EINVAL; } else { ops = &file_private_mmap; if (vma->vm_offset & (inode->i_sb->s_blocksize - 1)) return -EINVAL; } if (!inode->i_sb || !S_ISREG(inode->i_mode)) return -EACCES; if (!inode->i_op || !inode->i_op->readpage) return -ENOEXEC; UPDATE_ATIME(inode) vma->vm_inode = inode; inode->i_count++; vma->vm_ops = ops; return 0; } /* * The msync() system call. */ static int msync_interval(struct vm_area_struct * vma, unsigned long start, unsigned long end, int flags) { if (vma->vm_inode && vma->vm_ops && vma->vm_ops->sync) { int error; error = vma->vm_ops->sync(vma, start, end-start, flags); if (error) return error; if (flags & MS_SYNC) return file_fsync(vma->vm_inode, NULL); return 0; } return 0; } asmlinkage int sys_msync(unsigned long start, size_t len, int flags) { unsigned long end; struct vm_area_struct * vma; int unmapped_error, error; if (start & ~PAGE_MASK) return -EINVAL; len = (len + ~PAGE_MASK) & PAGE_MASK; end = start + len; if (end < start) return -EINVAL; if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC)) return -EINVAL; if (end == start) return 0; /* * If the interval [start,end) covers some unmapped address ranges, * just ignore them, but return -EFAULT at the end. */ vma = find_vma(current->mm, start); unmapped_error = 0; for (;;) { /* Still start < end. */ if (!vma) return -EFAULT; /* Here start < vma->vm_end. */ if (start < vma->vm_start) { unmapped_error = -EFAULT; start = vma->vm_start; } /* Here vma->vm_start <= start < vma->vm_end. */ if (end <= vma->vm_end) { if (start < end) { error = msync_interval(vma, start, end, flags); if (error) return error; } return unmapped_error; } /* Here vma->vm_start <= start < vma->vm_end < end. */ error = msync_interval(vma, start, vma->vm_end, flags); if (error) return error; start = vma->vm_end; vma = vma->vm_next; } } 1.1 or1k/rc203soc/sw/uClinux/mm/kmalloc.c http://www.opencores.org/cvsweb.shtml/or1k/rc203soc/sw/uClinux/mm/kmalloc.c?rev=1.1&content-type=text/x-cvsweb-markup Index: kmalloc.c =================================================================== /* * linux/mm/kmalloc.c * * Copyright (C) 1991, 1992 Linus Torvalds & Roger Wolff. * * Written by R.E. Wolff Sept/Oct '93. * */ /* * Modified by Alex Bligh (alex@c...) 4 Apr 1994 to use multiple * pages. So for 'page' throughout, read 'area'. * * Largely rewritten.. Linus */ #include <linux/mm.h> #include <linux/delay.h> #include <linux/interrupt.h> #include <asm/system.h> #include <asm/dma.h> /* Define this if you want slow routines that try to trip errors */ #undef SADISTIC_KMALLOC /* Private flags. */ #define MF_USED 0xffaa0055 #define MF_DMA 0xff00aa55 #define MF_FREE 0x0055ffaa /* * Much care has gone into making these routines in this file reentrant. * * The fancy bookkeeping of nbytesmalloced and the like are only used to * report them to the user (oooohhhhh, aaaaahhhhh....) are not * protected by cli(). (If that goes wrong. So what?) * * These routines restore the interrupt status to allow calling with ints * off. */ /* * A block header. This is in front of every malloc-block, whether free or not. */ struct block_header { unsigned long bh_flags; union { unsigned long ubh_length; struct block_header *fbh_next; } vp; }; #define bh_length vp.ubh_length #define bh_next vp.fbh_next #define BH(p) ((struct block_header *)(p)) /* * The page descriptor is at the front of every page that malloc has in use. */ struct page_descriptor { struct page_descriptor *next; struct block_header *firstfree; int order; int nfree; }; #define PAGE_DESC(p) ((struct page_descriptor *)(((unsigned long)(p)) & PAGE_MASK)) /* * A size descriptor describes a specific class of malloc sizes. * Each class of sizes has its own freelist. */ struct size_descriptor { struct page_descriptor *firstfree; struct page_descriptor *dmafree; /* DMA-able memory */ int nblocks; int nmallocs; int nfrees; int nbytesmalloced; int npages; unsigned long gfporder; /* number of pages in the area required */ }; /* * For now it is unsafe to allocate bucket sizes between n and * n-sizeof(page_descriptor) where n is PAGE_SIZE * any power of two * * The blocksize and sizes arrays _must_ match! */ #if PAGE_SIZE == 4096 static const unsigned int blocksize[] = { 32, 64, 128, 252, 508, 1020, 2040, 4096 - 16, 8192 - 16, 16384 - 16, 32768 - 16, 65536 - 16, 131072 - 16, 0 }; static struct size_descriptor sizes[] = { {NULL, NULL, 127, 0, 0, 0, 0, 0}, {NULL, NULL, 63, 0, 0, 0, 0, 0}, {NULL, NULL, 31, 0, 0, 0, 0, 0}, {NULL, NULL, 16, 0, 0, 0, 0, 0}, {NULL, NULL, 8, 0, 0, 0, 0, 0}, {NULL, NULL, 4, 0, 0, 0, 0, 0}, {NULL, NULL, 2, 0, 0, 0, 0, 0}, {NULL, NULL, 1, 0, 0, 0, 0, 0}, {NULL, NULL, 1, 0, 0, 0, 0, 1}, {NULL, NULL, 1, 0, 0, 0, 0, 2}, {NULL, NULL, 1, 0, 0, 0, 0, 3}, {NULL, NULL, 1, 0, 0, 0, 0, 4}, {NULL, NULL, 1, 0, 0, 0, 0, 5}, {NULL, NULL, 0, 0, 0, 0, 0, 0} }; #elif PAGE_SIZE == 8192 static const unsigned int blocksize[] = { 64, 128, 248, 504, 1016, 2040, 4080, 8192 - 32, 16384 - 32, 32768 - 32, 65536 - 32, 131072 - 32, 262144 - 32, 0 }; struct size_descriptor sizes[] = { {NULL, NULL, 127, 0, 0, 0, 0, 0}, {NULL, NULL, 63, 0, 0, 0, 0, 0}, {NULL, NULL, 31, 0, 0, 0, 0, 0}, {NULL, NULL, 16, 0, 0, 0, 0, 0}, {NULL, NULL, 8, 0, 0, 0, 0, 0}, {NULL, NULL, 4, 0, 0, 0, 0, 0}, {NULL, NULL, 2, 0, 0, 0, 0, 0}, {NULL, NULL, 1, 0, 0, 0, 0, 0}, {NULL, NULL, 1, 0, 0, 0, 0, 1}, {NULL, NULL, 1, 0, 0, 0, 0, 2}, {NULL, NULL, 1, 0, 0, 0, 0, 3}, {NULL, NULL, 1, 0, 0, 0, 0, 4}, {NULL, NULL, 1, 0, 0, 0, 0, 5}, {NULL, NULL, 0, 0, 0, 0, 0, 0} }; #else #error you need to make a version for your pagesize #endif #define NBLOCKS(order) (sizes[order].nblocks) #define BLOCKSIZE(order) (blocksize[order]) #define AREASIZE(order) (PAGE_SIZE<<(sizes[order].gfporder)) /* * Create a small cache of page allocations: this helps a bit with * those pesky 8kB+ allocations for NFS when we're temporarily * out of memory.. * * This is a _truly_ small cache, we just cache one single page * order (for orders 0, 1 and 2, that is 4, 8 and 16kB on x86). */ #define MAX_CACHE_ORDER 3 struct page_descriptor * kmalloc_cache[MAX_CACHE_ORDER]; static inline struct page_descriptor * get_kmalloc_pages(unsigned long priority, unsigned long order, int dma) { return (struct page_descriptor *) __get_free_pages(priority, order, dma); } static inline void free_kmalloc_pages(struct page_descriptor * page, unsigned long order, int dma) { if (!dma && order < MAX_CACHE_ORDER) { page = xchg(kmalloc_cache+order, page); if (!page) return; } free_pages((unsigned long) page, order); } long kmalloc_init(long start_mem, long end_mem) { int order; /* * Check the static info array. Things will blow up terribly if it's * incorrect. This is a late "compile time" check..... */ for (order = 0; BLOCKSIZE(order); order++) { if ((NBLOCKS(order) * BLOCKSIZE(order) + sizeof(struct page_descriptor)) > AREASIZE(order)) { printk("Cannot use %d bytes out of %d in order = %d block mallocs\n", (int) (NBLOCKS(order) * BLOCKSIZE(order) + sizeof(struct page_descriptor)), (int) AREASIZE(order), BLOCKSIZE(order)); panic("This only happens if someone messes with kmalloc"); } } return start_mem; } /* * Ugh, this is ugly, but we want the default case to run * straight through, which is why we have the ugly goto's */ void *kmalloc(size_t size, int priority) { unsigned long flags; unsigned long type; int order, dma; struct block_header *p; struct page_descriptor *page, **pg; struct size_descriptor *bucket = sizes; /* Get order */ order = 0; { unsigned int realsize = size + sizeof(struct block_header); for (;;) { int ordersize = BLOCKSIZE(order); if (realsize <= ordersize) break; order++; bucket++; if (ordersize) continue; printk("kmalloc of too large a block (%d bytes).\n", (int) size); return NULL; } } dma = 0; type = MF_USED; pg = &bucket->firstfree; if (priority & GFP_DMA) { dma = 1; type = MF_DMA; pg = &bucket->dmafree; } priority &= GFP_LEVEL_MASK; /* Sanity check... */ if (intr_count && priority != GFP_ATOMIC) { static int count = 0; if (++count < 5) { printk("kmalloc called nonatomically from interrupt %p\n", __builtin_return_address(0)); priority = GFP_ATOMIC; } } save_flags(flags); cli(); page = *pg; if (!page) goto no_bucket_page; p = page->firstfree; if (p->bh_flags != MF_FREE) goto not_free_on_freelist; found_it: page->firstfree = p->bh_next; page->nfree--; if (!page->nfree) *pg = page->next; restore_flags(flags); bucket->nmallocs++; bucket->nbytesmalloced += size; p->bh_flags = type; /* As of now this block is officially in use */ p->bh_length = size; #ifdef SADISTIC_KMALLOC memset(p+1, 0xf0, size); #endif return p + 1; /* Pointer arithmetic: increments past header */ no_bucket_page: /* * If we didn't find a page already allocated for this * bucket size, we need to get one.. * * This can be done with ints on: it is private to this invocation */ restore_flags(flags); { int i, sz; /* sz is the size of the blocks we're dealing with */ sz = BLOCKSIZE(order); page = get_kmalloc_pages(priority, bucket->gfporder, dma); if (!page) goto no_free_page; found_cached_page: bucket->npages++; page->order = order; /* Loop for all but last block: */ i = (page->nfree = bucket->nblocks) - 1; p = BH(page + 1); while (i > 0) { i--; p->bh_flags = MF_FREE; p->bh_next = BH(((long) p) + sz); p = p->bh_next; } /* Last block: */ p->bh_flags = MF_FREE; p->bh_next = NULL; p = BH(page+1); } /* * Now we're going to muck with the "global" freelist * for this size: this should be uninterruptible */ cli(); page->next = *pg; *pg = page; goto found_it; no_free_page: /* * No free pages, check the kmalloc cache of * pages to see if maybe we have something available */ if (!dma && order < MAX_CACHE_ORDER) { page = xchg(kmalloc_cache+order, page); if (page) goto found_cached_page; } { static unsigned long last = 0; if (priority != GFP_BUFFER && priority != GFP_IO && (last + 10 * HZ < jiffies)) { last = jiffies; printk("Couldn't get a free page.....\n"); } return NULL; } not_free_on_freelist: restore_flags(flags); printk("Problem: block on freelist at %08lx isn't free.\n", (long) p); return NULL; } void kfree(void *__ptr) { int dma; unsigned long flags; unsigned int order; struct page_descriptor *page, **pg; struct size_descriptor *bucket; if (!__ptr) goto null_kfree; #define ptr ((struct block_header *) __ptr) page = PAGE_DESC(ptr); __ptr = ptr - 1; if (~PAGE_MASK & (unsigned long)page->next) goto bad_order; order = page->order; if (order >= sizeof(sizes) / sizeof(sizes[0])) goto bad_order; bucket = sizes + order; dma = 0; pg = &bucket->firstfree; if (ptr->bh_flags == MF_DMA) { dma = 1; ptr->bh_flags = MF_USED; pg = &bucket->dmafree; } if (ptr->bh_flags != MF_USED) goto bad_order; ptr->bh_flags = MF_FREE; /* As of now this block is officially free */ #ifdef SADISTIC_KMALLOC memset(ptr+1, 0xe0, ptr->bh_length); #endif save_flags(flags); cli(); bucket->nfrees++; bucket->nbytesmalloced -= ptr->bh_length; ptr->bh_next = page->firstfree; page->firstfree = ptr; if (!page->nfree++) { /* Page went from full to one free block: put it on the freelist. */ if (bucket->nblocks == 1) goto free_page; page->next = *pg; *pg = page; } /* If page is completely free, free it */ if (page->nfree == bucket->nblocks) { for (;;) { struct page_descriptor *tmp = *pg; if (!tmp) goto not_on_freelist; if (tmp == page) break; pg = &tmp->next; } *pg = page->next; free_page: bucket->npages--; free_kmalloc_pages(page, bucket->gfporder, dma); } restore_flags(flags); null_kfree: return; bad_order: printk("kfree of non-kmalloced memory: %p, next= %p, order=%d\n", ptr+1, page->next, page->order); return; not_on_freelist: restore_flags(flags); printk("Ooops. page %p doesn't show on freelist.\n", page); } 1.1 or1k/rc203soc/sw/uClinux/mm/memory.c http://www.opencores.org/cvsweb.shtml/or1k/rc203soc/sw/uClinux/mm/memory.c?rev=1.1&content-type=text/x-cvsweb-markup Index: memory.c =================================================================== /* * linux/mm/memory.c * * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds */ /* * demand-loading started 01.12.91 - seems it is high on the list of * things wanted, and it should be easy to implement. - Linus */ /* * Ok, demand-loading was easy, shared pages a little bit tricker. Shared * pages started 02.12.91, seems to work. - Linus. * * Tested sharing by executing about 30 /bin/sh: under the old kernel it * would have taken more than the 6M I have free, but it worked well as * far as I could see. * * Also corrected some "invalidate()"s - I wasn't doing enough of them. */ /* * Real VM (paging to/from disk) started 18.12.91. Much more work and * thought has to go into this. Oh, well.. * 19.12.91 - works, somewhat. Sometimes I get faults, don't know why. * Found it. Everything seems to work now. * 20.12.91 - Ok, making the swap-device changeable like the root. */ /* * 05.04.94 - Multi-page memory management added for v1.1. * Idea by Alex Bligh (alex@c...) */ #include <linux/signal.h> #include <linux/sched.h> #include <linux/head.h> #include <linux/kernel.h> #include <linux/errno.h> #include <linux/string.h> #include <linux/types.h> #include <linux/ptrace.h> #include <linux/mman.h> #include <linux/mm.h> #include <linux/swap.h> #include <asm/system.h> #include <asm/segment.h> #include <asm/pgtable.h> #include <asm/string.h> unsigned long high_memory = 0; /* * We special-case the C-O-W ZERO_PAGE, because it's such * a common occurrence (no need to read the page to know * that it's zero - better for the cache and memory subsystem). */ static inline void copy_page(unsigned long from, unsigned long to) { if (from == ZERO_PAGE) { memset((void *) to, 0, PAGE_SIZE); return; } memcpy((void *) to, (void *) from, PAGE_SIZE); } #define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE) mem_map_t * mem_map = NULL; /* * oom() prints a message (so that the user knows why the process died), * and gives the process an untrappable SIGKILL. */ void oom(struct task_struct * task) { printk("\nOut of memory for %s.\n", task->comm); task->sig->action[SIGKILL-1].sa_handler = NULL; task->blocked &= ~(1<<(SIGKILL-1)); send_sig(SIGKILL,task,1); } /* * Note: this doesn't free the actual pages themselves. That * has been handled earlier when unmapping all the memory regions. */ static inline void free_one_pmd(pmd_t * dir) { pte_t * pte; if (pmd_none(*dir)) return; if (pmd_bad(*dir)) { printk("free_one_pmd: bad directory entry %08lx\n", pmd_val(*dir)); pmd_clear(dir); return; } pte = pte_offset(dir, 0); pmd_clear(dir); pte_free(pte); } static inline void free_one_pgd(pgd_t * dir) { int j; pmd_t * pmd; if (pgd_none(*dir)) return; if (pgd_bad(*dir)) { printk("free_one_pgd: bad directory entry %08lx\n", pgd_val(*dir)); pgd_clear(dir); return; } pmd = pmd_offset(dir, 0); pgd_clear(dir); for (j = 0; j < PTRS_PER_PMD ; j++) free_one_pmd(pmd+j); pmd_free(pmd); } /* * This function clears all user-level page tables of a process - this * is needed by execve(), so that old pages aren't in the way. */ void clear_page_tables(struct task_struct * tsk) { int i; pgd_t * page_dir; page_dir = tsk->mm->pgd; if (!page_dir || page_dir == swapper_pg_dir) { printk("%s trying to clear kernel page-directory: not good\n", tsk->comm); return; } flush_cache_mm(tsk->mm); for (i = 0 ; i < USER_PTRS_PER_PGD ; i++) free_one_pgd(page_dir + i); flush_tlb_mm(tsk->mm); } /* * This function frees up all page tables of a process when it exits. It * is the same as "clear_page_tables()", except it also changes the process' * page table directory to the kernel page tables and then frees the old * page table directory. */ void free_page_tables(struct mm_struct * mm) { int i; pgd_t * page_dir; page_dir = mm->pgd; if (!page_dir || page_dir == swapper_pg_dir) { printk("Trying to free kernel page-directory: not good\n"); return; } for (i = 0 ; i < USER_PTRS_PER_PGD ; i++) free_one_pgd(page_dir + i); pgd_free(page_dir); } int new_page_tables(struct task_struct * tsk) { pgd_t * page_dir, * new_pg; if (!(new_pg = pgd_alloc())) return -ENOMEM; page_dir = pgd_offset(&init_mm, 0); flush_cache_mm(tsk->mm); memcpy(new_pg + USER_PTRS_PER_PGD, page_dir + USER_PTRS_PER_PGD, (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof (pgd_t)); flush_tlb_mm(tsk->mm); SET_PAGE_DIR(tsk, new_pg); tsk->mm->pgd = new_pg; return 0; } static inline void copy_one_pte(pte_t * old_pte, pte_t * new_pte, int cow) { pte_t pte = *old_pte; unsigned long page_nr; if (pte_none(pte)) return; if (!pte_present(pte)) { swap_duplicate(pte_val(pte)); set_pte(new_pte, pte); return; } page_nr = MAP_NR(pte_page(pte)); if (page_nr >= MAP_NR(high_memory) || PageReserved(mem_map+page_nr)) { set_pte(new_pte, pte); return; } if (cow) pte = pte_wrprotect(pte); if (delete_from_swap_cache(page_nr)) pte = pte_mkdirty(pte); set_pte(new_pte, pte_mkold(pte)); set_pte(old_pte, pte); mem_map[page_nr].count++; } static inline int copy_pte_range(pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long address, unsigned long size, int cow) { pte_t * src_pte, * dst_pte; unsigned long end; if (pmd_none(*src_pmd)) return 0; if (pmd_bad(*src_pmd)) { printk("copy_pte_range: bad pmd (%08lx)\n", pmd_val(*src_pmd)); pmd_clear(src_pmd); return 0; } src_pte = pte_offset(src_pmd, address); if (pmd_none(*dst_pmd)) { if (!pte_alloc(dst_pmd, 0)) return -ENOMEM; } dst_pte = pte_offset(dst_pmd, address); address &= ~PMD_MASK; end = address + size; if (end >= PMD_SIZE) end = PMD_SIZE; do { /* I would like to switch arguments here, to make it * consistent with copy_xxx_range and memcpy syntax. */ copy_one_pte(src_pte++, dst_pte++, cow); address += PAGE_SIZE; } while (address < end); return 0; } static inline int copy_pmd_range(pgd_t *dst_pgd, pgd_t *src_pgd, unsigned long address, unsigned long size, int cow) { pmd_t * src_pmd, * dst_pmd; unsigned long end; int error = 0; if (pgd_none(*src_pgd)) return 0; if (pgd_bad(*src_pgd)) { printk("copy_pmd_range: bad pgd (%08lx)\n", pgd_val(*src_pgd)); pgd_clear(src_pgd); return 0; } src_pmd = pmd_offset(src_pgd, address); if (pgd_none(*dst_pgd)) { if (!pmd_alloc(dst_pgd, 0)) return -ENOMEM; } dst_pmd = pmd_offset(dst_pgd, address); address &= ~PGDIR_MASK; end = address + size; if (end > PGDIR_SIZE) end = PGDIR_SIZE; do { error = copy_pte_range(dst_pmd++, src_pmd++, address, end - address, cow); if (error) break; address = (address + PMD_SIZE) & PMD_MASK; } while (address < end); return error; } /* * copy one vm_area from one task to the other. Assumes the page tables * already present in the new task to be cleared in the whole range * covered by this vma. */ int copy_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma) { pgd_t * src_pgd, * dst_pgd; unsigned long address = vma->vm_start; unsigned long end = vma->vm_end; int error = 0, cow; cow = (vma->vm_flags & (VM_SHARED | VM_WRITE)) == VM_WRITE; src_pgd = pgd_offset(src, address); dst_pgd = pgd_offset(dst, address); flush_cache_range(src, vma->vm_start, vma->vm_end); flush_cache_range(dst, vma->vm_start, vma->vm_end); while (address < end) { error = copy_pmd_range(dst_pgd++, src_pgd++, address, end - address, cow); if (error) break; address = (address + PGDIR_SIZE) & PGDIR_MASK; } /* Note that the src ptes get c-o-w treatment, so they change too. */ flush_tlb_range(src, vma->vm_start, vma->vm_end); flush_tlb_range(dst, vma->vm_start, vma->vm_end); return error; } static inline void free_pte(pte_t page) { if (pte_present(page)) { unsigned long addr = pte_page(page); if (addr >= high_memory || PageReserved(mem_map+MAP_NR(addr))) return; free_page(addr); if (current->mm->rss <= 0) return; current->mm->rss--; return; } swap_free(pte_val(page)); } static inline void forget_pte(pte_t page) { if (!pte_none(page)) { printk("forget_pte: old mapping existed!\n"); free_pte(page); } } static inline void zap_pte_range(pmd_t * pmd, unsigned long address, unsigned long size) { pte_t * pte; if (pmd_none(*pmd)) return; if (pmd_bad(*pmd)) { printk("zap_pte_range: bad pmd (%08lx)\n", pmd_val(*pmd)); pmd_clear(pmd); return; } pte = pte_offset(pmd, address); address &= ~PMD_MASK; if (address + size > PMD_SIZE) size = PMD_SIZE - address; size >>= PAGE_SHIFT; for (;;) { pte_t page; if (!size) break; page = *pte; pte++; size--; if (pte_none(page)) continue; pte_clear(pte-1); free_pte(page); } } static inline void zap_pmd_range(pgd_t * dir, unsigned long address, unsigned long size) { pmd_t * pmd; unsigned long end; if (pgd_none(*dir)) return; if (pgd_bad(*dir)) { printk("zap_pmd_range: bad pgd (%08lx)\n", pgd_val(*dir)); pgd_clear(dir); return; } pmd = pmd_offset(dir, address); address &= ~PGDIR_MASK; end = address + size; if (end > PGDIR_SIZE) end = PGDIR_SIZE; do { zap_pte_range(pmd, address, end - address); address = (address + PMD_SIZE) & PMD_MASK; pmd++; } while (address < end); } /* * remove user pages in a given range. */ int zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size) { pgd_t * dir; unsigned long end = address + size; dir = pgd_offset(mm, address); flush_cache_range(mm, end - size, end); while (address < end) { zap_pmd_range(dir, address, end - address); address = (address + PGDIR_SIZE) & PGDIR_MASK; dir++; } flush_tlb_range(mm, end - size, end); return 0; } static inline void zeromap_pte_range(pte_t * pte, unsigned long address, unsigned long size, pte_t zero_pte) { unsigned long end; address &= ~PMD_MASK; end = address + size; if (end > PMD_SIZE) end = PMD_SIZE; do { pte_t oldpage = *pte; set_pte(pte, zero_pte); forget_pte(oldpage); address += PAGE_SIZE; pte++; } while (address < end); } static inline int zeromap_pmd_range(pmd_t * pmd, unsigned long address, unsigned long size, pte_t zero_pte) { unsigned long end; address &= ~PGDIR_MASK; end = address + size; if (end > PGDIR_SIZE) end = PGDIR_SIZE; do { pte_t * pte = pte_alloc(pmd, address); if (!pte) return -ENOMEM; zeromap_pte_range(pte, address, end - address, zero_pte); address = (address + PMD_SIZE) & PMD_MASK; pmd++; } while (address < end); return 0; } int zeromap_page_range(unsigned long address, unsigned long size, pgprot_t prot) { int error = 0; pgd_t * dir; unsigned long beg = address; unsigned long end = address + size; pte_t zero_pte; zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE, prot)); dir = pgd_offset(current->mm, address); flush_cache_range(current->mm, beg, end); while (address < end) { pmd_t *pmd = pmd_alloc(dir, address); error = -ENOMEM; if (!pmd) break; error = zeromap_pmd_range(pmd, address, end - address, zero_pte); if (error) break; address = (address + PGDIR_SIZE) & PGDIR_MASK; dir++; } flush_tlb_range(current->mm, beg, end); return error; } /* * maps a range of physical memory into the requested pages. the old * mappings are removed. any references to nonexistent pages results * in null mappings (currently treated as "copy-on-access") */ static inline void remap_pte_range(pte_t * pte, unsigned long address, unsigned long size, unsigned long offset, pgprot_t prot) { unsigned long end; address &= ~PMD_MASK; end = address + size; if (end > PMD_SIZE) end = PMD_SIZE; do { pte_t oldpage = *pte; pte_clear(pte); if (offset >= high_memory || PageReserved(mem_map+MAP_NR(offset))) set_pte(pte, mk_pte(offset, prot)); forget_pte(oldpage); address += PAGE_SIZE; offset += PAGE_SIZE; pte++; } while (address < end); } static inline int remap_pmd_range(pmd_t * pmd, unsigned long address, unsigned long size, unsigned long offset, pgprot_t prot) { unsigned long end; address &= ~PGDIR_MASK; end = address + size; if (end > PGDIR_SIZE) end = PGDIR_SIZE; offset -= address; do { pte_t * pte = pte_alloc(pmd, address); if (!pte) return -ENOMEM; remap_pte_range(pte, address, end - address, address + offset, prot); address = (address + PMD_SIZE) & PMD_MASK; pmd++; } while (address < end); return 0; } int remap_page_range(unsigned long from, unsigned long offset, unsigned long size, pgprot_t prot) { int error = 0; pgd_t * dir; unsigned long beg = from; unsigned long end = from + size; offset -= from; dir = pgd_offset(current->mm, from); flush_cache_range(current->mm, beg, end); while (from < end) { pmd_t *pmd = pmd_alloc(dir, from); error = -ENOMEM; if (!pmd) break; error = remap_pmd_range(pmd, from, end - from, offset + from, prot); if (error) break; from = (from + PGDIR_SIZE) & PGDIR_MASK; dir++; } flush_tlb_range(current->mm, beg, end); return error; } /* * sanity-check function.. */ static void put_page(pte_t * page_table, pte_t pte) { if (!pte_none(*page_table)) { free_page(pte_page(pte)); return; } /* no need for flush_tlb */ set_pte(page_table, pte); } /* * This routine is used to map in a page into an address space: needed by * execve() for the initial stack and environment pages. */ unsigned long put_dirty_page(struct task_struct * tsk, unsigned long page, unsigned long address) { pgd_t * pgd; pmd_t * pmd; pte_t * pte; if (page >= high_memory) printk("put_dirty_page: trying to put page %08lx at %08lx\n",page,address); if (mem_map[MAP_NR(page)].count != 1) printk("mem_map disagrees with %08lx at %08lx\n",page,address); pgd = pgd_offset(tsk->mm,address); pmd = pmd_alloc(pgd, address); if (!pmd) { free_page(page); oom(tsk); return 0; } pte = pte_alloc(pmd, address); if (!pte) { free_page(page); oom(tsk); return 0; } if (!pte_none(*pte)) { printk("put_dirty_page: page already exists\n"); free_page(page); return 0; } flush_page_to_ram(page); set_pte(pte, pte_mkwrite(pte_mkdirty(mk_pte(page, PAGE_COPY)))); /* no need for invalidate */ return page; } /* * This routine handles present pages, when users try to write * to a shared page. It is done by copying the page to a new address * and decrementing the shared-page counter for the old page. * * Goto-purists beware: the only reason for goto's here is that it results * in better assembly code.. The "default" path will see no jumps at all. * * Note that this routine assumes that the protection checks have been * done by the caller (the low-level page fault routine in most cases). * Thus we can safely just mark it writable once we've done any necessary * COW. * * We also mark the page dirty at this point even though the page will * change only once the write actually happens. This avoids a few races, * and potentially makes it more efficient. */ void do_wp_page(struct task_struct * tsk, struct vm_area_struct * vma, unsigned long address, int write_access) { pgd_t *page_dir; pmd_t *page_middle; pte_t *page_table, pte; unsigned long old_page, new_page; new_page = __get_free_page(GFP_KERNEL); page_dir = pgd_offset(vma->vm_mm, address); if (pgd_none(*page_dir)) goto end_wp_page; if (pgd_bad(*page_dir)) goto bad_wp_pagedir; page_middle = pmd_offset(page_dir, address); if (pmd_none(*page_middle)) goto end_wp_page; if (pmd_bad(*page_middle)) goto bad_wp_pagemiddle; page_table = pte_offset(page_middle, address); pte = *page_table; if (!pte_present(pte)) goto end_wp_page; if (pte_write(pte)) goto end_wp_page; old_page = pte_page(pte); if (old_page >= high_memory) goto bad_wp_page; tsk->min_flt++; /* * Do we need to copy? */ if (mem_map[MAP_NR(old_page)].count != 1) { if (new_page) { if (PageReserved(mem_map + MAP_NR(old_page))) ++vma->vm_mm->rss; copy_page(old_page,new_page); flush_page_to_ram(old_page); flush_page_to_ram(new_page); flush_cache_page(vma, address); set_pte(page_table, pte_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot)))); free_page(old_page); flush_tlb_page(vma, address); return; } flush_cache_page(vma, address); set_pte(page_table, BAD_PAGE); flush_tlb_page(vma, address); free_page(old_page); oom(tsk); return; } flush_cache_page(vma, address); set_pte(page_table, pte_mkdirty(pte_mkwrite(pte))); flush_tlb_page(vma, address); if (new_page) free_page(new_page); return; bad_wp_page: printk("do_wp_page: bogus page at address %08lx (%08lx)\n",address,old_page); send_sig(SIGKILL, tsk, 1); goto end_wp_page; bad_wp_pagemiddle: printk("do_wp_page: bogus page-middle at address %08lx (%08lx)\n", address, pmd_val(*page_middle)); send_sig(SIGKILL, tsk, 1); goto end_wp_page; bad_wp_pagedir: printk("do_wp_page: bogus page-dir entry at address %08lx (%08lx)\n", address, pgd_val(*page_dir)); send_sig(SIGKILL, tsk, 1); end_wp_page: if (new_page) free_page(new_page); return; } /* * Ugly, ugly, but the goto's result in better assembly.. */ int verify_area(int type, const void * addr, unsigned long size) { struct vm_area_struct * vma; unsigned long start = (unsigned long) addr; /* If the current user space is mapped to kernel space (for the * case where we use a fake user buffer with get_fs/set_fs()) we * don't expect to find the address in the user vm map. */ if (!size || get_fs() == KERNEL_DS) return 0; vma = find_vma(current->mm, start); if (!vma) goto bad_area; if (vma->vm_start > start) goto check_stack; good_area: if (type == VERIFY_WRITE) goto check_write; for (;;) { struct vm_area_struct * next; if (!(vma->vm_flags & VM_READ)) goto bad_area; if (vma->vm_end - start >= size) return 0; next = vma->vm_next; if (!next || vma->vm_end != next->vm_start) goto bad_area; vma = next; } check_write: if (!(vma->vm_flags & VM_WRITE)) goto bad_area; if (!wp_works_ok) goto check_wp_fault_by_hand; for (;;) { if (vma->vm_end - start >= size) break; if (!vma->vm_next || vma->vm_end != vma->vm_next->vm_start) goto bad_area; vma = vma->vm_next; if (!(vma->vm_flags & VM_WRITE)) goto bad_area; } return 0; check_wp_fault_by_hand: size--; size += start & ~PAGE_MASK; size >>= PAGE_SHIFT; start &= PAGE_MASK; for (;;) { do_wp_page(current, vma, start, 1); if (!size) break; size--; start += PAGE_SIZE; if (start < vma->vm_end) continue; vma = vma->vm_next; if (!vma || vma->vm_start != start) goto bad_area; if (!(vma->vm_flags & VM_WRITE)) goto bad_area;; } return 0; check_stack: if (!(vma->vm_flags & VM_GROWSDOWN)) goto bad_area; if (expand_stack(vma, start) == 0) goto good_area; bad_area: return -EFAULT; } /* * This function zeroes out partial mmap'ed pages at truncation time.. */ static void partial_clear(struct vm_area_struct *vma, unsigned long address) { pgd_t *page_dir; pmd_t *page_middle; pte_t *page_table, pte; page_dir = pgd_offset(vma->vm_mm, address); if (pgd_none(*page_dir)) return; if (pgd_bad(*page_dir)) { printk("bad page table directory entry %p:[%lx]\n", page_dir, pgd_val(*page_dir)); pgd_clear(page_dir); return; } page_middle = pmd_offset(page_dir, address); if (pmd_none(*page_middle)) return; if (pmd_bad(*page_middle)) { printk("bad page table directory entry %p:[%lx]\n", page_dir, pgd_val(*page_dir)); pmd_clear(page_middle); return; } page_table = pte_offset(page_middle, address); pte = *page_table; if (!pte_present(pte)) return; flush_cache_page(vma, address); address &= ~PAGE_MASK; address += pte_page(pte); if (address >= high_memory) return; memset((void *) address, 0, PAGE_SIZE - (address & ~PAGE_MASK)); flush_page_to_ram(pte_page(pte)); } /* * Handle all mappings that got truncated by a "truncate()" * system call. * * NOTE! We have to be ready to update the memory sharing * between the file and the memory map for a potential last * incomplete page. Ugly, but necessary. */ void vmtruncate(struct inode * inode, unsigned long offset) { struct vm_area_struct * mpnt; truncate_inode_pages(inode, offset); if (!inode->i_mmap) return; mpnt = inode->i_mmap; do { unsigned long start = mpnt->vm_start; unsigned long len = mpnt->vm_end - start; unsigned long diff; /* mapping wholly truncated? */ if (mpnt->vm_offset >= offset) { zap_page_range(mpnt->vm_mm, start, len); continue; } /* mapping wholly unaffected? */ diff = offset - mpnt->vm_offset; if (diff >= len) continue; /* Ok, partially affected.. */ start += diff; len = (len - diff) & PAGE_MASK; if (start & ~PAGE_MASK) { partial_clear(mpnt, start); start = (start + ~PAGE_MASK) & PAGE_MASK; } zap_page_range(mpnt->vm_mm, start, len); } while ((mpnt = mpnt->vm_next_share) != inode->i_mmap); } static inline void do_swap_page(struct task_struct * tsk, struct vm_area_struct * vma, unsigned long address, pte_t * page_table, pte_t entry, int write_access) { pte_t page; if (!vma->vm_ops || !vma->vm_ops->swapin) { swap_in(tsk, vma, page_table, pte_val(entry), write_access); flush_page_to_ram(pte_page(*page_table)); return; } page = vma->vm_ops->swapin(vma, address - vma->vm_start + vma->vm_offset, pte_val(entry)); if (pte_val(*page_table) != pte_val(entry)) { free_page(pte_page(page)); return; } if (mem_map[MAP_NR(pte_page(page))].count > 1 && !(vma->vm_flags & VM_SHARED)) page = pte_wrprotect(page); ++vma->vm_mm->rss; ++tsk->maj_flt; flush_page_to_ram(pte_page(page)); set_pte(page_table, page); return; } /* * do_no_page() tries to create a new page mapping. It aggressively * tries to share with existing pages, but makes a separate copy if * the "write_access" parameter is true in order to avoid the next * page fault. * * As this is called only for pages that do not currently exist, we * do not need to flush old virtual caches or the TLB. */ void do_no_page(struct task_struct * tsk, struct vm_area_struct * vma, unsigned long address, int write_access) { pgd_t * pgd; pmd_t * pmd; pte_t * page_table; pte_t entry; unsigned long page; pgd = pgd_offset(tsk->mm, address); pmd = pmd_alloc(pgd, address); if (!pmd) goto no_memory; page_table = pte_alloc(pmd, address); if (!page_table) goto no_memory; entry = *page_table; if (pte_present(entry)) goto is_present; if (!pte_none(entry)) goto swap_page; address &= PAGE_MASK; if (!vma->vm_ops || !vma->vm_ops->nopage) goto anonymous_page; /* * The third argument is "no_share", which tells the low-level code * to copy, not share the page even if sharing is possible. It's * essentially an early COW detection */ page = vma->vm_ops->nopage(vma, address, (vma->vm_flags & VM_SHARED)?0:write_access); if (!page) goto sigbus; ++tsk->maj_flt; ++vma->vm_mm->rss; /* * This silly early PAGE_DIRTY setting removes a race * due to the bad i386 page protection. But it's valid * for other architectures too. * * Note that if write_access is true, we either now have * a exclusive copy of the page, or this is a shared mapping, * so we can make it writable and dirty to avoid having to * handle that later. */ flush_page_to_ram(page); entry = mk_pte(page, vma->vm_page_prot); if (write_access) { entry = pte_mkwrite(pte_mkdirty(entry)); } else if (mem_map[MAP_NR(page)].count > 1 && !(vma->vm_flags & VM_SHARED)) entry = pte_wrprotect(entry); put_page(page_table, entry); /* no need to invalidate: a not-present page shouldn't be cached */ return; anonymous_page: entry = pte_wrprotect(mk_pte(ZERO_PAGE, vma->vm_page_prot)); if (write_access) { unsigned long page = __get_free_page(GFP_KERNEL); if (!page) goto sigbus; memset((void *) page, 0, PAGE_SIZE); entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); vma->vm_mm->rss++; tsk->min_flt++; flush_page_to_ram(page); } put_page(page_table, entry); return; sigbus: force_sig(SIGBUS, current); put_page(page_table, BAD_PAGE); /* no need to invalidate, wasn't present */ return; swap_page: do_swap_page(tsk, vma, address, page_table, entry, write_access); return; no_memory: oom(tsk); is_present: return; } /* * The above separate functions for the no-page and wp-page * cases will go away (they mostly do the same thing anyway), * and we'll instead use only a general "handle_mm_fault()". * * These routines also need to handle stuff like marking pages dirty * and/or accessed for architectures that don't do it in hardware (most * RISC architectures). The early dirtying is also good on the i386. * * There is also a hook called "update_mmu_cache()" that architectures * with external mmu caches can use to update those (ie the Sparc or * PowerPC hashed page tables that act as extended TLBs). */ static inline void handle_pte_fault(struct vm_area_struct * vma, unsigned long address, int write_access, pte_t * pte) { if (!pte_present(*pte)) { do_no_page(current, vma, address, write_access); return; } set_pte(pte, pte_mkyoung(*pte)); flush_tlb_page(vma, address); if (!write_access) return; if (pte_write(*pte)) { set_pte(pte, pte_mkdirty(*pte)); flush_tlb_page(vma, address); return; } do_wp_page(current, vma, address, write_access); } void handle_mm_fault(struct vm_area_struct * vma, unsigned long address, int write_access) { pgd_t *pgd; pmd_t *pmd; pte_t *pte; pgd = pgd_offset(vma->vm_mm, address); pmd = pmd_alloc(pgd, address); if (!pmd) goto no_memory; pte = pte_alloc(pmd, address); if (!pte) goto no_memory; handle_pte_fault(vma, address, write_access, pte); update_mmu_cache(vma, address, *pte); return; no_memory: oom(current); } 1.1 or1k/rc203soc/sw/uClinux/mm/mlock.c http://www.opencores.org/cvsweb.shtml/or1k/rc203soc/sw/uClinux/mm/mlock.c?rev=1.1&content-type=text/x-cvsweb-markup Index: mlock.c =================================================================== /* * linux/mm/mlock.c * * (C) Copyright 1995 Linus Torvalds */ #include <linux/stat.h> #include <linux/sched.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/shm.h> #include <linux/errno.h> #include <linux/mman.h> #include <linux/string.h> #include <linux/malloc.h> #include <asm/segment.h> #include <asm/system.h> #include <asm/pgtable.h> static inline int mlock_fixup_all(struct vm_area_struct * vma, int newflags) { vma->vm_flags = newflags; return 0; } static inline int mlock_fixup_start(struct vm_area_struct * vma, unsigned long end, int newflags) { struct vm_area_struct * n; n = (struct vm_area_struct *) kmalloc(sizeof(struct vm_area_struct), GFP_KERNEL); if (!n) return -EAGAIN; *n = *vma; vma->vm_start = end; n->vm_end = end; vma->vm_offset += vma->vm_start - n->vm_start; n->vm_flags = newflags; if (n->vm_inode) n->vm_inode->i_count++; if (n->vm_ops && n->vm_ops->open) n->vm_ops->open(n); insert_vm_struct(current->mm, n); return 0; } static inline int mlock_fixup_end(struct vm_area_struct * vma, unsigned long start, int newflags) { struct vm_area_struct * n; n = (struct vm_area_struct *) kmalloc(sizeof(struct vm_area_struct), GFP_KERNEL); if (!n) return -EAGAIN; *n = *vma; vma->vm_end = start; n->vm_start = start; n->vm_offset += n->vm_start - vma->vm_start; n->vm_flags = newflags; if (n->vm_inode) n->vm_inode->i_count++; if (n->vm_ops && n->vm_ops->open) n->vm_ops->open(n); insert_vm_struct(current->mm, n); return 0; } static inline int mlock_fixup_middle(struct vm_area_struct * vma, unsigned long start, unsigned long end, int newflags) { struct vm_area_struct * left, * right; left = (struct vm_area_struct *) kmalloc(sizeof(struct vm_area_struct), GFP_KERNEL); if (!left) return -EAGAIN; right = (struct vm_area_struct *) kmalloc(sizeof(struct vm_area_struct), GFP_KERNEL); if (!right) { kfree(left); return -EAGAIN; } *left = *vma; *right = *vma; left->vm_end = start; vma->vm_start = start; vma->vm_end = end; right->vm_start = end; vma->vm_offset += vma->vm_start - left->vm_start; right->vm_offset += right->vm_start - left->vm_start; vma->vm_flags = newflags; if (vma->vm_inode) vma->vm_inode->i_count += 2; if (vma->vm_ops && vma->vm_ops->open) { vma->vm_ops->open(left); vma->vm_ops->open(right); } insert_vm_struct(current->mm, left); insert_vm_struct(current->mm, right); return 0; } static int mlock_fixup(struct vm_area_struct * vma, unsigned long start, unsigned long end, unsigned int newflags) { int pages, retval; if (newflags == vma->vm_flags) return 0; if (start == vma->vm_start) { if (end == vma->vm_end) retval = mlock_fixup_all(vma, newflags); else retval = mlock_fixup_start(vma, end, newflags); } else { if (end == vma->vm_end) retval = mlock_fixup_end(vma, start, newflags); else retval = mlock_fixup_middle(vma, start, end, newflags); } if (!retval) { /* keep track of amount of locked VM */ pages = (end - start) >> PAGE_SHIFT; if (!(newflags & VM_LOCKED)) pages = -pages; vma->vm_mm->locked_vm += pages; if ((newflags & VM_LOCKED) && (newflags & VM_READ)) while (start < end) { int c = get_user((int *) start); __asm__ __volatile__("": :"r" (c)); start += PAGE_SIZE; } } return retval; } static int do_mlock(unsigned long start, size_t len, int on) { unsigned long nstart, end, tmp; struct vm_area_struct * vma, * next; int error; if (!suser()) return -EPERM; len = (len + ~PAGE_MASK) & PAGE_MASK; end = start + len; if (end < start) return -EINVAL; if (end == start) return 0; vma = find_vma(current->mm, start); if (!vma || vma->vm_start > start) return -ENOMEM; for (nstart = start ; ; ) { unsigned int newflags; /* Here we know that vma->vm_start <= nstart < vma->vm_end. */ newflags = vma->vm_flags | VM_LOCKED; if (!on) newflags &= ~VM_LOCKED; if (vma->vm_end >= end) { error = mlock_fixup(vma, nstart, end, newflags); break; } tmp = vma->vm_end; next = vma->vm_next; error = mlock_fixup(vma, nstart, tmp, newflags); if (error) break; nstart = tmp; vma = next; if (!vma || vma->vm_start != nstart) { error = -ENOMEM; break; } } merge_segments(current->mm, start, end); return error; } asmlinkage int sys_mlock(unsigned long start, size_t len) { unsigned long locked; unsigned long lock_limit; len = (len + (start & ~PAGE_MASK) + ~PAGE_MASK) & PAGE_MASK; start &= PAGE_MASK; locked = len >> PAGE_SHIFT; locked += current->mm->locked_vm; lock_limit = current->rlim[RLIMIT_MEMLOCK].rlim_cur; lock_limit >>= PAGE_SHIFT; /* check against resource limits */ if (locked > lock_limit) return -ENOMEM; /* we may lock at most half of physical memory... */ /* (this check is pretty bogus, but doesn't hurt) */ if (locked > (MAP_NR(high_memory) >> 1)) return -ENOMEM; return do_mlock(start, len, 1); } asmlinkage int sys_munlock(unsigned long start, size_t len) { len = (len + (start & ~PAGE_MASK) + ~PAGE_MASK) & PAGE_MASK; start &= PAGE_MASK; return do_mlock(start, len, 0); } static int do_mlockall(int flags) { int error; unsigned int def_flags; struct vm_area_struct * vma; if (!suser()) return -EPERM; def_flags = 0; if (flags & MCL_FUTURE) def_flags = VM_LOCKED; current->mm->def_flags = def_flags; error = 0; for (vma = current->mm->mmap; vma ; vma = vma->vm_next) { unsigned int newflags; newflags = vma->vm_flags | VM_LOCKED; if (!(flags & MCL_CURRENT)) newflags &= ~VM_LOCKED; error = mlock_fixup(vma, vma->vm_start, vma->vm_end, newflags); if (error) break; } merge_segments(current->mm, 0, TASK_SIZE); return error; } asmlinkage int sys_mlockall(int flags) { unsigned long lock_limit; if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE))) return -EINVAL; lock_limit = current->rlim[RLIMIT_MEMLOCK].rlim_cur; lock_limit >>= PAGE_SHIFT; if (current->mm->total_vm > lock_limit) return -ENOMEM; /* we may lock at most half of physical memory... */ /* (this check is pretty bogus, but doesn't hurt) */ if (current->mm->total_vm > (MAP_NR(high_memory) >> 1)) return -ENOMEM; return do_mlockall(flags); } asmlinkage int sys_munlockall(void) { return do_mlockall(0); } 1.1 or1k/rc203soc/sw/uClinux/mm/mmap.c http://www.opencores.org/cvsweb.shtml/or1k/rc203soc/sw/uClinux/mm/mmap.c?rev=1.1&content-type=text/x-cvsweb-markup Index: mmap.c =================================================================== /* * linux/mm/mmap.c * * Written by obz. */ #include <linux/stat.h> #include <linux/sched.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/shm.h> #include <linux/errno.h> #include <linux/mman.h> #include <linux/string.h> #include <linux/malloc.h> #include <linux/pagemap.h> #include <linux/swap.h> #include <asm/segment.h> #include <asm/system.h> #include <asm/pgtable.h> /* * description of effects of mapping type and prot in current implementation. * this is due to the limited x86 page protection hardware. The expected * behavior is in parens: * * map_type prot * PROT_NONE PROT_READ PROT_WRITE PROT_EXEC * MAP_SHARED r: (no) no r: (yes) yes r: (no) yes r: (no) yes * w: (no) no w: (no) no w: (yes) yes w: (no) no * x: (no) no x: (no) yes x: (no) yes x: (yes) yes * * MAP_PRIVATE r: (no) no r: (yes) yes r: (no) yes r: (no) yes * w: (no) no w: (no) no w: (copy) copy w: (no) no * x: (no) no x: (no) yes x: (no) yes x: (yes) yes * */ pgprot_t protection_map[16] = { __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111, __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111 }; /* * Check that a process has enough memory to allocate a * new virtual mapping. */ int vm_enough_memory(long pages) { /* * stupid algorithm to decide if we have enough memory: while * simple, it hopefully works in most obvious cases.. Easy to * fool it, but this should catch most mistakes. */ long freepages; freepages = buffermem >> PAGE_SHIFT; freepages += page_cache_size; if (freepages <= (MAP_NR(high_memory) >> 4) + 48) freepages >>= 1; freepages += nr_free_pages; freepages += nr_swap_pages; freepages -= MAP_NR(high_memory) >> 4; return freepages > pages; } asmlinkage unsigned long sys_brk(unsigned long brk) { unsigned long rlim; unsigned long newbrk, oldbrk; struct mm_struct *mm = current->mm; if (brk < mm->end_code) return mm->brk; newbrk = PAGE_ALIGN(brk); oldbrk = PAGE_ALIGN(mm->brk); if (oldbrk == newbrk) return mm->brk = brk; /* * Always allow shrinking brk */ if (brk <= mm->brk) { mm->brk = brk; do_munmap(newbrk, oldbrk-newbrk); return brk; } /* * Check against rlimit and stack.. */ rlim = current->rlim[RLIMIT_DATA].rlim_cur; if (rlim >= RLIM_INFINITY) rlim = ~0; if (brk - mm->end_code > rlim) return mm->brk; /* * Check against existing mmap mappings. */ if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE)) return mm->brk; /* * Check if we have enough memory.. */ if (!vm_enough_memory((newbrk-oldbrk) >> PAGE_SHIFT)) return mm->brk; /* * Ok, looks good - let it rip. */ if(do_mmap(NULL, oldbrk, newbrk-oldbrk, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_FIXED|MAP_PRIVATE, 0) != oldbrk) return mm->brk; return mm->brk = brk; } /* * Combine the mmap "prot" and "flags" argument into one "vm_flags" used * internally. Essentially, translate the "PROT_xxx" and "MAP_xxx" bits * into "VM_xxx". */ static inline unsigned long vm_flags(unsigned long prot, unsigned long flags) { #define _trans(x,bit1,bit2) \ ((bit1==bit2)?(x&bit1):(x&bit1)?bit2:0) unsigned long prot_bits, flag_bits; prot_bits = _trans(prot, PROT_READ, VM_READ) | _trans(prot, PROT_WRITE, VM_WRITE) | _trans(prot, PROT_EXEC, VM_EXEC); flag_bits = _trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN) | _trans(flags, MAP_DENYWRITE, VM_DENYWRITE) | _trans(flags, MAP_EXECUTABLE, VM_EXECUTABLE); return prot_bits | flag_bits; #undef _trans } unsigned long do_mmap(struct file * file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long off) { struct mm_struct * mm = current->mm; struct vm_area_struct * vma; if ((len = PAGE_ALIGN(len)) == 0) return addr; if (len > MAX_USER_ADDR || addr > MAX_USER_ADDR-len) return -EINVAL; /* offset overflow? */ if (off + len < off) return -EINVAL; /* mlock MCL_FUTURE? */ if (mm->def_flags & VM_LOCKED) { unsigned long locked = mm->locked_vm << PAGE_SHIFT; locked += len; if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur) return -EAGAIN; } /* * do simple checking here so the lower-level routines won't have * to. we assume access permissions have been handled by the open * of the memory object, so we don't do any here. */ if (file != NULL) { switch (flags & MAP_TYPE) { case MAP_SHARED: if ((prot & PROT_WRITE) && !(file->f_mode & 2)) return -EACCES; /* * make sure there are no mandatory locks on the file. */ if (locks_verify_locked(file->f_inode)) return -EAGAIN; /* cevans -- whoops another append-only file flaw */ if (IS_APPEND(file->f_inode) && (file->f_mode & 2)) return -EACCES; /* fall through */ case MAP_PRIVATE: if (!(file->f_mode & 1)) return -EACCES; break; default: return -EINVAL; } if (flags & MAP_DENYWRITE) { if (file->f_inode->i_writecount > 0) return -ETXTBSY; } } else if ((flags & MAP_TYPE) != MAP_PRIVATE) return -EINVAL; /* * obtain the address to map to. we verify (or select) it and ensure * that it represents a valid section of the address space. */ if (flags & MAP_FIXED) { if (addr & ~PAGE_MASK) return -EINVAL; } else { addr = get_unmapped_area(addr, len); if (!addr) return -ENOMEM; } /* * determine the object being mapped and call the appropriate * specific mapper. the address has already been validated, but * not unmapped, but the maps are removed from the list. */ if (file && (!file->f_op || !file->f_op->mmap)) return -ENODEV; vma = (struct vm_area_struct *)kmalloc(sizeof(struct vm_area_struct), GFP_KERNEL); if (!vma) return -ENOMEM; vma->vm_mm = mm; vma->vm_start = addr; vma->vm_end = addr + len; vma->vm_flags = vm_flags(prot,flags) | mm->def_flags; if (file) { if (file->f_mode & 1) vma->vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; if (flags & MAP_SHARED) { vma->vm_flags |= VM_SHARED | VM_MAYSHARE; /* * This looks strange, but when we don't have the file open * for writing, we can demote the shared mapping to a simpler * private mapping. That also takes care of a security hole * with ptrace() writing to a shared mapping without write * permissions. * * We leave the VM_MAYSHARE bit on, just to get correct output * from /proc/xxx/maps.. */ if (!(file->f_mode & 2)) vma->vm_flags &= ~(VM_MAYWRITE | VM_SHARED); } } else vma->vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; vma->vm_page_prot = protection_map[vma->vm_flags & 0x0f]; vma->vm_ops = NULL; vma->vm_offset = off; vma->vm_inode = NULL; vma->vm_pte = 0; do_munmap(addr, len); /* Clear old maps */ /* Check against address space limit. */ if ((mm->total_vm << PAGE_SHIFT) + len > current->rlim[RLIMIT_AS].rlim_cur) { kfree(vma); return -ENOMEM; } /* Private writable mapping? Check memory availability.. */ if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) == VM_WRITE) { if (!vm_enough_memory(len >> PAGE_SHIFT)) { kfree(vma); return -ENOMEM; } } if (file) { int error = file->f_op->mmap(file->f_inode, file, vma); if (error) { kfree(vma); return error; } } flags = vma->vm_flags; insert_vm_struct(mm, vma); merge_segments(mm, vma->vm_start, vma->vm_end); /* merge_segments might have merged our vma, so we can't use it any more */ mm->total_vm += len >> PAGE_SHIFT; if (flags & VM_LOCKED) { unsigned long start = addr; mm->locked_vm += len >> PAGE_SHIFT; do { char c = get_user((char *) start); len -= PAGE_SIZE; start += PAGE_SIZE; __asm__ __volatile__("": :"r" (c)); } while (len > 0); } return addr; } /* * Get an address range which is currently unmapped. * For mmap() without MAP_FIXED and shmat() with addr=0. * Return value 0 means ENOMEM. */ unsigned long get_unmapped_area(unsigned long addr, unsigned long len) { struct vm_area_struct * vmm; if (len > MAX_USER_ADDR) return 0; if (!addr) addr = MMAP_SEARCH_START; addr = PAGE_ALIGN(addr); for (vmm = find_vma(current->mm, addr); ; vmm = vmm->vm_next) { /* At this point: (!vmm || addr < vmm->vm_end). */ if (MAX_USER_ADDR - len < addr) return 0; if (!vmm || addr + len <= vmm->vm_start) return addr; addr = vmm->vm_end; } } /* * Searching a VMA in the linear list task->mm->mmap is horribly slow. * Use an AVL (Adelson-Velskii and Landis) tree to speed up this search * from O(n) to O(log n), where n is the number of VMAs of the task * (typically around 6, but may reach 3000 in some cases). * Written by Bruno Haible <haible@m...>. */ /* We keep the list and tree sorted by address. */ #define vm_avl_key vm_end #define vm_avl_key_t unsigned long /* typeof(vma->avl_key) */ /* * task->mm->mmap_avl is the AVL tree corresponding to task->mm->mmap * or, more exactly, its root. * A vm_area_struct has the following fields: * vm_avl_left left son of a tree node * vm_avl_right right son of a tree node * vm_avl_height 1+max(heightof(left),heightof(right)) * The empty tree is represented as NULL. */ /* Since the trees are balanced, their height will never be large. */ #define avl_maxheight 41 /* why this? a small exercise */ #define heightof(tree) ((tree) == avl_empty ? 0 : (tree)->vm_avl_height) /* * Consistency and balancing rules: * 1. tree->vm_avl_height == 1+max(heightof(tree->vm_avl_left),heightof(tree->vm_avl_right)) * 2. abs( heightof(tree->vm_avl_left) - heightof(tree->vm_avl_right) ) <= 1 * 3. foreach node in tree->vm_avl_left: node->vm_avl_key <= tree->vm_avl_key, * foreach node in tree->vm_avl_right: node->vm_avl_key >= tree->vm_avl_key. */ /* Look up the nodes at the left and at the right of a given node. */ static inline void avl_neighbours (struct vm_area_struct * node, struct vm_area_struct * tree, struct vm_area_struct ** to_the_left, struct vm_area_struct ** to_the_right) { vm_avl_key_t key = node->vm_avl_key; *to_the_left = *to_the_right = NULL; for (;;) { if (tree == avl_empty) { printk("avl_neighbours: node not found in the tree\n"); return; } if (key == tree->vm_avl_key) break; if (key < tree->vm_avl_key) { *to_the_right = tree; tree = tree->vm_avl_left; } else { *to_the_left = tree; tree = tree->vm_avl_right; } } if (tree != node) { printk("avl_neighbours: node not exactly found in the tree\n"); return; } if (tree->vm_avl_left != avl_empty) { struct vm_area_struct * node; for (node = tree->vm_avl_left; node->vm_avl_right != avl_empty; node = node->vm_avl_right) continue; *to_the_left = node; } if (tree->vm_avl_right != avl_empty) { struct vm_area_struct * node; for (node = tree->vm_avl_right; node->vm_avl_left != avl_empty; node = node->vm_avl_left) continue; *to_the_right = node; } if ((*to_the_left && ((*to_the_left)->vm_next != node)) || (node->vm_next != *to_the_right)) printk("avl_neighbours: tree inconsistent with list\n"); } /* * Rebalance a tree. * After inserting or deleting a node of a tree we have a sequence of subtrees * nodes[0]..nodes[k-1] such that * nodes[0] is the root and nodes[i+1] = nodes[i]->{vm_avl_left|vm_avl_right}. */ static inline void avl_rebalance (struct vm_area_struct *** nodeplaces_ptr, int count) { for ( ; count > 0 ; count--) { struct vm_area_struct ** nodeplace = *--nodeplaces_ptr; struct vm_area_struct * node = *nodeplace; struct vm_area_struct * nodeleft = node->vm_avl_left; struct vm_area_struct * noderight = node->vm_avl_right; int heightleft = heightof(nodeleft); int heightright = heightof(noderight); if (heightright + 1 < heightleft) { /* */ /* * */ /* / \ */ /* n+2 n */ /* */ struct vm_area_struct * nodeleftleft = nodeleft->vm_avl_left; struct vm_area_struct * nodeleftright = nodeleft->vm_avl_right; int heightleftright = heightof(nodeleftright); if (heightof(nodeleftleft) >= heightleftright) { /* */ /* * n+2|n+3 */ /* / \ / \ */ /* n+2 n --> / n+1|n+2 */ /* / \ | / \ */ /* n+1 n|n+1 n+1 n|n+1 n */ /* */ node->vm_avl_left = nodeleftright; nodeleft->vm_avl_right = node; nodeleft->vm_avl_height = 1 + (node->vm_avl_height = 1 + heightleftright); *nodeplace = nodeleft; } else { /* */ /* * n+2 */ /* / \ / \ */ /* n+2 n --> n+1 n+1 */ /* / \ / \ / \ */ /* n n+1 n L R n */ /* / \ */ /* L R */ /* */ nodeleft->vm_avl_right = nodeleftright->vm_avl_left; node->vm_avl_left = nodeleftright->vm_avl_right; nodeleftright->vm_avl_left = nodeleft; nodeleftright->vm_avl_right = node; nodeleft->vm_avl_height = node->vm_avl_height = heightleftright; nodeleftright->vm_avl_height = heightleft; *nodeplace = nodeleftright; } } else if (heightleft + 1 < heightright) { /* similar to the above, just interchange 'left' <--> 'right' */ struct vm_area_struct * noderightright = noderight->vm_avl_right; struct vm_area_struct * noderightleft = noderight->vm_avl_left; int heightrightleft = heightof(noderightleft); if (heightof(noderightright) >= heightrightleft) { node->vm_avl_right = noderightleft; noderight->vm_avl_left = node; noderight->vm_avl_height = 1 + (node->vm_avl_height = 1 + heightrightleft); *nodeplace = noderight; } else { noderight->vm_avl_left = noderightleft->vm_avl_right; node->vm_avl_right = noderightleft->vm_avl_left; noderightleft->vm_avl_right = noderight; noderightleft->vm_avl_left = node; noderight->vm_avl_height = node->vm_avl_height = heightrightleft; noderightleft->vm_avl_height = heightright; *nodeplace = noderightleft; } } else { int height = (heightleft<heightright ? heightright : heightleft) + 1; if (height == node->vm_avl_height) break; node->vm_avl_height = height; } } } /* Insert a node into a tree. */ static inline void avl_insert (struct vm_area_struct * new_node, struct vm_area_struct ** ptree) { vm_avl_key_t key = new_node->vm_avl_key; struct vm_area_struct ** nodeplace = ptree; struct vm_area_struct ** stack[avl_maxheight]; int stack_count = 0; struct vm_area_struct *** stack_ptr = &stack[0]; /* = &stack[stackcount] */ for (;;) { struct vm_area_struct * node = *nodeplace; if (node == avl_empty) break; *stack_ptr++ = nodeplace; stack_count++; if (key < node->vm_avl_key) nodeplace = &node->vm_avl_left; else nodeplace = &node->vm_avl_right; } new_node->vm_avl_left = avl_empty; new_node->vm_avl_right = avl_empty; new_node->vm_avl_height = 1; *nodeplace = new_node; avl_rebalance(stack_ptr,stack_count); } /* Insert a node into a tree, and * return the node to the left of it and the node to the right of it. */ static inline void avl_insert_neighbours (struct vm_area_struct * new_node, struct vm_area_struct ** ptree, struct vm_area_struct ** to_the_left, struct vm_area_struct ** to_the_right) { vm_avl_key_t key = new_node->vm_avl_key; struct vm_area_struct ** nodeplace = ptree; struct vm_area_struct ** stack[avl_maxheight]; int stack_count = 0; struct vm_area_struct *** stack_ptr = &stack[0]; /* = &stack[stackcount] */ *to_the_left = *to_the_right = NULL; for (;;) { struct vm_area_struct * node = *nodeplace; if (node == avl_empty) break; *stack_ptr++ = nodeplace; stack_count++; if (key < node->vm_avl_key) { *to_the_right = node; nodeplace = &node->vm_avl_left; } else { *to_the_left = node; nodeplace = &node->vm_avl_right; } } new_node->vm_avl_left = avl_empty; new_node->vm_avl_right = avl_empty; new_node->vm_avl_height = 1; *nodeplace = new_node; avl_rebalance(stack_ptr,stack_count); } /* Removes a node out of a tree. */ static inline void avl_remove (struct vm_area_struct * node_to_delete, struct vm_area_struct ** ptree) { vm_avl_key_t key = node_to_delete->vm_avl_key; struct vm_area_struct ** nodeplace = ptree; struct vm_area_struct ** stack[avl_maxheight]; int stack_count = 0; struct vm_area_struct *** stack_ptr = &stack[0]; /* = &stack[stackcount] */ struct vm_area_struct ** nodeplace_to_delete; for (;;) { struct vm_area_struct * node = *nodeplace; if (node == avl_empty) { /* what? node_to_delete not found in tree? */ printk("avl_remove: node to delete not found in tree\n"); return; } *stack_ptr++ = nodeplace; stack_count++; if (key == node->vm_avl_key) break; if (key < node->vm_avl_key) nodeplace = &node->vm_avl_left; else nodeplace = &node->vm_avl_right; } nodeplace_to_delete = nodeplace; /* Have to remove node_to_delete = *nodeplace_to_delete. */ if (node_to_delete->vm_avl_left == avl_empty) { *nodeplace_to_delete = node_to_delete->vm_avl_right; stack_ptr--; stack_count--; } else { struct vm_area_struct *** stack_ptr_to_delete = stack_ptr; struct vm_area_struct ** nodeplace = &node_to_delete->vm_avl_left; struct vm_area_struct * node; for (;;) { node = *nodeplace; if (node->vm_avl_right == avl_empty) break; *stack_ptr++ = nodeplace; stack_count++; nodeplace = &node->vm_avl_right; } *nodeplace = node->vm_avl_left; /* node replaces node_to_delete */ node->vm_avl_left = node_to_delete->vm_avl_left; node->vm_avl_right = node_to_delete->vm_avl_right; node->vm_avl_height = node_to_delete->vm_avl_height; *nodeplace_to_delete = node; /* replace node_to_delete */ *stack_ptr_to_delete = &node->vm_avl_left; /* replace &node_to_delete->vm_avl_left */ } avl_rebalance(stack_ptr,stack_count); } #ifdef DEBUG_AVL /* print a list */ static void printk_list (struct vm_area_struct * vma) { printk("["); while (vma) { printk("%08lX-%08lX", vma->vm_start, vma->vm_end); vma = vma->vm_next; if (!vma) break; printk(" "); } printk("]"); } /* print a tree */ static void printk_avl (struct vm_area_struct * tree) { if (tree != avl_empty) { printk("("); if (tree->vm_avl_left != avl_empty) { printk_avl(tree->vm_avl_left); printk("<"); } printk("%08lX-%08lX", tree->vm_start, tree->vm_end); if (tree->vm_avl_right != avl_empty) { printk(">"); printk_avl(tree->vm_avl_right); } printk(")"); } } static char *avl_check_point = "somewhere"; /* check a tree's con