Innodb buffer pool struct
2010-08-04 12:37
477 查看
struct buf_pool_struct{
/* 1. General fields */
mutex_t mutex; /* mutex protecting the buffer pool
struct and control blocks, except the
read-write lock in them */
byte* frame_mem; /* pointer to the memory area which
was allocated for the frames; in AWE
this is the virtual address space
window where we map pages stored
in physical memory */
byte* frame_zero; /* pointer to the first buffer frame:
this may differ from frame_mem, because
this is aligned by the frame size */
byte* high_end; /* pointer to the end of the buffer
frames */
ulint n_frames; /* number of frames */
buf_block_t* blocks; /* array of buffer control blocks */
buf_block_t** blocks_of_frames;/* inverse mapping which can be used
to retrieve the buffer control block
of a frame; this is an array which
lists the blocks of frames in the
order frame_zero,
frame_zero + UNIV_PAGE_SIZE, ...
a control block is always assigned
for each frame, even if the frame does
not contain any data; note that in AWE
there are more control blocks than
buffer frames */
os_awe_t* awe_info; /* if AWE is used, AWE info for the
physical 4 kB memory pages associated
with buffer frames */
ulint max_size; /* number of control blocks ==
maximum pool size in pages */
ulint curr_size; /* current pool size in pages;
currently always the same as
max_size */
hash_table_t* page_hash; /* hash table of the file pages */
ulint n_pend_reads; /* number of pending read operations */
time_t last_printout_time; /* when buf_print was last time
called */
ulint n_pages_read; /* number read operations */
ulint n_pages_written;/* number write operations */
ulint n_pages_created;/* number of pages created in the pool
with no read */
ulint n_page_gets; /* number of page gets performed;
also successful searches through
the adaptive hash index are
counted as page gets; this field
is NOT protected by the buffer
pool mutex */
ulint n_pages_awe_remapped; /* if AWE is enabled, the
number of remaps of blocks to
buffer frames */
ulint n_page_gets_old;/* n_page_gets when buf_print was
last time called: used to calculate
hit rate */
ulint n_pages_read_old;/* n_pages_read when buf_print was
last time called */
ulint n_pages_written_old;/* number write operations */
ulint n_pages_created_old;/* number of pages created in
the pool with no read */
ulint n_pages_awe_remapped_old;
/* 2. Page flushing algorithm fields */
UT_LIST_BASE_NODE_T(buf_block_t) flush_list;
/* base node of the modified block
list */
ibool init_flush[BUF_FLUSH_LIST + 1];
/* this is TRUE when a flush of the
given type is being initialized */
ulint n_flush[BUF_FLUSH_LIST + 1];
/* this is the number of pending
writes in the given flush type */
os_event_t no_flush[BUF_FLUSH_LIST + 1];
/* this is in the set state when there
is no flush batch of the given type
running */
ulint ulint_clock; /* a sequence number used to count
time. NOTE! This counter wraps
around at 4 billion (if ulint ==
32 bits)! */
ulint freed_page_clock;/* a sequence number used to count the
number of buffer blocks removed from
the end of the LRU list; NOTE that
this counter may wrap around at 4
billion! A thread is allowed to
read this for heuristic purposes
without holding any mutex or latch */
ulint LRU_flush_ended;/* when an LRU flush ends for a page,
this is incremented by one; this is
set to zero when a buffer block is
allocated */
/* 3. LRU replacement algorithm fields */
UT_LIST_BASE_NODE_T(buf_block_t) free;
/* base node of the free block list;
in the case of AWE, at the start are
always free blocks for which the
physical memory is mapped to a frame */
UT_LIST_BASE_NODE_T(buf_block_t) LRU;
/* base node of the LRU list */
buf_block_t* LRU_old; /* pointer to the about 3/8 oldest
blocks in the LRU list; NULL if LRU
length less than BUF_LRU_OLD_MIN_LEN */
ulint LRU_old_len; /* length of the LRU list from
the block to which LRU_old points
onward, including that block;
see buf0lru.c for the restrictions
on this value; not defined if
LRU_old == NULL */
UT_LIST_BASE_NODE_T(buf_block_t) awe_LRU_free_mapped;
/* list of those blocks which are
in the LRU list or the free list, and
where the page is mapped to a frame;
thus, frames allocated, e.g., to the
locki table, are not in this list */
};
struct buf_block_struct{
/* 1. General fields */
ulint magic_n; /* magic number to check */
ulint state; /* state of the control block:
BUF_BLOCK_NOT_USED, ...; changing
this is only allowed when a thread
has BOTH the buffer pool mutex AND
block->mutex locked */
byte* frame; /* pointer to buffer frame which
is of size UNIV_PAGE_SIZE, and
aligned to an address divisible by
UNIV_PAGE_SIZE; if AWE is used, this
will be NULL for the pages which are
currently not mapped into the virtual
address space window of the buffer
pool */
os_awe_t* awe_info; /* if AWE is used, then an array of
awe page infos for
UNIV_PAGE_SIZE / OS_AWE_X86_PAGE_SIZE
(normally = 4) physical memory
pages; otherwise NULL */
ulint space; /* space id of the page */
ulint offset; /* page number within the space */
ulint lock_hash_val; /* hashed value of the page address
in the record lock hash table */
mutex_t mutex; /* mutex protecting this block:
state (also protected by the buffer
pool mutex), io_fix, buf_fix_count,
and accessed; we introduce this new
mutex in InnoDB-5.1 to relieve
contention on the buffer pool mutex */
rw_lock_t lock; /* read-write lock of the buffer
frame */
buf_block_t* hash; /* node used in chaining to the page
hash table */
ibool check_index_page_at_flush;
/* TRUE if we know that this is
an index page, and want the database
to check its consistency before flush;
note that there may be pages in the
buffer pool which are index pages,
but this flag is not set because
we do not keep track of all pages */
/* 2. Page flushing fields */
/* 2. Page flushing fields */
UT_LIST_NODE_T(buf_block_t) flush_list;
/* node of the modified, not yet
flushed blocks list */
dulint newest_modification;
/* log sequence number of the youngest
modification to this block, zero if
not modified */
dulint oldest_modification;
/* log sequence number of the START of
the log entry written of the oldest
modification to this block which has
not yet been flushed on disk; zero if
all modifications are on disk */
ulint flush_type; /* if this block is currently being
flushed to disk, this tells the
flush_type: BUF_FLUSH_LRU or
BUF_FLUSH_LIST */
/* 3. LRU replacement algorithm fields */
UT_LIST_NODE_T(buf_block_t) free;
/* node of the free block list */
ibool in_free_list; /* TRUE if in the free list; used in
debugging */
UT_LIST_NODE_T(buf_block_t) LRU;
/* node of the LRU list */
UT_LIST_NODE_T(buf_block_t) awe_LRU_free_mapped;
/* in the AWE version node in the
list of free and LRU blocks which are
mapped to a frame */
ibool in_LRU_list; /* TRUE of the page is in the LRU list;
used in debugging */
ulint LRU_position; /* value which monotonically
decreases (or may stay constant if
the block is in the old blocks) toward
the end of the LRU list, if the pool
ulint_clock has not wrapped around:
NOTE that this value can only be used
in heuristic algorithms, because of
the possibility of a wrap-around! */
ulint freed_page_clock;/* the value of freed_page_clock
of the buffer pool when this block was
the last time put to the head of the
LRU list; a thread is allowed to
read this for heuristic purposes
without holding any mutex or latch */
ibool old; /* TRUE if the block is in the old
blocks in the LRU list */
ibool accessed; /* TRUE if the page has been accessed
while in the buffer pool: read-ahead
may read in pages which have not been
accessed yet; this is protected by
block->mutex; a thread is allowed to
read this for heuristic purposes
without holding any mutex or latch */
ulint buf_fix_count; /* count of how manyfold this block
is currently bufferfixed; this is
protected by block->mutex */
ulint io_fix; /* if a read is pending to the frame,
io_fix is BUF_IO_READ, in the case
of a write BUF_IO_WRITE, otherwise 0;
this is protected by block->mutex */
/* 4. Optimistic search field */
dulint modify_clock; /* this clock is incremented every
time a pointer to a record on the
page may become obsolete; this is
used in the optimistic cursor
positioning: if the modify clock has
not changed, we know that the pointer
is still valid; this field may be
changed if the thread (1) owns the
pool mutex and the page is not
bufferfixed, or (2) the thread has an
x-latch on the block */
/* 5. Hash search fields: NOTE that the first 4 fields are NOT
protected by any semaphore! */
ulint n_hash_helps; /* counter which controls building
of a new hash index for the page */
ulint n_fields; /* recommended prefix length for hash
search: number of full fields */
ulint n_bytes; /* recommended prefix: number of bytes
in an incomplete field */
ibool left_side; /* TRUE or FALSE, depending on
whether the leftmost record of several
records with the same prefix should be
indexed in the hash index */
/* These 6 fields may only be modified when we have
an x-latch on btr_search_latch AND
a) we are holding an s-latch or x-latch on block->lock or
b) we know that block->buf_fix_count == 0.
An exception to this is when we init or create a page
in the buffer pool in buf0buf.c. */
ibool is_hashed; /* TRUE if hash index has already been
built on this page; note that it does
not guarantee that the index is
complete, though: there may have been
hash collisions, record deletions,
etc. */
ulint n_pointers; /* used in debugging: the number of
pointers in the adaptive hash index
pointing to this frame */
ulint curr_n_fields; /* prefix length for hash indexing:
number of full fields */
ulint curr_n_bytes; /* number of bytes in hash indexing */
ibool curr_left_side; /* TRUE or FALSE in hash indexing */
dict_index_t* index; /* Index for which the adaptive
hash index has been created. */
/* 6. Debug fields */
#ifdef UNIV_SYNC_DEBUG
rw_lock_t debug_latch; /* in the debug version, each thread
which bufferfixes the block acquires
an s-latch here; so we can use the
debug utilities in sync0rw */
#endif
ibool file_page_was_freed;
/* this is set to TRUE when fsp
frees a page in buffer pool */
};
Some useful URL to help understand how innodb buffer pool initialize and works:
Chinaunix:
http://bbs.chinaunix.net/thread-1766852-1-1.html
Yangwanfu's CSDN blog:
http://blog.csdn.net/yzyangwanfu/archive/2010/07/13/5730260.aspx
/* 1. General fields */
mutex_t mutex; /* mutex protecting the buffer pool
struct and control blocks, except the
read-write lock in them */
byte* frame_mem; /* pointer to the memory area which
was allocated for the frames; in AWE
this is the virtual address space
window where we map pages stored
in physical memory */
byte* frame_zero; /* pointer to the first buffer frame:
this may differ from frame_mem, because
this is aligned by the frame size */
byte* high_end; /* pointer to the end of the buffer
frames */
ulint n_frames; /* number of frames */
buf_block_t* blocks; /* array of buffer control blocks */
buf_block_t** blocks_of_frames;/* inverse mapping which can be used
to retrieve the buffer control block
of a frame; this is an array which
lists the blocks of frames in the
order frame_zero,
frame_zero + UNIV_PAGE_SIZE, ...
a control block is always assigned
for each frame, even if the frame does
not contain any data; note that in AWE
there are more control blocks than
buffer frames */
os_awe_t* awe_info; /* if AWE is used, AWE info for the
physical 4 kB memory pages associated
with buffer frames */
ulint max_size; /* number of control blocks ==
maximum pool size in pages */
ulint curr_size; /* current pool size in pages;
currently always the same as
max_size */
hash_table_t* page_hash; /* hash table of the file pages */
ulint n_pend_reads; /* number of pending read operations */
time_t last_printout_time; /* when buf_print was last time
called */
ulint n_pages_read; /* number read operations */
ulint n_pages_written;/* number write operations */
ulint n_pages_created;/* number of pages created in the pool
with no read */
ulint n_page_gets; /* number of page gets performed;
also successful searches through
the adaptive hash index are
counted as page gets; this field
is NOT protected by the buffer
pool mutex */
ulint n_pages_awe_remapped; /* if AWE is enabled, the
number of remaps of blocks to
buffer frames */
ulint n_page_gets_old;/* n_page_gets when buf_print was
last time called: used to calculate
hit rate */
ulint n_pages_read_old;/* n_pages_read when buf_print was
last time called */
ulint n_pages_written_old;/* number write operations */
ulint n_pages_created_old;/* number of pages created in
the pool with no read */
ulint n_pages_awe_remapped_old;
/* 2. Page flushing algorithm fields */
UT_LIST_BASE_NODE_T(buf_block_t) flush_list;
/* base node of the modified block
list */
ibool init_flush[BUF_FLUSH_LIST + 1];
/* this is TRUE when a flush of the
given type is being initialized */
ulint n_flush[BUF_FLUSH_LIST + 1];
/* this is the number of pending
writes in the given flush type */
os_event_t no_flush[BUF_FLUSH_LIST + 1];
/* this is in the set state when there
is no flush batch of the given type
running */
ulint ulint_clock; /* a sequence number used to count
time. NOTE! This counter wraps
around at 4 billion (if ulint ==
32 bits)! */
ulint freed_page_clock;/* a sequence number used to count the
number of buffer blocks removed from
the end of the LRU list; NOTE that
this counter may wrap around at 4
billion! A thread is allowed to
read this for heuristic purposes
without holding any mutex or latch */
ulint LRU_flush_ended;/* when an LRU flush ends for a page,
this is incremented by one; this is
set to zero when a buffer block is
allocated */
/* 3. LRU replacement algorithm fields */
UT_LIST_BASE_NODE_T(buf_block_t) free;
/* base node of the free block list;
in the case of AWE, at the start are
always free blocks for which the
physical memory is mapped to a frame */
UT_LIST_BASE_NODE_T(buf_block_t) LRU;
/* base node of the LRU list */
buf_block_t* LRU_old; /* pointer to the about 3/8 oldest
blocks in the LRU list; NULL if LRU
length less than BUF_LRU_OLD_MIN_LEN */
ulint LRU_old_len; /* length of the LRU list from
the block to which LRU_old points
onward, including that block;
see buf0lru.c for the restrictions
on this value; not defined if
LRU_old == NULL */
UT_LIST_BASE_NODE_T(buf_block_t) awe_LRU_free_mapped;
/* list of those blocks which are
in the LRU list or the free list, and
where the page is mapped to a frame;
thus, frames allocated, e.g., to the
locki table, are not in this list */
};
struct buf_block_struct{
/* 1. General fields */
ulint magic_n; /* magic number to check */
ulint state; /* state of the control block:
BUF_BLOCK_NOT_USED, ...; changing
this is only allowed when a thread
has BOTH the buffer pool mutex AND
block->mutex locked */
byte* frame; /* pointer to buffer frame which
is of size UNIV_PAGE_SIZE, and
aligned to an address divisible by
UNIV_PAGE_SIZE; if AWE is used, this
will be NULL for the pages which are
currently not mapped into the virtual
address space window of the buffer
pool */
os_awe_t* awe_info; /* if AWE is used, then an array of
awe page infos for
UNIV_PAGE_SIZE / OS_AWE_X86_PAGE_SIZE
(normally = 4) physical memory
pages; otherwise NULL */
ulint space; /* space id of the page */
ulint offset; /* page number within the space */
ulint lock_hash_val; /* hashed value of the page address
in the record lock hash table */
mutex_t mutex; /* mutex protecting this block:
state (also protected by the buffer
pool mutex), io_fix, buf_fix_count,
and accessed; we introduce this new
mutex in InnoDB-5.1 to relieve
contention on the buffer pool mutex */
rw_lock_t lock; /* read-write lock of the buffer
frame */
buf_block_t* hash; /* node used in chaining to the page
hash table */
ibool check_index_page_at_flush;
/* TRUE if we know that this is
an index page, and want the database
to check its consistency before flush;
note that there may be pages in the
buffer pool which are index pages,
but this flag is not set because
we do not keep track of all pages */
/* 2. Page flushing fields */
/* 2. Page flushing fields */
UT_LIST_NODE_T(buf_block_t) flush_list;
/* node of the modified, not yet
flushed blocks list */
dulint newest_modification;
/* log sequence number of the youngest
modification to this block, zero if
not modified */
dulint oldest_modification;
/* log sequence number of the START of
the log entry written of the oldest
modification to this block which has
not yet been flushed on disk; zero if
all modifications are on disk */
ulint flush_type; /* if this block is currently being
flushed to disk, this tells the
flush_type: BUF_FLUSH_LRU or
BUF_FLUSH_LIST */
/* 3. LRU replacement algorithm fields */
UT_LIST_NODE_T(buf_block_t) free;
/* node of the free block list */
ibool in_free_list; /* TRUE if in the free list; used in
debugging */
UT_LIST_NODE_T(buf_block_t) LRU;
/* node of the LRU list */
UT_LIST_NODE_T(buf_block_t) awe_LRU_free_mapped;
/* in the AWE version node in the
list of free and LRU blocks which are
mapped to a frame */
ibool in_LRU_list; /* TRUE of the page is in the LRU list;
used in debugging */
ulint LRU_position; /* value which monotonically
decreases (or may stay constant if
the block is in the old blocks) toward
the end of the LRU list, if the pool
ulint_clock has not wrapped around:
NOTE that this value can only be used
in heuristic algorithms, because of
the possibility of a wrap-around! */
ulint freed_page_clock;/* the value of freed_page_clock
of the buffer pool when this block was
the last time put to the head of the
LRU list; a thread is allowed to
read this for heuristic purposes
without holding any mutex or latch */
ibool old; /* TRUE if the block is in the old
blocks in the LRU list */
ibool accessed; /* TRUE if the page has been accessed
while in the buffer pool: read-ahead
may read in pages which have not been
accessed yet; this is protected by
block->mutex; a thread is allowed to
read this for heuristic purposes
without holding any mutex or latch */
ulint buf_fix_count; /* count of how manyfold this block
is currently bufferfixed; this is
protected by block->mutex */
ulint io_fix; /* if a read is pending to the frame,
io_fix is BUF_IO_READ, in the case
of a write BUF_IO_WRITE, otherwise 0;
this is protected by block->mutex */
/* 4. Optimistic search field */
dulint modify_clock; /* this clock is incremented every
time a pointer to a record on the
page may become obsolete; this is
used in the optimistic cursor
positioning: if the modify clock has
not changed, we know that the pointer
is still valid; this field may be
changed if the thread (1) owns the
pool mutex and the page is not
bufferfixed, or (2) the thread has an
x-latch on the block */
/* 5. Hash search fields: NOTE that the first 4 fields are NOT
protected by any semaphore! */
ulint n_hash_helps; /* counter which controls building
of a new hash index for the page */
ulint n_fields; /* recommended prefix length for hash
search: number of full fields */
ulint n_bytes; /* recommended prefix: number of bytes
in an incomplete field */
ibool left_side; /* TRUE or FALSE, depending on
whether the leftmost record of several
records with the same prefix should be
indexed in the hash index */
/* These 6 fields may only be modified when we have
an x-latch on btr_search_latch AND
a) we are holding an s-latch or x-latch on block->lock or
b) we know that block->buf_fix_count == 0.
An exception to this is when we init or create a page
in the buffer pool in buf0buf.c. */
ibool is_hashed; /* TRUE if hash index has already been
built on this page; note that it does
not guarantee that the index is
complete, though: there may have been
hash collisions, record deletions,
etc. */
ulint n_pointers; /* used in debugging: the number of
pointers in the adaptive hash index
pointing to this frame */
ulint curr_n_fields; /* prefix length for hash indexing:
number of full fields */
ulint curr_n_bytes; /* number of bytes in hash indexing */
ibool curr_left_side; /* TRUE or FALSE in hash indexing */
dict_index_t* index; /* Index for which the adaptive
hash index has been created. */
/* 6. Debug fields */
#ifdef UNIV_SYNC_DEBUG
rw_lock_t debug_latch; /* in the debug version, each thread
which bufferfixes the block acquires
an s-latch here; so we can use the
debug utilities in sync0rw */
#endif
ibool file_page_was_freed;
/* this is set to TRUE when fsp
frees a page in buffer pool */
};
Some useful URL to help understand how innodb buffer pool initialize and works:
Chinaunix:
http://bbs.chinaunix.net/thread-1766852-1-1.html
Yangwanfu's CSDN blog:
http://blog.csdn.net/yzyangwanfu/archive/2010/07/13/5730260.aspx
相关文章推荐
- mysql之innodb_buffer_pool
- Mysql Innodb_buffer_pool状态参数详解
- linux下查找字符串&mysql-为magento性能测试修改innodb的innodb_buffer_pool_size而引发的问题
- innodb double write buffer--struct
- 14.4.3.6 Fine-tuning InnoDB Buffer Pool Flushing 微调 InnoDB Buffer Pool 刷新:
- 快速预热innodb buffer pool
- InnoDB buffer pool 刷新快慢取决因素
- 14.3.3.2 Configuring the Rate of InnoDB Buffer Pool Flushing 配置 InnoDB Buffer Pool 刷新频率
- innodb_buffer_pool_size=30G
- mysql数据库参数innodb_buffer_pool_size和max_connections
- 如何在MySQL中分配innodb_buffer_pool_size
- [置顶]Innodb Buffer Pool内部结构
- Innodb_buffer_pool_pages_dirty [一个故事@MySQL DBA]MYSQL
- 14.4.3.6 Fine-tuning InnoDB Buffer Pool Flushing 微调 InnoDB Buffer Pool 刷新:
- innodb_buffer_pool_size的安全上限
- mysql - 为magento性能测试修改innodb的innodb_buffer_pool_size而引发的问题
- innobackupex-1.5.1: fatal error: no 'innodb_buffer_pool_filename'解决方法
- 推荐在线设置 innodb_buffer_pool_size
- MySQL · 引擎特性 · InnoDB Buffer Pool
- 14.4.3.6 Fine-tuning InnoDB Buffer Pool Flushing 微调 InnoDB Buffer Pool 刷新: