blob: 3156f8ef5a09496c2e3ac5c4bcab1dc99266a1d8 [file] [log] [blame]
drhed7c8552001-04-11 14:29:21 +00001/*
2** Copyright (c) 2001 D. Richard Hipp
3**
4** This program is free software; you can redistribute it and/or
5** modify it under the terms of the GNU General Public
6** License as published by the Free Software Foundation; either
7** version 2 of the License, or (at your option) any later version.
8**
9** This program is distributed in the hope that it will be useful,
10** but WITHOUT ANY WARRANTY; without even the implied warranty of
11** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12** General Public License for more details.
13**
14** You should have received a copy of the GNU General Public
15** License along with this library; if not, write to the
16** Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17** Boston, MA 02111-1307, USA.
18**
19** Author contact information:
20** drh@hwaci.com
21** http://www.hwaci.com/drh/
22**
23*************************************************************************
24** This is the implementation of the page cache subsystem.
25**
26** The page cache is used to access a database file. The pager journals
27** all writes in order to support rollback. Locking is used to limit
drh5e00f6c2001-09-13 13:46:56 +000028** access to one or more reader or to one writer.
drhed7c8552001-04-11 14:29:21 +000029**
drhbe0072d2001-09-13 14:46:09 +000030** @(#) $Id: pager.c,v 1.15 2001/09/13 14:46:10 drh Exp $
drhed7c8552001-04-11 14:29:21 +000031*/
drhd9b02572001-04-15 00:37:09 +000032#include "sqliteInt.h"
drhed7c8552001-04-11 14:29:21 +000033#include "pager.h"
34#include <fcntl.h>
35#include <sys/stat.h>
36#include <unistd.h>
37#include <assert.h>
drhd9b02572001-04-15 00:37:09 +000038#include <string.h>
drhed7c8552001-04-11 14:29:21 +000039
40/*
41** The page cache as a whole is always in one of the following
42** states:
43**
44** SQLITE_UNLOCK The page cache is not currently reading or
45** writing the database file. There is no
46** data held in memory. This is the initial
47** state.
48**
49** SQLITE_READLOCK The page cache is reading the database.
50** Writing is not permitted. There can be
51** multiple readers accessing the same database
drh69688d52001-04-14 16:38:23 +000052** file at the same time.
drhed7c8552001-04-11 14:29:21 +000053**
54** SQLITE_WRITELOCK The page cache is writing the database.
55** Access is exclusive. No other processes or
56** threads can be reading or writing while one
57** process is writing.
58**
drh306dc212001-05-21 13:45:10 +000059** The page cache comes up in SQLITE_UNLOCK. The first time a
60** sqlite_page_get() occurs, the state transitions to SQLITE_READLOCK.
drhed7c8552001-04-11 14:29:21 +000061** After all pages have been released using sqlite_page_unref(),
drh306dc212001-05-21 13:45:10 +000062** the state transitions back to SQLITE_UNLOCK. The first time
drhed7c8552001-04-11 14:29:21 +000063** that sqlite_page_write() is called, the state transitions to
drh306dc212001-05-21 13:45:10 +000064** SQLITE_WRITELOCK. (Note that sqlite_page_write() can only be
65** called on an outstanding page which means that the pager must
66** be in SQLITE_READLOCK before it transitions to SQLITE_WRITELOCK.)
67** The sqlite_page_rollback() and sqlite_page_commit() functions
68** transition the state from SQLITE_WRITELOCK back to SQLITE_READLOCK.
drhed7c8552001-04-11 14:29:21 +000069*/
70#define SQLITE_UNLOCK 0
71#define SQLITE_READLOCK 1
72#define SQLITE_WRITELOCK 2
73
drhd9b02572001-04-15 00:37:09 +000074
drhed7c8552001-04-11 14:29:21 +000075/*
76** Each in-memory image of a page begins with the following header.
drhbd03cae2001-06-02 02:40:57 +000077** This header is only visible to this pager module. The client
78** code that calls pager sees only the data that follows the header.
drhed7c8552001-04-11 14:29:21 +000079*/
drhd9b02572001-04-15 00:37:09 +000080typedef struct PgHdr PgHdr;
drhed7c8552001-04-11 14:29:21 +000081struct PgHdr {
82 Pager *pPager; /* The pager to which this page belongs */
83 Pgno pgno; /* The page number for this page */
drh69688d52001-04-14 16:38:23 +000084 PgHdr *pNextHash, *pPrevHash; /* Hash collision chain for PgHdr.pgno */
drhed7c8552001-04-11 14:29:21 +000085 int nRef; /* Number of users of this page */
drhd9b02572001-04-15 00:37:09 +000086 PgHdr *pNextFree, *pPrevFree; /* Freelist of pages where nRef==0 */
87 PgHdr *pNextAll, *pPrevAll; /* A list of all pages */
drhed7c8552001-04-11 14:29:21 +000088 char inJournal; /* TRUE if has been written to journal */
89 char dirty; /* TRUE if we need to write back changes */
drh69688d52001-04-14 16:38:23 +000090 /* SQLITE_PAGE_SIZE bytes of page data follow this header */
drh7e3b0a02001-04-28 16:52:40 +000091 /* Pager.nExtra bytes of local data follow the page data */
drhed7c8552001-04-11 14:29:21 +000092};
93
94/*
drh69688d52001-04-14 16:38:23 +000095** Convert a pointer to a PgHdr into a pointer to its data
96** and back again.
drhed7c8552001-04-11 14:29:21 +000097*/
98#define PGHDR_TO_DATA(P) ((void*)(&(P)[1]))
99#define DATA_TO_PGHDR(D) (&((PgHdr*)(D))[-1])
drh7e3b0a02001-04-28 16:52:40 +0000100#define PGHDR_TO_EXTRA(P) ((void*)&((char*)(&(P)[1]))[SQLITE_PAGE_SIZE])
drhed7c8552001-04-11 14:29:21 +0000101
102/*
drhed7c8552001-04-11 14:29:21 +0000103** How big to make the hash table used for locating in-memory pages
drh306dc212001-05-21 13:45:10 +0000104** by page number. Knuth says this should be a prime number.
drhed7c8552001-04-11 14:29:21 +0000105*/
drhd9b02572001-04-15 00:37:09 +0000106#define N_PG_HASH 101
drhed7c8552001-04-11 14:29:21 +0000107
108/*
109** A open page cache is an instance of the following structure.
110*/
111struct Pager {
112 char *zFilename; /* Name of the database file */
113 char *zJournal; /* Name of the journal file */
114 int fd, jfd; /* File descriptors for database and journal */
drhed7c8552001-04-11 14:29:21 +0000115 int dbSize; /* Number of pages in the file */
drh69688d52001-04-14 16:38:23 +0000116 int origDbSize; /* dbSize before the current change */
drh7e3b0a02001-04-28 16:52:40 +0000117 int nExtra; /* Add this many bytes to each in-memory page */
drh72f82862001-05-24 21:06:34 +0000118 void (*xDestructor)(void*); /* Call this routine when freeing pages */
drhed7c8552001-04-11 14:29:21 +0000119 int nPage; /* Total number of in-memory pages */
drhd9b02572001-04-15 00:37:09 +0000120 int nRef; /* Number of in-memory pages with PgHdr.nRef>0 */
drhed7c8552001-04-11 14:29:21 +0000121 int mxPage; /* Maximum number of pages to hold in cache */
drhd9b02572001-04-15 00:37:09 +0000122 int nHit, nMiss, nOvfl; /* Cache hits, missing, and LRU overflows */
123 unsigned char state; /* SQLITE_UNLOCK, _READLOCK or _WRITELOCK */
124 unsigned char errMask; /* One of several kinds of errors */
drh5e00f6c2001-09-13 13:46:56 +0000125 unsigned char tempFile; /* zFilename is a temporary file */
126 unsigned char readOnly; /* True for a read-only database */
drh6019e162001-07-02 17:51:45 +0000127 unsigned char *aInJournal; /* One bit for each page in the database file */
drhed7c8552001-04-11 14:29:21 +0000128 PgHdr *pFirst, *pLast; /* List of free pages */
drhd9b02572001-04-15 00:37:09 +0000129 PgHdr *pAll; /* List of all pages */
drhed7c8552001-04-11 14:29:21 +0000130 PgHdr *aHash[N_PG_HASH]; /* Hash table to map page number of PgHdr */
drhd9b02572001-04-15 00:37:09 +0000131};
132
133/*
134** These are bits that can be set in Pager.errMask.
135*/
136#define PAGER_ERR_FULL 0x01 /* a write() failed */
137#define PAGER_ERR_MEM 0x02 /* malloc() failed */
138#define PAGER_ERR_LOCK 0x04 /* error in the locking protocol */
139#define PAGER_ERR_CORRUPT 0x08 /* database or journal corruption */
140
141/*
142** The journal file contains page records in the following
143** format.
144*/
145typedef struct PageRecord PageRecord;
146struct PageRecord {
147 Pgno pgno; /* The page number */
148 char aData[SQLITE_PAGE_SIZE]; /* Original data for page pgno */
149};
150
151/*
drh5e00f6c2001-09-13 13:46:56 +0000152** Journal files begin with the following magic string. The data
153** was obtained from /dev/random. It is used only as a sanity check.
drhd9b02572001-04-15 00:37:09 +0000154*/
155static const unsigned char aJournalMagic[] = {
156 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd4,
drhed7c8552001-04-11 14:29:21 +0000157};
158
159/*
160** Hash a page number
161*/
drhd9b02572001-04-15 00:37:09 +0000162#define pager_hash(PN) ((PN)%N_PG_HASH)
drhed7c8552001-04-11 14:29:21 +0000163
164/*
drhdd793422001-06-28 01:54:48 +0000165** Enable reference count tracking here:
166*/
167#if SQLITE_TEST
drh5e00f6c2001-09-13 13:46:56 +0000168 int pager_refinfo_enable = 0;
drhdd793422001-06-28 01:54:48 +0000169 static void pager_refinfo(PgHdr *p){
170 static int cnt = 0;
171 if( !pager_refinfo_enable ) return;
172 printf(
173 "REFCNT: %4d addr=0x%08x nRef=%d\n",
174 p->pgno, (int)PGHDR_TO_DATA(p), p->nRef
175 );
176 cnt++; /* Something to set a breakpoint on */
177 }
178# define REFINFO(X) pager_refinfo(X)
179#else
180# define REFINFO(X)
181#endif
182
183/*
drhed7c8552001-04-11 14:29:21 +0000184** Attempt to acquire a read lock (if wrlock==0) or a write lock (if wrlock==1)
185** on the database file. Return 0 on success and non-zero if the lock
186** could not be acquired.
187*/
drhd9b02572001-04-15 00:37:09 +0000188static int pager_lock(int fd, int wrlock){
189 int rc;
drhed7c8552001-04-11 14:29:21 +0000190 struct flock lock;
drhd9b02572001-04-15 00:37:09 +0000191 lock.l_type = wrlock ? F_WRLCK : F_RDLCK;
192 lock.l_whence = SEEK_SET;
193 lock.l_start = lock.l_len = 0L;
194 rc = fcntl(fd, F_SETLK, &lock);
195 return rc!=0;
drhed7c8552001-04-11 14:29:21 +0000196}
197
198/*
199** Unlock the database file.
200*/
drhd9b02572001-04-15 00:37:09 +0000201static int pager_unlock(fd){
202 int rc;
drhed7c8552001-04-11 14:29:21 +0000203 struct flock lock;
204 lock.l_type = F_UNLCK;
drhd9b02572001-04-15 00:37:09 +0000205 lock.l_whence = SEEK_SET;
206 lock.l_start = lock.l_len = 0L;
207 rc = fcntl(fd, F_SETLK, &lock);
208 return rc!=0;
209}
210
211/*
212** Move the cursor for file descriptor fd to the point whereto from
213** the beginning of the file.
214*/
215static int pager_seek(int fd, off_t whereto){
drh6019e162001-07-02 17:51:45 +0000216 /*printf("SEEK to page %d\n", whereto/SQLITE_PAGE_SIZE + 1);*/
drhd9b02572001-04-15 00:37:09 +0000217 lseek(fd, whereto, SEEK_SET);
218 return SQLITE_OK;
219}
220
221/*
222** Truncate the given file so that it contains exactly mxPg pages
223** of data.
224*/
225static int pager_truncate(int fd, Pgno mxPg){
226 int rc;
227 rc = ftruncate(fd, mxPg*SQLITE_PAGE_SIZE);
228 return rc!=0 ? SQLITE_IOERR : SQLITE_OK;
229}
230
231/*
232** Read nBytes of data from fd into pBuf. If the data cannot be
233** read or only a partial read occurs, then the unread parts of
234** pBuf are filled with zeros and this routine returns SQLITE_IOERR.
235** If the read is completely successful, return SQLITE_OK.
236*/
237static int pager_read(int fd, void *pBuf, int nByte){
238 int rc;
drh6019e162001-07-02 17:51:45 +0000239 /* printf("READ\n");*/
drhd9b02572001-04-15 00:37:09 +0000240 rc = read(fd, pBuf, nByte);
241 if( rc<0 ){
242 memset(pBuf, 0, nByte);
243 return SQLITE_IOERR;
244 }
245 if( rc<nByte ){
246 memset(&((char*)pBuf)[rc], 0, nByte - rc);
247 rc = SQLITE_IOERR;
248 }else{
249 rc = SQLITE_OK;
250 }
251 return rc;
252}
253
254/*
255** Write nBytes of data into fd. If any problem occurs or if the
256** write is incomplete, SQLITE_IOERR is returned. SQLITE_OK is
257** returned upon complete success.
258*/
259static int pager_write(int fd, const void *pBuf, int nByte){
260 int rc;
drh6019e162001-07-02 17:51:45 +0000261 /*printf("WRITE\n");*/
drhd9b02572001-04-15 00:37:09 +0000262 rc = write(fd, pBuf, nByte);
263 if( rc<nByte ){
264 return SQLITE_FULL;
265 }else{
266 return SQLITE_OK;
267 }
268}
269
270/*
271** Convert the bits in the pPager->errMask into an approprate
272** return code.
273*/
274static int pager_errcode(Pager *pPager){
275 int rc = SQLITE_OK;
276 if( pPager->errMask & PAGER_ERR_LOCK ) rc = SQLITE_PROTOCOL;
277 if( pPager->errMask & PAGER_ERR_FULL ) rc = SQLITE_FULL;
278 if( pPager->errMask & PAGER_ERR_MEM ) rc = SQLITE_NOMEM;
279 if( pPager->errMask & PAGER_ERR_CORRUPT ) rc = SQLITE_CORRUPT;
280 return rc;
drhed7c8552001-04-11 14:29:21 +0000281}
282
283/*
284** Find a page in the hash table given its page number. Return
285** a pointer to the page or NULL if not found.
286*/
drhd9b02572001-04-15 00:37:09 +0000287static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){
drhed7c8552001-04-11 14:29:21 +0000288 PgHdr *p = pPager->aHash[pgno % N_PG_HASH];
289 while( p && p->pgno!=pgno ){
290 p = p->pNextHash;
291 }
292 return p;
293}
294
295/*
296** Unlock the database and clear the in-memory cache. This routine
297** sets the state of the pager back to what it was when it was first
298** opened. Any outstanding pages are invalidated and subsequent attempts
299** to access those pages will likely result in a coredump.
300*/
drhd9b02572001-04-15 00:37:09 +0000301static void pager_reset(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000302 PgHdr *pPg, *pNext;
drhd9b02572001-04-15 00:37:09 +0000303 for(pPg=pPager->pAll; pPg; pPg=pNext){
304 pNext = pPg->pNextAll;
305 sqliteFree(pPg);
drhed7c8552001-04-11 14:29:21 +0000306 }
307 pPager->pFirst = 0;
drhd9b02572001-04-15 00:37:09 +0000308 pPager->pLast = 0;
309 pPager->pAll = 0;
drhed7c8552001-04-11 14:29:21 +0000310 memset(pPager->aHash, 0, sizeof(pPager->aHash));
311 pPager->nPage = 0;
312 if( pPager->state==SQLITE_WRITELOCK ){
drhd9b02572001-04-15 00:37:09 +0000313 sqlitepager_rollback(pPager);
drhed7c8552001-04-11 14:29:21 +0000314 }
drhd9b02572001-04-15 00:37:09 +0000315 pager_unlock(pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000316 pPager->state = SQLITE_UNLOCK;
drhd9b02572001-04-15 00:37:09 +0000317 pPager->dbSize = -1;
drhed7c8552001-04-11 14:29:21 +0000318 pPager->nRef = 0;
319}
320
321/*
322** When this routine is called, the pager has the journal file open and
323** a write lock on the database. This routine releases the database
324** write lock and acquires a read lock in its place. The journal file
325** is deleted and closed.
326**
327** We have to release the write lock before acquiring the read lock,
328** so there is a race condition where another process can get the lock
329** while we are not holding it. But, no other process should do this
330** because we are also holding a lock on the journal, and no process
331** should get a write lock on the database without first getting a lock
332** on the journal. So this routine should never fail. But it can fail
333** if another process is not playing by the rules. If it does fail,
drhd9b02572001-04-15 00:37:09 +0000334** all in-memory cache pages are invalidated, the PAGER_ERR_LOCK bit
335** is set in pPager->errMask, and this routine returns SQLITE_PROTOCOL.
336** SQLITE_OK is returned on success.
drhed7c8552001-04-11 14:29:21 +0000337*/
drhd9b02572001-04-15 00:37:09 +0000338static int pager_unwritelock(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000339 int rc;
drhd9b02572001-04-15 00:37:09 +0000340 PgHdr *pPg;
341 if( pPager->state!=SQLITE_WRITELOCK ) return SQLITE_OK;
342 pager_unlock(pPager->fd);
343 rc = pager_lock(pPager->fd, 0);
drhed7c8552001-04-11 14:29:21 +0000344 unlink(pPager->zJournal);
345 close(pPager->jfd);
346 pPager->jfd = -1;
drh6019e162001-07-02 17:51:45 +0000347 sqliteFree( pPager->aInJournal );
348 pPager->aInJournal = 0;
drhd9b02572001-04-15 00:37:09 +0000349 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
350 pPg->inJournal = 0;
351 pPg->dirty = 0;
352 }
drhed7c8552001-04-11 14:29:21 +0000353 if( rc!=SQLITE_OK ){
354 pPager->state = SQLITE_UNLOCK;
drhed7c8552001-04-11 14:29:21 +0000355 rc = SQLITE_PROTOCOL;
drhd9b02572001-04-15 00:37:09 +0000356 pPager->errMask |= PAGER_ERR_LOCK;
drhed7c8552001-04-11 14:29:21 +0000357 }else{
drhd9b02572001-04-15 00:37:09 +0000358 rc = SQLITE_OK;
drhed7c8552001-04-11 14:29:21 +0000359 pPager->state = SQLITE_READLOCK;
360 }
361 return rc;
362}
363
drhed7c8552001-04-11 14:29:21 +0000364/*
365** Playback the journal and thus restore the database file to
366** the state it was in before we started making changes.
367**
drhd9b02572001-04-15 00:37:09 +0000368** The journal file format is as follows: There is an initial
369** file-type string for sanity checking. Then there is a single
370** Pgno number which is the number of pages in the database before
371** changes were made. The database is truncated to this size.
drh306dc212001-05-21 13:45:10 +0000372** Next come zero or more page records where each page record
373** consists of a Pgno and SQLITE_PAGE_SIZE bytes of data. See
374** the PageRecord structure for details.
drhed7c8552001-04-11 14:29:21 +0000375**
drhd9b02572001-04-15 00:37:09 +0000376** For playback, the pages have to be read from the journal in
377** reverse order and put back into the original database file.
drhed7c8552001-04-11 14:29:21 +0000378**
drhd9b02572001-04-15 00:37:09 +0000379** If the file opened as the journal file is not a well-formed
380** journal file (as determined by looking at the magic number
381** at the beginning) then this routine returns SQLITE_PROTOCOL.
382** If any other errors occur during playback, the database will
383** likely be corrupted, so the PAGER_ERR_CORRUPT bit is set in
384** pPager->errMask and SQLITE_CORRUPT is returned. If it all
385** works, then this routine returns SQLITE_OK.
drhed7c8552001-04-11 14:29:21 +0000386*/
drhd9b02572001-04-15 00:37:09 +0000387static int pager_playback(Pager *pPager){
388 int nRec; /* Number of Records */
389 int i; /* Loop counter */
390 Pgno mxPg = 0; /* Size of the original file in pages */
391 struct stat statbuf; /* Used to size the journal */
392 PgHdr *pPg; /* An existing page in the cache */
393 PageRecord pgRec;
394 unsigned char aMagic[sizeof(aJournalMagic)];
drhed7c8552001-04-11 14:29:21 +0000395 int rc;
396
drhd9b02572001-04-15 00:37:09 +0000397 /* Read the beginning of the journal and truncate the
398 ** database file back to its original size.
drhed7c8552001-04-11 14:29:21 +0000399 */
drhd9b02572001-04-15 00:37:09 +0000400 assert( pPager->jfd>=0 );
401 pager_seek(pPager->jfd, 0);
402 rc = pager_read(pPager->jfd, aMagic, sizeof(aMagic));
403 if( rc!=SQLITE_OK || memcmp(aMagic,aJournalMagic,sizeof(aMagic))!=0 ){
404 return SQLITE_PROTOCOL;
405 }
406 rc = pager_read(pPager->jfd, &mxPg, sizeof(mxPg));
407 if( rc!=SQLITE_OK ){
408 return SQLITE_PROTOCOL;
409 }
410 pager_truncate(pPager->fd, mxPg);
411 pPager->dbSize = mxPg;
412
413 /* Begin reading the journal beginning at the end and moving
414 ** toward the beginning.
415 */
416 if( fstat(pPager->jfd, &statbuf)!=0 ){
drhed7c8552001-04-11 14:29:21 +0000417 return SQLITE_OK;
418 }
drhd9b02572001-04-15 00:37:09 +0000419 nRec = (statbuf.st_size - (sizeof(aMagic)+sizeof(Pgno))) / sizeof(PageRecord);
drhed7c8552001-04-11 14:29:21 +0000420
421 /* Process segments beginning with the last and working backwards
422 ** to the first.
423 */
drhd9b02572001-04-15 00:37:09 +0000424 for(i=nRec-1; i>=0; i--){
drhed7c8552001-04-11 14:29:21 +0000425 /* Seek to the beginning of the segment */
drhd9b02572001-04-15 00:37:09 +0000426 off_t ofst;
427 ofst = i*sizeof(PageRecord) + sizeof(aMagic) + sizeof(Pgno);
428 rc = pager_seek(pPager->jfd, ofst);
429 if( rc!=SQLITE_OK ) break;
430 rc = pager_read(pPager->jfd, &pgRec, sizeof(pgRec));
431 if( rc!=SQLITE_OK ) break;
drhed7c8552001-04-11 14:29:21 +0000432
drhd9b02572001-04-15 00:37:09 +0000433 /* Sanity checking on the page */
434 if( pgRec.pgno>mxPg || pgRec.pgno==0 ){
435 rc = SQLITE_CORRUPT;
436 break;
drhed7c8552001-04-11 14:29:21 +0000437 }
438
drhd9b02572001-04-15 00:37:09 +0000439 /* Playback the page. Update the in-memory copy of the page
440 ** at the same time, if there is one.
drhed7c8552001-04-11 14:29:21 +0000441 */
drhd9b02572001-04-15 00:37:09 +0000442 pPg = pager_lookup(pPager, pgRec.pgno);
443 if( pPg ){
444 memcpy(PGHDR_TO_DATA(pPg), pgRec.aData, SQLITE_PAGE_SIZE);
drh6019e162001-07-02 17:51:45 +0000445 memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
drhed7c8552001-04-11 14:29:21 +0000446 }
drhd9b02572001-04-15 00:37:09 +0000447 rc = pager_seek(pPager->fd, (pgRec.pgno-1)*SQLITE_PAGE_SIZE);
448 if( rc!=SQLITE_OK ) break;
449 rc = pager_write(pPager->fd, pgRec.aData, SQLITE_PAGE_SIZE);
450 if( rc!=SQLITE_OK ) break;
drhed7c8552001-04-11 14:29:21 +0000451 }
drhd9b02572001-04-15 00:37:09 +0000452 if( rc!=SQLITE_OK ){
453 pager_unwritelock(pPager);
454 pPager->errMask |= PAGER_ERR_CORRUPT;
455 rc = SQLITE_CORRUPT;
456 }else{
457 rc = pager_unwritelock(pPager);
drhed7c8552001-04-11 14:29:21 +0000458 }
drhd9b02572001-04-15 00:37:09 +0000459 return rc;
drhed7c8552001-04-11 14:29:21 +0000460}
461
462/*
drh5e00f6c2001-09-13 13:46:56 +0000463** Locate a directory where we can potentially create a temporary
464** file.
465*/
466static const char *findTempDir(void){
467 static const char *azDirs[] = {
468 ".",
469 "/var/tmp",
470 "/usr/tmp",
471 "/tmp",
472 "/temp",
473 "./temp",
474 };
475 int i;
476 struct stat buf;
477 for(i=0; i<sizeof(azDirs)/sizeof(azDirs[0]); i++){
478 if( stat(azDirs[i], &buf)==0 && S_ISDIR(buf.st_mode)
drhbe0072d2001-09-13 14:46:09 +0000479 && access(azDirs[i], W_OK) ){
drh5e00f6c2001-09-13 13:46:56 +0000480 return azDirs[i];
481 }
482 }
483 return 0;
484}
485
486/*
drhed7c8552001-04-11 14:29:21 +0000487** Create a new page cache and put a pointer to the page cache in *ppPager.
drh5e00f6c2001-09-13 13:46:56 +0000488** The file to be cached need not exist. The file is not locked until
drhd9b02572001-04-15 00:37:09 +0000489** the first call to sqlitepager_get() and is only held open until the
490** last page is released using sqlitepager_unref().
drhed7c8552001-04-11 14:29:21 +0000491*/
drh7e3b0a02001-04-28 16:52:40 +0000492int sqlitepager_open(
493 Pager **ppPager, /* Return the Pager structure here */
494 const char *zFilename, /* Name of the database file to open */
495 int mxPage, /* Max number of in-memory cache pages */
496 int nExtra /* Extra bytes append to each in-memory page */
497){
drhed7c8552001-04-11 14:29:21 +0000498 Pager *pPager;
499 int nameLen;
500 int fd;
drh5e00f6c2001-09-13 13:46:56 +0000501 int tempFile;
502 int readOnly = 0;
503 char zTemp[300];
drhed7c8552001-04-11 14:29:21 +0000504
drhd9b02572001-04-15 00:37:09 +0000505 *ppPager = 0;
506 if( sqlite_malloc_failed ){
507 return SQLITE_NOMEM;
508 }
drh5e00f6c2001-09-13 13:46:56 +0000509 if( zFilename ){
510 fd = open(zFilename, O_RDWR|O_CREAT, 0644);
511 if( fd<0 ){
512 fd = open(zFilename, O_RDONLY, 0);
513 readOnly = 1;
514 }
515 tempFile = 0;
516 }else{
517 int cnt = 8;
drhbe0072d2001-09-13 14:46:09 +0000518 const char *zDir = findTempDir();
drh5e00f6c2001-09-13 13:46:56 +0000519 if( zDir==0 ) return SQLITE_CANTOPEN;
520 do{
521 cnt--;
drhbe0072d2001-09-13 14:46:09 +0000522 sprintf(zTemp,"%s/_sqlite_%u", zDir, (unsigned)sqliteRandomInteger());
drh5e00f6c2001-09-13 13:46:56 +0000523 fd = open(zTemp, O_RDWR|O_CREAT|O_EXCL, 0600);
524 }while( cnt>0 && fd<0 );
525 zFilename = zTemp;
526 tempFile = 1;
527 }
drhed7c8552001-04-11 14:29:21 +0000528 if( fd<0 ){
529 return SQLITE_CANTOPEN;
530 }
531 nameLen = strlen(zFilename);
532 pPager = sqliteMalloc( sizeof(*pPager) + nameLen*2 + 30 );
drhd9b02572001-04-15 00:37:09 +0000533 if( pPager==0 ){
534 close(fd);
535 return SQLITE_NOMEM;
536 }
drhed7c8552001-04-11 14:29:21 +0000537 pPager->zFilename = (char*)&pPager[1];
538 pPager->zJournal = &pPager->zFilename[nameLen+1];
539 strcpy(pPager->zFilename, zFilename);
540 strcpy(pPager->zJournal, zFilename);
541 strcpy(&pPager->zJournal[nameLen], "-journal");
542 pPager->fd = fd;
543 pPager->jfd = -1;
544 pPager->nRef = 0;
545 pPager->dbSize = -1;
546 pPager->nPage = 0;
drhd79caeb2001-04-15 02:27:24 +0000547 pPager->mxPage = mxPage>5 ? mxPage : 10;
drhed7c8552001-04-11 14:29:21 +0000548 pPager->state = SQLITE_UNLOCK;
drhd9b02572001-04-15 00:37:09 +0000549 pPager->errMask = 0;
drh5e00f6c2001-09-13 13:46:56 +0000550 pPager->tempFile = tempFile;
551 pPager->readOnly = readOnly;
drhed7c8552001-04-11 14:29:21 +0000552 pPager->pFirst = 0;
553 pPager->pLast = 0;
drh7c717f72001-06-24 20:39:41 +0000554 pPager->nExtra = nExtra;
drhed7c8552001-04-11 14:29:21 +0000555 memset(pPager->aHash, 0, sizeof(pPager->aHash));
556 *ppPager = pPager;
557 return SQLITE_OK;
558}
559
560/*
drh72f82862001-05-24 21:06:34 +0000561** Set the destructor for this pager. If not NULL, the destructor is called
drh5e00f6c2001-09-13 13:46:56 +0000562** when the reference count on each page reaches zero. The destructor can
563** be used to clean up information in the extra segment appended to each page.
drh72f82862001-05-24 21:06:34 +0000564**
565** The destructor is not called as a result sqlitepager_close().
566** Destructors are only called by sqlitepager_unref().
567*/
568void sqlitepager_set_destructor(Pager *pPager, void (*xDesc)(void*)){
569 pPager->xDestructor = xDesc;
570}
571
572/*
drh5e00f6c2001-09-13 13:46:56 +0000573** Return the total number of pages in the disk file associated with
574** pPager.
drhed7c8552001-04-11 14:29:21 +0000575*/
drhd9b02572001-04-15 00:37:09 +0000576int sqlitepager_pagecount(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000577 int n;
578 struct stat statbuf;
drhd9b02572001-04-15 00:37:09 +0000579 assert( pPager!=0 );
drhed7c8552001-04-11 14:29:21 +0000580 if( pPager->dbSize>=0 ){
581 return pPager->dbSize;
582 }
583 if( fstat(pPager->fd, &statbuf)!=0 ){
584 n = 0;
585 }else{
586 n = statbuf.st_size/SQLITE_PAGE_SIZE;
587 }
drhd9b02572001-04-15 00:37:09 +0000588 if( pPager->state!=SQLITE_UNLOCK ){
drhed7c8552001-04-11 14:29:21 +0000589 pPager->dbSize = n;
590 }
591 return n;
592}
593
594/*
595** Shutdown the page cache. Free all memory and close all files.
596**
597** If a transaction was in progress when this routine is called, that
598** transaction is rolled back. All outstanding pages are invalidated
599** and their memory is freed. Any attempt to use a page associated
600** with this page cache after this function returns will likely
601** result in a coredump.
602*/
drhd9b02572001-04-15 00:37:09 +0000603int sqlitepager_close(Pager *pPager){
604 PgHdr *pPg, *pNext;
drhed7c8552001-04-11 14:29:21 +0000605 switch( pPager->state ){
606 case SQLITE_WRITELOCK: {
drhd9b02572001-04-15 00:37:09 +0000607 sqlitepager_rollback(pPager);
608 pager_unlock(pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000609 break;
610 }
611 case SQLITE_READLOCK: {
drhd9b02572001-04-15 00:37:09 +0000612 pager_unlock(pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000613 break;
614 }
615 default: {
616 /* Do nothing */
617 break;
618 }
619 }
drhd9b02572001-04-15 00:37:09 +0000620 for(pPg=pPager->pAll; pPg; pPg=pNext){
621 pNext = pPg->pNextAll;
622 sqliteFree(pPg);
drhed7c8552001-04-11 14:29:21 +0000623 }
624 if( pPager->fd>=0 ) close(pPager->fd);
625 assert( pPager->jfd<0 );
drh5e00f6c2001-09-13 13:46:56 +0000626 if( pPager->tempFile ){
627 unlink(pPager->zFilename);
628 }
drhed7c8552001-04-11 14:29:21 +0000629 sqliteFree(pPager);
630 return SQLITE_OK;
631}
632
633/*
drh5e00f6c2001-09-13 13:46:56 +0000634** Return the page number for the given page data.
drhed7c8552001-04-11 14:29:21 +0000635*/
drhd9b02572001-04-15 00:37:09 +0000636Pgno sqlitepager_pagenumber(void *pData){
drhed7c8552001-04-11 14:29:21 +0000637 PgHdr *p = DATA_TO_PGHDR(pData);
638 return p->pgno;
639}
640
641/*
drh7e3b0a02001-04-28 16:52:40 +0000642** Increment the reference count for a page. If the page is
643** currently on the freelist (the reference count is zero) then
644** remove it from the freelist.
645*/
drhdf0b3b02001-06-23 11:36:20 +0000646static void page_ref(PgHdr *pPg){
drh7e3b0a02001-04-28 16:52:40 +0000647 if( pPg->nRef==0 ){
648 /* The page is currently on the freelist. Remove it. */
649 if( pPg->pPrevFree ){
650 pPg->pPrevFree->pNextFree = pPg->pNextFree;
651 }else{
652 pPg->pPager->pFirst = pPg->pNextFree;
653 }
654 if( pPg->pNextFree ){
655 pPg->pNextFree->pPrevFree = pPg->pPrevFree;
656 }else{
657 pPg->pPager->pLast = pPg->pPrevFree;
658 }
659 pPg->pPager->nRef++;
660 }
661 pPg->nRef++;
drhdd793422001-06-28 01:54:48 +0000662 REFINFO(pPg);
drhdf0b3b02001-06-23 11:36:20 +0000663}
664
665/*
666** Increment the reference count for a page. The input pointer is
667** a reference to the page data.
668*/
669int sqlitepager_ref(void *pData){
670 PgHdr *pPg = DATA_TO_PGHDR(pData);
671 page_ref(pPg);
drh8c42ca92001-06-22 19:15:00 +0000672 return SQLITE_OK;
drh7e3b0a02001-04-28 16:52:40 +0000673}
674
675/*
drhd9b02572001-04-15 00:37:09 +0000676** Acquire a page.
677**
drh5e00f6c2001-09-13 13:46:56 +0000678** A read lock on the disk file is obtained when the first page acquired.
679** This read lock is dropped when the last page is released.
drhd9b02572001-04-15 00:37:09 +0000680**
drh306dc212001-05-21 13:45:10 +0000681** A _get works for any page number greater than 0. If the database
682** file is smaller than the requested page, then no actual disk
683** read occurs and the memory image of the page is initialized to
684** all zeros. The extra data appended to a page is always initialized
685** to zeros the first time a page is loaded into memory.
686**
drhd9b02572001-04-15 00:37:09 +0000687** The acquisition might fail for several reasons. In all cases,
688** an appropriate error code is returned and *ppPage is set to NULL.
drh7e3b0a02001-04-28 16:52:40 +0000689**
690** See also sqlitepager_lookup(). Both this routine and _lookup() attempt
691** to find a page in the in-memory cache first. If the page is not already
drh5e00f6c2001-09-13 13:46:56 +0000692** in memory, this routine goes to disk to read it in whereas _lookup()
drh7e3b0a02001-04-28 16:52:40 +0000693** just returns 0. This routine acquires a read-lock the first time it
694** has to go to disk, and could also playback an old journal if necessary.
695** Since _lookup() never goes to disk, it never has to deal with locks
696** or journal files.
drhed7c8552001-04-11 14:29:21 +0000697*/
drhd9b02572001-04-15 00:37:09 +0000698int sqlitepager_get(Pager *pPager, Pgno pgno, void **ppPage){
drhed7c8552001-04-11 14:29:21 +0000699 PgHdr *pPg;
700
drhd9b02572001-04-15 00:37:09 +0000701 /* Make sure we have not hit any critical errors.
702 */
703 if( pPager==0 || pgno==0 ){
704 return SQLITE_ERROR;
705 }
706 if( pPager->errMask & ~(PAGER_ERR_FULL) ){
707 return pager_errcode(pPager);
708 }
709
drhed7c8552001-04-11 14:29:21 +0000710 /* If this is the first page accessed, then get a read lock
711 ** on the database file.
712 */
713 if( pPager->nRef==0 ){
drhd9b02572001-04-15 00:37:09 +0000714 if( pager_lock(pPager->fd, 0)!=0 ){
drhed7c8552001-04-11 14:29:21 +0000715 *ppPage = 0;
716 return SQLITE_BUSY;
717 }
drhd9b02572001-04-15 00:37:09 +0000718 pPager->state = SQLITE_READLOCK;
drhed7c8552001-04-11 14:29:21 +0000719
720 /* If a journal file exists, try to play it back.
721 */
722 if( access(pPager->zJournal,0)==0 ){
723 int rc;
724
725 /* Open the journal for exclusive access. Return SQLITE_BUSY if
726 ** we cannot get exclusive access to the journal file
727 */
728 pPager->jfd = open(pPager->zJournal, O_RDONLY, 0);
drhd9b02572001-04-15 00:37:09 +0000729 if( pPager->jfd<0 || pager_lock(pPager->jfd, 1)!=0 ){
drhed7c8552001-04-11 14:29:21 +0000730 if( pPager->jfd>=0 ){ close(pPager->jfd); pPager->jfd = -1; }
drhd9b02572001-04-15 00:37:09 +0000731 pager_unlock(pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000732 *ppPage = 0;
733 return SQLITE_BUSY;
734 }
735
736 /* Get a write lock on the database */
drhd9b02572001-04-15 00:37:09 +0000737 pager_unlock(pPager->fd);
738 if( pager_lock(pPager->fd, 1)!=0 ){
739 close(pPager->jfd);
740 pPager->jfd = -1;
drhed7c8552001-04-11 14:29:21 +0000741 *ppPage = 0;
742 return SQLITE_PROTOCOL;
743 }
744
745 /* Playback and delete the journal. Drop the database write
746 ** lock and reacquire the read lock.
747 */
drhd9b02572001-04-15 00:37:09 +0000748 rc = pager_playback(pPager);
749 if( rc!=SQLITE_OK ){
750 return rc;
751 }
drhed7c8552001-04-11 14:29:21 +0000752 }
753 pPg = 0;
754 }else{
755 /* Search for page in cache */
drhd9b02572001-04-15 00:37:09 +0000756 pPg = pager_lookup(pPager, pgno);
drhed7c8552001-04-11 14:29:21 +0000757 }
758 if( pPg==0 ){
drhd9b02572001-04-15 00:37:09 +0000759 /* The requested page is not in the page cache. */
drhed7c8552001-04-11 14:29:21 +0000760 int h;
drh7e3b0a02001-04-28 16:52:40 +0000761 pPager->nMiss++;
drhed7c8552001-04-11 14:29:21 +0000762 if( pPager->nPage<pPager->mxPage || pPager->pFirst==0 ){
763 /* Create a new page */
drh7e3b0a02001-04-28 16:52:40 +0000764 pPg = sqliteMalloc( sizeof(*pPg) + SQLITE_PAGE_SIZE + pPager->nExtra );
drhd9b02572001-04-15 00:37:09 +0000765 if( pPg==0 ){
766 *ppPage = 0;
767 pager_unwritelock(pPager);
768 pPager->errMask |= PAGER_ERR_MEM;
769 return SQLITE_NOMEM;
770 }
drhed7c8552001-04-11 14:29:21 +0000771 pPg->pPager = pPager;
drhd9b02572001-04-15 00:37:09 +0000772 pPg->pNextAll = pPager->pAll;
773 if( pPager->pAll ){
774 pPager->pAll->pPrevAll = pPg;
775 }
776 pPg->pPrevAll = 0;
drhd79caeb2001-04-15 02:27:24 +0000777 pPager->pAll = pPg;
drhd9b02572001-04-15 00:37:09 +0000778 pPager->nPage++;
drhed7c8552001-04-11 14:29:21 +0000779 }else{
drhd9b02572001-04-15 00:37:09 +0000780 /* Recycle an older page. First locate the page to be recycled.
781 ** Try to find one that is not dirty and is near the head of
782 ** of the free list */
drh6019e162001-07-02 17:51:45 +0000783 int cnt = pPager->mxPage/2;
drhed7c8552001-04-11 14:29:21 +0000784 pPg = pPager->pFirst;
drh6019e162001-07-02 17:51:45 +0000785 while( pPg->dirty && 0<cnt-- && pPg->pNextFree ){
drhd9b02572001-04-15 00:37:09 +0000786 pPg = pPg->pNextFree;
787 }
788 if( pPg==0 || pPg->dirty ) pPg = pPager->pFirst;
789 assert( pPg->nRef==0 );
790
791 /* If the page to be recycled is dirty, sync the journal and write
792 ** the old page into the database. */
drhed7c8552001-04-11 14:29:21 +0000793 if( pPg->dirty ){
794 int rc;
drhd9b02572001-04-15 00:37:09 +0000795 assert( pPg->inJournal==1 );
796 assert( pPager->state==SQLITE_WRITELOCK );
797 rc = fsync(pPager->jfd);
798 if( rc!=0 ){
799 rc = sqlitepager_rollback(pPager);
drhed7c8552001-04-11 14:29:21 +0000800 *ppPage = 0;
drhd9b02572001-04-15 00:37:09 +0000801 if( rc==SQLITE_OK ) rc = SQLITE_IOERR;
drhed7c8552001-04-11 14:29:21 +0000802 return rc;
803 }
drhd9b02572001-04-15 00:37:09 +0000804 pager_seek(pPager->fd, (pPg->pgno-1)*SQLITE_PAGE_SIZE);
805 rc = pager_write(pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
806 if( rc!=SQLITE_OK ){
807 rc = sqlitepager_rollback(pPager);
808 *ppPage = 0;
809 if( rc==SQLITE_OK ) rc = SQLITE_FULL;
810 return rc;
811 }
812 }
813
814 /* Unlink the old page from the free list and the hash table
815 */
drh6019e162001-07-02 17:51:45 +0000816 if( pPg->pPrevFree ){
817 pPg->pPrevFree->pNextFree = pPg->pNextFree;
drhed7c8552001-04-11 14:29:21 +0000818 }else{
drh6019e162001-07-02 17:51:45 +0000819 assert( pPager->pFirst==pPg );
820 pPager->pFirst = pPg->pNextFree;
drhed7c8552001-04-11 14:29:21 +0000821 }
drh6019e162001-07-02 17:51:45 +0000822 if( pPg->pNextFree ){
823 pPg->pNextFree->pPrevFree = pPg->pPrevFree;
824 }else{
825 assert( pPager->pLast==pPg );
826 pPager->pLast = pPg->pPrevFree;
827 }
828 pPg->pNextFree = pPg->pPrevFree = 0;
drhed7c8552001-04-11 14:29:21 +0000829 if( pPg->pNextHash ){
830 pPg->pNextHash->pPrevHash = pPg->pPrevHash;
831 }
832 if( pPg->pPrevHash ){
833 pPg->pPrevHash->pNextHash = pPg->pNextHash;
834 }else{
drhd9b02572001-04-15 00:37:09 +0000835 h = pager_hash(pPg->pgno);
drhed7c8552001-04-11 14:29:21 +0000836 assert( pPager->aHash[h]==pPg );
837 pPager->aHash[h] = pPg->pNextHash;
838 }
drh6019e162001-07-02 17:51:45 +0000839 pPg->pNextHash = pPg->pPrevHash = 0;
drhd9b02572001-04-15 00:37:09 +0000840 pPager->nOvfl++;
drhed7c8552001-04-11 14:29:21 +0000841 }
842 pPg->pgno = pgno;
drh6019e162001-07-02 17:51:45 +0000843 if( pPager->aInJournal && pgno<=pPager->origDbSize ){
844 pPg->inJournal = (pPager->aInJournal[pgno/8] & (1<<(pgno&7)))!=0;
845 }else{
846 pPg->inJournal = 0;
847 }
drhed7c8552001-04-11 14:29:21 +0000848 pPg->dirty = 0;
849 pPg->nRef = 1;
drhdd793422001-06-28 01:54:48 +0000850 REFINFO(pPg);
drhd9b02572001-04-15 00:37:09 +0000851 pPager->nRef++;
852 h = pager_hash(pgno);
drhed7c8552001-04-11 14:29:21 +0000853 pPg->pNextHash = pPager->aHash[h];
854 pPager->aHash[h] = pPg;
855 if( pPg->pNextHash ){
856 assert( pPg->pNextHash->pPrevHash==0 );
857 pPg->pNextHash->pPrevHash = pPg;
858 }
drh306dc212001-05-21 13:45:10 +0000859 if( pPager->dbSize<0 ) sqlitepager_pagecount(pPager);
860 if( pPager->dbSize<pgno ){
861 memset(PGHDR_TO_DATA(pPg), 0, SQLITE_PAGE_SIZE);
862 }else{
863 pager_seek(pPager->fd, (pgno-1)*SQLITE_PAGE_SIZE);
864 pager_read(pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
865 }
drh7e3b0a02001-04-28 16:52:40 +0000866 if( pPager->nExtra>0 ){
867 memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
868 }
drhed7c8552001-04-11 14:29:21 +0000869 }else{
drhd9b02572001-04-15 00:37:09 +0000870 /* The requested page is in the page cache. */
drh7e3b0a02001-04-28 16:52:40 +0000871 pPager->nHit++;
drhdf0b3b02001-06-23 11:36:20 +0000872 page_ref(pPg);
drhed7c8552001-04-11 14:29:21 +0000873 }
874 *ppPage = PGHDR_TO_DATA(pPg);
875 return SQLITE_OK;
876}
877
878/*
drh7e3b0a02001-04-28 16:52:40 +0000879** Acquire a page if it is already in the in-memory cache. Do
880** not read the page from disk. Return a pointer to the page,
881** or 0 if the page is not in cache.
882**
883** See also sqlitepager_get(). The difference between this routine
884** and sqlitepager_get() is that _get() will go to the disk and read
885** in the page if the page is not already in cache. This routine
drh5e00f6c2001-09-13 13:46:56 +0000886** returns NULL if the page is not in cache or if a disk I/O error
887** has ever happened.
drh7e3b0a02001-04-28 16:52:40 +0000888*/
889void *sqlitepager_lookup(Pager *pPager, Pgno pgno){
890 PgHdr *pPg;
891
892 /* Make sure we have not hit any critical errors.
893 */
894 if( pPager==0 || pgno==0 ){
895 return 0;
896 }
897 if( pPager->errMask & ~(PAGER_ERR_FULL) ){
898 return 0;
899 }
900 if( pPager->nRef==0 ){
901 return 0;
902 }
903 pPg = pager_lookup(pPager, pgno);
904 if( pPg==0 ) return 0;
drhdf0b3b02001-06-23 11:36:20 +0000905 page_ref(pPg);
drh7e3b0a02001-04-28 16:52:40 +0000906 return PGHDR_TO_DATA(pPg);
907}
908
909/*
drhed7c8552001-04-11 14:29:21 +0000910** Release a page.
911**
912** If the number of references to the page drop to zero, then the
913** page is added to the LRU list. When all references to all pages
drhd9b02572001-04-15 00:37:09 +0000914** are released, a rollback occurs and the lock on the database is
drhed7c8552001-04-11 14:29:21 +0000915** removed.
916*/
drhd9b02572001-04-15 00:37:09 +0000917int sqlitepager_unref(void *pData){
drhed7c8552001-04-11 14:29:21 +0000918 Pager *pPager;
919 PgHdr *pPg;
drhd9b02572001-04-15 00:37:09 +0000920
921 /* Decrement the reference count for this page
922 */
drhed7c8552001-04-11 14:29:21 +0000923 pPg = DATA_TO_PGHDR(pData);
924 assert( pPg->nRef>0 );
925 pPager = pPg->pPager;
926 pPg->nRef--;
drhdd793422001-06-28 01:54:48 +0000927 REFINFO(pPg);
drhd9b02572001-04-15 00:37:09 +0000928
drh72f82862001-05-24 21:06:34 +0000929 /* When the number of references to a page reach 0, call the
930 ** destructor and add the page to the freelist.
drhd9b02572001-04-15 00:37:09 +0000931 */
drhed7c8552001-04-11 14:29:21 +0000932 if( pPg->nRef==0 ){
drhd9b02572001-04-15 00:37:09 +0000933 pPg->pNextFree = 0;
934 pPg->pPrevFree = pPager->pLast;
drhed7c8552001-04-11 14:29:21 +0000935 pPager->pLast = pPg;
drhd9b02572001-04-15 00:37:09 +0000936 if( pPg->pPrevFree ){
937 pPg->pPrevFree->pNextFree = pPg;
drhed7c8552001-04-11 14:29:21 +0000938 }else{
939 pPager->pFirst = pPg;
940 }
drh72f82862001-05-24 21:06:34 +0000941 if( pPager->xDestructor ){
942 pPager->xDestructor(pData);
943 }
drhd9b02572001-04-15 00:37:09 +0000944
945 /* When all pages reach the freelist, drop the read lock from
946 ** the database file.
947 */
948 pPager->nRef--;
949 assert( pPager->nRef>=0 );
950 if( pPager->nRef==0 ){
951 pager_reset(pPager);
952 }
drhed7c8552001-04-11 14:29:21 +0000953 }
drhd9b02572001-04-15 00:37:09 +0000954 return SQLITE_OK;
drhed7c8552001-04-11 14:29:21 +0000955}
956
957/*
958** Mark a data page as writeable. The page is written into the journal
959** if it is not there already. This routine must be called before making
960** changes to a page.
961**
962** The first time this routine is called, the pager creates a new
963** journal and acquires a write lock on the database. If the write
964** lock could not be acquired, this routine returns SQLITE_BUSY. The
drh306dc212001-05-21 13:45:10 +0000965** calling routine must check for that return value and be careful not to
drhed7c8552001-04-11 14:29:21 +0000966** change any page data until this routine returns SQLITE_OK.
drhd9b02572001-04-15 00:37:09 +0000967**
968** If the journal file could not be written because the disk is full,
969** then this routine returns SQLITE_FULL and does an immediate rollback.
970** All subsequent write attempts also return SQLITE_FULL until there
971** is a call to sqlitepager_commit() or sqlitepager_rollback() to
972** reset.
drhed7c8552001-04-11 14:29:21 +0000973*/
drhd9b02572001-04-15 00:37:09 +0000974int sqlitepager_write(void *pData){
drh69688d52001-04-14 16:38:23 +0000975 PgHdr *pPg = DATA_TO_PGHDR(pData);
976 Pager *pPager = pPg->pPager;
drhd79caeb2001-04-15 02:27:24 +0000977 int rc = SQLITE_OK;
drh69688d52001-04-14 16:38:23 +0000978
drhd9b02572001-04-15 00:37:09 +0000979 if( pPager->errMask ){
980 return pager_errcode(pPager);
981 }
drh5e00f6c2001-09-13 13:46:56 +0000982 if( pPager->readOnly ){
983 return SQLITE_PERM;
984 }
drhd9b02572001-04-15 00:37:09 +0000985 pPg->dirty = 1;
drh69688d52001-04-14 16:38:23 +0000986 if( pPg->inJournal ){ return SQLITE_OK; }
drhd9b02572001-04-15 00:37:09 +0000987 assert( pPager->state!=SQLITE_UNLOCK );
drhed7c8552001-04-11 14:29:21 +0000988 if( pPager->state==SQLITE_READLOCK ){
drh6019e162001-07-02 17:51:45 +0000989 assert( pPager->aInJournal==0 );
990 pPager->aInJournal = sqliteMalloc( pPager->dbSize/8 + 1 );
991 if( pPager->aInJournal==0 ){
992 return SQLITE_NOMEM;
993 }
drhed7c8552001-04-11 14:29:21 +0000994 pPager->jfd = open(pPager->zJournal, O_RDWR|O_CREAT, 0644);
995 if( pPager->jfd<0 ){
996 return SQLITE_CANTOPEN;
997 }
drhd9b02572001-04-15 00:37:09 +0000998 if( pager_lock(pPager->jfd, 1) ){
drhed7c8552001-04-11 14:29:21 +0000999 close(pPager->jfd);
1000 pPager->jfd = -1;
1001 return SQLITE_BUSY;
1002 }
drhd9b02572001-04-15 00:37:09 +00001003 pager_unlock(pPager->fd);
1004 if( pager_lock(pPager->fd, 1) ){
drhed7c8552001-04-11 14:29:21 +00001005 close(pPager->jfd);
1006 pPager->jfd = -1;
1007 pPager->state = SQLITE_UNLOCK;
drhd9b02572001-04-15 00:37:09 +00001008 pPager->errMask |= PAGER_ERR_LOCK;
drhed7c8552001-04-11 14:29:21 +00001009 return SQLITE_PROTOCOL;
1010 }
1011 pPager->state = SQLITE_WRITELOCK;
drhd9b02572001-04-15 00:37:09 +00001012 sqlitepager_pagecount(pPager);
drh69688d52001-04-14 16:38:23 +00001013 pPager->origDbSize = pPager->dbSize;
drhd9b02572001-04-15 00:37:09 +00001014 rc = pager_write(pPager->jfd, aJournalMagic, sizeof(aJournalMagic));
1015 if( rc==SQLITE_OK ){
1016 rc = pager_write(pPager->jfd, &pPager->dbSize, sizeof(Pgno));
1017 }
1018 if( rc!=SQLITE_OK ){
1019 rc = pager_unwritelock(pPager);
1020 if( rc==SQLITE_OK ) rc = SQLITE_FULL;
1021 return rc;
1022 }
drhed7c8552001-04-11 14:29:21 +00001023 }
drhd9b02572001-04-15 00:37:09 +00001024 assert( pPager->state==SQLITE_WRITELOCK );
drh69688d52001-04-14 16:38:23 +00001025 assert( pPager->jfd>=0 );
drhd9b02572001-04-15 00:37:09 +00001026 if( pPg->pgno <= pPager->origDbSize ){
1027 rc = pager_write(pPager->jfd, &pPg->pgno, sizeof(Pgno));
1028 if( rc==SQLITE_OK ){
1029 rc = pager_write(pPager->jfd, pData, SQLITE_PAGE_SIZE);
1030 }
1031 if( rc!=SQLITE_OK ){
1032 sqlitepager_rollback(pPager);
1033 pPager->errMask |= PAGER_ERR_FULL;
1034 return rc;
1035 }
drh6019e162001-07-02 17:51:45 +00001036 assert( pPager->aInJournal!=0 );
1037 pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
drh69688d52001-04-14 16:38:23 +00001038 }
drh69688d52001-04-14 16:38:23 +00001039 pPg->inJournal = 1;
drh306dc212001-05-21 13:45:10 +00001040 if( pPager->dbSize<pPg->pgno ){
1041 pPager->dbSize = pPg->pgno;
1042 }
drh69688d52001-04-14 16:38:23 +00001043 return rc;
drhed7c8552001-04-11 14:29:21 +00001044}
1045
1046/*
drh6019e162001-07-02 17:51:45 +00001047** Return TRUE if the page given in the argument was previous passed
1048** to sqlitepager_write(). In other words, return TRUE if it is ok
1049** to change the content of the page.
1050*/
1051int sqlitepager_iswriteable(void *pData){
1052 PgHdr *pPg = DATA_TO_PGHDR(pData);
1053 return pPg->dirty;
1054}
1055
1056/*
drhed7c8552001-04-11 14:29:21 +00001057** Commit all changes to the database and release the write lock.
drhd9b02572001-04-15 00:37:09 +00001058**
1059** If the commit fails for any reason, a rollback attempt is made
1060** and an error code is returned. If the commit worked, SQLITE_OK
1061** is returned.
drhed7c8552001-04-11 14:29:21 +00001062*/
drhd9b02572001-04-15 00:37:09 +00001063int sqlitepager_commit(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +00001064 int i, rc;
1065 PgHdr *pPg;
drhd9b02572001-04-15 00:37:09 +00001066
1067 if( pPager->errMask==PAGER_ERR_FULL ){
1068 rc = sqlitepager_rollback(pPager);
1069 if( rc==SQLITE_OK ) rc = SQLITE_FULL;
1070 return rc;
1071 }
1072 if( pPager->errMask!=0 ){
1073 rc = pager_errcode(pPager);
1074 return rc;
1075 }
1076 if( pPager->state!=SQLITE_WRITELOCK ){
1077 return SQLITE_ERROR;
1078 }
drhed7c8552001-04-11 14:29:21 +00001079 assert( pPager->jfd>=0 );
1080 if( fsync(pPager->jfd) ){
drhd9b02572001-04-15 00:37:09 +00001081 goto commit_abort;
drhed7c8552001-04-11 14:29:21 +00001082 }
1083 for(i=0; i<N_PG_HASH; i++){
1084 for(pPg=pPager->aHash[i]; pPg; pPg=pPg->pNextHash){
1085 if( pPg->dirty==0 ) continue;
drhd9b02572001-04-15 00:37:09 +00001086 rc = pager_seek(pPager->fd, (pPg->pgno-1)*SQLITE_PAGE_SIZE);
1087 if( rc!=SQLITE_OK ) goto commit_abort;
1088 rc = pager_write(pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
1089 if( rc!=SQLITE_OK ) goto commit_abort;
drhed7c8552001-04-11 14:29:21 +00001090 }
1091 }
drhd9b02572001-04-15 00:37:09 +00001092 if( fsync(pPager->fd) ) goto commit_abort;
1093 rc = pager_unwritelock(pPager);
1094 pPager->dbSize = -1;
1095 return rc;
1096
1097 /* Jump here if anything goes wrong during the commit process.
1098 */
1099commit_abort:
1100 rc = sqlitepager_rollback(pPager);
1101 if( rc==SQLITE_OK ){
1102 rc = SQLITE_FULL;
drhed7c8552001-04-11 14:29:21 +00001103 }
drhed7c8552001-04-11 14:29:21 +00001104 return rc;
1105}
1106
1107/*
1108** Rollback all changes. The database falls back to read-only mode.
1109** All in-memory cache pages revert to their original data contents.
1110** The journal is deleted.
drhd9b02572001-04-15 00:37:09 +00001111**
1112** This routine cannot fail unless some other process is not following
1113** the correct locking protocol (SQLITE_PROTOCOL) or unless some other
1114** process is writing trash into the journal file (SQLITE_CORRUPT) or
1115** unless a prior malloc() failed (SQLITE_NOMEM). Appropriate error
1116** codes are returned for all these occasions. Otherwise,
1117** SQLITE_OK is returned.
drhed7c8552001-04-11 14:29:21 +00001118*/
drhd9b02572001-04-15 00:37:09 +00001119int sqlitepager_rollback(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +00001120 int rc;
drhd9b02572001-04-15 00:37:09 +00001121 if( pPager->errMask!=0 && pPager->errMask!=PAGER_ERR_FULL ){
1122 return pager_errcode(pPager);
drhed7c8552001-04-11 14:29:21 +00001123 }
drhd9b02572001-04-15 00:37:09 +00001124 if( pPager->state!=SQLITE_WRITELOCK ){
1125 return SQLITE_OK;
1126 }
1127 rc = pager_playback(pPager);
1128 if( rc!=SQLITE_OK ){
1129 rc = SQLITE_CORRUPT;
1130 pPager->errMask |= PAGER_ERR_CORRUPT;
1131 }
1132 pPager->dbSize = -1;
drhed7c8552001-04-11 14:29:21 +00001133 return rc;
1134};
drhd9b02572001-04-15 00:37:09 +00001135
1136/*
drh5e00f6c2001-09-13 13:46:56 +00001137** Return TRUE if the database file is opened read-only. Return FALSE
1138** if the database is (in theory) writable.
1139*/
1140int sqlitepager_isreadonly(Pager *pPager){
drhbe0072d2001-09-13 14:46:09 +00001141 return pPager->readOnly;
drh5e00f6c2001-09-13 13:46:56 +00001142}
1143
1144/*
drhd9b02572001-04-15 00:37:09 +00001145** This routine is used for testing and analysis only.
1146*/
1147int *sqlitepager_stats(Pager *pPager){
1148 static int a[9];
1149 a[0] = pPager->nRef;
1150 a[1] = pPager->nPage;
1151 a[2] = pPager->mxPage;
1152 a[3] = pPager->dbSize;
1153 a[4] = pPager->state;
1154 a[5] = pPager->errMask;
1155 a[6] = pPager->nHit;
1156 a[7] = pPager->nMiss;
1157 a[8] = pPager->nOvfl;
1158 return a;
1159}
drhdd793422001-06-28 01:54:48 +00001160
1161#if SQLITE_TEST
1162/*
1163** Print a listing of all referenced pages and their ref count.
1164*/
1165void sqlitepager_refdump(Pager *pPager){
1166 PgHdr *pPg;
1167 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1168 if( pPg->nRef<=0 ) continue;
1169 printf("PAGE %3d addr=0x%08x nRef=%d\n",
1170 pPg->pgno, (int)PGHDR_TO_DATA(pPg), pPg->nRef);
1171 }
1172}
1173#endif