blob: 8d4f9696d93ec4ae046511d6d88882a30acd6fc3 [file] [log] [blame]
drhed7c8552001-04-11 14:29:21 +00001/*
2** Copyright (c) 2001 D. Richard Hipp
3**
4** This program is free software; you can redistribute it and/or
5** modify it under the terms of the GNU General Public
6** License as published by the Free Software Foundation; either
7** version 2 of the License, or (at your option) any later version.
8**
9** This program is distributed in the hope that it will be useful,
10** but WITHOUT ANY WARRANTY; without even the implied warranty of
11** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12** General Public License for more details.
13**
14** You should have received a copy of the GNU General Public
15** License along with this library; if not, write to the
16** Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17** Boston, MA 02111-1307, USA.
18**
19** Author contact information:
20** drh@hwaci.com
21** http://www.hwaci.com/drh/
22**
23*************************************************************************
24** This is the implementation of the page cache subsystem.
25**
26** The page cache is used to access a database file. The pager journals
27** all writes in order to support rollback. Locking is used to limit
drh5e00f6c2001-09-13 13:46:56 +000028** access to one or more reader or to one writer.
drhed7c8552001-04-11 14:29:21 +000029**
drhf57b14a2001-09-14 18:54:08 +000030** @(#) $Id: pager.c,v 1.18 2001/09/14 18:54:09 drh Exp $
drhed7c8552001-04-11 14:29:21 +000031*/
drhd9b02572001-04-15 00:37:09 +000032#include "sqliteInt.h"
drhed7c8552001-04-11 14:29:21 +000033#include "pager.h"
34#include <fcntl.h>
35#include <sys/stat.h>
36#include <unistd.h>
37#include <assert.h>
drhd9b02572001-04-15 00:37:09 +000038#include <string.h>
drhed7c8552001-04-11 14:29:21 +000039
40/*
41** The page cache as a whole is always in one of the following
42** states:
43**
44** SQLITE_UNLOCK The page cache is not currently reading or
45** writing the database file. There is no
46** data held in memory. This is the initial
47** state.
48**
49** SQLITE_READLOCK The page cache is reading the database.
50** Writing is not permitted. There can be
51** multiple readers accessing the same database
drh69688d52001-04-14 16:38:23 +000052** file at the same time.
drhed7c8552001-04-11 14:29:21 +000053**
54** SQLITE_WRITELOCK The page cache is writing the database.
55** Access is exclusive. No other processes or
56** threads can be reading or writing while one
57** process is writing.
58**
drh306dc212001-05-21 13:45:10 +000059** The page cache comes up in SQLITE_UNLOCK. The first time a
60** sqlite_page_get() occurs, the state transitions to SQLITE_READLOCK.
drhed7c8552001-04-11 14:29:21 +000061** After all pages have been released using sqlite_page_unref(),
drh306dc212001-05-21 13:45:10 +000062** the state transitions back to SQLITE_UNLOCK. The first time
drhed7c8552001-04-11 14:29:21 +000063** that sqlite_page_write() is called, the state transitions to
drh306dc212001-05-21 13:45:10 +000064** SQLITE_WRITELOCK. (Note that sqlite_page_write() can only be
65** called on an outstanding page which means that the pager must
66** be in SQLITE_READLOCK before it transitions to SQLITE_WRITELOCK.)
67** The sqlite_page_rollback() and sqlite_page_commit() functions
68** transition the state from SQLITE_WRITELOCK back to SQLITE_READLOCK.
drhed7c8552001-04-11 14:29:21 +000069*/
70#define SQLITE_UNLOCK 0
71#define SQLITE_READLOCK 1
72#define SQLITE_WRITELOCK 2
73
drhd9b02572001-04-15 00:37:09 +000074
drhed7c8552001-04-11 14:29:21 +000075/*
76** Each in-memory image of a page begins with the following header.
drhbd03cae2001-06-02 02:40:57 +000077** This header is only visible to this pager module. The client
78** code that calls pager sees only the data that follows the header.
drhed7c8552001-04-11 14:29:21 +000079*/
drhd9b02572001-04-15 00:37:09 +000080typedef struct PgHdr PgHdr;
drhed7c8552001-04-11 14:29:21 +000081struct PgHdr {
82 Pager *pPager; /* The pager to which this page belongs */
83 Pgno pgno; /* The page number for this page */
drh69688d52001-04-14 16:38:23 +000084 PgHdr *pNextHash, *pPrevHash; /* Hash collision chain for PgHdr.pgno */
drhed7c8552001-04-11 14:29:21 +000085 int nRef; /* Number of users of this page */
drhd9b02572001-04-15 00:37:09 +000086 PgHdr *pNextFree, *pPrevFree; /* Freelist of pages where nRef==0 */
87 PgHdr *pNextAll, *pPrevAll; /* A list of all pages */
drhed7c8552001-04-11 14:29:21 +000088 char inJournal; /* TRUE if has been written to journal */
89 char dirty; /* TRUE if we need to write back changes */
drh69688d52001-04-14 16:38:23 +000090 /* SQLITE_PAGE_SIZE bytes of page data follow this header */
drh7e3b0a02001-04-28 16:52:40 +000091 /* Pager.nExtra bytes of local data follow the page data */
drhed7c8552001-04-11 14:29:21 +000092};
93
94/*
drh69688d52001-04-14 16:38:23 +000095** Convert a pointer to a PgHdr into a pointer to its data
96** and back again.
drhed7c8552001-04-11 14:29:21 +000097*/
98#define PGHDR_TO_DATA(P) ((void*)(&(P)[1]))
99#define DATA_TO_PGHDR(D) (&((PgHdr*)(D))[-1])
drh7e3b0a02001-04-28 16:52:40 +0000100#define PGHDR_TO_EXTRA(P) ((void*)&((char*)(&(P)[1]))[SQLITE_PAGE_SIZE])
drhed7c8552001-04-11 14:29:21 +0000101
102/*
drhed7c8552001-04-11 14:29:21 +0000103** How big to make the hash table used for locating in-memory pages
drh306dc212001-05-21 13:45:10 +0000104** by page number. Knuth says this should be a prime number.
drhed7c8552001-04-11 14:29:21 +0000105*/
drha1b351a2001-09-14 16:42:12 +0000106#define N_PG_HASH 907
drhed7c8552001-04-11 14:29:21 +0000107
108/*
109** A open page cache is an instance of the following structure.
110*/
111struct Pager {
112 char *zFilename; /* Name of the database file */
113 char *zJournal; /* Name of the journal file */
114 int fd, jfd; /* File descriptors for database and journal */
drhed7c8552001-04-11 14:29:21 +0000115 int dbSize; /* Number of pages in the file */
drh69688d52001-04-14 16:38:23 +0000116 int origDbSize; /* dbSize before the current change */
drh7e3b0a02001-04-28 16:52:40 +0000117 int nExtra; /* Add this many bytes to each in-memory page */
drh72f82862001-05-24 21:06:34 +0000118 void (*xDestructor)(void*); /* Call this routine when freeing pages */
drhed7c8552001-04-11 14:29:21 +0000119 int nPage; /* Total number of in-memory pages */
drhd9b02572001-04-15 00:37:09 +0000120 int nRef; /* Number of in-memory pages with PgHdr.nRef>0 */
drhed7c8552001-04-11 14:29:21 +0000121 int mxPage; /* Maximum number of pages to hold in cache */
drhd9b02572001-04-15 00:37:09 +0000122 int nHit, nMiss, nOvfl; /* Cache hits, missing, and LRU overflows */
123 unsigned char state; /* SQLITE_UNLOCK, _READLOCK or _WRITELOCK */
124 unsigned char errMask; /* One of several kinds of errors */
drh5e00f6c2001-09-13 13:46:56 +0000125 unsigned char tempFile; /* zFilename is a temporary file */
126 unsigned char readOnly; /* True for a read-only database */
drhf57b14a2001-09-14 18:54:08 +0000127 unsigned char needSync; /* True if an fsync() is needed on the journal */
drh6019e162001-07-02 17:51:45 +0000128 unsigned char *aInJournal; /* One bit for each page in the database file */
drhed7c8552001-04-11 14:29:21 +0000129 PgHdr *pFirst, *pLast; /* List of free pages */
drhd9b02572001-04-15 00:37:09 +0000130 PgHdr *pAll; /* List of all pages */
drhed7c8552001-04-11 14:29:21 +0000131 PgHdr *aHash[N_PG_HASH]; /* Hash table to map page number of PgHdr */
drhd9b02572001-04-15 00:37:09 +0000132};
133
134/*
135** These are bits that can be set in Pager.errMask.
136*/
137#define PAGER_ERR_FULL 0x01 /* a write() failed */
138#define PAGER_ERR_MEM 0x02 /* malloc() failed */
139#define PAGER_ERR_LOCK 0x04 /* error in the locking protocol */
140#define PAGER_ERR_CORRUPT 0x08 /* database or journal corruption */
141
142/*
143** The journal file contains page records in the following
144** format.
145*/
146typedef struct PageRecord PageRecord;
147struct PageRecord {
148 Pgno pgno; /* The page number */
149 char aData[SQLITE_PAGE_SIZE]; /* Original data for page pgno */
150};
151
152/*
drh5e00f6c2001-09-13 13:46:56 +0000153** Journal files begin with the following magic string. The data
154** was obtained from /dev/random. It is used only as a sanity check.
drhd9b02572001-04-15 00:37:09 +0000155*/
156static const unsigned char aJournalMagic[] = {
157 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd4,
drhed7c8552001-04-11 14:29:21 +0000158};
159
160/*
161** Hash a page number
162*/
drhd9b02572001-04-15 00:37:09 +0000163#define pager_hash(PN) ((PN)%N_PG_HASH)
drhed7c8552001-04-11 14:29:21 +0000164
165/*
drhdd793422001-06-28 01:54:48 +0000166** Enable reference count tracking here:
167*/
168#if SQLITE_TEST
drh5e00f6c2001-09-13 13:46:56 +0000169 int pager_refinfo_enable = 0;
drhdd793422001-06-28 01:54:48 +0000170 static void pager_refinfo(PgHdr *p){
171 static int cnt = 0;
172 if( !pager_refinfo_enable ) return;
173 printf(
174 "REFCNT: %4d addr=0x%08x nRef=%d\n",
175 p->pgno, (int)PGHDR_TO_DATA(p), p->nRef
176 );
177 cnt++; /* Something to set a breakpoint on */
178 }
179# define REFINFO(X) pager_refinfo(X)
180#else
181# define REFINFO(X)
182#endif
183
184/*
drhed7c8552001-04-11 14:29:21 +0000185** Attempt to acquire a read lock (if wrlock==0) or a write lock (if wrlock==1)
186** on the database file. Return 0 on success and non-zero if the lock
187** could not be acquired.
188*/
drhd9b02572001-04-15 00:37:09 +0000189static int pager_lock(int fd, int wrlock){
190 int rc;
drhed7c8552001-04-11 14:29:21 +0000191 struct flock lock;
drhd9b02572001-04-15 00:37:09 +0000192 lock.l_type = wrlock ? F_WRLCK : F_RDLCK;
193 lock.l_whence = SEEK_SET;
194 lock.l_start = lock.l_len = 0L;
195 rc = fcntl(fd, F_SETLK, &lock);
196 return rc!=0;
drhed7c8552001-04-11 14:29:21 +0000197}
198
199/*
200** Unlock the database file.
201*/
drhd9b02572001-04-15 00:37:09 +0000202static int pager_unlock(fd){
203 int rc;
drhed7c8552001-04-11 14:29:21 +0000204 struct flock lock;
205 lock.l_type = F_UNLCK;
drhd9b02572001-04-15 00:37:09 +0000206 lock.l_whence = SEEK_SET;
207 lock.l_start = lock.l_len = 0L;
208 rc = fcntl(fd, F_SETLK, &lock);
209 return rc!=0;
210}
211
212/*
213** Move the cursor for file descriptor fd to the point whereto from
214** the beginning of the file.
215*/
216static int pager_seek(int fd, off_t whereto){
drh6019e162001-07-02 17:51:45 +0000217 /*printf("SEEK to page %d\n", whereto/SQLITE_PAGE_SIZE + 1);*/
drhd9b02572001-04-15 00:37:09 +0000218 lseek(fd, whereto, SEEK_SET);
219 return SQLITE_OK;
220}
221
222/*
223** Truncate the given file so that it contains exactly mxPg pages
224** of data.
225*/
226static int pager_truncate(int fd, Pgno mxPg){
227 int rc;
228 rc = ftruncate(fd, mxPg*SQLITE_PAGE_SIZE);
229 return rc!=0 ? SQLITE_IOERR : SQLITE_OK;
230}
231
232/*
233** Read nBytes of data from fd into pBuf. If the data cannot be
234** read or only a partial read occurs, then the unread parts of
235** pBuf are filled with zeros and this routine returns SQLITE_IOERR.
236** If the read is completely successful, return SQLITE_OK.
237*/
238static int pager_read(int fd, void *pBuf, int nByte){
239 int rc;
drh6019e162001-07-02 17:51:45 +0000240 /* printf("READ\n");*/
drhd9b02572001-04-15 00:37:09 +0000241 rc = read(fd, pBuf, nByte);
242 if( rc<0 ){
243 memset(pBuf, 0, nByte);
244 return SQLITE_IOERR;
245 }
246 if( rc<nByte ){
247 memset(&((char*)pBuf)[rc], 0, nByte - rc);
248 rc = SQLITE_IOERR;
249 }else{
250 rc = SQLITE_OK;
251 }
252 return rc;
253}
254
255/*
256** Write nBytes of data into fd. If any problem occurs or if the
257** write is incomplete, SQLITE_IOERR is returned. SQLITE_OK is
258** returned upon complete success.
259*/
260static int pager_write(int fd, const void *pBuf, int nByte){
261 int rc;
drh6019e162001-07-02 17:51:45 +0000262 /*printf("WRITE\n");*/
drhd9b02572001-04-15 00:37:09 +0000263 rc = write(fd, pBuf, nByte);
264 if( rc<nByte ){
265 return SQLITE_FULL;
266 }else{
267 return SQLITE_OK;
268 }
269}
270
271/*
272** Convert the bits in the pPager->errMask into an approprate
273** return code.
274*/
275static int pager_errcode(Pager *pPager){
276 int rc = SQLITE_OK;
277 if( pPager->errMask & PAGER_ERR_LOCK ) rc = SQLITE_PROTOCOL;
278 if( pPager->errMask & PAGER_ERR_FULL ) rc = SQLITE_FULL;
279 if( pPager->errMask & PAGER_ERR_MEM ) rc = SQLITE_NOMEM;
280 if( pPager->errMask & PAGER_ERR_CORRUPT ) rc = SQLITE_CORRUPT;
281 return rc;
drhed7c8552001-04-11 14:29:21 +0000282}
283
284/*
285** Find a page in the hash table given its page number. Return
286** a pointer to the page or NULL if not found.
287*/
drhd9b02572001-04-15 00:37:09 +0000288static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){
drhed7c8552001-04-11 14:29:21 +0000289 PgHdr *p = pPager->aHash[pgno % N_PG_HASH];
290 while( p && p->pgno!=pgno ){
291 p = p->pNextHash;
292 }
293 return p;
294}
295
296/*
297** Unlock the database and clear the in-memory cache. This routine
298** sets the state of the pager back to what it was when it was first
299** opened. Any outstanding pages are invalidated and subsequent attempts
300** to access those pages will likely result in a coredump.
301*/
drhd9b02572001-04-15 00:37:09 +0000302static void pager_reset(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000303 PgHdr *pPg, *pNext;
drhd9b02572001-04-15 00:37:09 +0000304 for(pPg=pPager->pAll; pPg; pPg=pNext){
305 pNext = pPg->pNextAll;
306 sqliteFree(pPg);
drhed7c8552001-04-11 14:29:21 +0000307 }
308 pPager->pFirst = 0;
drhd9b02572001-04-15 00:37:09 +0000309 pPager->pLast = 0;
310 pPager->pAll = 0;
drhed7c8552001-04-11 14:29:21 +0000311 memset(pPager->aHash, 0, sizeof(pPager->aHash));
312 pPager->nPage = 0;
313 if( pPager->state==SQLITE_WRITELOCK ){
drhd9b02572001-04-15 00:37:09 +0000314 sqlitepager_rollback(pPager);
drhed7c8552001-04-11 14:29:21 +0000315 }
drhd9b02572001-04-15 00:37:09 +0000316 pager_unlock(pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000317 pPager->state = SQLITE_UNLOCK;
drhd9b02572001-04-15 00:37:09 +0000318 pPager->dbSize = -1;
drhed7c8552001-04-11 14:29:21 +0000319 pPager->nRef = 0;
320}
321
322/*
323** When this routine is called, the pager has the journal file open and
324** a write lock on the database. This routine releases the database
325** write lock and acquires a read lock in its place. The journal file
326** is deleted and closed.
327**
328** We have to release the write lock before acquiring the read lock,
329** so there is a race condition where another process can get the lock
330** while we are not holding it. But, no other process should do this
331** because we are also holding a lock on the journal, and no process
332** should get a write lock on the database without first getting a lock
333** on the journal. So this routine should never fail. But it can fail
334** if another process is not playing by the rules. If it does fail,
drhd9b02572001-04-15 00:37:09 +0000335** all in-memory cache pages are invalidated, the PAGER_ERR_LOCK bit
336** is set in pPager->errMask, and this routine returns SQLITE_PROTOCOL.
337** SQLITE_OK is returned on success.
drhed7c8552001-04-11 14:29:21 +0000338*/
drhd9b02572001-04-15 00:37:09 +0000339static int pager_unwritelock(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000340 int rc;
drhd9b02572001-04-15 00:37:09 +0000341 PgHdr *pPg;
342 if( pPager->state!=SQLITE_WRITELOCK ) return SQLITE_OK;
343 pager_unlock(pPager->fd);
344 rc = pager_lock(pPager->fd, 0);
drhed7c8552001-04-11 14:29:21 +0000345 unlink(pPager->zJournal);
346 close(pPager->jfd);
347 pPager->jfd = -1;
drh6019e162001-07-02 17:51:45 +0000348 sqliteFree( pPager->aInJournal );
349 pPager->aInJournal = 0;
drhd9b02572001-04-15 00:37:09 +0000350 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
351 pPg->inJournal = 0;
352 pPg->dirty = 0;
353 }
drhed7c8552001-04-11 14:29:21 +0000354 if( rc!=SQLITE_OK ){
355 pPager->state = SQLITE_UNLOCK;
drhed7c8552001-04-11 14:29:21 +0000356 rc = SQLITE_PROTOCOL;
drhd9b02572001-04-15 00:37:09 +0000357 pPager->errMask |= PAGER_ERR_LOCK;
drhed7c8552001-04-11 14:29:21 +0000358 }else{
drhd9b02572001-04-15 00:37:09 +0000359 rc = SQLITE_OK;
drhed7c8552001-04-11 14:29:21 +0000360 pPager->state = SQLITE_READLOCK;
361 }
362 return rc;
363}
364
drhed7c8552001-04-11 14:29:21 +0000365/*
366** Playback the journal and thus restore the database file to
367** the state it was in before we started making changes.
368**
drhd9b02572001-04-15 00:37:09 +0000369** The journal file format is as follows: There is an initial
370** file-type string for sanity checking. Then there is a single
371** Pgno number which is the number of pages in the database before
372** changes were made. The database is truncated to this size.
drh306dc212001-05-21 13:45:10 +0000373** Next come zero or more page records where each page record
374** consists of a Pgno and SQLITE_PAGE_SIZE bytes of data. See
375** the PageRecord structure for details.
drhed7c8552001-04-11 14:29:21 +0000376**
drhd9b02572001-04-15 00:37:09 +0000377** For playback, the pages have to be read from the journal in
378** reverse order and put back into the original database file.
drhed7c8552001-04-11 14:29:21 +0000379**
drhd9b02572001-04-15 00:37:09 +0000380** If the file opened as the journal file is not a well-formed
381** journal file (as determined by looking at the magic number
382** at the beginning) then this routine returns SQLITE_PROTOCOL.
383** If any other errors occur during playback, the database will
384** likely be corrupted, so the PAGER_ERR_CORRUPT bit is set in
385** pPager->errMask and SQLITE_CORRUPT is returned. If it all
386** works, then this routine returns SQLITE_OK.
drhed7c8552001-04-11 14:29:21 +0000387*/
drhd9b02572001-04-15 00:37:09 +0000388static int pager_playback(Pager *pPager){
389 int nRec; /* Number of Records */
390 int i; /* Loop counter */
391 Pgno mxPg = 0; /* Size of the original file in pages */
392 struct stat statbuf; /* Used to size the journal */
393 PgHdr *pPg; /* An existing page in the cache */
394 PageRecord pgRec;
395 unsigned char aMagic[sizeof(aJournalMagic)];
drhed7c8552001-04-11 14:29:21 +0000396 int rc;
397
drhd9b02572001-04-15 00:37:09 +0000398 /* Read the beginning of the journal and truncate the
399 ** database file back to its original size.
drhed7c8552001-04-11 14:29:21 +0000400 */
drhd9b02572001-04-15 00:37:09 +0000401 assert( pPager->jfd>=0 );
402 pager_seek(pPager->jfd, 0);
403 rc = pager_read(pPager->jfd, aMagic, sizeof(aMagic));
404 if( rc!=SQLITE_OK || memcmp(aMagic,aJournalMagic,sizeof(aMagic))!=0 ){
405 return SQLITE_PROTOCOL;
406 }
407 rc = pager_read(pPager->jfd, &mxPg, sizeof(mxPg));
408 if( rc!=SQLITE_OK ){
409 return SQLITE_PROTOCOL;
410 }
411 pager_truncate(pPager->fd, mxPg);
412 pPager->dbSize = mxPg;
413
414 /* Begin reading the journal beginning at the end and moving
415 ** toward the beginning.
416 */
417 if( fstat(pPager->jfd, &statbuf)!=0 ){
drhed7c8552001-04-11 14:29:21 +0000418 return SQLITE_OK;
419 }
drhd9b02572001-04-15 00:37:09 +0000420 nRec = (statbuf.st_size - (sizeof(aMagic)+sizeof(Pgno))) / sizeof(PageRecord);
drhed7c8552001-04-11 14:29:21 +0000421
422 /* Process segments beginning with the last and working backwards
423 ** to the first.
424 */
drhd9b02572001-04-15 00:37:09 +0000425 for(i=nRec-1; i>=0; i--){
drhed7c8552001-04-11 14:29:21 +0000426 /* Seek to the beginning of the segment */
drhd9b02572001-04-15 00:37:09 +0000427 off_t ofst;
428 ofst = i*sizeof(PageRecord) + sizeof(aMagic) + sizeof(Pgno);
429 rc = pager_seek(pPager->jfd, ofst);
430 if( rc!=SQLITE_OK ) break;
431 rc = pager_read(pPager->jfd, &pgRec, sizeof(pgRec));
432 if( rc!=SQLITE_OK ) break;
drhed7c8552001-04-11 14:29:21 +0000433
drhd9b02572001-04-15 00:37:09 +0000434 /* Sanity checking on the page */
435 if( pgRec.pgno>mxPg || pgRec.pgno==0 ){
436 rc = SQLITE_CORRUPT;
437 break;
drhed7c8552001-04-11 14:29:21 +0000438 }
439
drhd9b02572001-04-15 00:37:09 +0000440 /* Playback the page. Update the in-memory copy of the page
441 ** at the same time, if there is one.
drhed7c8552001-04-11 14:29:21 +0000442 */
drhd9b02572001-04-15 00:37:09 +0000443 pPg = pager_lookup(pPager, pgRec.pgno);
444 if( pPg ){
445 memcpy(PGHDR_TO_DATA(pPg), pgRec.aData, SQLITE_PAGE_SIZE);
drh6019e162001-07-02 17:51:45 +0000446 memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
drhed7c8552001-04-11 14:29:21 +0000447 }
drhd9b02572001-04-15 00:37:09 +0000448 rc = pager_seek(pPager->fd, (pgRec.pgno-1)*SQLITE_PAGE_SIZE);
449 if( rc!=SQLITE_OK ) break;
450 rc = pager_write(pPager->fd, pgRec.aData, SQLITE_PAGE_SIZE);
451 if( rc!=SQLITE_OK ) break;
drhed7c8552001-04-11 14:29:21 +0000452 }
drhd9b02572001-04-15 00:37:09 +0000453 if( rc!=SQLITE_OK ){
454 pager_unwritelock(pPager);
455 pPager->errMask |= PAGER_ERR_CORRUPT;
456 rc = SQLITE_CORRUPT;
457 }else{
458 rc = pager_unwritelock(pPager);
drhed7c8552001-04-11 14:29:21 +0000459 }
drhd9b02572001-04-15 00:37:09 +0000460 return rc;
drhed7c8552001-04-11 14:29:21 +0000461}
462
463/*
drh5e00f6c2001-09-13 13:46:56 +0000464** Locate a directory where we can potentially create a temporary
465** file.
466*/
467static const char *findTempDir(void){
468 static const char *azDirs[] = {
469 ".",
470 "/var/tmp",
471 "/usr/tmp",
472 "/tmp",
473 "/temp",
474 "./temp",
475 };
476 int i;
477 struct stat buf;
478 for(i=0; i<sizeof(azDirs)/sizeof(azDirs[0]); i++){
drh3fc190c2001-09-14 03:24:23 +0000479 if( stat(azDirs[i], &buf) ) continue;
480 if( !S_ISDIR(buf.st_mode) ) continue;
481 if( access(azDirs[i], 07) ) continue;
482 return azDirs[i];
drh5e00f6c2001-09-13 13:46:56 +0000483 }
484 return 0;
485}
486
487/*
drhf57b14a2001-09-14 18:54:08 +0000488** Change the maximum number of in-memory pages that are allowed.
489*/
490void sqlitepager_set_cachesize(Pager *pPager, int mxPage){
491 if( mxPage>10 ){
492 pPager->mxPage = mxPage;
493 }
494}
495
496/*
drhed7c8552001-04-11 14:29:21 +0000497** Create a new page cache and put a pointer to the page cache in *ppPager.
drh5e00f6c2001-09-13 13:46:56 +0000498** The file to be cached need not exist. The file is not locked until
drhd9b02572001-04-15 00:37:09 +0000499** the first call to sqlitepager_get() and is only held open until the
500** last page is released using sqlitepager_unref().
drhed7c8552001-04-11 14:29:21 +0000501*/
drh7e3b0a02001-04-28 16:52:40 +0000502int sqlitepager_open(
503 Pager **ppPager, /* Return the Pager structure here */
504 const char *zFilename, /* Name of the database file to open */
505 int mxPage, /* Max number of in-memory cache pages */
506 int nExtra /* Extra bytes append to each in-memory page */
507){
drhed7c8552001-04-11 14:29:21 +0000508 Pager *pPager;
509 int nameLen;
510 int fd;
drh5e00f6c2001-09-13 13:46:56 +0000511 int tempFile;
512 int readOnly = 0;
513 char zTemp[300];
drhed7c8552001-04-11 14:29:21 +0000514
drhd9b02572001-04-15 00:37:09 +0000515 *ppPager = 0;
516 if( sqlite_malloc_failed ){
517 return SQLITE_NOMEM;
518 }
drh5e00f6c2001-09-13 13:46:56 +0000519 if( zFilename ){
520 fd = open(zFilename, O_RDWR|O_CREAT, 0644);
521 if( fd<0 ){
522 fd = open(zFilename, O_RDONLY, 0);
523 readOnly = 1;
524 }
525 tempFile = 0;
526 }else{
527 int cnt = 8;
drhbe0072d2001-09-13 14:46:09 +0000528 const char *zDir = findTempDir();
drh5e00f6c2001-09-13 13:46:56 +0000529 if( zDir==0 ) return SQLITE_CANTOPEN;
530 do{
531 cnt--;
drhbe0072d2001-09-13 14:46:09 +0000532 sprintf(zTemp,"%s/_sqlite_%u", zDir, (unsigned)sqliteRandomInteger());
drh5e00f6c2001-09-13 13:46:56 +0000533 fd = open(zTemp, O_RDWR|O_CREAT|O_EXCL, 0600);
534 }while( cnt>0 && fd<0 );
535 zFilename = zTemp;
536 tempFile = 1;
537 }
drhed7c8552001-04-11 14:29:21 +0000538 if( fd<0 ){
539 return SQLITE_CANTOPEN;
540 }
541 nameLen = strlen(zFilename);
542 pPager = sqliteMalloc( sizeof(*pPager) + nameLen*2 + 30 );
drhd9b02572001-04-15 00:37:09 +0000543 if( pPager==0 ){
544 close(fd);
545 return SQLITE_NOMEM;
546 }
drhed7c8552001-04-11 14:29:21 +0000547 pPager->zFilename = (char*)&pPager[1];
548 pPager->zJournal = &pPager->zFilename[nameLen+1];
549 strcpy(pPager->zFilename, zFilename);
550 strcpy(pPager->zJournal, zFilename);
551 strcpy(&pPager->zJournal[nameLen], "-journal");
552 pPager->fd = fd;
553 pPager->jfd = -1;
554 pPager->nRef = 0;
555 pPager->dbSize = -1;
556 pPager->nPage = 0;
drhd79caeb2001-04-15 02:27:24 +0000557 pPager->mxPage = mxPage>5 ? mxPage : 10;
drhed7c8552001-04-11 14:29:21 +0000558 pPager->state = SQLITE_UNLOCK;
drhd9b02572001-04-15 00:37:09 +0000559 pPager->errMask = 0;
drh5e00f6c2001-09-13 13:46:56 +0000560 pPager->tempFile = tempFile;
561 pPager->readOnly = readOnly;
drhf57b14a2001-09-14 18:54:08 +0000562 pPager->needSync = 0;
drhed7c8552001-04-11 14:29:21 +0000563 pPager->pFirst = 0;
564 pPager->pLast = 0;
drh7c717f72001-06-24 20:39:41 +0000565 pPager->nExtra = nExtra;
drhed7c8552001-04-11 14:29:21 +0000566 memset(pPager->aHash, 0, sizeof(pPager->aHash));
567 *ppPager = pPager;
568 return SQLITE_OK;
569}
570
571/*
drh72f82862001-05-24 21:06:34 +0000572** Set the destructor for this pager. If not NULL, the destructor is called
drh5e00f6c2001-09-13 13:46:56 +0000573** when the reference count on each page reaches zero. The destructor can
574** be used to clean up information in the extra segment appended to each page.
drh72f82862001-05-24 21:06:34 +0000575**
576** The destructor is not called as a result sqlitepager_close().
577** Destructors are only called by sqlitepager_unref().
578*/
579void sqlitepager_set_destructor(Pager *pPager, void (*xDesc)(void*)){
580 pPager->xDestructor = xDesc;
581}
582
583/*
drh5e00f6c2001-09-13 13:46:56 +0000584** Return the total number of pages in the disk file associated with
585** pPager.
drhed7c8552001-04-11 14:29:21 +0000586*/
drhd9b02572001-04-15 00:37:09 +0000587int sqlitepager_pagecount(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +0000588 int n;
589 struct stat statbuf;
drhd9b02572001-04-15 00:37:09 +0000590 assert( pPager!=0 );
drhed7c8552001-04-11 14:29:21 +0000591 if( pPager->dbSize>=0 ){
592 return pPager->dbSize;
593 }
594 if( fstat(pPager->fd, &statbuf)!=0 ){
595 n = 0;
596 }else{
597 n = statbuf.st_size/SQLITE_PAGE_SIZE;
598 }
drhd9b02572001-04-15 00:37:09 +0000599 if( pPager->state!=SQLITE_UNLOCK ){
drhed7c8552001-04-11 14:29:21 +0000600 pPager->dbSize = n;
601 }
602 return n;
603}
604
605/*
606** Shutdown the page cache. Free all memory and close all files.
607**
608** If a transaction was in progress when this routine is called, that
609** transaction is rolled back. All outstanding pages are invalidated
610** and their memory is freed. Any attempt to use a page associated
611** with this page cache after this function returns will likely
612** result in a coredump.
613*/
drhd9b02572001-04-15 00:37:09 +0000614int sqlitepager_close(Pager *pPager){
615 PgHdr *pPg, *pNext;
drhed7c8552001-04-11 14:29:21 +0000616 switch( pPager->state ){
617 case SQLITE_WRITELOCK: {
drhd9b02572001-04-15 00:37:09 +0000618 sqlitepager_rollback(pPager);
619 pager_unlock(pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000620 break;
621 }
622 case SQLITE_READLOCK: {
drhd9b02572001-04-15 00:37:09 +0000623 pager_unlock(pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000624 break;
625 }
626 default: {
627 /* Do nothing */
628 break;
629 }
630 }
drhd9b02572001-04-15 00:37:09 +0000631 for(pPg=pPager->pAll; pPg; pPg=pNext){
632 pNext = pPg->pNextAll;
633 sqliteFree(pPg);
drhed7c8552001-04-11 14:29:21 +0000634 }
635 if( pPager->fd>=0 ) close(pPager->fd);
636 assert( pPager->jfd<0 );
drh5e00f6c2001-09-13 13:46:56 +0000637 if( pPager->tempFile ){
638 unlink(pPager->zFilename);
639 }
drhed7c8552001-04-11 14:29:21 +0000640 sqliteFree(pPager);
641 return SQLITE_OK;
642}
643
644/*
drh5e00f6c2001-09-13 13:46:56 +0000645** Return the page number for the given page data.
drhed7c8552001-04-11 14:29:21 +0000646*/
drhd9b02572001-04-15 00:37:09 +0000647Pgno sqlitepager_pagenumber(void *pData){
drhed7c8552001-04-11 14:29:21 +0000648 PgHdr *p = DATA_TO_PGHDR(pData);
649 return p->pgno;
650}
651
652/*
drh7e3b0a02001-04-28 16:52:40 +0000653** Increment the reference count for a page. If the page is
654** currently on the freelist (the reference count is zero) then
655** remove it from the freelist.
656*/
drhdf0b3b02001-06-23 11:36:20 +0000657static void page_ref(PgHdr *pPg){
drh7e3b0a02001-04-28 16:52:40 +0000658 if( pPg->nRef==0 ){
659 /* The page is currently on the freelist. Remove it. */
660 if( pPg->pPrevFree ){
661 pPg->pPrevFree->pNextFree = pPg->pNextFree;
662 }else{
663 pPg->pPager->pFirst = pPg->pNextFree;
664 }
665 if( pPg->pNextFree ){
666 pPg->pNextFree->pPrevFree = pPg->pPrevFree;
667 }else{
668 pPg->pPager->pLast = pPg->pPrevFree;
669 }
670 pPg->pPager->nRef++;
671 }
672 pPg->nRef++;
drhdd793422001-06-28 01:54:48 +0000673 REFINFO(pPg);
drhdf0b3b02001-06-23 11:36:20 +0000674}
675
676/*
677** Increment the reference count for a page. The input pointer is
678** a reference to the page data.
679*/
680int sqlitepager_ref(void *pData){
681 PgHdr *pPg = DATA_TO_PGHDR(pData);
682 page_ref(pPg);
drh8c42ca92001-06-22 19:15:00 +0000683 return SQLITE_OK;
drh7e3b0a02001-04-28 16:52:40 +0000684}
685
686/*
drhd9b02572001-04-15 00:37:09 +0000687** Acquire a page.
688**
drh5e00f6c2001-09-13 13:46:56 +0000689** A read lock on the disk file is obtained when the first page acquired.
690** This read lock is dropped when the last page is released.
drhd9b02572001-04-15 00:37:09 +0000691**
drh306dc212001-05-21 13:45:10 +0000692** A _get works for any page number greater than 0. If the database
693** file is smaller than the requested page, then no actual disk
694** read occurs and the memory image of the page is initialized to
695** all zeros. The extra data appended to a page is always initialized
696** to zeros the first time a page is loaded into memory.
697**
drhd9b02572001-04-15 00:37:09 +0000698** The acquisition might fail for several reasons. In all cases,
699** an appropriate error code is returned and *ppPage is set to NULL.
drh7e3b0a02001-04-28 16:52:40 +0000700**
701** See also sqlitepager_lookup(). Both this routine and _lookup() attempt
702** to find a page in the in-memory cache first. If the page is not already
drh5e00f6c2001-09-13 13:46:56 +0000703** in memory, this routine goes to disk to read it in whereas _lookup()
drh7e3b0a02001-04-28 16:52:40 +0000704** just returns 0. This routine acquires a read-lock the first time it
705** has to go to disk, and could also playback an old journal if necessary.
706** Since _lookup() never goes to disk, it never has to deal with locks
707** or journal files.
drhed7c8552001-04-11 14:29:21 +0000708*/
drhd9b02572001-04-15 00:37:09 +0000709int sqlitepager_get(Pager *pPager, Pgno pgno, void **ppPage){
drhed7c8552001-04-11 14:29:21 +0000710 PgHdr *pPg;
711
drhd9b02572001-04-15 00:37:09 +0000712 /* Make sure we have not hit any critical errors.
713 */
714 if( pPager==0 || pgno==0 ){
715 return SQLITE_ERROR;
716 }
717 if( pPager->errMask & ~(PAGER_ERR_FULL) ){
718 return pager_errcode(pPager);
719 }
720
drhed7c8552001-04-11 14:29:21 +0000721 /* If this is the first page accessed, then get a read lock
722 ** on the database file.
723 */
724 if( pPager->nRef==0 ){
drhd9b02572001-04-15 00:37:09 +0000725 if( pager_lock(pPager->fd, 0)!=0 ){
drhed7c8552001-04-11 14:29:21 +0000726 *ppPage = 0;
727 return SQLITE_BUSY;
728 }
drhd9b02572001-04-15 00:37:09 +0000729 pPager->state = SQLITE_READLOCK;
drhed7c8552001-04-11 14:29:21 +0000730
731 /* If a journal file exists, try to play it back.
732 */
733 if( access(pPager->zJournal,0)==0 ){
734 int rc;
735
736 /* Open the journal for exclusive access. Return SQLITE_BUSY if
737 ** we cannot get exclusive access to the journal file
738 */
739 pPager->jfd = open(pPager->zJournal, O_RDONLY, 0);
drhd9b02572001-04-15 00:37:09 +0000740 if( pPager->jfd<0 || pager_lock(pPager->jfd, 1)!=0 ){
drhed7c8552001-04-11 14:29:21 +0000741 if( pPager->jfd>=0 ){ close(pPager->jfd); pPager->jfd = -1; }
drhd9b02572001-04-15 00:37:09 +0000742 pager_unlock(pPager->fd);
drhed7c8552001-04-11 14:29:21 +0000743 *ppPage = 0;
744 return SQLITE_BUSY;
745 }
746
747 /* Get a write lock on the database */
drhd9b02572001-04-15 00:37:09 +0000748 pager_unlock(pPager->fd);
749 if( pager_lock(pPager->fd, 1)!=0 ){
750 close(pPager->jfd);
751 pPager->jfd = -1;
drhed7c8552001-04-11 14:29:21 +0000752 *ppPage = 0;
753 return SQLITE_PROTOCOL;
754 }
755
756 /* Playback and delete the journal. Drop the database write
757 ** lock and reacquire the read lock.
758 */
drhd9b02572001-04-15 00:37:09 +0000759 rc = pager_playback(pPager);
760 if( rc!=SQLITE_OK ){
761 return rc;
762 }
drhed7c8552001-04-11 14:29:21 +0000763 }
764 pPg = 0;
765 }else{
766 /* Search for page in cache */
drhd9b02572001-04-15 00:37:09 +0000767 pPg = pager_lookup(pPager, pgno);
drhed7c8552001-04-11 14:29:21 +0000768 }
769 if( pPg==0 ){
drhd9b02572001-04-15 00:37:09 +0000770 /* The requested page is not in the page cache. */
drhed7c8552001-04-11 14:29:21 +0000771 int h;
drh7e3b0a02001-04-28 16:52:40 +0000772 pPager->nMiss++;
drhed7c8552001-04-11 14:29:21 +0000773 if( pPager->nPage<pPager->mxPage || pPager->pFirst==0 ){
774 /* Create a new page */
drh7e3b0a02001-04-28 16:52:40 +0000775 pPg = sqliteMalloc( sizeof(*pPg) + SQLITE_PAGE_SIZE + pPager->nExtra );
drhd9b02572001-04-15 00:37:09 +0000776 if( pPg==0 ){
777 *ppPage = 0;
778 pager_unwritelock(pPager);
779 pPager->errMask |= PAGER_ERR_MEM;
780 return SQLITE_NOMEM;
781 }
drhed7c8552001-04-11 14:29:21 +0000782 pPg->pPager = pPager;
drhd9b02572001-04-15 00:37:09 +0000783 pPg->pNextAll = pPager->pAll;
784 if( pPager->pAll ){
785 pPager->pAll->pPrevAll = pPg;
786 }
787 pPg->pPrevAll = 0;
drhd79caeb2001-04-15 02:27:24 +0000788 pPager->pAll = pPg;
drhd9b02572001-04-15 00:37:09 +0000789 pPager->nPage++;
drhed7c8552001-04-11 14:29:21 +0000790 }else{
drhd9b02572001-04-15 00:37:09 +0000791 /* Recycle an older page. First locate the page to be recycled.
792 ** Try to find one that is not dirty and is near the head of
793 ** of the free list */
drhf57b14a2001-09-14 18:54:08 +0000794 /* int cnt = pPager->mxPage/2; */
795 int cnt = 10;
drhed7c8552001-04-11 14:29:21 +0000796 pPg = pPager->pFirst;
drh6019e162001-07-02 17:51:45 +0000797 while( pPg->dirty && 0<cnt-- && pPg->pNextFree ){
drhd9b02572001-04-15 00:37:09 +0000798 pPg = pPg->pNextFree;
799 }
800 if( pPg==0 || pPg->dirty ) pPg = pPager->pFirst;
801 assert( pPg->nRef==0 );
802
803 /* If the page to be recycled is dirty, sync the journal and write
804 ** the old page into the database. */
drhed7c8552001-04-11 14:29:21 +0000805 if( pPg->dirty ){
806 int rc;
drhd9b02572001-04-15 00:37:09 +0000807 assert( pPg->inJournal==1 );
808 assert( pPager->state==SQLITE_WRITELOCK );
drhf57b14a2001-09-14 18:54:08 +0000809 if( pPager->needSync ){
810 rc = fsync(pPager->jfd);
811 if( rc!=0 ){
812 rc = sqlitepager_rollback(pPager);
813 *ppPage = 0;
814 if( rc==SQLITE_OK ) rc = SQLITE_IOERR;
815 return rc;
816 }
817 pPager->needSync = 0;
drhed7c8552001-04-11 14:29:21 +0000818 }
drhd9b02572001-04-15 00:37:09 +0000819 pager_seek(pPager->fd, (pPg->pgno-1)*SQLITE_PAGE_SIZE);
820 rc = pager_write(pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
821 if( rc!=SQLITE_OK ){
822 rc = sqlitepager_rollback(pPager);
823 *ppPage = 0;
824 if( rc==SQLITE_OK ) rc = SQLITE_FULL;
825 return rc;
826 }
827 }
828
829 /* Unlink the old page from the free list and the hash table
830 */
drh6019e162001-07-02 17:51:45 +0000831 if( pPg->pPrevFree ){
832 pPg->pPrevFree->pNextFree = pPg->pNextFree;
drhed7c8552001-04-11 14:29:21 +0000833 }else{
drh6019e162001-07-02 17:51:45 +0000834 assert( pPager->pFirst==pPg );
835 pPager->pFirst = pPg->pNextFree;
drhed7c8552001-04-11 14:29:21 +0000836 }
drh6019e162001-07-02 17:51:45 +0000837 if( pPg->pNextFree ){
838 pPg->pNextFree->pPrevFree = pPg->pPrevFree;
839 }else{
840 assert( pPager->pLast==pPg );
841 pPager->pLast = pPg->pPrevFree;
842 }
843 pPg->pNextFree = pPg->pPrevFree = 0;
drhed7c8552001-04-11 14:29:21 +0000844 if( pPg->pNextHash ){
845 pPg->pNextHash->pPrevHash = pPg->pPrevHash;
846 }
847 if( pPg->pPrevHash ){
848 pPg->pPrevHash->pNextHash = pPg->pNextHash;
849 }else{
drhd9b02572001-04-15 00:37:09 +0000850 h = pager_hash(pPg->pgno);
drhed7c8552001-04-11 14:29:21 +0000851 assert( pPager->aHash[h]==pPg );
852 pPager->aHash[h] = pPg->pNextHash;
853 }
drh6019e162001-07-02 17:51:45 +0000854 pPg->pNextHash = pPg->pPrevHash = 0;
drhd9b02572001-04-15 00:37:09 +0000855 pPager->nOvfl++;
drhed7c8552001-04-11 14:29:21 +0000856 }
857 pPg->pgno = pgno;
drh6019e162001-07-02 17:51:45 +0000858 if( pPager->aInJournal && pgno<=pPager->origDbSize ){
859 pPg->inJournal = (pPager->aInJournal[pgno/8] & (1<<(pgno&7)))!=0;
860 }else{
861 pPg->inJournal = 0;
862 }
drhed7c8552001-04-11 14:29:21 +0000863 pPg->dirty = 0;
864 pPg->nRef = 1;
drhdd793422001-06-28 01:54:48 +0000865 REFINFO(pPg);
drhd9b02572001-04-15 00:37:09 +0000866 pPager->nRef++;
867 h = pager_hash(pgno);
drhed7c8552001-04-11 14:29:21 +0000868 pPg->pNextHash = pPager->aHash[h];
869 pPager->aHash[h] = pPg;
870 if( pPg->pNextHash ){
871 assert( pPg->pNextHash->pPrevHash==0 );
872 pPg->pNextHash->pPrevHash = pPg;
873 }
drh306dc212001-05-21 13:45:10 +0000874 if( pPager->dbSize<0 ) sqlitepager_pagecount(pPager);
875 if( pPager->dbSize<pgno ){
876 memset(PGHDR_TO_DATA(pPg), 0, SQLITE_PAGE_SIZE);
877 }else{
878 pager_seek(pPager->fd, (pgno-1)*SQLITE_PAGE_SIZE);
879 pager_read(pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
880 }
drh7e3b0a02001-04-28 16:52:40 +0000881 if( pPager->nExtra>0 ){
882 memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
883 }
drhed7c8552001-04-11 14:29:21 +0000884 }else{
drhd9b02572001-04-15 00:37:09 +0000885 /* The requested page is in the page cache. */
drh7e3b0a02001-04-28 16:52:40 +0000886 pPager->nHit++;
drhdf0b3b02001-06-23 11:36:20 +0000887 page_ref(pPg);
drhed7c8552001-04-11 14:29:21 +0000888 }
889 *ppPage = PGHDR_TO_DATA(pPg);
890 return SQLITE_OK;
891}
892
893/*
drh7e3b0a02001-04-28 16:52:40 +0000894** Acquire a page if it is already in the in-memory cache. Do
895** not read the page from disk. Return a pointer to the page,
896** or 0 if the page is not in cache.
897**
898** See also sqlitepager_get(). The difference between this routine
899** and sqlitepager_get() is that _get() will go to the disk and read
900** in the page if the page is not already in cache. This routine
drh5e00f6c2001-09-13 13:46:56 +0000901** returns NULL if the page is not in cache or if a disk I/O error
902** has ever happened.
drh7e3b0a02001-04-28 16:52:40 +0000903*/
904void *sqlitepager_lookup(Pager *pPager, Pgno pgno){
905 PgHdr *pPg;
906
907 /* Make sure we have not hit any critical errors.
908 */
909 if( pPager==0 || pgno==0 ){
910 return 0;
911 }
912 if( pPager->errMask & ~(PAGER_ERR_FULL) ){
913 return 0;
914 }
915 if( pPager->nRef==0 ){
916 return 0;
917 }
918 pPg = pager_lookup(pPager, pgno);
919 if( pPg==0 ) return 0;
drhdf0b3b02001-06-23 11:36:20 +0000920 page_ref(pPg);
drh7e3b0a02001-04-28 16:52:40 +0000921 return PGHDR_TO_DATA(pPg);
922}
923
924/*
drhed7c8552001-04-11 14:29:21 +0000925** Release a page.
926**
927** If the number of references to the page drop to zero, then the
928** page is added to the LRU list. When all references to all pages
drhd9b02572001-04-15 00:37:09 +0000929** are released, a rollback occurs and the lock on the database is
drhed7c8552001-04-11 14:29:21 +0000930** removed.
931*/
drhd9b02572001-04-15 00:37:09 +0000932int sqlitepager_unref(void *pData){
drhed7c8552001-04-11 14:29:21 +0000933 Pager *pPager;
934 PgHdr *pPg;
drhd9b02572001-04-15 00:37:09 +0000935
936 /* Decrement the reference count for this page
937 */
drhed7c8552001-04-11 14:29:21 +0000938 pPg = DATA_TO_PGHDR(pData);
939 assert( pPg->nRef>0 );
940 pPager = pPg->pPager;
941 pPg->nRef--;
drhdd793422001-06-28 01:54:48 +0000942 REFINFO(pPg);
drhd9b02572001-04-15 00:37:09 +0000943
drh72f82862001-05-24 21:06:34 +0000944 /* When the number of references to a page reach 0, call the
945 ** destructor and add the page to the freelist.
drhd9b02572001-04-15 00:37:09 +0000946 */
drhed7c8552001-04-11 14:29:21 +0000947 if( pPg->nRef==0 ){
drhd9b02572001-04-15 00:37:09 +0000948 pPg->pNextFree = 0;
949 pPg->pPrevFree = pPager->pLast;
drhed7c8552001-04-11 14:29:21 +0000950 pPager->pLast = pPg;
drhd9b02572001-04-15 00:37:09 +0000951 if( pPg->pPrevFree ){
952 pPg->pPrevFree->pNextFree = pPg;
drhed7c8552001-04-11 14:29:21 +0000953 }else{
954 pPager->pFirst = pPg;
955 }
drh72f82862001-05-24 21:06:34 +0000956 if( pPager->xDestructor ){
957 pPager->xDestructor(pData);
958 }
drhd9b02572001-04-15 00:37:09 +0000959
960 /* When all pages reach the freelist, drop the read lock from
961 ** the database file.
962 */
963 pPager->nRef--;
964 assert( pPager->nRef>=0 );
965 if( pPager->nRef==0 ){
966 pager_reset(pPager);
967 }
drhed7c8552001-04-11 14:29:21 +0000968 }
drhd9b02572001-04-15 00:37:09 +0000969 return SQLITE_OK;
drhed7c8552001-04-11 14:29:21 +0000970}
971
972/*
973** Mark a data page as writeable. The page is written into the journal
974** if it is not there already. This routine must be called before making
975** changes to a page.
976**
977** The first time this routine is called, the pager creates a new
978** journal and acquires a write lock on the database. If the write
979** lock could not be acquired, this routine returns SQLITE_BUSY. The
drh306dc212001-05-21 13:45:10 +0000980** calling routine must check for that return value and be careful not to
drhed7c8552001-04-11 14:29:21 +0000981** change any page data until this routine returns SQLITE_OK.
drhd9b02572001-04-15 00:37:09 +0000982**
983** If the journal file could not be written because the disk is full,
984** then this routine returns SQLITE_FULL and does an immediate rollback.
985** All subsequent write attempts also return SQLITE_FULL until there
986** is a call to sqlitepager_commit() or sqlitepager_rollback() to
987** reset.
drhed7c8552001-04-11 14:29:21 +0000988*/
drhd9b02572001-04-15 00:37:09 +0000989int sqlitepager_write(void *pData){
drh69688d52001-04-14 16:38:23 +0000990 PgHdr *pPg = DATA_TO_PGHDR(pData);
991 Pager *pPager = pPg->pPager;
drhd79caeb2001-04-15 02:27:24 +0000992 int rc = SQLITE_OK;
drh69688d52001-04-14 16:38:23 +0000993
drhd9b02572001-04-15 00:37:09 +0000994 if( pPager->errMask ){
995 return pager_errcode(pPager);
996 }
drh5e00f6c2001-09-13 13:46:56 +0000997 if( pPager->readOnly ){
998 return SQLITE_PERM;
999 }
drhd9b02572001-04-15 00:37:09 +00001000 pPg->dirty = 1;
drh69688d52001-04-14 16:38:23 +00001001 if( pPg->inJournal ){ return SQLITE_OK; }
drhd9b02572001-04-15 00:37:09 +00001002 assert( pPager->state!=SQLITE_UNLOCK );
drhed7c8552001-04-11 14:29:21 +00001003 if( pPager->state==SQLITE_READLOCK ){
drh6019e162001-07-02 17:51:45 +00001004 assert( pPager->aInJournal==0 );
1005 pPager->aInJournal = sqliteMalloc( pPager->dbSize/8 + 1 );
1006 if( pPager->aInJournal==0 ){
1007 return SQLITE_NOMEM;
1008 }
drhed7c8552001-04-11 14:29:21 +00001009 pPager->jfd = open(pPager->zJournal, O_RDWR|O_CREAT, 0644);
1010 if( pPager->jfd<0 ){
1011 return SQLITE_CANTOPEN;
1012 }
drhf57b14a2001-09-14 18:54:08 +00001013 pPager->needSync = 0;
drhd9b02572001-04-15 00:37:09 +00001014 if( pager_lock(pPager->jfd, 1) ){
drhed7c8552001-04-11 14:29:21 +00001015 close(pPager->jfd);
1016 pPager->jfd = -1;
1017 return SQLITE_BUSY;
1018 }
drhd9b02572001-04-15 00:37:09 +00001019 pager_unlock(pPager->fd);
1020 if( pager_lock(pPager->fd, 1) ){
drhed7c8552001-04-11 14:29:21 +00001021 close(pPager->jfd);
1022 pPager->jfd = -1;
1023 pPager->state = SQLITE_UNLOCK;
drhd9b02572001-04-15 00:37:09 +00001024 pPager->errMask |= PAGER_ERR_LOCK;
drhed7c8552001-04-11 14:29:21 +00001025 return SQLITE_PROTOCOL;
1026 }
1027 pPager->state = SQLITE_WRITELOCK;
drhd9b02572001-04-15 00:37:09 +00001028 sqlitepager_pagecount(pPager);
drh69688d52001-04-14 16:38:23 +00001029 pPager->origDbSize = pPager->dbSize;
drhd9b02572001-04-15 00:37:09 +00001030 rc = pager_write(pPager->jfd, aJournalMagic, sizeof(aJournalMagic));
1031 if( rc==SQLITE_OK ){
1032 rc = pager_write(pPager->jfd, &pPager->dbSize, sizeof(Pgno));
1033 }
1034 if( rc!=SQLITE_OK ){
1035 rc = pager_unwritelock(pPager);
1036 if( rc==SQLITE_OK ) rc = SQLITE_FULL;
1037 return rc;
1038 }
drhed7c8552001-04-11 14:29:21 +00001039 }
drhd9b02572001-04-15 00:37:09 +00001040 assert( pPager->state==SQLITE_WRITELOCK );
drh69688d52001-04-14 16:38:23 +00001041 assert( pPager->jfd>=0 );
drhd9b02572001-04-15 00:37:09 +00001042 if( pPg->pgno <= pPager->origDbSize ){
1043 rc = pager_write(pPager->jfd, &pPg->pgno, sizeof(Pgno));
1044 if( rc==SQLITE_OK ){
1045 rc = pager_write(pPager->jfd, pData, SQLITE_PAGE_SIZE);
1046 }
1047 if( rc!=SQLITE_OK ){
1048 sqlitepager_rollback(pPager);
1049 pPager->errMask |= PAGER_ERR_FULL;
1050 return rc;
1051 }
drh6019e162001-07-02 17:51:45 +00001052 assert( pPager->aInJournal!=0 );
1053 pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
drhf57b14a2001-09-14 18:54:08 +00001054 pPager->needSync = 1;
drh69688d52001-04-14 16:38:23 +00001055 }
drh69688d52001-04-14 16:38:23 +00001056 pPg->inJournal = 1;
drh306dc212001-05-21 13:45:10 +00001057 if( pPager->dbSize<pPg->pgno ){
1058 pPager->dbSize = pPg->pgno;
1059 }
drh69688d52001-04-14 16:38:23 +00001060 return rc;
drhed7c8552001-04-11 14:29:21 +00001061}
1062
1063/*
drh6019e162001-07-02 17:51:45 +00001064** Return TRUE if the page given in the argument was previous passed
1065** to sqlitepager_write(). In other words, return TRUE if it is ok
1066** to change the content of the page.
1067*/
1068int sqlitepager_iswriteable(void *pData){
1069 PgHdr *pPg = DATA_TO_PGHDR(pData);
1070 return pPg->dirty;
1071}
1072
1073/*
drhed7c8552001-04-11 14:29:21 +00001074** Commit all changes to the database and release the write lock.
drhd9b02572001-04-15 00:37:09 +00001075**
1076** If the commit fails for any reason, a rollback attempt is made
1077** and an error code is returned. If the commit worked, SQLITE_OK
1078** is returned.
drhed7c8552001-04-11 14:29:21 +00001079*/
drhd9b02572001-04-15 00:37:09 +00001080int sqlitepager_commit(Pager *pPager){
drha1b351a2001-09-14 16:42:12 +00001081 int rc;
drhed7c8552001-04-11 14:29:21 +00001082 PgHdr *pPg;
drhd9b02572001-04-15 00:37:09 +00001083
1084 if( pPager->errMask==PAGER_ERR_FULL ){
1085 rc = sqlitepager_rollback(pPager);
1086 if( rc==SQLITE_OK ) rc = SQLITE_FULL;
1087 return rc;
1088 }
1089 if( pPager->errMask!=0 ){
1090 rc = pager_errcode(pPager);
1091 return rc;
1092 }
1093 if( pPager->state!=SQLITE_WRITELOCK ){
1094 return SQLITE_ERROR;
1095 }
drhed7c8552001-04-11 14:29:21 +00001096 assert( pPager->jfd>=0 );
drhf57b14a2001-09-14 18:54:08 +00001097 if( pPager->needSync && fsync(pPager->jfd) ){
drhd9b02572001-04-15 00:37:09 +00001098 goto commit_abort;
drhed7c8552001-04-11 14:29:21 +00001099 }
drha1b351a2001-09-14 16:42:12 +00001100 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1101 if( pPg->dirty==0 ) continue;
1102 rc = pager_seek(pPager->fd, (pPg->pgno-1)*SQLITE_PAGE_SIZE);
1103 if( rc!=SQLITE_OK ) goto commit_abort;
1104 rc = pager_write(pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
1105 if( rc!=SQLITE_OK ) goto commit_abort;
drhed7c8552001-04-11 14:29:21 +00001106 }
drhd9b02572001-04-15 00:37:09 +00001107 if( fsync(pPager->fd) ) goto commit_abort;
1108 rc = pager_unwritelock(pPager);
1109 pPager->dbSize = -1;
1110 return rc;
1111
1112 /* Jump here if anything goes wrong during the commit process.
1113 */
1114commit_abort:
1115 rc = sqlitepager_rollback(pPager);
1116 if( rc==SQLITE_OK ){
1117 rc = SQLITE_FULL;
drhed7c8552001-04-11 14:29:21 +00001118 }
drhed7c8552001-04-11 14:29:21 +00001119 return rc;
1120}
1121
1122/*
1123** Rollback all changes. The database falls back to read-only mode.
1124** All in-memory cache pages revert to their original data contents.
1125** The journal is deleted.
drhd9b02572001-04-15 00:37:09 +00001126**
1127** This routine cannot fail unless some other process is not following
1128** the correct locking protocol (SQLITE_PROTOCOL) or unless some other
1129** process is writing trash into the journal file (SQLITE_CORRUPT) or
1130** unless a prior malloc() failed (SQLITE_NOMEM). Appropriate error
1131** codes are returned for all these occasions. Otherwise,
1132** SQLITE_OK is returned.
drhed7c8552001-04-11 14:29:21 +00001133*/
drhd9b02572001-04-15 00:37:09 +00001134int sqlitepager_rollback(Pager *pPager){
drhed7c8552001-04-11 14:29:21 +00001135 int rc;
drhd9b02572001-04-15 00:37:09 +00001136 if( pPager->errMask!=0 && pPager->errMask!=PAGER_ERR_FULL ){
1137 return pager_errcode(pPager);
drhed7c8552001-04-11 14:29:21 +00001138 }
drhd9b02572001-04-15 00:37:09 +00001139 if( pPager->state!=SQLITE_WRITELOCK ){
1140 return SQLITE_OK;
1141 }
1142 rc = pager_playback(pPager);
1143 if( rc!=SQLITE_OK ){
1144 rc = SQLITE_CORRUPT;
1145 pPager->errMask |= PAGER_ERR_CORRUPT;
1146 }
1147 pPager->dbSize = -1;
drhed7c8552001-04-11 14:29:21 +00001148 return rc;
1149};
drhd9b02572001-04-15 00:37:09 +00001150
1151/*
drh5e00f6c2001-09-13 13:46:56 +00001152** Return TRUE if the database file is opened read-only. Return FALSE
1153** if the database is (in theory) writable.
1154*/
1155int sqlitepager_isreadonly(Pager *pPager){
drhbe0072d2001-09-13 14:46:09 +00001156 return pPager->readOnly;
drh5e00f6c2001-09-13 13:46:56 +00001157}
1158
1159/*
drhd9b02572001-04-15 00:37:09 +00001160** This routine is used for testing and analysis only.
1161*/
1162int *sqlitepager_stats(Pager *pPager){
1163 static int a[9];
1164 a[0] = pPager->nRef;
1165 a[1] = pPager->nPage;
1166 a[2] = pPager->mxPage;
1167 a[3] = pPager->dbSize;
1168 a[4] = pPager->state;
1169 a[5] = pPager->errMask;
1170 a[6] = pPager->nHit;
1171 a[7] = pPager->nMiss;
1172 a[8] = pPager->nOvfl;
1173 return a;
1174}
drhdd793422001-06-28 01:54:48 +00001175
1176#if SQLITE_TEST
1177/*
1178** Print a listing of all referenced pages and their ref count.
1179*/
1180void sqlitepager_refdump(Pager *pPager){
1181 PgHdr *pPg;
1182 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1183 if( pPg->nRef<=0 ) continue;
1184 printf("PAGE %3d addr=0x%08x nRef=%d\n",
1185 pPg->pgno, (int)PGHDR_TO_DATA(pPg), pPg->nRef);
1186 }
1187}
1188#endif