blob: 586a749c8b2c0234f8fd0e93c5a1e37366ab7d01 [file] [log] [blame]
danielk1977a3f06592009-04-23 14:58:39 +00001/*
2** 2005 December 14
3**
4** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
6**
7** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
10**
11*************************************************************************
12**
danielk19774598b8e2009-04-24 10:13:05 +000013** $Id: sqlite3async.c,v 1.3 2009/04/24 10:13:06 danielk1977 Exp $
danielk1977a3f06592009-04-23 14:58:39 +000014**
danielk1977debcfd22009-04-24 09:27:16 +000015** This file contains the implementation of an asynchronous IO backend
16** for SQLite.
danielk1977a3f06592009-04-23 14:58:39 +000017*/
18
19#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ASYNCIO)
20
21#include "sqlite3async.h"
danielk19774598b8e2009-04-24 10:13:05 +000022#include "sqlite3.h"
23#include <stdarg.h>
24#include <string.h>
25#include <assert.h>
danielk1977a3f06592009-04-23 14:58:39 +000026
danielk1977a3f06592009-04-23 14:58:39 +000027/* Useful macros used in several places */
28#define MIN(x,y) ((x)<(y)?(x):(y))
29#define MAX(x,y) ((x)>(y)?(x):(y))
30
31/* Forward references */
32typedef struct AsyncWrite AsyncWrite;
33typedef struct AsyncFile AsyncFile;
34typedef struct AsyncFileData AsyncFileData;
35typedef struct AsyncFileLock AsyncFileLock;
36typedef struct AsyncLock AsyncLock;
37
38/* Enable for debugging */
danielk19774598b8e2009-04-24 10:13:05 +000039#ifndef NDEBUG
40#include <stdio.h>
danielk1977a3f06592009-04-23 14:58:39 +000041static int sqlite3async_trace = 0;
42# define ASYNC_TRACE(X) if( sqlite3async_trace ) asyncTrace X
43static void asyncTrace(const char *zFormat, ...){
44 char *z;
45 va_list ap;
46 va_start(ap, zFormat);
47 z = sqlite3_vmprintf(zFormat, ap);
48 va_end(ap);
49 fprintf(stderr, "[%d] %s", 0 /* (int)pthread_self() */, z);
50 sqlite3_free(z);
51}
danielk19774598b8e2009-04-24 10:13:05 +000052#endif
danielk1977a3f06592009-04-23 14:58:39 +000053
54/*
55** THREAD SAFETY NOTES
56**
57** Basic rules:
58**
59** * Both read and write access to the global write-op queue must be
60** protected by the async.queueMutex. As are the async.ioError and
61** async.nFile variables.
62**
63** * The async.pLock list and all AsyncLock and AsyncFileLock
64** structures must be protected by the async.lockMutex mutex.
65**
66** * The file handles from the underlying system are not assumed to
67** be thread safe.
68**
69** * See the last two paragraphs under "The Writer Thread" for
70** an assumption to do with file-handle synchronization by the Os.
71**
72** Deadlock prevention:
73**
74** There are three mutex used by the system: the "writer" mutex,
75** the "queue" mutex and the "lock" mutex. Rules are:
76**
77** * It is illegal to block on the writer mutex when any other mutex
78** are held, and
79**
80** * It is illegal to block on the queue mutex when the lock mutex
81** is held.
82**
83** i.e. mutex's must be grabbed in the order "writer", "queue", "lock".
84**
85** File system operations (invoked by SQLite thread):
86**
87** xOpen
88** xDelete
89** xFileExists
90**
91** File handle operations (invoked by SQLite thread):
92**
93** asyncWrite, asyncClose, asyncTruncate, asyncSync
94**
95** The operations above add an entry to the global write-op list. They
96** prepare the entry, acquire the async.queueMutex momentarily while
97** list pointers are manipulated to insert the new entry, then release
98** the mutex and signal the writer thread to wake up in case it happens
99** to be asleep.
100**
101**
102** asyncRead, asyncFileSize.
103**
104** Read operations. Both of these read from both the underlying file
105** first then adjust their result based on pending writes in the
106** write-op queue. So async.queueMutex is held for the duration
107** of these operations to prevent other threads from changing the
108** queue in mid operation.
109**
110**
111** asyncLock, asyncUnlock, asyncCheckReservedLock
112**
113** These primitives implement in-process locking using a hash table
114** on the file name. Files are locked correctly for connections coming
115** from the same process. But other processes cannot see these locks
116** and will therefore not honor them.
117**
118**
119** The writer thread:
120**
121** The async.writerMutex is used to make sure only there is only
122** a single writer thread running at a time.
123**
124** Inside the writer thread is a loop that works like this:
125**
126** WHILE (write-op list is not empty)
127** Do IO operation at head of write-op list
128** Remove entry from head of write-op list
129** END WHILE
130**
131** The async.queueMutex is always held during the <write-op list is
132** not empty> test, and when the entry is removed from the head
133** of the write-op list. Sometimes it is held for the interim
134** period (while the IO is performed), and sometimes it is
135** relinquished. It is relinquished if (a) the IO op is an
136** ASYNC_CLOSE or (b) when the file handle was opened, two of
137** the underlying systems handles were opened on the same
138** file-system entry.
139**
140** If condition (b) above is true, then one file-handle
141** (AsyncFile.pBaseRead) is used exclusively by sqlite threads to read the
142** file, the other (AsyncFile.pBaseWrite) by sqlite3_async_flush()
143** threads to perform write() operations. This means that read
144** operations are not blocked by asynchronous writes (although
145** asynchronous writes may still be blocked by reads).
146**
147** This assumes that the OS keeps two handles open on the same file
148** properly in sync. That is, any read operation that starts after a
149** write operation on the same file system entry has completed returns
150** data consistent with the write. We also assume that if one thread
151** reads a file while another is writing it all bytes other than the
152** ones actually being written contain valid data.
153**
154** If the above assumptions are not true, set the preprocessor symbol
155** SQLITE_ASYNC_TWO_FILEHANDLES to 0.
156*/
157
158
159#ifndef NDEBUG
160# define TESTONLY( X ) X
161#else
162# define TESTONLY( X )
163#endif
164
165/*
danielk1977debcfd22009-04-24 09:27:16 +0000166** PORTING FUNCTIONS
167**
danielk1977a3f06592009-04-23 14:58:39 +0000168** There are two definitions of the following functions. One for pthreads
169** compatible systems and one for Win32. These functions isolate the OS
170** specific code required by each platform.
171**
172** The system uses three mutexes and a single condition variable. To
173** block on a mutex, async_mutex_enter() is called. The parameter passed
174** to async_mutex_enter(), which must be one of ASYNC_MUTEX_LOCK,
175** ASYNC_MUTEX_QUEUE or ASYNC_MUTEX_WRITER, identifies which of the three
176** mutexes to lock. Similarly, to unlock a mutex, async_mutex_leave() is
177** called with a parameter identifying the mutex being unlocked. Mutexes
178** are not recursive - it is an error to call async_mutex_enter() to
179** lock a mutex that is already locked, or to call async_mutex_leave()
180** to unlock a mutex that is not currently locked.
181**
182** The async_cond_wait() and async_cond_signal() functions are modelled
183** on the pthreads functions with similar names. The first parameter to
184** both functions is always ASYNC_COND_QUEUE. When async_cond_wait()
185** is called the mutex identified by the second parameter must be held.
186** The mutex is unlocked, and the calling thread simultaneously begins
187** waiting for the condition variable to be signalled by another thread.
188** After another thread signals the condition variable, the calling
189** thread stops waiting, locks mutex eMutex and returns. The
190** async_cond_signal() function is used to signal the condition variable.
191** It is assumed that the mutex used by the thread calling async_cond_wait()
192** is held by the caller of async_cond_signal() (otherwise there would be
193** a race condition).
194**
195** It is guaranteed that no other thread will call async_cond_wait() when
196** there is already a thread waiting on the condition variable.
197**
198** The async_sched_yield() function is called to suggest to the operating
199** system that it would be a good time to shift the current thread off the
200** CPU. The system will still work if this function is not implemented
201** (it is not currently implemented for win32), but it might be marginally
202** more efficient if it is.
203*/
204static void async_mutex_enter(int eMutex);
205static void async_mutex_leave(int eMutex);
206static void async_cond_wait(int eCond, int eMutex);
207static void async_cond_signal(int eCond);
208static void async_sched_yield(void);
209
210/*
211** There are also two definitions of the following. async_os_initialize()
212** is called when the asynchronous VFS is first installed, and os_shutdown()
213** is called when it is uninstalled (from within sqlite3async_shutdown()).
214**
215** For pthreads builds, both of these functions are no-ops. For win32,
216** they provide an opportunity to initialize and finalize the required
217** mutex and condition variables.
218**
219** If async_os_initialize() returns other than zero, then the initialization
220** fails and SQLITE_ERROR is returned to the user.
221*/
222static int async_os_initialize(void);
223static void async_os_shutdown(void);
224
225/* Values for use as the 'eMutex' argument of the above functions. The
226** integer values assigned to these constants are important for assert()
227** statements that verify that mutexes are locked in the correct order.
228** Specifically, it is unsafe to try to lock mutex N while holding a lock
229** on mutex M if (M<=N).
230*/
231#define ASYNC_MUTEX_LOCK 0
232#define ASYNC_MUTEX_QUEUE 1
233#define ASYNC_MUTEX_WRITER 2
234
235/* Values for use as the 'eCond' argument of the above functions. */
236#define ASYNC_COND_QUEUE 0
237
238/*************************************************************************
239** Start of OS specific code.
240*/
241#if SQLITE_OS_WIN || defined(_WIN32) || defined(WIN32) || defined(__CYGWIN__) || defined(__MINGW32__) || defined(__BORLANDC__)
242
243/* The following block contains the win32 specific code. */
244
245#define mutex_held(X) (GetCurrentThreadId()==primitives.aHolder[X])
246
247static struct AsyncPrimitives {
248 int isInit;
249 DWORD aHolder[3];
250 CRITICAL_SECTION aMutex[3];
251 HANDLE aCond[1];
252} primitives = { 0 };
253
254static int async_os_initialize(void){
255 if( !primitives.isInit ){
256 primitives.aCond[0] = CreateEvent(NULL, TRUE, FALSE, 0);
257 if( primitives.aCond[0]==NULL ){
258 return 1;
259 }
260 InitializeCriticalSection(&primitives.aMutex[0]);
261 InitializeCriticalSection(&primitives.aMutex[1]);
262 InitializeCriticalSection(&primitives.aMutex[2]);
263 primitives.isInit = 1;
264 }
265 return 0;
266}
267static void async_os_shutdown(void){
268 if( primitives.isInit ){
269 DeleteCriticalSection(&primitives.aMutex[0]);
270 DeleteCriticalSection(&primitives.aMutex[1]);
271 DeleteCriticalSection(&primitives.aMutex[2]);
272 CloseHandle(primitives.aCond[0]);
273 primitives.isInit = 0;
274 }
275}
276
277/* The following block contains the Win32 specific code. */
278static void async_mutex_enter(int eMutex){
279 assert( eMutex==0 || eMutex==1 || eMutex==2 );
280 assert( eMutex!=2 || (!mutex_held(0) && !mutex_held(1) && !mutex_held(2)) );
281 assert( eMutex!=1 || (!mutex_held(0) && !mutex_held(1)) );
282 assert( eMutex!=0 || (!mutex_held(0)) );
283 EnterCriticalSection(&primitives.aMutex[eMutex]);
284 TESTONLY( primitives.aHolder[eMutex] = GetCurrentThreadId(); )
285}
286static void async_mutex_leave(int eMutex){
287 assert( eMutex==0 || eMutex==1 || eMutex==2 );
288 assert( mutex_held(eMutex) );
289 TESTONLY( primitives.aHolder[eMutex] = 0; )
290 LeaveCriticalSection(&primitives.aMutex[eMutex]);
291}
292static void async_cond_wait(int eCond, int eMutex){
293 ResetEvent(primitives.aCond[eCond]);
294 async_mutex_leave(eMutex);
295 WaitForSingleObject(primitives.aCond[eCond], INFINITE);
296 async_mutex_enter(eMutex);
297}
298static void async_cond_signal(int eCond){
299 assert( mutex_held(ASYNC_MUTEX_QUEUE) );
300 SetEvent(primitives.aCond[eCond]);
301}
302static void async_sched_yield(void){
303 /* Todo: Find out if win32 offers anything like sched_yield() */
304}
305#else
306
307/* The following block contains the pthreads specific code. */
308#include <pthread.h>
309#include <sched.h>
310
311#define mutex_held(X) pthread_equal(primitives.aHolder[X], pthread_self())
312
313static int async_os_initialize(void) {return 0;}
314static void async_os_shutdown(void) {}
315
316static struct AsyncPrimitives {
317 pthread_mutex_t aMutex[3];
318 pthread_cond_t aCond[1];
319 pthread_t aHolder[3];
320} primitives = {
321 { PTHREAD_MUTEX_INITIALIZER,
322 PTHREAD_MUTEX_INITIALIZER,
323 PTHREAD_MUTEX_INITIALIZER
324 } , {
325 PTHREAD_COND_INITIALIZER
326 } , { 0, 0, 0 }
327};
328
329static void async_mutex_enter(int eMutex){
330 assert( eMutex==0 || eMutex==1 || eMutex==2 );
331 assert( eMutex!=2 || (!mutex_held(0) && !mutex_held(1) && !mutex_held(2)) );
332 assert( eMutex!=1 || (!mutex_held(0) && !mutex_held(1)) );
333 assert( eMutex!=0 || (!mutex_held(0)) );
334 pthread_mutex_lock(&primitives.aMutex[eMutex]);
335 TESTONLY( primitives.aHolder[eMutex] = pthread_self(); )
336}
337static void async_mutex_leave(int eMutex){
338 assert( eMutex==0 || eMutex==1 || eMutex==2 );
339 assert( mutex_held(eMutex) );
340 TESTONLY( primitives.aHolder[eMutex] = 0; )
341 pthread_mutex_unlock(&primitives.aMutex[eMutex]);
342}
343static void async_cond_wait(int eCond, int eMutex){
344 assert( eMutex==0 || eMutex==1 || eMutex==2 );
345 assert( mutex_held(eMutex) );
346 TESTONLY( primitives.aHolder[eMutex] = 0; )
347 pthread_cond_wait(&primitives.aCond[eCond], &primitives.aMutex[eMutex]);
348 TESTONLY( primitives.aHolder[eMutex] = pthread_self(); )
349}
350static void async_cond_signal(int eCond){
351 assert( mutex_held(ASYNC_MUTEX_QUEUE) );
352 pthread_cond_signal(&primitives.aCond[eCond]);
353}
354static void async_sched_yield(void){
355 sched_yield();
356}
357#endif
358/*
359** End of OS specific code.
360*************************************************************************/
361
362#define assert_mutex_is_held(X) assert( mutex_held(X) )
363
364
365#ifndef SQLITE_ASYNC_TWO_FILEHANDLES
366/* #define SQLITE_ASYNC_TWO_FILEHANDLES 0 */
367#define SQLITE_ASYNC_TWO_FILEHANDLES 1
368#endif
369
370/*
371** State information is held in the static variable "async" defined
372** as the following structure.
373**
374** Both async.ioError and async.nFile are protected by async.queueMutex.
375*/
376static struct TestAsyncStaticData {
377 AsyncWrite *pQueueFirst; /* Next write operation to be processed */
378 AsyncWrite *pQueueLast; /* Last write operation on the list */
379 AsyncLock *pLock; /* Linked list of all AsyncLock structures */
380 volatile int ioDelay; /* Extra delay between write operations */
381 volatile int eHalt; /* One of the SQLITEASYNC_HALT_XXX values */
danielk19774598b8e2009-04-24 10:13:05 +0000382 volatile int bLockFiles; /* Current value of "lockfiles" parameter */
danielk1977a3f06592009-04-23 14:58:39 +0000383 int ioError; /* True if an IO error has occurred */
384 int nFile; /* Number of open files (from sqlite pov) */
danielk19774598b8e2009-04-24 10:13:05 +0000385} async = { 0,0,0,0,0,1,0,0 };
danielk1977a3f06592009-04-23 14:58:39 +0000386
387/* Possible values of AsyncWrite.op */
388#define ASYNC_NOOP 0
389#define ASYNC_WRITE 1
390#define ASYNC_SYNC 2
391#define ASYNC_TRUNCATE 3
392#define ASYNC_CLOSE 4
393#define ASYNC_DELETE 5
394#define ASYNC_OPENEXCLUSIVE 6
395#define ASYNC_UNLOCK 7
396
397/* Names of opcodes. Used for debugging only.
398** Make sure these stay in sync with the macros above!
399*/
400static const char *azOpcodeName[] = {
401 "NOOP", "WRITE", "SYNC", "TRUNCATE", "CLOSE", "DELETE", "OPENEX", "UNLOCK"
402};
403
404/*
405** Entries on the write-op queue are instances of the AsyncWrite
406** structure, defined here.
407**
408** The interpretation of the iOffset and nByte variables varies depending
409** on the value of AsyncWrite.op:
410**
411** ASYNC_NOOP:
412** No values used.
413**
414** ASYNC_WRITE:
415** iOffset -> Offset in file to write to.
416** nByte -> Number of bytes of data to write (pointed to by zBuf).
417**
418** ASYNC_SYNC:
419** nByte -> flags to pass to sqlite3OsSync().
420**
421** ASYNC_TRUNCATE:
422** iOffset -> Size to truncate file to.
423** nByte -> Unused.
424**
425** ASYNC_CLOSE:
426** iOffset -> Unused.
427** nByte -> Unused.
428**
429** ASYNC_DELETE:
430** iOffset -> Contains the "syncDir" flag.
431** nByte -> Number of bytes of zBuf points to (file name).
432**
433** ASYNC_OPENEXCLUSIVE:
434** iOffset -> Value of "delflag".
435** nByte -> Number of bytes of zBuf points to (file name).
436**
437** ASYNC_UNLOCK:
438** nByte -> Argument to sqlite3OsUnlock().
439**
440**
441** For an ASYNC_WRITE operation, zBuf points to the data to write to the file.
442** This space is sqlite3_malloc()d along with the AsyncWrite structure in a
443** single blob, so is deleted when sqlite3_free() is called on the parent
444** structure.
445*/
446struct AsyncWrite {
447 AsyncFileData *pFileData; /* File to write data to or sync */
448 int op; /* One of ASYNC_xxx etc. */
449 sqlite_int64 iOffset; /* See above */
450 int nByte; /* See above */
451 char *zBuf; /* Data to write to file (or NULL if op!=ASYNC_WRITE) */
452 AsyncWrite *pNext; /* Next write operation (to any file) */
453};
454
455/*
456** An instance of this structure is created for each distinct open file
457** (i.e. if two handles are opened on the one file, only one of these
458** structures is allocated) and stored in the async.aLock hash table. The
459** keys for async.aLock are the full pathnames of the opened files.
460**
461** AsyncLock.pList points to the head of a linked list of AsyncFileLock
462** structures, one for each handle currently open on the file.
463**
464** If the opened file is not a main-database (the SQLITE_OPEN_MAIN_DB is
danielk19774598b8e2009-04-24 10:13:05 +0000465** not passed to the sqlite3OsOpen() call), or if async.bLockFiles is
466** false, variables AsyncLock.pFile and AsyncLock.eLock are never used.
467** Otherwise, pFile is a file handle opened on the file in question and
468** used to obtain the file-system locks required by database connections
469** within this process.
danielk1977a3f06592009-04-23 14:58:39 +0000470**
471** See comments above the asyncLock() function for more details on
472** the implementation of database locking used by this backend.
473*/
474struct AsyncLock {
475 char *zFile;
476 int nFile;
477 sqlite3_file *pFile;
478 int eLock;
479 AsyncFileLock *pList;
480 AsyncLock *pNext; /* Next in linked list headed by async.pLock */
481};
482
483/*
484** An instance of the following structure is allocated along with each
485** AsyncFileData structure (see AsyncFileData.lock), but is only used if the
486** file was opened with the SQLITE_OPEN_MAIN_DB.
487*/
488struct AsyncFileLock {
489 int eLock; /* Internally visible lock state (sqlite pov) */
490 int eAsyncLock; /* Lock-state with write-queue unlock */
491 AsyncFileLock *pNext;
492};
493
494/*
495** The AsyncFile structure is a subclass of sqlite3_file used for
496** asynchronous IO.
497**
498** All of the actual data for the structure is stored in the structure
499** pointed to by AsyncFile.pData, which is allocated as part of the
500** sqlite3OsOpen() using sqlite3_malloc(). The reason for this is that the
501** lifetime of the AsyncFile structure is ended by the caller after OsClose()
502** is called, but the data in AsyncFileData may be required by the
503** writer thread after that point.
504*/
505struct AsyncFile {
506 sqlite3_io_methods *pMethod;
507 AsyncFileData *pData;
508};
509struct AsyncFileData {
510 char *zName; /* Underlying OS filename - used for debugging */
511 int nName; /* Number of characters in zName */
512 sqlite3_file *pBaseRead; /* Read handle to the underlying Os file */
513 sqlite3_file *pBaseWrite; /* Write handle to the underlying Os file */
514 AsyncFileLock lock; /* Lock state for this handle */
515 AsyncLock *pLock; /* AsyncLock object for this file system entry */
516 AsyncWrite closeOp; /* Preallocated close operation */
517};
518
519/*
520** Add an entry to the end of the global write-op list. pWrite should point
521** to an AsyncWrite structure allocated using sqlite3_malloc(). The writer
522** thread will call sqlite3_free() to free the structure after the specified
523** operation has been completed.
524**
525** Once an AsyncWrite structure has been added to the list, it becomes the
526** property of the writer thread and must not be read or modified by the
527** caller.
528*/
529static void addAsyncWrite(AsyncWrite *pWrite){
530 /* We must hold the queue mutex in order to modify the queue pointers */
531 if( pWrite->op!=ASYNC_UNLOCK ){
532 async_mutex_enter(ASYNC_MUTEX_QUEUE);
533 }
534
535 /* Add the record to the end of the write-op queue */
536 assert( !pWrite->pNext );
537 if( async.pQueueLast ){
538 assert( async.pQueueFirst );
539 async.pQueueLast->pNext = pWrite;
540 }else{
541 async.pQueueFirst = pWrite;
542 }
543 async.pQueueLast = pWrite;
544 ASYNC_TRACE(("PUSH %p (%s %s %d)\n", pWrite, azOpcodeName[pWrite->op],
545 pWrite->pFileData ? pWrite->pFileData->zName : "-", pWrite->iOffset));
546
547 if( pWrite->op==ASYNC_CLOSE ){
548 async.nFile--;
549 }
550
551 /* The writer thread might have been idle because there was nothing
552 ** on the write-op queue for it to do. So wake it up. */
553 async_cond_signal(ASYNC_COND_QUEUE);
554
555 /* Drop the queue mutex */
556 if( pWrite->op!=ASYNC_UNLOCK ){
557 async_mutex_leave(ASYNC_MUTEX_QUEUE);
558 }
559}
560
561/*
562** Increment async.nFile in a thread-safe manner.
563*/
564static void incrOpenFileCount(void){
565 /* We must hold the queue mutex in order to modify async.nFile */
566 async_mutex_enter(ASYNC_MUTEX_QUEUE);
567 if( async.nFile==0 ){
568 async.ioError = SQLITE_OK;
569 }
570 async.nFile++;
571 async_mutex_leave(ASYNC_MUTEX_QUEUE);
572}
573
574/*
575** This is a utility function to allocate and populate a new AsyncWrite
576** structure and insert it (via addAsyncWrite() ) into the global list.
577*/
578static int addNewAsyncWrite(
579 AsyncFileData *pFileData,
580 int op,
581 sqlite3_int64 iOffset,
582 int nByte,
583 const char *zByte
584){
585 AsyncWrite *p;
586 if( op!=ASYNC_CLOSE && async.ioError ){
587 return async.ioError;
588 }
589 p = sqlite3_malloc(sizeof(AsyncWrite) + (zByte?nByte:0));
590 if( !p ){
591 /* The upper layer does not expect operations like OsWrite() to
592 ** return SQLITE_NOMEM. This is partly because under normal conditions
593 ** SQLite is required to do rollback without calling malloc(). So
594 ** if malloc() fails here, treat it as an I/O error. The above
595 ** layer knows how to handle that.
596 */
597 return SQLITE_IOERR;
598 }
599 p->op = op;
600 p->iOffset = iOffset;
601 p->nByte = nByte;
602 p->pFileData = pFileData;
603 p->pNext = 0;
604 if( zByte ){
605 p->zBuf = (char *)&p[1];
606 memcpy(p->zBuf, zByte, nByte);
607 }else{
608 p->zBuf = 0;
609 }
610 addAsyncWrite(p);
611 return SQLITE_OK;
612}
613
614/*
615** Close the file. This just adds an entry to the write-op list, the file is
616** not actually closed.
617*/
618static int asyncClose(sqlite3_file *pFile){
619 AsyncFileData *p = ((AsyncFile *)pFile)->pData;
620
621 /* Unlock the file, if it is locked */
622 async_mutex_enter(ASYNC_MUTEX_LOCK);
623 p->lock.eLock = 0;
624 async_mutex_leave(ASYNC_MUTEX_LOCK);
625
626 addAsyncWrite(&p->closeOp);
627 return SQLITE_OK;
628}
629
630/*
631** Implementation of sqlite3OsWrite() for asynchronous files. Instead of
632** writing to the underlying file, this function adds an entry to the end of
633** the global AsyncWrite list. Either SQLITE_OK or SQLITE_NOMEM may be
634** returned.
635*/
636static int asyncWrite(
637 sqlite3_file *pFile,
638 const void *pBuf,
639 int amt,
640 sqlite3_int64 iOff
641){
642 AsyncFileData *p = ((AsyncFile *)pFile)->pData;
643 return addNewAsyncWrite(p, ASYNC_WRITE, iOff, amt, pBuf);
644}
645
646/*
647** Read data from the file. First we read from the filesystem, then adjust
648** the contents of the buffer based on ASYNC_WRITE operations in the
649** write-op queue.
650**
651** This method holds the mutex from start to finish.
652*/
653static int asyncRead(
654 sqlite3_file *pFile,
655 void *zOut,
656 int iAmt,
657 sqlite3_int64 iOffset
658){
659 AsyncFileData *p = ((AsyncFile *)pFile)->pData;
660 int rc = SQLITE_OK;
661 sqlite3_int64 filesize;
662 int nRead;
663 sqlite3_file *pBase = p->pBaseRead;
664
665 /* Grab the write queue mutex for the duration of the call */
666 async_mutex_enter(ASYNC_MUTEX_QUEUE);
667
668 /* If an I/O error has previously occurred in this virtual file
669 ** system, then all subsequent operations fail.
670 */
671 if( async.ioError!=SQLITE_OK ){
672 rc = async.ioError;
673 goto asyncread_out;
674 }
675
676 if( pBase->pMethods ){
677 rc = pBase->pMethods->xFileSize(pBase, &filesize);
678 if( rc!=SQLITE_OK ){
679 goto asyncread_out;
680 }
681 nRead = MIN(filesize - iOffset, iAmt);
682 if( nRead>0 ){
683 rc = pBase->pMethods->xRead(pBase, zOut, nRead, iOffset);
684 ASYNC_TRACE(("READ %s %d bytes at %d\n", p->zName, nRead, iOffset));
685 }
686 }
687
688 if( rc==SQLITE_OK ){
689 AsyncWrite *pWrite;
690 char *zName = p->zName;
691
692 for(pWrite=async.pQueueFirst; pWrite; pWrite = pWrite->pNext){
693 if( pWrite->op==ASYNC_WRITE && (
694 (pWrite->pFileData==p) ||
695 (zName && pWrite->pFileData->zName==zName)
696 )){
697 int iBeginOut = (pWrite->iOffset-iOffset);
698 int iBeginIn = -iBeginOut;
699 int nCopy;
700
701 if( iBeginIn<0 ) iBeginIn = 0;
702 if( iBeginOut<0 ) iBeginOut = 0;
703 nCopy = MIN(pWrite->nByte-iBeginIn, iAmt-iBeginOut);
704
705 if( nCopy>0 ){
706 memcpy(&((char *)zOut)[iBeginOut], &pWrite->zBuf[iBeginIn], nCopy);
707 ASYNC_TRACE(("OVERREAD %d bytes at %d\n", nCopy, iBeginOut+iOffset));
708 }
709 }
710 }
711 }
712
713asyncread_out:
714 async_mutex_leave(ASYNC_MUTEX_QUEUE);
715 return rc;
716}
717
718/*
719** Truncate the file to nByte bytes in length. This just adds an entry to
720** the write-op list, no IO actually takes place.
721*/
722static int asyncTruncate(sqlite3_file *pFile, sqlite3_int64 nByte){
723 AsyncFileData *p = ((AsyncFile *)pFile)->pData;
724 return addNewAsyncWrite(p, ASYNC_TRUNCATE, nByte, 0, 0);
725}
726
727/*
728** Sync the file. This just adds an entry to the write-op list, the
729** sync() is done later by sqlite3_async_flush().
730*/
731static int asyncSync(sqlite3_file *pFile, int flags){
732 AsyncFileData *p = ((AsyncFile *)pFile)->pData;
733 return addNewAsyncWrite(p, ASYNC_SYNC, 0, flags, 0);
734}
735
736/*
737** Read the size of the file. First we read the size of the file system
738** entry, then adjust for any ASYNC_WRITE or ASYNC_TRUNCATE operations
739** currently in the write-op list.
740**
741** This method holds the mutex from start to finish.
742*/
743int asyncFileSize(sqlite3_file *pFile, sqlite3_int64 *piSize){
744 AsyncFileData *p = ((AsyncFile *)pFile)->pData;
745 int rc = SQLITE_OK;
746 sqlite3_int64 s = 0;
747 sqlite3_file *pBase;
748
749 async_mutex_enter(ASYNC_MUTEX_QUEUE);
750
751 /* Read the filesystem size from the base file. If pBaseRead is NULL, this
752 ** means the file hasn't been opened yet. In this case all relevant data
753 ** must be in the write-op queue anyway, so we can omit reading from the
754 ** file-system.
755 */
756 pBase = p->pBaseRead;
757 if( pBase->pMethods ){
758 rc = pBase->pMethods->xFileSize(pBase, &s);
759 }
760
761 if( rc==SQLITE_OK ){
762 AsyncWrite *pWrite;
763 for(pWrite=async.pQueueFirst; pWrite; pWrite = pWrite->pNext){
764 if( pWrite->op==ASYNC_DELETE
765 && p->zName
766 && strcmp(p->zName, pWrite->zBuf)==0
767 ){
768 s = 0;
769 }else if( pWrite->pFileData && (
770 (pWrite->pFileData==p)
771 || (p->zName && pWrite->pFileData->zName==p->zName)
772 )){
773 switch( pWrite->op ){
774 case ASYNC_WRITE:
775 s = MAX(pWrite->iOffset + (sqlite3_int64)(pWrite->nByte), s);
776 break;
777 case ASYNC_TRUNCATE:
778 s = MIN(s, pWrite->iOffset);
779 break;
780 }
781 }
782 }
783 *piSize = s;
784 }
785 async_mutex_leave(ASYNC_MUTEX_QUEUE);
786 return rc;
787}
788
789/*
790** Lock or unlock the actual file-system entry.
791*/
792static int getFileLock(AsyncLock *pLock){
793 int rc = SQLITE_OK;
794 AsyncFileLock *pIter;
795 int eRequired = 0;
796
797 if( pLock->pFile ){
798 for(pIter=pLock->pList; pIter; pIter=pIter->pNext){
799 assert(pIter->eAsyncLock>=pIter->eLock);
800 if( pIter->eAsyncLock>eRequired ){
801 eRequired = pIter->eAsyncLock;
802 assert(eRequired>=0 && eRequired<=SQLITE_LOCK_EXCLUSIVE);
803 }
804 }
805
806 if( eRequired>pLock->eLock ){
807 rc = pLock->pFile->pMethods->xLock(pLock->pFile, eRequired);
808 if( rc==SQLITE_OK ){
809 pLock->eLock = eRequired;
810 }
811 }
812 else if( eRequired<pLock->eLock && eRequired<=SQLITE_LOCK_SHARED ){
813 rc = pLock->pFile->pMethods->xUnlock(pLock->pFile, eRequired);
814 if( rc==SQLITE_OK ){
815 pLock->eLock = eRequired;
816 }
817 }
818 }
819
820 return rc;
821}
822
823/*
824** Return the AsyncLock structure from the global async.pLock list
825** associated with the file-system entry identified by path zName
826** (a string of nName bytes). If no such structure exists, return 0.
827*/
828static AsyncLock *findLock(const char *zName, int nName){
829 AsyncLock *p = async.pLock;
830 while( p && (p->nFile!=nName || memcmp(p->zFile, zName, nName)) ){
831 p = p->pNext;
832 }
833 return p;
834}
835
836/*
837** The following two methods - asyncLock() and asyncUnlock() - are used
838** to obtain and release locks on database files opened with the
839** asynchronous backend.
840*/
841static int asyncLock(sqlite3_file *pFile, int eLock){
842 int rc = SQLITE_OK;
843 AsyncFileData *p = ((AsyncFile *)pFile)->pData;
844
845 if( p->zName ){
846 async_mutex_enter(ASYNC_MUTEX_LOCK);
847 if( p->lock.eLock<eLock ){
848 AsyncLock *pLock = p->pLock;
849 AsyncFileLock *pIter;
850 assert(pLock && pLock->pList);
851 for(pIter=pLock->pList; pIter; pIter=pIter->pNext){
852 if( pIter!=&p->lock && (
853 (eLock==SQLITE_LOCK_EXCLUSIVE && pIter->eLock>=SQLITE_LOCK_SHARED) ||
854 (eLock==SQLITE_LOCK_PENDING && pIter->eLock>=SQLITE_LOCK_RESERVED) ||
855 (eLock==SQLITE_LOCK_RESERVED && pIter->eLock>=SQLITE_LOCK_RESERVED) ||
856 (eLock==SQLITE_LOCK_SHARED && pIter->eLock>=SQLITE_LOCK_PENDING)
857 )){
858 rc = SQLITE_BUSY;
859 }
860 }
861 if( rc==SQLITE_OK ){
862 p->lock.eLock = eLock;
863 p->lock.eAsyncLock = MAX(p->lock.eAsyncLock, eLock);
864 }
865 assert(p->lock.eAsyncLock>=p->lock.eLock);
866 if( rc==SQLITE_OK ){
867 rc = getFileLock(pLock);
868 }
869 }
870 async_mutex_leave(ASYNC_MUTEX_LOCK);
871 }
872
873 ASYNC_TRACE(("LOCK %d (%s) rc=%d\n", eLock, p->zName, rc));
874 return rc;
875}
876static int asyncUnlock(sqlite3_file *pFile, int eLock){
877 int rc = SQLITE_OK;
878 AsyncFileData *p = ((AsyncFile *)pFile)->pData;
879 if( p->zName ){
880 AsyncFileLock *pLock = &p->lock;
881 async_mutex_enter(ASYNC_MUTEX_QUEUE);
882 async_mutex_enter(ASYNC_MUTEX_LOCK);
883 pLock->eLock = MIN(pLock->eLock, eLock);
884 rc = addNewAsyncWrite(p, ASYNC_UNLOCK, 0, eLock, 0);
885 async_mutex_leave(ASYNC_MUTEX_LOCK);
886 async_mutex_leave(ASYNC_MUTEX_QUEUE);
887 }
888 return rc;
889}
890
891/*
892** This function is called when the pager layer first opens a database file
893** and is checking for a hot-journal.
894*/
895static int asyncCheckReservedLock(sqlite3_file *pFile, int *pResOut){
896 int ret = 0;
897 AsyncFileLock *pIter;
898 AsyncFileData *p = ((AsyncFile *)pFile)->pData;
899
900 async_mutex_enter(ASYNC_MUTEX_LOCK);
901 for(pIter=p->pLock->pList; pIter; pIter=pIter->pNext){
902 if( pIter->eLock>=SQLITE_LOCK_RESERVED ){
903 ret = 1;
904 }
905 }
906 async_mutex_leave(ASYNC_MUTEX_LOCK);
907
908 ASYNC_TRACE(("CHECK-LOCK %d (%s)\n", ret, p->zName));
909 *pResOut = ret;
910 return SQLITE_OK;
911}
912
913/*
914** sqlite3_file_control() implementation.
915*/
916static int asyncFileControl(sqlite3_file *id, int op, void *pArg){
917 switch( op ){
918 case SQLITE_FCNTL_LOCKSTATE: {
919 async_mutex_enter(ASYNC_MUTEX_LOCK);
920 *(int*)pArg = ((AsyncFile*)id)->pData->lock.eLock;
921 async_mutex_leave(ASYNC_MUTEX_LOCK);
922 return SQLITE_OK;
923 }
924 }
925 return SQLITE_ERROR;
926}
927
928/*
929** Return the device characteristics and sector-size of the device. It
930** is not tricky to implement these correctly, as this backend might
931** not have an open file handle at this point.
932*/
933static int asyncSectorSize(sqlite3_file *pFile){
934 return 512;
935}
936static int asyncDeviceCharacteristics(sqlite3_file *pFile){
937 return 0;
938}
939
940static int unlinkAsyncFile(AsyncFileData *pData){
941 AsyncFileLock **ppIter;
942 int rc = SQLITE_OK;
943
944 if( pData->zName ){
945 AsyncLock *pLock = pData->pLock;
946 for(ppIter=&pLock->pList; *ppIter; ppIter=&((*ppIter)->pNext)){
947 if( (*ppIter)==&pData->lock ){
948 *ppIter = pData->lock.pNext;
949 break;
950 }
951 }
952 if( !pLock->pList ){
953 AsyncLock **pp;
954 if( pLock->pFile ){
955 pLock->pFile->pMethods->xClose(pLock->pFile);
956 }
957 for(pp=&async.pLock; *pp!=pLock; pp=&((*pp)->pNext));
958 *pp = pLock->pNext;
959 sqlite3_free(pLock);
960 }else{
961 rc = getFileLock(pLock);
962 }
963 }
964
965 return rc;
966}
967
968/*
969** The parameter passed to this function is a copy of a 'flags' parameter
970** passed to this modules xOpen() method. This function returns true
971** if the file should be opened asynchronously, or false if it should
972** be opened immediately.
973**
974** If the file is to be opened asynchronously, then asyncOpen() will add
975** an entry to the event queue and the file will not actually be opened
976** until the event is processed. Otherwise, the file is opened directly
977** by the caller.
978*/
979static int doAsynchronousOpen(int flags){
980 return (flags&SQLITE_OPEN_CREATE) && (
981 (flags&SQLITE_OPEN_MAIN_JOURNAL) ||
982 (flags&SQLITE_OPEN_TEMP_JOURNAL) ||
983 (flags&SQLITE_OPEN_DELETEONCLOSE)
984 );
985}
986
987/*
988** Open a file.
989*/
990static int asyncOpen(
991 sqlite3_vfs *pAsyncVfs,
992 const char *zName,
993 sqlite3_file *pFile,
994 int flags,
995 int *pOutFlags
996){
997 static sqlite3_io_methods async_methods = {
998 1, /* iVersion */
999 asyncClose, /* xClose */
1000 asyncRead, /* xRead */
1001 asyncWrite, /* xWrite */
1002 asyncTruncate, /* xTruncate */
1003 asyncSync, /* xSync */
1004 asyncFileSize, /* xFileSize */
1005 asyncLock, /* xLock */
1006 asyncUnlock, /* xUnlock */
1007 asyncCheckReservedLock, /* xCheckReservedLock */
1008 asyncFileControl, /* xFileControl */
1009 asyncSectorSize, /* xSectorSize */
1010 asyncDeviceCharacteristics /* xDeviceCharacteristics */
1011 };
1012
1013 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1014 AsyncFile *p = (AsyncFile *)pFile;
1015 int nName = 0;
1016 int rc = SQLITE_OK;
1017 int nByte;
1018 AsyncFileData *pData;
1019 AsyncLock *pLock = 0;
1020 char *z;
1021 int isAsyncOpen = doAsynchronousOpen(flags);
1022
1023 /* If zName is NULL, then the upper layer is requesting an anonymous file */
1024 if( zName ){
1025 nName = strlen(zName)+1;
1026 }
1027
1028 nByte = (
1029 sizeof(AsyncFileData) + /* AsyncFileData structure */
1030 2 * pVfs->szOsFile + /* AsyncFileData.pBaseRead and pBaseWrite */
1031 nName /* AsyncFileData.zName */
1032 );
1033 z = sqlite3_malloc(nByte);
1034 if( !z ){
1035 return SQLITE_NOMEM;
1036 }
1037 memset(z, 0, nByte);
1038 pData = (AsyncFileData*)z;
1039 z += sizeof(pData[0]);
1040 pData->pBaseRead = (sqlite3_file*)z;
1041 z += pVfs->szOsFile;
1042 pData->pBaseWrite = (sqlite3_file*)z;
1043 pData->closeOp.pFileData = pData;
1044 pData->closeOp.op = ASYNC_CLOSE;
1045
1046 if( zName ){
1047 z += pVfs->szOsFile;
1048 pData->zName = z;
1049 pData->nName = nName;
1050 memcpy(pData->zName, zName, nName);
1051 }
1052
1053 if( !isAsyncOpen ){
1054 int flagsout;
1055 rc = pVfs->xOpen(pVfs, pData->zName, pData->pBaseRead, flags, &flagsout);
1056 if( rc==SQLITE_OK && (flagsout&SQLITE_OPEN_READWRITE) ){
1057 rc = pVfs->xOpen(pVfs, pData->zName, pData->pBaseWrite, flags, 0);
1058 }
1059 if( pOutFlags ){
1060 *pOutFlags = flagsout;
1061 }
1062 }
1063
1064 async_mutex_enter(ASYNC_MUTEX_LOCK);
1065
1066 if( zName && rc==SQLITE_OK ){
1067 pLock = findLock(pData->zName, pData->nName);
1068 if( !pLock ){
1069 int nByte = pVfs->szOsFile + sizeof(AsyncLock) + pData->nName + 1;
1070 pLock = (AsyncLock *)sqlite3_malloc(nByte);
1071 if( pLock ){
1072 memset(pLock, 0, nByte);
danielk19774598b8e2009-04-24 10:13:05 +00001073 if( async.bLockFiles && (flags&SQLITE_OPEN_MAIN_DB) ){
danielk1977a3f06592009-04-23 14:58:39 +00001074 pLock->pFile = (sqlite3_file *)&pLock[1];
1075 rc = pVfs->xOpen(pVfs, pData->zName, pLock->pFile, flags, 0);
1076 if( rc!=SQLITE_OK ){
1077 sqlite3_free(pLock);
1078 pLock = 0;
1079 }
1080 }
danielk1977a3f06592009-04-23 14:58:39 +00001081 if( pLock ){
1082 pLock->nFile = pData->nName;
1083 pLock->zFile = &((char *)(&pLock[1]))[pVfs->szOsFile];
1084 memcpy(pLock->zFile, pData->zName, pLock->nFile);
1085 pLock->pNext = async.pLock;
1086 async.pLock = pLock;
1087 }
1088 }else{
1089 rc = SQLITE_NOMEM;
1090 }
1091 }
1092 }
1093
1094 if( rc==SQLITE_OK ){
1095 p->pMethod = &async_methods;
1096 p->pData = pData;
1097
1098 /* Link AsyncFileData.lock into the linked list of
1099 ** AsyncFileLock structures for this file.
1100 */
1101 if( zName ){
1102 pData->lock.pNext = pLock->pList;
1103 pLock->pList = &pData->lock;
1104 pData->zName = pLock->zFile;
1105 }
1106 }else{
1107 if( pData->pBaseRead->pMethods ){
1108 pData->pBaseRead->pMethods->xClose(pData->pBaseRead);
1109 }
1110 if( pData->pBaseWrite->pMethods ){
1111 pData->pBaseWrite->pMethods->xClose(pData->pBaseWrite);
1112 }
1113 sqlite3_free(pData);
1114 }
1115
1116 async_mutex_leave(ASYNC_MUTEX_LOCK);
1117
1118 if( rc==SQLITE_OK ){
1119 incrOpenFileCount();
1120 pData->pLock = pLock;
1121 }
1122
1123 if( rc==SQLITE_OK && isAsyncOpen ){
1124 rc = addNewAsyncWrite(pData, ASYNC_OPENEXCLUSIVE, (sqlite3_int64)flags,0,0);
1125 if( rc==SQLITE_OK ){
1126 if( pOutFlags ) *pOutFlags = flags;
1127 }else{
1128 async_mutex_enter(ASYNC_MUTEX_LOCK);
1129 unlinkAsyncFile(pData);
1130 async_mutex_leave(ASYNC_MUTEX_LOCK);
1131 sqlite3_free(pData);
1132 }
1133 }
1134 if( rc!=SQLITE_OK ){
1135 p->pMethod = 0;
1136 }
1137 return rc;
1138}
1139
1140/*
1141** Implementation of sqlite3OsDelete. Add an entry to the end of the
1142** write-op queue to perform the delete.
1143*/
1144static int asyncDelete(sqlite3_vfs *pAsyncVfs, const char *z, int syncDir){
1145 return addNewAsyncWrite(0, ASYNC_DELETE, syncDir, strlen(z)+1, z);
1146}
1147
1148/*
1149** Implementation of sqlite3OsAccess. This method holds the mutex from
1150** start to finish.
1151*/
1152static int asyncAccess(
1153 sqlite3_vfs *pAsyncVfs,
1154 const char *zName,
1155 int flags,
1156 int *pResOut
1157){
1158 int rc;
1159 int ret;
1160 AsyncWrite *p;
1161 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1162
1163 assert(flags==SQLITE_ACCESS_READWRITE
1164 || flags==SQLITE_ACCESS_READ
1165 || flags==SQLITE_ACCESS_EXISTS
1166 );
1167
1168 async_mutex_enter(ASYNC_MUTEX_QUEUE);
1169 rc = pVfs->xAccess(pVfs, zName, flags, &ret);
1170 if( rc==SQLITE_OK && flags==SQLITE_ACCESS_EXISTS ){
1171 for(p=async.pQueueFirst; p; p = p->pNext){
1172 if( p->op==ASYNC_DELETE && 0==strcmp(p->zBuf, zName) ){
1173 ret = 0;
1174 }else if( p->op==ASYNC_OPENEXCLUSIVE
1175 && p->pFileData->zName
1176 && 0==strcmp(p->pFileData->zName, zName)
1177 ){
1178 ret = 1;
1179 }
1180 }
1181 }
1182 ASYNC_TRACE(("ACCESS(%s): %s = %d\n",
1183 flags==SQLITE_ACCESS_READWRITE?"read-write":
1184 flags==SQLITE_ACCESS_READ?"read":"exists"
1185 , zName, ret)
1186 );
1187 async_mutex_leave(ASYNC_MUTEX_QUEUE);
1188 *pResOut = ret;
1189 return rc;
1190}
1191
1192/*
1193** Fill in zPathOut with the full path to the file identified by zPath.
1194*/
1195static int asyncFullPathname(
1196 sqlite3_vfs *pAsyncVfs,
1197 const char *zPath,
1198 int nPathOut,
1199 char *zPathOut
1200){
1201 int rc;
1202 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1203 rc = pVfs->xFullPathname(pVfs, zPath, nPathOut, zPathOut);
1204
1205 /* Because of the way intra-process file locking works, this backend
1206 ** needs to return a canonical path. The following block assumes the
1207 ** file-system uses unix style paths.
1208 */
1209 if( rc==SQLITE_OK ){
1210 int i, j;
1211 int n = nPathOut;
1212 char *z = zPathOut;
1213 while( n>1 && z[n-1]=='/' ){ n--; }
1214 for(i=j=0; i<n; i++){
1215 if( z[i]=='/' ){
1216 if( z[i+1]=='/' ) continue;
1217 if( z[i+1]=='.' && i+2<n && z[i+2]=='/' ){
1218 i += 1;
1219 continue;
1220 }
1221 if( z[i+1]=='.' && i+3<n && z[i+2]=='.' && z[i+3]=='/' ){
1222 while( j>0 && z[j-1]!='/' ){ j--; }
1223 if( j>0 ){ j--; }
1224 i += 2;
1225 continue;
1226 }
1227 }
1228 z[j++] = z[i];
1229 }
1230 z[j] = 0;
1231 }
1232
1233 return rc;
1234}
1235static void *asyncDlOpen(sqlite3_vfs *pAsyncVfs, const char *zPath){
1236 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1237 return pVfs->xDlOpen(pVfs, zPath);
1238}
1239static void asyncDlError(sqlite3_vfs *pAsyncVfs, int nByte, char *zErrMsg){
1240 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1241 pVfs->xDlError(pVfs, nByte, zErrMsg);
1242}
1243static void (*asyncDlSym(
1244 sqlite3_vfs *pAsyncVfs,
1245 void *pHandle,
1246 const char *zSymbol
1247))(void){
1248 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1249 return pVfs->xDlSym(pVfs, pHandle, zSymbol);
1250}
1251static void asyncDlClose(sqlite3_vfs *pAsyncVfs, void *pHandle){
1252 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1253 pVfs->xDlClose(pVfs, pHandle);
1254}
1255static int asyncRandomness(sqlite3_vfs *pAsyncVfs, int nByte, char *zBufOut){
1256 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1257 return pVfs->xRandomness(pVfs, nByte, zBufOut);
1258}
1259static int asyncSleep(sqlite3_vfs *pAsyncVfs, int nMicro){
1260 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1261 return pVfs->xSleep(pVfs, nMicro);
1262}
1263static int asyncCurrentTime(sqlite3_vfs *pAsyncVfs, double *pTimeOut){
1264 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1265 return pVfs->xCurrentTime(pVfs, pTimeOut);
1266}
1267
1268static sqlite3_vfs async_vfs = {
1269 1, /* iVersion */
1270 sizeof(AsyncFile), /* szOsFile */
1271 0, /* mxPathname */
1272 0, /* pNext */
1273 SQLITEASYNC_VFSNAME, /* zName */
1274 0, /* pAppData */
1275 asyncOpen, /* xOpen */
1276 asyncDelete, /* xDelete */
1277 asyncAccess, /* xAccess */
1278 asyncFullPathname, /* xFullPathname */
1279 asyncDlOpen, /* xDlOpen */
1280 asyncDlError, /* xDlError */
1281 asyncDlSym, /* xDlSym */
1282 asyncDlClose, /* xDlClose */
1283 asyncRandomness, /* xDlError */
1284 asyncSleep, /* xDlSym */
1285 asyncCurrentTime /* xDlClose */
1286};
1287
1288/*
1289** This procedure runs in a separate thread, reading messages off of the
1290** write queue and processing them one by one.
1291**
1292** If async.writerHaltNow is true, then this procedure exits
1293** after processing a single message.
1294**
1295** If async.writerHaltWhenIdle is true, then this procedure exits when
1296** the write queue is empty.
1297**
1298** If both of the above variables are false, this procedure runs
1299** indefinately, waiting for operations to be added to the write queue
1300** and processing them in the order in which they arrive.
1301**
1302** An artifical delay of async.ioDelay milliseconds is inserted before
1303** each write operation in order to simulate the effect of a slow disk.
1304**
1305** Only one instance of this procedure may be running at a time.
1306*/
1307static void asyncWriterThread(void){
1308 sqlite3_vfs *pVfs = (sqlite3_vfs *)(async_vfs.pAppData);
1309 AsyncWrite *p = 0;
1310 int rc = SQLITE_OK;
1311 int holdingMutex = 0;
1312
1313 async_mutex_enter(ASYNC_MUTEX_WRITER);
1314
1315 while( async.eHalt!=SQLITEASYNC_HALT_NOW ){
1316 int doNotFree = 0;
1317 sqlite3_file *pBase = 0;
1318
1319 if( !holdingMutex ){
1320 async_mutex_enter(ASYNC_MUTEX_QUEUE);
1321 }
1322 while( (p = async.pQueueFirst)==0 ){
1323 if( async.eHalt!=SQLITEASYNC_HALT_NEVER ){
1324 async_mutex_leave(ASYNC_MUTEX_QUEUE);
1325 break;
1326 }else{
1327 ASYNC_TRACE(("IDLE\n"));
1328 async_cond_wait(ASYNC_COND_QUEUE, ASYNC_MUTEX_QUEUE);
1329 ASYNC_TRACE(("WAKEUP\n"));
1330 }
1331 }
1332 if( p==0 ) break;
1333 holdingMutex = 1;
1334
1335 /* Right now this thread is holding the mutex on the write-op queue.
1336 ** Variable 'p' points to the first entry in the write-op queue. In
1337 ** the general case, we hold on to the mutex for the entire body of
1338 ** the loop.
1339 **
1340 ** However in the cases enumerated below, we relinquish the mutex,
1341 ** perform the IO, and then re-request the mutex before removing 'p' from
1342 ** the head of the write-op queue. The idea is to increase concurrency with
1343 ** sqlite threads.
1344 **
1345 ** * An ASYNC_CLOSE operation.
1346 ** * An ASYNC_OPENEXCLUSIVE operation. For this one, we relinquish
1347 ** the mutex, call the underlying xOpenExclusive() function, then
1348 ** re-aquire the mutex before seting the AsyncFile.pBaseRead
1349 ** variable.
1350 ** * ASYNC_SYNC and ASYNC_WRITE operations, if
1351 ** SQLITE_ASYNC_TWO_FILEHANDLES was set at compile time and two
1352 ** file-handles are open for the particular file being "synced".
1353 */
1354 if( async.ioError!=SQLITE_OK && p->op!=ASYNC_CLOSE ){
1355 p->op = ASYNC_NOOP;
1356 }
1357 if( p->pFileData ){
1358 pBase = p->pFileData->pBaseWrite;
1359 if(
1360 p->op==ASYNC_CLOSE ||
1361 p->op==ASYNC_OPENEXCLUSIVE ||
1362 (pBase->pMethods && (p->op==ASYNC_SYNC || p->op==ASYNC_WRITE) )
1363 ){
1364 async_mutex_leave(ASYNC_MUTEX_QUEUE);
1365 holdingMutex = 0;
1366 }
1367 if( !pBase->pMethods ){
1368 pBase = p->pFileData->pBaseRead;
1369 }
1370 }
1371
1372 switch( p->op ){
1373 case ASYNC_NOOP:
1374 break;
1375
1376 case ASYNC_WRITE:
1377 assert( pBase );
1378 ASYNC_TRACE(("WRITE %s %d bytes at %d\n",
1379 p->pFileData->zName, p->nByte, p->iOffset));
1380 rc = pBase->pMethods->xWrite(pBase, (void *)(p->zBuf), p->nByte, p->iOffset);
1381 break;
1382
1383 case ASYNC_SYNC:
1384 assert( pBase );
1385 ASYNC_TRACE(("SYNC %s\n", p->pFileData->zName));
1386 rc = pBase->pMethods->xSync(pBase, p->nByte);
1387 break;
1388
1389 case ASYNC_TRUNCATE:
1390 assert( pBase );
1391 ASYNC_TRACE(("TRUNCATE %s to %d bytes\n",
1392 p->pFileData->zName, p->iOffset));
1393 rc = pBase->pMethods->xTruncate(pBase, p->iOffset);
1394 break;
1395
1396 case ASYNC_CLOSE: {
1397 AsyncFileData *pData = p->pFileData;
1398 ASYNC_TRACE(("CLOSE %s\n", p->pFileData->zName));
1399 if( pData->pBaseWrite->pMethods ){
1400 pData->pBaseWrite->pMethods->xClose(pData->pBaseWrite);
1401 }
1402 if( pData->pBaseRead->pMethods ){
1403 pData->pBaseRead->pMethods->xClose(pData->pBaseRead);
1404 }
1405
1406 /* Unlink AsyncFileData.lock from the linked list of AsyncFileLock
1407 ** structures for this file. Obtain the async.lockMutex mutex
1408 ** before doing so.
1409 */
1410 async_mutex_enter(ASYNC_MUTEX_LOCK);
1411 rc = unlinkAsyncFile(pData);
1412 async_mutex_leave(ASYNC_MUTEX_LOCK);
1413
1414 if( !holdingMutex ){
1415 async_mutex_enter(ASYNC_MUTEX_QUEUE);
1416 holdingMutex = 1;
1417 }
1418 assert_mutex_is_held(ASYNC_MUTEX_QUEUE);
1419 async.pQueueFirst = p->pNext;
1420 sqlite3_free(pData);
1421 doNotFree = 1;
1422 break;
1423 }
1424
1425 case ASYNC_UNLOCK: {
1426 AsyncWrite *pIter;
1427 AsyncFileData *pData = p->pFileData;
1428 int eLock = p->nByte;
1429
1430 /* When a file is locked by SQLite using the async backend, it is
1431 ** locked within the 'real' file-system synchronously. When it is
1432 ** unlocked, an ASYNC_UNLOCK event is added to the write-queue to
1433 ** unlock the file asynchronously. The design of the async backend
1434 ** requires that the 'real' file-system file be locked from the
1435 ** time that SQLite first locks it (and probably reads from it)
1436 ** until all asynchronous write events that were scheduled before
1437 ** SQLite unlocked the file have been processed.
1438 **
1439 ** This is more complex if SQLite locks and unlocks the file multiple
1440 ** times in quick succession. For example, if SQLite does:
1441 **
1442 ** lock, write, unlock, lock, write, unlock
1443 **
1444 ** Each "lock" operation locks the file immediately. Each "write"
1445 ** and "unlock" operation adds an event to the event queue. If the
1446 ** second "lock" operation is performed before the first "unlock"
1447 ** operation has been processed asynchronously, then the first
1448 ** "unlock" cannot be safely processed as is, since this would mean
1449 ** the file was unlocked when the second "write" operation is
1450 ** processed. To work around this, when processing an ASYNC_UNLOCK
1451 ** operation, SQLite:
1452 **
1453 ** 1) Unlocks the file to the minimum of the argument passed to
1454 ** the xUnlock() call and the current lock from SQLite's point
1455 ** of view, and
1456 **
1457 ** 2) Only unlocks the file at all if this event is the last
1458 ** ASYNC_UNLOCK event on this file in the write-queue.
1459 */
1460 assert( holdingMutex==1 );
1461 assert( async.pQueueFirst==p );
1462 for(pIter=async.pQueueFirst->pNext; pIter; pIter=pIter->pNext){
1463 if( pIter->pFileData==pData && pIter->op==ASYNC_UNLOCK ) break;
1464 }
1465 if( !pIter ){
1466 async_mutex_enter(ASYNC_MUTEX_LOCK);
1467 pData->lock.eAsyncLock = MIN(
1468 pData->lock.eAsyncLock, MAX(pData->lock.eLock, eLock)
1469 );
1470 assert(pData->lock.eAsyncLock>=pData->lock.eLock);
1471 rc = getFileLock(pData->pLock);
1472 async_mutex_leave(ASYNC_MUTEX_LOCK);
1473 }
1474 break;
1475 }
1476
1477 case ASYNC_DELETE:
1478 ASYNC_TRACE(("DELETE %s\n", p->zBuf));
1479 rc = pVfs->xDelete(pVfs, p->zBuf, (int)p->iOffset);
1480 break;
1481
1482 case ASYNC_OPENEXCLUSIVE: {
1483 int flags = (int)p->iOffset;
1484 AsyncFileData *pData = p->pFileData;
1485 ASYNC_TRACE(("OPEN %s flags=%d\n", p->zBuf, (int)p->iOffset));
1486 assert(pData->pBaseRead->pMethods==0 && pData->pBaseWrite->pMethods==0);
1487 rc = pVfs->xOpen(pVfs, pData->zName, pData->pBaseRead, flags, 0);
1488 assert( holdingMutex==0 );
1489 async_mutex_enter(ASYNC_MUTEX_QUEUE);
1490 holdingMutex = 1;
1491 break;
1492 }
1493
1494 default: assert(!"Illegal value for AsyncWrite.op");
1495 }
1496
1497 /* If we didn't hang on to the mutex during the IO op, obtain it now
1498 ** so that the AsyncWrite structure can be safely removed from the
1499 ** global write-op queue.
1500 */
1501 if( !holdingMutex ){
1502 async_mutex_enter(ASYNC_MUTEX_QUEUE);
1503 holdingMutex = 1;
1504 }
1505 /* ASYNC_TRACE(("UNLINK %p\n", p)); */
1506 if( p==async.pQueueLast ){
1507 async.pQueueLast = 0;
1508 }
1509 if( !doNotFree ){
1510 assert_mutex_is_held(ASYNC_MUTEX_QUEUE);
1511 async.pQueueFirst = p->pNext;
1512 sqlite3_free(p);
1513 }
1514 assert( holdingMutex );
1515
1516 /* An IO error has occurred. We cannot report the error back to the
1517 ** connection that requested the I/O since the error happened
1518 ** asynchronously. The connection has already moved on. There
1519 ** really is nobody to report the error to.
1520 **
1521 ** The file for which the error occurred may have been a database or
1522 ** journal file. Regardless, none of the currently queued operations
1523 ** associated with the same database should now be performed. Nor should
1524 ** any subsequently requested IO on either a database or journal file
1525 ** handle for the same database be accepted until the main database
1526 ** file handle has been closed and reopened.
1527 **
1528 ** Furthermore, no further IO should be queued or performed on any file
1529 ** handle associated with a database that may have been part of a
1530 ** multi-file transaction that included the database associated with
1531 ** the IO error (i.e. a database ATTACHed to the same handle at some
1532 ** point in time).
1533 */
1534 if( rc!=SQLITE_OK ){
1535 async.ioError = rc;
1536 }
1537
1538 if( async.ioError && !async.pQueueFirst ){
1539 async_mutex_enter(ASYNC_MUTEX_LOCK);
1540 if( 0==async.pLock ){
1541 async.ioError = SQLITE_OK;
1542 }
1543 async_mutex_leave(ASYNC_MUTEX_LOCK);
1544 }
1545
1546 /* Drop the queue mutex before continuing to the next write operation
1547 ** in order to give other threads a chance to work with the write queue.
1548 */
1549 if( !async.pQueueFirst || !async.ioError ){
1550 async_mutex_leave(ASYNC_MUTEX_QUEUE);
1551 holdingMutex = 0;
1552 if( async.ioDelay>0 ){
1553 pVfs->xSleep(pVfs, async.ioDelay);
1554 }else{
1555 async_sched_yield();
1556 }
1557 }
1558 }
1559
1560 async_mutex_leave(ASYNC_MUTEX_WRITER);
1561 return;
1562}
1563
1564/*
1565** Install the asynchronous VFS.
1566*/
1567int sqlite3async_initialize(const char *zParent, int isDefault){
1568 int rc = SQLITE_OK;
1569 if( async_vfs.pAppData==0 ){
1570 sqlite3_vfs *pParent = sqlite3_vfs_find(zParent);
1571 if( !pParent || async_os_initialize() ){
1572 rc = SQLITE_ERROR;
1573 }else if( SQLITE_OK!=(rc = sqlite3_vfs_register(&async_vfs, isDefault)) ){
1574 async_os_shutdown();
1575 }else{
1576 async_vfs.pAppData = (void *)pParent;
1577 async_vfs.mxPathname = ((sqlite3_vfs *)async_vfs.pAppData)->mxPathname;
1578 }
1579 }
1580 return rc;
1581}
1582
1583/*
1584** Uninstall the asynchronous VFS.
1585*/
1586void sqlite3async_shutdown(void){
1587 if( async_vfs.pAppData ){
1588 async_os_shutdown();
1589 sqlite3_vfs_unregister((sqlite3_vfs *)&async_vfs);
1590 async_vfs.pAppData = 0;
1591 }
1592}
1593
1594/*
1595** Process events on the write-queue.
1596*/
1597void sqlite3async_run(void){
1598 asyncWriterThread();
1599}
1600
1601/*
1602** Control/configure the asynchronous IO system.
1603*/
1604int sqlite3async_control(int op, ...){
1605 va_list ap;
1606 va_start(ap, op);
1607 switch( op ){
1608 case SQLITEASYNC_HALT: {
1609 int eWhen = va_arg(ap, int);
1610 if( eWhen!=SQLITEASYNC_HALT_NEVER
1611 && eWhen!=SQLITEASYNC_HALT_NOW
1612 && eWhen!=SQLITEASYNC_HALT_IDLE
1613 ){
danielk19774598b8e2009-04-24 10:13:05 +00001614 return SQLITE_MISUSE;
danielk1977a3f06592009-04-23 14:58:39 +00001615 }
1616 async.eHalt = eWhen;
1617 async_mutex_enter(ASYNC_MUTEX_QUEUE);
1618 async_cond_signal(ASYNC_COND_QUEUE);
1619 async_mutex_leave(ASYNC_MUTEX_QUEUE);
1620 break;
1621 }
1622
1623 case SQLITEASYNC_DELAY: {
1624 int iDelay = va_arg(ap, int);
danielk19774598b8e2009-04-24 10:13:05 +00001625 if( iDelay<0 ){
1626 return SQLITE_MISUSE;
1627 }
danielk1977a3f06592009-04-23 14:58:39 +00001628 async.ioDelay = iDelay;
1629 break;
1630 }
danielk19774598b8e2009-04-24 10:13:05 +00001631
1632 case SQLITEASYNC_LOCKFILES: {
1633 int bLock = va_arg(ap, int);
1634 async_mutex_enter(ASYNC_MUTEX_QUEUE);
1635 if( async.nFile || async.pQueueFirst ){
1636 async_mutex_leave(ASYNC_MUTEX_QUEUE);
1637 return SQLITE_MISUSE;
1638 }
1639 async.bLockFiles = bLock;
1640 async_mutex_leave(ASYNC_MUTEX_QUEUE);
1641 break;
1642 }
danielk1977a3f06592009-04-23 14:58:39 +00001643
1644 case SQLITEASYNC_GET_HALT: {
1645 int *peWhen = va_arg(ap, int *);
1646 *peWhen = async.eHalt;
1647 break;
1648 }
1649 case SQLITEASYNC_GET_DELAY: {
1650 int *piDelay = va_arg(ap, int *);
1651 *piDelay = async.ioDelay;
1652 break;
1653 }
danielk19774598b8e2009-04-24 10:13:05 +00001654 case SQLITEASYNC_GET_LOCKFILES: {
1655 int *piDelay = va_arg(ap, int *);
1656 *piDelay = async.bLockFiles;
1657 break;
1658 }
danielk1977a3f06592009-04-23 14:58:39 +00001659
1660 default:
1661 return SQLITE_ERROR;
1662 }
1663 return SQLITE_OK;
1664}
1665
1666#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ASYNCIO) */
1667