blob: cfdd5a0ae9bf2ec9c67fce61c16d03dd6a115334 [file] [log] [blame]
drhbbd42a62004-05-22 17:41:58 +00001/*
2** 2004 May 22
3**
4** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
6**
7** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
10**
11******************************************************************************
12**
drh734c9862008-11-28 15:37:20 +000013** This file contains the VFS implementation for unix-like operating systems
14** include Linux, MacOSX, *BSD, QNX, VxWorks, AIX, HPUX, and others.
danielk1977822a5162008-05-16 04:51:54 +000015**
drh734c9862008-11-28 15:37:20 +000016** There are actually several different VFS implementations in this file.
17** The differences are in the way that file locking is done. The default
18** implementation uses Posix Advisory Locks. Alternative implementations
19** use flock(), dot-files, various proprietary locking schemas, or simply
20** skip locking all together.
21**
drh9b35ea62008-11-29 02:20:26 +000022** This source file is organized into divisions where the logic for various
drh734c9862008-11-28 15:37:20 +000023** subfunctions is contained within the appropriate division. PLEASE
24** KEEP THE STRUCTURE OF THIS FILE INTACT. New code should be placed
25** in the correct division and should be clearly labeled.
26**
drh6b9d6dd2008-12-03 19:34:47 +000027** The layout of divisions is as follows:
drh734c9862008-11-28 15:37:20 +000028**
29** * General-purpose declarations and utility functions.
30** * Unique file ID logic used by VxWorks.
31** * Various locking primitive implementations:
32** + for Posix Advisory Locks
33** + for no-op locks
34** + for dot-file locks
35** + for flock() locking
36** + for named semaphore locks (VxWorks only)
37** + for AFP filesystem locks (MacOSX only)
38** + for proxy locks (MacOSX only)
drh9b35ea62008-11-29 02:20:26 +000039** * sqlite3_file methods not associated with locking.
40** * Definitions of sqlite3_io_methods objects for all locking
41** methods plus "finder" functions for each locking method.
drh6b9d6dd2008-12-03 19:34:47 +000042** * sqlite3_vfs method implementations.
drh9b35ea62008-11-29 02:20:26 +000043** * Definitions of sqlite3_vfs objects for all locking methods
44** plus implementations of sqlite3_os_init() and sqlite3_os_end().
drh734c9862008-11-28 15:37:20 +000045**
drh6b9d6dd2008-12-03 19:34:47 +000046** $Id: os_unix.c,v 1.225 2008/12/03 19:34:47 drh Exp $
drhbbd42a62004-05-22 17:41:58 +000047*/
drhbbd42a62004-05-22 17:41:58 +000048#include "sqliteInt.h"
danielk197729bafea2008-06-26 10:41:19 +000049#if SQLITE_OS_UNIX /* This file is used on unix only */
drh66560ad2006-01-06 14:32:19 +000050
danielk1977e339d652008-06-28 11:23:00 +000051/*
drh6b9d6dd2008-12-03 19:34:47 +000052** There are various methods for file locking used for concurrency
53** control:
danielk1977e339d652008-06-28 11:23:00 +000054**
drh734c9862008-11-28 15:37:20 +000055** 1. POSIX locking (the default),
56** 2. No locking,
57** 3. Dot-file locking,
58** 4. flock() locking,
59** 5. AFP locking (OSX only),
60** 6. Named POSIX semaphores (VXWorks only),
61** 7. proxy locking. (OSX only)
62**
63** Styles 4, 5, and 7 are only available of SQLITE_ENABLE_LOCKING_STYLE
64** is defined to 1. The SQLITE_ENABLE_LOCKING_STYLE also enables automatic
65** selection of the appropriate locking style based on the filesystem
66** where the database is located.
danielk1977e339d652008-06-28 11:23:00 +000067*/
drh40bbb0a2008-09-23 10:23:26 +000068#if !defined(SQLITE_ENABLE_LOCKING_STYLE)
69# if defined(__DARWIN__)
70# define SQLITE_ENABLE_LOCKING_STYLE 1
71# else
72# define SQLITE_ENABLE_LOCKING_STYLE 0
73# endif
74#endif
drhbfe66312006-10-03 17:40:40 +000075
drh9cbe6352005-11-29 03:13:21 +000076/*
drh6c7d5c52008-11-21 20:32:33 +000077** Define the OS_VXWORKS pre-processor macro to 1 if building on
danielk1977397d65f2008-11-19 11:35:39 +000078** vxworks, or 0 otherwise.
79*/
drh6c7d5c52008-11-21 20:32:33 +000080#ifndef OS_VXWORKS
81# if defined(__RTP__) || defined(_WRS_KERNEL)
82# define OS_VXWORKS 1
83# else
84# define OS_VXWORKS 0
85# endif
danielk1977397d65f2008-11-19 11:35:39 +000086#endif
87
88/*
drh9cbe6352005-11-29 03:13:21 +000089** These #defines should enable >2GB file support on Posix if the
90** underlying operating system supports it. If the OS lacks
drhf1a221e2006-01-15 17:27:17 +000091** large file support, these should be no-ops.
drh9cbe6352005-11-29 03:13:21 +000092**
93** Large file support can be disabled using the -DSQLITE_DISABLE_LFS switch
94** on the compiler command line. This is necessary if you are compiling
95** on a recent machine (ex: RedHat 7.2) but you want your code to work
96** on an older machine (ex: RedHat 6.0). If you compile on RedHat 7.2
97** without this option, LFS is enable. But LFS does not exist in the kernel
98** in RedHat 6.0, so the code won't work. Hence, for maximum binary
99** portability you should omit LFS.
drh9b35ea62008-11-29 02:20:26 +0000100**
101** The previous paragraph was written in 2005. (This paragraph is written
102** on 2008-11-28.) These days, all Linux kernels support large files, so
103** you should probably leave LFS enabled. But some embedded platforms might
104** lack LFS in which case the SQLITE_DISABLE_LFS macro might still be useful.
drh9cbe6352005-11-29 03:13:21 +0000105*/
106#ifndef SQLITE_DISABLE_LFS
107# define _LARGE_FILE 1
108# ifndef _FILE_OFFSET_BITS
109# define _FILE_OFFSET_BITS 64
110# endif
111# define _LARGEFILE_SOURCE 1
112#endif
drhbbd42a62004-05-22 17:41:58 +0000113
drh9cbe6352005-11-29 03:13:21 +0000114/*
115** standard include files.
116*/
117#include <sys/types.h>
118#include <sys/stat.h>
119#include <fcntl.h>
120#include <unistd.h>
drhbbd42a62004-05-22 17:41:58 +0000121#include <time.h>
drh19e2d372005-08-29 23:00:03 +0000122#include <sys/time.h>
drhbbd42a62004-05-22 17:41:58 +0000123#include <errno.h>
danielk1977e339d652008-06-28 11:23:00 +0000124
drh40bbb0a2008-09-23 10:23:26 +0000125#if SQLITE_ENABLE_LOCKING_STYLE
danielk1977c70dfc42008-11-19 13:52:30 +0000126# include <sys/ioctl.h>
drh6c7d5c52008-11-21 20:32:33 +0000127# if OS_VXWORKS
danielk1977c70dfc42008-11-19 13:52:30 +0000128# include <semaphore.h>
129# include <limits.h>
130# else
drh9b35ea62008-11-29 02:20:26 +0000131# include <sys/file.h>
danielk1977c70dfc42008-11-19 13:52:30 +0000132# include <sys/param.h>
133# include <sys/mount.h>
134# endif
drhbfe66312006-10-03 17:40:40 +0000135#endif /* SQLITE_ENABLE_LOCKING_STYLE */
drh9cbe6352005-11-29 03:13:21 +0000136
137/*
drhf1a221e2006-01-15 17:27:17 +0000138** If we are to be thread-safe, include the pthreads header and define
139** the SQLITE_UNIX_THREADS macro.
drh9cbe6352005-11-29 03:13:21 +0000140*/
drhd677b3d2007-08-20 22:48:41 +0000141#if SQLITE_THREADSAFE
drh9cbe6352005-11-29 03:13:21 +0000142# include <pthread.h>
143# define SQLITE_UNIX_THREADS 1
144#endif
145
146/*
147** Default permissions when creating a new file
148*/
149#ifndef SQLITE_DEFAULT_FILE_PERMISSIONS
150# define SQLITE_DEFAULT_FILE_PERMISSIONS 0644
151#endif
152
danielk1977b4b47412007-08-17 15:53:36 +0000153/*
aswiftaebf4132008-11-21 00:10:35 +0000154 ** Default permissions when creating auto proxy dir
155 */
156#ifndef SQLITE_DEFAULT_PROXYDIR_PERMISSIONS
157# define SQLITE_DEFAULT_PROXYDIR_PERMISSIONS 0755
158#endif
159
160/*
danielk1977b4b47412007-08-17 15:53:36 +0000161** Maximum supported path-length.
162*/
163#define MAX_PATHNAME 512
drh9cbe6352005-11-29 03:13:21 +0000164
drh734c9862008-11-28 15:37:20 +0000165/*
drh734c9862008-11-28 15:37:20 +0000166** Only set the lastErrno if the error code is a real error and not
167** a normal expected return code of SQLITE_BUSY or SQLITE_OK
168*/
169#define IS_LOCK_ERROR(x) ((x != SQLITE_OK) && (x != SQLITE_BUSY))
170
drh9cbe6352005-11-29 03:13:21 +0000171
172/*
drh9b35ea62008-11-29 02:20:26 +0000173** The unixFile structure is subclass of sqlite3_file specific to the unix
174** VFS implementations.
drh9cbe6352005-11-29 03:13:21 +0000175*/
drh054889e2005-11-30 03:20:31 +0000176typedef struct unixFile unixFile;
177struct unixFile {
danielk197762079062007-08-15 17:08:46 +0000178 sqlite3_io_methods const *pMethod; /* Always the first entry */
drh6c7d5c52008-11-21 20:32:33 +0000179 struct unixOpenCnt *pOpen; /* Info about all open fd's on this inode */
180 struct unixLockInfo *pLock; /* Info about locks on this inode */
181 int h; /* The file descriptor */
182 int dirfd; /* File descriptor for the directory */
183 unsigned char locktype; /* The type of lock held on this fd */
184 int lastErrno; /* The unix errno from the last I/O error */
drh6c7d5c52008-11-21 20:32:33 +0000185 void *lockingContext; /* Locking style specific state */
drh734c9862008-11-28 15:37:20 +0000186 int openFlags; /* The flags specified at open */
187#if SQLITE_THREADSAFE && defined(__linux__)
drh6c7d5c52008-11-21 20:32:33 +0000188 pthread_t tid; /* The thread that "owns" this unixFile */
189#endif
190#if OS_VXWORKS
191 int isDelete; /* Delete on close if true */
drh107886a2008-11-21 22:21:50 +0000192 struct vxworksFileId *pId; /* Unique file ID */
drh6c7d5c52008-11-21 20:32:33 +0000193#endif
danielk1977967a4a12007-08-20 14:23:44 +0000194#ifdef SQLITE_TEST
195 /* In test mode, increase the size of this structure a bit so that
196 ** it is larger than the struct CrashFile defined in test6.c.
197 */
198 char aPadding[32];
199#endif
drh9cbe6352005-11-29 03:13:21 +0000200};
201
drh0ccebe72005-06-07 22:22:50 +0000202/*
drh198bf392006-01-06 21:52:49 +0000203** Include code that is common to all os_*.c files
204*/
205#include "os_common.h"
206
207/*
drh0ccebe72005-06-07 22:22:50 +0000208** Define various macros that are missing from some systems.
209*/
drhbbd42a62004-05-22 17:41:58 +0000210#ifndef O_LARGEFILE
211# define O_LARGEFILE 0
212#endif
213#ifdef SQLITE_DISABLE_LFS
214# undef O_LARGEFILE
215# define O_LARGEFILE 0
216#endif
217#ifndef O_NOFOLLOW
218# define O_NOFOLLOW 0
219#endif
220#ifndef O_BINARY
221# define O_BINARY 0
222#endif
223
224/*
225** The DJGPP compiler environment looks mostly like Unix, but it
226** lacks the fcntl() system call. So redefine fcntl() to be something
227** that always succeeds. This means that locking does not occur under
drh85b623f2007-12-13 21:54:09 +0000228** DJGPP. But it is DOS - what did you expect?
drhbbd42a62004-05-22 17:41:58 +0000229*/
230#ifdef __DJGPP__
231# define fcntl(A,B,C) 0
232#endif
233
234/*
drh2b4b5962005-06-15 17:47:55 +0000235** The threadid macro resolves to the thread-id or to 0. Used for
236** testing and debugging only.
237*/
drhd677b3d2007-08-20 22:48:41 +0000238#if SQLITE_THREADSAFE
drh2b4b5962005-06-15 17:47:55 +0000239#define threadid pthread_self()
240#else
241#define threadid 0
242#endif
243
danielk197713adf8a2004-06-03 16:08:41 +0000244
drh107886a2008-11-21 22:21:50 +0000245/*
246** Helper functions to obtain and relinquish the global mutex.
247*/
248static void unixEnterMutex(void){
249 sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER));
250}
251static void unixLeaveMutex(void){
252 sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER));
253}
254
drh734c9862008-11-28 15:37:20 +0000255
256#ifdef SQLITE_DEBUG
257/*
258** Helper function for printing out trace information from debugging
259** binaries. This returns the string represetation of the supplied
260** integer lock-type.
261*/
262static const char *locktypeName(int locktype){
263 switch( locktype ){
264 case NO_LOCK: return "NONE";
265 case SHARED_LOCK: return "SHARED";
266 case RESERVED_LOCK: return "RESERVED";
267 case PENDING_LOCK: return "PENDING";
268 case EXCLUSIVE_LOCK: return "EXCLUSIVE";
269 }
270 return "ERROR";
271}
272#endif
273
274#ifdef SQLITE_LOCK_TRACE
275/*
276** Print out information about all locking operations.
drh6c7d5c52008-11-21 20:32:33 +0000277**
drh734c9862008-11-28 15:37:20 +0000278** This routine is used for troubleshooting locks on multithreaded
279** platforms. Enable by compiling with the -DSQLITE_LOCK_TRACE
280** command-line option on the compiler. This code is normally
281** turned off.
282*/
283static int lockTrace(int fd, int op, struct flock *p){
284 char *zOpName, *zType;
285 int s;
286 int savedErrno;
287 if( op==F_GETLK ){
288 zOpName = "GETLK";
289 }else if( op==F_SETLK ){
290 zOpName = "SETLK";
291 }else{
292 s = fcntl(fd, op, p);
293 sqlite3DebugPrintf("fcntl unknown %d %d %d\n", fd, op, s);
294 return s;
295 }
296 if( p->l_type==F_RDLCK ){
297 zType = "RDLCK";
298 }else if( p->l_type==F_WRLCK ){
299 zType = "WRLCK";
300 }else if( p->l_type==F_UNLCK ){
301 zType = "UNLCK";
302 }else{
303 assert( 0 );
304 }
305 assert( p->l_whence==SEEK_SET );
306 s = fcntl(fd, op, p);
307 savedErrno = errno;
308 sqlite3DebugPrintf("fcntl %d %d %s %s %d %d %d %d\n",
309 threadid, fd, zOpName, zType, (int)p->l_start, (int)p->l_len,
310 (int)p->l_pid, s);
311 if( s==(-1) && op==F_SETLK && (p->l_type==F_RDLCK || p->l_type==F_WRLCK) ){
312 struct flock l2;
313 l2 = *p;
314 fcntl(fd, F_GETLK, &l2);
315 if( l2.l_type==F_RDLCK ){
316 zType = "RDLCK";
317 }else if( l2.l_type==F_WRLCK ){
318 zType = "WRLCK";
319 }else if( l2.l_type==F_UNLCK ){
320 zType = "UNLCK";
321 }else{
322 assert( 0 );
323 }
324 sqlite3DebugPrintf("fcntl-failure-reason: %s %d %d %d\n",
325 zType, (int)l2.l_start, (int)l2.l_len, (int)l2.l_pid);
326 }
327 errno = savedErrno;
328 return s;
329}
330#define fcntl lockTrace
331#endif /* SQLITE_LOCK_TRACE */
332
333
334
335/*
336** This routine translates a standard POSIX errno code into something
337** useful to the clients of the sqlite3 functions. Specifically, it is
338** intended to translate a variety of "try again" errors into SQLITE_BUSY
339** and a variety of "please close the file descriptor NOW" errors into
340** SQLITE_IOERR
341**
342** Errors during initialization of locks, or file system support for locks,
343** should handle ENOLCK, ENOTSUP, EOPNOTSUPP separately.
344*/
345static int sqliteErrorFromPosixError(int posixError, int sqliteIOErr) {
346 switch (posixError) {
347 case 0:
348 return SQLITE_OK;
349
350 case EAGAIN:
351 case ETIMEDOUT:
352 case EBUSY:
353 case EINTR:
354 case ENOLCK:
355 /* random NFS retry error, unless during file system support
356 * introspection, in which it actually means what it says */
357 return SQLITE_BUSY;
358
359 case EACCES:
360 /* EACCES is like EAGAIN during locking operations, but not any other time*/
361 if( (sqliteIOErr == SQLITE_IOERR_LOCK) ||
362 (sqliteIOErr == SQLITE_IOERR_UNLOCK) ||
363 (sqliteIOErr == SQLITE_IOERR_RDLOCK) ||
364 (sqliteIOErr == SQLITE_IOERR_CHECKRESERVEDLOCK) ){
365 return SQLITE_BUSY;
366 }
367 /* else fall through */
368 case EPERM:
369 return SQLITE_PERM;
370
371 case EDEADLK:
372 return SQLITE_IOERR_BLOCKED;
373
374#if EOPNOTSUPP!=ENOTSUP
375 case EOPNOTSUPP:
376 /* something went terribly awry, unless during file system support
377 * introspection, in which it actually means what it says */
378#endif
379#ifdef ENOTSUP
380 case ENOTSUP:
381 /* invalid fd, unless during file system support introspection, in which
382 * it actually means what it says */
383#endif
384 case EIO:
385 case EBADF:
386 case EINVAL:
387 case ENOTCONN:
388 case ENODEV:
389 case ENXIO:
390 case ENOENT:
391 case ESTALE:
392 case ENOSYS:
393 /* these should force the client to close the file and reconnect */
394
395 default:
396 return sqliteIOErr;
397 }
398}
399
400
401
402/******************************************************************************
403****************** Begin Unique File ID Utility Used By VxWorks ***************
404**
405** On most versions of unix, we can get a unique ID for a file by concatenating
406** the device number and the inode number. But this does not work on VxWorks.
407** On VxWorks, a unique file id must be based on the canonical filename.
408**
409** A pointer to an instance of the following structure can be used as a
410** unique file ID in VxWorks. Each instance of this structure contains
411** a copy of the canonical filename. There is also a reference count.
412** The structure is reclaimed when the number of pointers to it drops to
413** zero.
414**
415** There are never very many files open at one time and lookups are not
416** a performance-critical path, so it is sufficient to put these
417** structures on a linked list.
418*/
419struct vxworksFileId {
420 struct vxworksFileId *pNext; /* Next in a list of them all */
421 int nRef; /* Number of references to this one */
422 int nName; /* Length of the zCanonicalName[] string */
423 char *zCanonicalName; /* Canonical filename */
424};
425
426#if OS_VXWORKS
427/*
drh9b35ea62008-11-29 02:20:26 +0000428** All unique filenames are held on a linked list headed by this
drh734c9862008-11-28 15:37:20 +0000429** variable:
430*/
431static struct vxworksFileId *vxworksFileList = 0;
432
433/*
434** Simplify a filename into its canonical form
435** by making the following changes:
436**
437** * removing any trailing and duplicate /
drh9b35ea62008-11-29 02:20:26 +0000438** * convert /./ into just /
439** * convert /A/../ where A is any simple name into just /
drh734c9862008-11-28 15:37:20 +0000440**
441** Changes are made in-place. Return the new name length.
442**
443** The original filename is in z[0..n-1]. Return the number of
444** characters in the simplified name.
445*/
446static int vxworksSimplifyName(char *z, int n){
447 int i, j;
448 while( n>1 && z[n-1]=='/' ){ n--; }
449 for(i=j=0; i<n; i++){
450 if( z[i]=='/' ){
451 if( z[i+1]=='/' ) continue;
452 if( z[i+1]=='.' && i+2<n && z[i+2]=='/' ){
453 i += 1;
454 continue;
455 }
456 if( z[i+1]=='.' && i+3<n && z[i+2]=='.' && z[i+3]=='/' ){
457 while( j>0 && z[j-1]!='/' ){ j--; }
458 if( j>0 ){ j--; }
459 i += 2;
460 continue;
461 }
462 }
463 z[j++] = z[i];
464 }
465 z[j] = 0;
466 return j;
467}
468
469/*
470** Find a unique file ID for the given absolute pathname. Return
471** a pointer to the vxworksFileId object. This pointer is the unique
472** file ID.
473**
474** The nRef field of the vxworksFileId object is incremented before
475** the object is returned. A new vxworksFileId object is created
476** and added to the global list if necessary.
477**
478** If a memory allocation error occurs, return NULL.
479*/
480static struct vxworksFileId *vxworksFindFileId(const char *zAbsoluteName){
481 struct vxworksFileId *pNew; /* search key and new file ID */
482 struct vxworksFileId *pCandidate; /* For looping over existing file IDs */
483 int n; /* Length of zAbsoluteName string */
484
485 assert( zAbsoluteName[0]=='/' );
486 n = strlen(zAbsoluteName);
487 pNew = sqlite3_malloc( sizeof(*pNew) + (n+1) );
488 if( pNew==0 ) return 0;
489 pNew->zCanonicalName = (char*)&pNew[1];
490 memcpy(pNew->zCanonicalName, zAbsoluteName, n+1);
491 n = vxworksSimplifyName(pNew->zCanonicalName, n);
492
493 /* Search for an existing entry that matching the canonical name.
494 ** If found, increment the reference count and return a pointer to
495 ** the existing file ID.
496 */
497 unixEnterMutex();
498 for(pCandidate=vxworksFileList; pCandidate; pCandidate=pCandidate->pNext){
499 if( pCandidate->nName==n
500 && memcmp(pCandidate->zCanonicalName, pNew->zCanonicalName, n)==0
501 ){
502 sqlite3_free(pNew);
503 pCandidate->nRef++;
504 unixLeaveMutex();
505 return pCandidate;
506 }
507 }
508
509 /* No match was found. We will make a new file ID */
510 pNew->nRef = 1;
511 pNew->nName = n;
512 pNew->pNext = vxworksFileList;
513 vxworksFileList = pNew;
514 unixLeaveMutex();
515 return pNew;
516}
517
518/*
519** Decrement the reference count on a vxworksFileId object. Free
520** the object when the reference count reaches zero.
521*/
522static void vxworksReleaseFileId(struct vxworksFileId *pId){
523 unixEnterMutex();
524 assert( pId->nRef>0 );
525 pId->nRef--;
526 if( pId->nRef==0 ){
527 struct vxworksFileId **pp;
528 for(pp=&vxworksFileList; *pp && *pp!=pId; pp = &((*pp)->pNext)){}
529 assert( *pp==pId );
530 *pp = pId->pNext;
531 sqlite3_free(pId);
532 }
533 unixLeaveMutex();
534}
535#endif /* OS_VXWORKS */
536/*************** End of Unique File ID Utility Used By VxWorks ****************
537******************************************************************************/
538
539
540/******************************************************************************
541*************************** Posix Advisory Locking ****************************
542**
drh9b35ea62008-11-29 02:20:26 +0000543** POSIX advisory locks are broken by design. ANSI STD 1003.1 (1996)
drhbbd42a62004-05-22 17:41:58 +0000544** section 6.5.2.2 lines 483 through 490 specify that when a process
545** sets or clears a lock, that operation overrides any prior locks set
546** by the same process. It does not explicitly say so, but this implies
547** that it overrides locks set by the same process using a different
548** file descriptor. Consider this test case:
drh6c7d5c52008-11-21 20:32:33 +0000549**
550** int fd1 = open("./file1", O_RDWR|O_CREAT, 0644);
drhbbd42a62004-05-22 17:41:58 +0000551** int fd2 = open("./file2", O_RDWR|O_CREAT, 0644);
552**
553** Suppose ./file1 and ./file2 are really the same file (because
554** one is a hard or symbolic link to the other) then if you set
555** an exclusive lock on fd1, then try to get an exclusive lock
556** on fd2, it works. I would have expected the second lock to
557** fail since there was already a lock on the file due to fd1.
558** But not so. Since both locks came from the same process, the
559** second overrides the first, even though they were on different
560** file descriptors opened on different file names.
561**
drh734c9862008-11-28 15:37:20 +0000562** This means that we cannot use POSIX locks to synchronize file access
563** among competing threads of the same process. POSIX locks will work fine
drhbbd42a62004-05-22 17:41:58 +0000564** to synchronize access for threads in separate processes, but not
565** threads within the same process.
566**
567** To work around the problem, SQLite has to manage file locks internally
568** on its own. Whenever a new database is opened, we have to find the
569** specific inode of the database file (the inode is determined by the
570** st_dev and st_ino fields of the stat structure that fstat() fills in)
571** and check for locks already existing on that inode. When locks are
572** created or removed, we have to look at our own internal record of the
573** locks to see if another thread has previously set a lock on that same
574** inode.
575**
drh9b35ea62008-11-29 02:20:26 +0000576** (Aside: The use of inode numbers as unique IDs does not work on VxWorks.
577** For VxWorks, we have to use the alternative unique ID system based on
578** canonical filename and implemented in the previous division.)
579**
danielk1977ad94b582007-08-20 06:44:22 +0000580** The sqlite3_file structure for POSIX is no longer just an integer file
drhbbd42a62004-05-22 17:41:58 +0000581** descriptor. It is now a structure that holds the integer file
582** descriptor and a pointer to a structure that describes the internal
583** locks on the corresponding inode. There is one locking structure
danielk1977ad94b582007-08-20 06:44:22 +0000584** per inode, so if the same inode is opened twice, both unixFile structures
drhbbd42a62004-05-22 17:41:58 +0000585** point to the same locking structure. The locking structure keeps
586** a reference count (so we will know when to delete it) and a "cnt"
587** field that tells us its internal lock status. cnt==0 means the
588** file is unlocked. cnt==-1 means the file has an exclusive lock.
589** cnt>0 means there are cnt shared locks on the file.
590**
591** Any attempt to lock or unlock a file first checks the locking
592** structure. The fcntl() system call is only invoked to set a
593** POSIX lock if the internal lock structure transitions between
594** a locked and an unlocked state.
595**
drh734c9862008-11-28 15:37:20 +0000596** But wait: there are yet more problems with POSIX advisory locks.
drhbbd42a62004-05-22 17:41:58 +0000597**
598** If you close a file descriptor that points to a file that has locks,
599** all locks on that file that are owned by the current process are
danielk1977ad94b582007-08-20 06:44:22 +0000600** released. To work around this problem, each unixFile structure contains
drh6c7d5c52008-11-21 20:32:33 +0000601** a pointer to an unixOpenCnt structure. There is one unixOpenCnt structure
danielk1977ad94b582007-08-20 06:44:22 +0000602** per open inode, which means that multiple unixFile can point to a single
drh6c7d5c52008-11-21 20:32:33 +0000603** unixOpenCnt. When an attempt is made to close an unixFile, if there are
danielk1977ad94b582007-08-20 06:44:22 +0000604** other unixFile open on the same inode that are holding locks, the call
drhbbd42a62004-05-22 17:41:58 +0000605** to close() the file descriptor is deferred until all of the locks clear.
drh6c7d5c52008-11-21 20:32:33 +0000606** The unixOpenCnt structure keeps a list of file descriptors that need to
drhbbd42a62004-05-22 17:41:58 +0000607** be closed and that list is walked (and cleared) when the last lock
608** clears.
609**
drh9b35ea62008-11-29 02:20:26 +0000610** Yet another problem: LinuxThreads do not play well with posix locks.
drh5fdae772004-06-29 03:29:00 +0000611**
drh9b35ea62008-11-29 02:20:26 +0000612** Many older versions of linux use the LinuxThreads library which is
613** not posix compliant. Under LinuxThreads, a lock created by thread
drh734c9862008-11-28 15:37:20 +0000614** A cannot be modified or overridden by a different thread B.
615** Only thread A can modify the lock. Locking behavior is correct
616** if the appliation uses the newer Native Posix Thread Library (NPTL)
617** on linux - with NPTL a lock created by thread A can override locks
618** in thread B. But there is no way to know at compile-time which
619** threading library is being used. So there is no way to know at
620** compile-time whether or not thread A can override locks on thread B.
621** We have to do a run-time check to discover the behavior of the
622** current process.
drh5fdae772004-06-29 03:29:00 +0000623**
drh734c9862008-11-28 15:37:20 +0000624** On systems where thread A is unable to modify locks created by
625** thread B, we have to keep track of which thread created each
drh9b35ea62008-11-29 02:20:26 +0000626** lock. Hence there is an extra field in the key to the unixLockInfo
drh734c9862008-11-28 15:37:20 +0000627** structure to record this information. And on those systems it
628** is illegal to begin a transaction in one thread and finish it
629** in another. For this latter restriction, there is no work-around.
630** It is a limitation of LinuxThreads.
drhbbd42a62004-05-22 17:41:58 +0000631*/
632
633/*
drh6c7d5c52008-11-21 20:32:33 +0000634** Set or check the unixFile.tid field. This field is set when an unixFile
635** is first opened. All subsequent uses of the unixFile verify that the
636** same thread is operating on the unixFile. Some operating systems do
637** not allow locks to be overridden by other threads and that restriction
638** means that sqlite3* database handles cannot be moved from one thread
drh734c9862008-11-28 15:37:20 +0000639** to another while locks are held.
drh6c7d5c52008-11-21 20:32:33 +0000640**
641** Version 3.3.1 (2006-01-15): unixFile can be moved from one thread to
642** another as long as we are running on a system that supports threads
drh734c9862008-11-28 15:37:20 +0000643** overriding each others locks (which is now the most common behavior)
drh6c7d5c52008-11-21 20:32:33 +0000644** or if no locks are held. But the unixFile.pLock field needs to be
645** recomputed because its key includes the thread-id. See the
646** transferOwnership() function below for additional information
647*/
drh734c9862008-11-28 15:37:20 +0000648#if SQLITE_THREADSAFE && defined(__linux__)
drh6c7d5c52008-11-21 20:32:33 +0000649# define SET_THREADID(X) (X)->tid = pthread_self()
650# define CHECK_THREADID(X) (threadsOverrideEachOthersLocks==0 && \
651 !pthread_equal((X)->tid, pthread_self()))
652#else
653# define SET_THREADID(X)
654# define CHECK_THREADID(X) 0
655#endif
656
657/*
drhbbd42a62004-05-22 17:41:58 +0000658** An instance of the following structure serves as the key used
drh6c7d5c52008-11-21 20:32:33 +0000659** to locate a particular unixOpenCnt structure given its inode. This
660** is the same as the unixLockKey except that the thread ID is omitted.
661*/
662struct unixFileId {
drh107886a2008-11-21 22:21:50 +0000663 dev_t dev; /* Device number */
drh6c7d5c52008-11-21 20:32:33 +0000664#if OS_VXWORKS
drh107886a2008-11-21 22:21:50 +0000665 struct vxworksFileId *pId; /* Unique file ID for vxworks. */
drh6c7d5c52008-11-21 20:32:33 +0000666#else
drh107886a2008-11-21 22:21:50 +0000667 ino_t ino; /* Inode number */
drh6c7d5c52008-11-21 20:32:33 +0000668#endif
669};
670
671/*
672** An instance of the following structure serves as the key used
673** to locate a particular unixLockInfo structure given its inode.
drh5fdae772004-06-29 03:29:00 +0000674**
drh734c9862008-11-28 15:37:20 +0000675** If threads cannot override each others locks (LinuxThreads), then we
676** set the unixLockKey.tid field to the thread ID. If threads can override
677** each others locks (Posix and NPTL) then tid is always set to zero.
678** tid is omitted if we compile without threading support or on an OS
679** other than linux.
drhbbd42a62004-05-22 17:41:58 +0000680*/
drh6c7d5c52008-11-21 20:32:33 +0000681struct unixLockKey {
682 struct unixFileId fid; /* Unique identifier for the file */
drh734c9862008-11-28 15:37:20 +0000683#if SQLITE_THREADSAFE && defined(__linux__)
684 pthread_t tid; /* Thread ID of lock owner. Zero if not using LinuxThreads */
drh5fdae772004-06-29 03:29:00 +0000685#endif
drhbbd42a62004-05-22 17:41:58 +0000686};
687
688/*
689** An instance of the following structure is allocated for each open
drh9b35ea62008-11-29 02:20:26 +0000690** inode. Or, on LinuxThreads, there is one of these structures for
691** each inode opened by each thread.
drhbbd42a62004-05-22 17:41:58 +0000692**
danielk1977ad94b582007-08-20 06:44:22 +0000693** A single inode can have multiple file descriptors, so each unixFile
drhbbd42a62004-05-22 17:41:58 +0000694** structure contains a pointer to an instance of this object and this
danielk1977ad94b582007-08-20 06:44:22 +0000695** object keeps a count of the number of unixFile pointing to it.
drhbbd42a62004-05-22 17:41:58 +0000696*/
drh6c7d5c52008-11-21 20:32:33 +0000697struct unixLockInfo {
drh734c9862008-11-28 15:37:20 +0000698 struct unixLockKey lockKey; /* The lookup key */
699 int cnt; /* Number of SHARED locks held */
700 int locktype; /* One of SHARED_LOCK, RESERVED_LOCK etc. */
701 int nRef; /* Number of pointers to this structure */
702 struct unixLockInfo *pNext; /* List of all unixLockInfo objects */
703 struct unixLockInfo *pPrev; /* .... doubly linked */
drhbbd42a62004-05-22 17:41:58 +0000704};
705
706/*
707** An instance of the following structure is allocated for each open
708** inode. This structure keeps track of the number of locks on that
709** inode. If a close is attempted against an inode that is holding
710** locks, the close is deferred until all locks clear by adding the
711** file descriptor to be closed to the pending list.
drh9b35ea62008-11-29 02:20:26 +0000712**
713** TODO: Consider changing this so that there is only a single file
714** descriptor for each open file, even when it is opened multiple times.
715** The close() system call would only occur when the last database
716** using the file closes.
drhbbd42a62004-05-22 17:41:58 +0000717*/
drh6c7d5c52008-11-21 20:32:33 +0000718struct unixOpenCnt {
719 struct unixFileId fileId; /* The lookup key */
720 int nRef; /* Number of pointers to this structure */
721 int nLock; /* Number of outstanding locks */
722 int nPending; /* Number of pending close() operations */
723 int *aPending; /* Malloced space holding fd's awaiting a close() */
724#if OS_VXWORKS
725 sem_t *pSem; /* Named POSIX semaphore */
chw97185482008-11-17 08:05:31 +0000726 char aSemName[MAX_PATHNAME+1]; /* Name of that semaphore */
727#endif
drh6c7d5c52008-11-21 20:32:33 +0000728 struct unixOpenCnt *pNext, *pPrev; /* List of all unixOpenCnt objects */
drhbbd42a62004-05-22 17:41:58 +0000729};
730
drhda0e7682008-07-30 15:27:54 +0000731/*
drh9b35ea62008-11-29 02:20:26 +0000732** Lists of all unixLockInfo and unixOpenCnt objects. These used to be hash
733** tables. But the number of objects is rarely more than a dozen and
drhda0e7682008-07-30 15:27:54 +0000734** never exceeds a few thousand. And lookup is not on a critical
drh6c7d5c52008-11-21 20:32:33 +0000735** path so a simple linked list will suffice.
drhbbd42a62004-05-22 17:41:58 +0000736*/
drh6c7d5c52008-11-21 20:32:33 +0000737static struct unixLockInfo *lockList = 0;
738static struct unixOpenCnt *openList = 0;
drh5fdae772004-06-29 03:29:00 +0000739
drh5fdae772004-06-29 03:29:00 +0000740/*
drh9b35ea62008-11-29 02:20:26 +0000741** This variable remembers whether or not threads can override each others
drh5fdae772004-06-29 03:29:00 +0000742** locks.
743**
drh9b35ea62008-11-29 02:20:26 +0000744** 0: No. Threads cannot override each others locks. (LinuxThreads)
745** 1: Yes. Threads can override each others locks. (Posix & NLPT)
drh5fdae772004-06-29 03:29:00 +0000746** -1: We don't know yet.
drhf1a221e2006-01-15 17:27:17 +0000747**
drh5062d3a2006-01-31 23:03:35 +0000748** On some systems, we know at compile-time if threads can override each
749** others locks. On those systems, the SQLITE_THREAD_OVERRIDE_LOCK macro
750** will be set appropriately. On other systems, we have to check at
751** runtime. On these latter systems, SQLTIE_THREAD_OVERRIDE_LOCK is
752** undefined.
753**
drhf1a221e2006-01-15 17:27:17 +0000754** This variable normally has file scope only. But during testing, we make
755** it a global so that the test code can change its value in order to verify
756** that the right stuff happens in either case.
drh5fdae772004-06-29 03:29:00 +0000757*/
drh5062d3a2006-01-31 23:03:35 +0000758#ifndef SQLITE_THREAD_OVERRIDE_LOCK
759# define SQLITE_THREAD_OVERRIDE_LOCK -1
760#endif
drh029b44b2006-01-15 00:13:15 +0000761#ifdef SQLITE_TEST
drh5062d3a2006-01-31 23:03:35 +0000762int threadsOverrideEachOthersLocks = SQLITE_THREAD_OVERRIDE_LOCK;
drh029b44b2006-01-15 00:13:15 +0000763#else
drh5062d3a2006-01-31 23:03:35 +0000764static int threadsOverrideEachOthersLocks = SQLITE_THREAD_OVERRIDE_LOCK;
drh029b44b2006-01-15 00:13:15 +0000765#endif
drh5fdae772004-06-29 03:29:00 +0000766
767/*
768** This structure holds information passed into individual test
769** threads by the testThreadLockingBehavior() routine.
770*/
771struct threadTestData {
772 int fd; /* File to be locked */
773 struct flock lock; /* The locking operation */
774 int result; /* Result of the locking operation */
775};
776
drh6c7d5c52008-11-21 20:32:33 +0000777#if SQLITE_THREADSAFE && defined(__linux__)
drh5fdae772004-06-29 03:29:00 +0000778/*
danielk197741a6a612008-11-11 18:34:35 +0000779** This function is used as the main routine for a thread launched by
780** testThreadLockingBehavior(). It tests whether the shared-lock obtained
781** by the main thread in testThreadLockingBehavior() conflicts with a
782** hypothetical write-lock obtained by this thread on the same file.
783**
784** The write-lock is not actually acquired, as this is not possible if
785** the file is open in read-only mode (see ticket #3472).
786*/
drh5fdae772004-06-29 03:29:00 +0000787static void *threadLockingTest(void *pArg){
788 struct threadTestData *pData = (struct threadTestData*)pArg;
danielk197741a6a612008-11-11 18:34:35 +0000789 pData->result = fcntl(pData->fd, F_GETLK, &pData->lock);
drh5fdae772004-06-29 03:29:00 +0000790 return pArg;
791}
drh6c7d5c52008-11-21 20:32:33 +0000792#endif /* SQLITE_THREADSAFE && defined(__linux__) */
drh5fdae772004-06-29 03:29:00 +0000793
drh6c7d5c52008-11-21 20:32:33 +0000794
795#if SQLITE_THREADSAFE && defined(__linux__)
drh5fdae772004-06-29 03:29:00 +0000796/*
797** This procedure attempts to determine whether or not threads
798** can override each others locks then sets the
799** threadsOverrideEachOthersLocks variable appropriately.
800*/
danielk19774d5238f2006-01-27 06:32:00 +0000801static void testThreadLockingBehavior(int fd_orig){
drh5fdae772004-06-29 03:29:00 +0000802 int fd;
danielk197741a6a612008-11-11 18:34:35 +0000803 int rc;
804 struct threadTestData d;
805 struct flock l;
806 pthread_t t;
drh5fdae772004-06-29 03:29:00 +0000807
808 fd = dup(fd_orig);
809 if( fd<0 ) return;
danielk197741a6a612008-11-11 18:34:35 +0000810 memset(&l, 0, sizeof(l));
811 l.l_type = F_RDLCK;
812 l.l_len = 1;
813 l.l_start = 0;
814 l.l_whence = SEEK_SET;
815 rc = fcntl(fd_orig, F_SETLK, &l);
816 if( rc!=0 ) return;
817 memset(&d, 0, sizeof(d));
818 d.fd = fd;
819 d.lock = l;
820 d.lock.l_type = F_WRLCK;
821 pthread_create(&t, 0, threadLockingTest, &d);
822 pthread_join(t, 0);
drh5fdae772004-06-29 03:29:00 +0000823 close(fd);
danielk197741a6a612008-11-11 18:34:35 +0000824 if( d.result!=0 ) return;
825 threadsOverrideEachOthersLocks = (d.lock.l_type==F_UNLCK);
drh5fdae772004-06-29 03:29:00 +0000826}
drh734c9862008-11-28 15:37:20 +0000827#elif SQLITE_THREADSAFE
danielk197741a6a612008-11-11 18:34:35 +0000828/*
829** On anything other than linux, assume threads override each others locks.
830*/
831static void testThreadLockingBehavior(int fd_orig){
drh6c7d5c52008-11-21 20:32:33 +0000832 UNUSED_PARAMETER(fd_orig);
danielk197741a6a612008-11-11 18:34:35 +0000833 threadsOverrideEachOthersLocks = 1;
834}
drh6c7d5c52008-11-21 20:32:33 +0000835#endif /* SQLITE_THERADSAFE && defined(__linux__) */
drh5fdae772004-06-29 03:29:00 +0000836
drhbbd42a62004-05-22 17:41:58 +0000837/*
drh6c7d5c52008-11-21 20:32:33 +0000838** Release a unixLockInfo structure previously allocated by findLockInfo().
839*/
840static void releaseLockInfo(struct unixLockInfo *pLock){
danielk1977e339d652008-06-28 11:23:00 +0000841 if( pLock ){
842 pLock->nRef--;
843 if( pLock->nRef==0 ){
drhda0e7682008-07-30 15:27:54 +0000844 if( pLock->pPrev ){
845 assert( pLock->pPrev->pNext==pLock );
846 pLock->pPrev->pNext = pLock->pNext;
847 }else{
848 assert( lockList==pLock );
849 lockList = pLock->pNext;
850 }
851 if( pLock->pNext ){
852 assert( pLock->pNext->pPrev==pLock );
853 pLock->pNext->pPrev = pLock->pPrev;
854 }
danielk1977e339d652008-06-28 11:23:00 +0000855 sqlite3_free(pLock);
856 }
drhbbd42a62004-05-22 17:41:58 +0000857 }
858}
859
860/*
drh6c7d5c52008-11-21 20:32:33 +0000861** Release a unixOpenCnt structure previously allocated by findLockInfo().
drhbbd42a62004-05-22 17:41:58 +0000862*/
drh6c7d5c52008-11-21 20:32:33 +0000863static void releaseOpenCnt(struct unixOpenCnt *pOpen){
danielk1977e339d652008-06-28 11:23:00 +0000864 if( pOpen ){
865 pOpen->nRef--;
866 if( pOpen->nRef==0 ){
drhda0e7682008-07-30 15:27:54 +0000867 if( pOpen->pPrev ){
868 assert( pOpen->pPrev->pNext==pOpen );
869 pOpen->pPrev->pNext = pOpen->pNext;
870 }else{
871 assert( openList==pOpen );
872 openList = pOpen->pNext;
873 }
874 if( pOpen->pNext ){
875 assert( pOpen->pNext->pPrev==pOpen );
876 pOpen->pNext->pPrev = pOpen->pPrev;
877 }
878 sqlite3_free(pOpen->aPending);
danielk1977e339d652008-06-28 11:23:00 +0000879 sqlite3_free(pOpen);
880 }
drhbbd42a62004-05-22 17:41:58 +0000881 }
882}
883
drh6c7d5c52008-11-21 20:32:33 +0000884/*
885** Given a file descriptor, locate unixLockInfo and unixOpenCnt structures that
886** describes that file descriptor. Create new ones if necessary. The
887** return values might be uninitialized if an error occurs.
888**
889** Return an appropriate error code.
890*/
891static int findLockInfo(
892 unixFile *pFile, /* Unix file with file desc used in the key */
893 struct unixLockInfo **ppLock, /* Return the unixLockInfo structure here */
894 struct unixOpenCnt **ppOpen /* Return the unixOpenCnt structure here */
895){
896 int rc; /* System call return code */
897 int fd; /* The file descriptor for pFile */
898 struct unixLockKey lockKey; /* Lookup key for the unixLockInfo structure */
899 struct unixFileId fileId; /* Lookup key for the unixOpenCnt struct */
900 struct stat statbuf; /* Low-level file information */
901 struct unixLockInfo *pLock; /* Candidate unixLockInfo object */
902 struct unixOpenCnt *pOpen; /* Candidate unixOpenCnt object */
903
904 /* Get low-level information about the file that we can used to
905 ** create a unique name for the file.
906 */
907 fd = pFile->h;
908 rc = fstat(fd, &statbuf);
909 if( rc!=0 ){
910 pFile->lastErrno = errno;
911#ifdef EOVERFLOW
912 if( pFile->lastErrno==EOVERFLOW ) return SQLITE_NOLFS;
913#endif
914 return SQLITE_IOERR;
915 }
916
917 /* On OS X on an msdos filesystem, the inode number is reported
918 ** incorrectly for zero-size files. See ticket #3260. To work
919 ** around this problem (we consider it a bug in OS X, not SQLite)
920 ** we always increase the file size to 1 by writing a single byte
921 ** prior to accessing the inode number. The one byte written is
922 ** an ASCII 'S' character which also happens to be the first byte
923 ** in the header of every SQLite database. In this way, if there
924 ** is a race condition such that another thread has already populated
925 ** the first page of the database, no damage is done.
926 */
927 if( statbuf.st_size==0 ){
928 write(fd, "S", 1);
929 rc = fstat(fd, &statbuf);
930 if( rc!=0 ){
931 pFile->lastErrno = errno;
932 return SQLITE_IOERR;
933 }
934 }
935
936 memset(&lockKey, 0, sizeof(lockKey));
937 lockKey.fid.dev = statbuf.st_dev;
938#if OS_VXWORKS
drh107886a2008-11-21 22:21:50 +0000939 lockKey.fid.pId = pFile->pId;
drh6c7d5c52008-11-21 20:32:33 +0000940#else
941 lockKey.fid.ino = statbuf.st_ino;
942#endif
drh734c9862008-11-28 15:37:20 +0000943#if SQLITE_THREADSAFE && defined(__linux__)
drh6c7d5c52008-11-21 20:32:33 +0000944 if( threadsOverrideEachOthersLocks<0 ){
945 testThreadLockingBehavior(fd);
946 }
947 lockKey.tid = threadsOverrideEachOthersLocks ? 0 : pthread_self();
948#endif
949 fileId = lockKey.fid;
950 if( ppLock!=0 ){
951 pLock = lockList;
952 while( pLock && memcmp(&lockKey, &pLock->lockKey, sizeof(lockKey)) ){
953 pLock = pLock->pNext;
954 }
955 if( pLock==0 ){
956 pLock = sqlite3_malloc( sizeof(*pLock) );
957 if( pLock==0 ){
958 rc = SQLITE_NOMEM;
959 goto exit_findlockinfo;
960 }
961 pLock->lockKey = lockKey;
962 pLock->nRef = 1;
963 pLock->cnt = 0;
964 pLock->locktype = 0;
965 pLock->pNext = lockList;
966 pLock->pPrev = 0;
967 if( lockList ) lockList->pPrev = pLock;
968 lockList = pLock;
969 }else{
970 pLock->nRef++;
971 }
972 *ppLock = pLock;
973 }
974 if( ppOpen!=0 ){
975 pOpen = openList;
976 while( pOpen && memcmp(&fileId, &pOpen->fileId, sizeof(fileId)) ){
977 pOpen = pOpen->pNext;
978 }
979 if( pOpen==0 ){
980 pOpen = sqlite3_malloc( sizeof(*pOpen) );
981 if( pOpen==0 ){
982 releaseLockInfo(pLock);
983 rc = SQLITE_NOMEM;
984 goto exit_findlockinfo;
985 }
986 pOpen->fileId = fileId;
987 pOpen->nRef = 1;
988 pOpen->nLock = 0;
989 pOpen->nPending = 0;
990 pOpen->aPending = 0;
991 pOpen->pNext = openList;
992 pOpen->pPrev = 0;
993 if( openList ) openList->pPrev = pOpen;
994 openList = pOpen;
995#if OS_VXWORKS
996 pOpen->pSem = NULL;
997 pOpen->aSemName[0] = '\0';
998#endif
999 }else{
1000 pOpen->nRef++;
1001 }
1002 *ppOpen = pOpen;
1003 }
1004
1005exit_findlockinfo:
1006 return rc;
1007}
drh6c7d5c52008-11-21 20:32:33 +00001008
drh7708e972008-11-29 00:56:52 +00001009/*
1010** If we are currently in a different thread than the thread that the
1011** unixFile argument belongs to, then transfer ownership of the unixFile
1012** over to the current thread.
1013**
1014** A unixFile is only owned by a thread on systems that use LinuxThreads.
1015**
1016** Ownership transfer is only allowed if the unixFile is currently unlocked.
1017** If the unixFile is locked and an ownership is wrong, then return
1018** SQLITE_MISUSE. SQLITE_OK is returned if everything works.
1019*/
1020#if SQLITE_THREADSAFE && defined(__linux__)
1021static int transferOwnership(unixFile *pFile){
1022 int rc;
1023 pthread_t hSelf;
1024 if( threadsOverrideEachOthersLocks ){
1025 /* Ownership transfers not needed on this system */
1026 return SQLITE_OK;
1027 }
1028 hSelf = pthread_self();
1029 if( pthread_equal(pFile->tid, hSelf) ){
1030 /* We are still in the same thread */
1031 OSTRACE1("No-transfer, same thread\n");
1032 return SQLITE_OK;
1033 }
1034 if( pFile->locktype!=NO_LOCK ){
1035 /* We cannot change ownership while we are holding a lock! */
1036 return SQLITE_MISUSE;
1037 }
1038 OSTRACE4("Transfer ownership of %d from %d to %d\n",
1039 pFile->h, pFile->tid, hSelf);
1040 pFile->tid = hSelf;
1041 if (pFile->pLock != NULL) {
1042 releaseLockInfo(pFile->pLock);
1043 rc = findLockInfo(pFile, &pFile->pLock, 0);
1044 OSTRACE5("LOCK %d is now %s(%s,%d)\n", pFile->h,
1045 locktypeName(pFile->locktype),
1046 locktypeName(pFile->pLock->locktype), pFile->pLock->cnt);
1047 return rc;
1048 } else {
1049 return SQLITE_OK;
1050 }
1051}
1052#else /* if not SQLITE_THREADSAFE */
1053 /* On single-threaded builds, ownership transfer is a no-op */
1054# define transferOwnership(X) SQLITE_OK
1055#endif /* SQLITE_THREADSAFE */
1056
aswift5b1a2562008-08-22 00:22:35 +00001057
1058/*
danielk197713adf8a2004-06-03 16:08:41 +00001059** This routine checks if there is a RESERVED lock held on the specified
aswift5b1a2562008-08-22 00:22:35 +00001060** file by this or any other process. If such a lock is held, set *pResOut
1061** to a non-zero value otherwise *pResOut is set to zero. The return value
1062** is set to SQLITE_OK unless an I/O error occurs during lock checking.
danielk197713adf8a2004-06-03 16:08:41 +00001063*/
danielk1977861f7452008-06-05 11:39:11 +00001064static int unixCheckReservedLock(sqlite3_file *id, int *pResOut){
aswift5b1a2562008-08-22 00:22:35 +00001065 int rc = SQLITE_OK;
1066 int reserved = 0;
drh054889e2005-11-30 03:20:31 +00001067 unixFile *pFile = (unixFile*)id;
danielk197713adf8a2004-06-03 16:08:41 +00001068
danielk1977861f7452008-06-05 11:39:11 +00001069 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
1070
drh054889e2005-11-30 03:20:31 +00001071 assert( pFile );
drh6c7d5c52008-11-21 20:32:33 +00001072 unixEnterMutex(); /* Because pFile->pLock is shared across threads */
danielk197713adf8a2004-06-03 16:08:41 +00001073
1074 /* Check if a thread in this process holds such a lock */
drh054889e2005-11-30 03:20:31 +00001075 if( pFile->pLock->locktype>SHARED_LOCK ){
aswift5b1a2562008-08-22 00:22:35 +00001076 reserved = 1;
danielk197713adf8a2004-06-03 16:08:41 +00001077 }
1078
drh2ac3ee92004-06-07 16:27:46 +00001079 /* Otherwise see if some other process holds it.
danielk197713adf8a2004-06-03 16:08:41 +00001080 */
aswift5b1a2562008-08-22 00:22:35 +00001081 if( !reserved ){
danielk197713adf8a2004-06-03 16:08:41 +00001082 struct flock lock;
1083 lock.l_whence = SEEK_SET;
drh2ac3ee92004-06-07 16:27:46 +00001084 lock.l_start = RESERVED_BYTE;
1085 lock.l_len = 1;
1086 lock.l_type = F_WRLCK;
aswift5b1a2562008-08-22 00:22:35 +00001087 if (-1 == fcntl(pFile->h, F_GETLK, &lock)) {
1088 int tErrno = errno;
1089 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_CHECKRESERVEDLOCK);
1090 pFile->lastErrno = tErrno;
1091 } else if( lock.l_type!=F_UNLCK ){
1092 reserved = 1;
danielk197713adf8a2004-06-03 16:08:41 +00001093 }
1094 }
1095
drh6c7d5c52008-11-21 20:32:33 +00001096 unixLeaveMutex();
aswift5b1a2562008-08-22 00:22:35 +00001097 OSTRACE4("TEST WR-LOCK %d %d %d\n", pFile->h, rc, reserved);
danielk197713adf8a2004-06-03 16:08:41 +00001098
aswift5b1a2562008-08-22 00:22:35 +00001099 *pResOut = reserved;
1100 return rc;
danielk197713adf8a2004-06-03 16:08:41 +00001101}
1102
1103/*
danielk19779a1d0ab2004-06-01 14:09:28 +00001104** Lock the file with the lock specified by parameter locktype - one
1105** of the following:
1106**
drh2ac3ee92004-06-07 16:27:46 +00001107** (1) SHARED_LOCK
1108** (2) RESERVED_LOCK
1109** (3) PENDING_LOCK
1110** (4) EXCLUSIVE_LOCK
1111**
drhb3e04342004-06-08 00:47:47 +00001112** Sometimes when requesting one lock state, additional lock states
1113** are inserted in between. The locking might fail on one of the later
1114** transitions leaving the lock state different from what it started but
1115** still short of its goal. The following chart shows the allowed
1116** transitions and the inserted intermediate states:
1117**
1118** UNLOCKED -> SHARED
1119** SHARED -> RESERVED
1120** SHARED -> (PENDING) -> EXCLUSIVE
1121** RESERVED -> (PENDING) -> EXCLUSIVE
1122** PENDING -> EXCLUSIVE
drh2ac3ee92004-06-07 16:27:46 +00001123**
drha6abd042004-06-09 17:37:22 +00001124** This routine will only increase a lock. Use the sqlite3OsUnlock()
1125** routine to lower a locking level.
danielk19779a1d0ab2004-06-01 14:09:28 +00001126*/
danielk197762079062007-08-15 17:08:46 +00001127static int unixLock(sqlite3_file *id, int locktype){
danielk1977f42f25c2004-06-25 07:21:28 +00001128 /* The following describes the implementation of the various locks and
1129 ** lock transitions in terms of the POSIX advisory shared and exclusive
1130 ** lock primitives (called read-locks and write-locks below, to avoid
1131 ** confusion with SQLite lock names). The algorithms are complicated
1132 ** slightly in order to be compatible with windows systems simultaneously
1133 ** accessing the same database file, in case that is ever required.
1134 **
1135 ** Symbols defined in os.h indentify the 'pending byte' and the 'reserved
1136 ** byte', each single bytes at well known offsets, and the 'shared byte
1137 ** range', a range of 510 bytes at a well known offset.
1138 **
1139 ** To obtain a SHARED lock, a read-lock is obtained on the 'pending
1140 ** byte'. If this is successful, a random byte from the 'shared byte
1141 ** range' is read-locked and the lock on the 'pending byte' released.
1142 **
danielk197790ba3bd2004-06-25 08:32:25 +00001143 ** A process may only obtain a RESERVED lock after it has a SHARED lock.
1144 ** A RESERVED lock is implemented by grabbing a write-lock on the
1145 ** 'reserved byte'.
danielk1977f42f25c2004-06-25 07:21:28 +00001146 **
1147 ** A process may only obtain a PENDING lock after it has obtained a
danielk197790ba3bd2004-06-25 08:32:25 +00001148 ** SHARED lock. A PENDING lock is implemented by obtaining a write-lock
1149 ** on the 'pending byte'. This ensures that no new SHARED locks can be
1150 ** obtained, but existing SHARED locks are allowed to persist. A process
1151 ** does not have to obtain a RESERVED lock on the way to a PENDING lock.
1152 ** This property is used by the algorithm for rolling back a journal file
1153 ** after a crash.
danielk1977f42f25c2004-06-25 07:21:28 +00001154 **
danielk197790ba3bd2004-06-25 08:32:25 +00001155 ** An EXCLUSIVE lock, obtained after a PENDING lock is held, is
1156 ** implemented by obtaining a write-lock on the entire 'shared byte
1157 ** range'. Since all other locks require a read-lock on one of the bytes
1158 ** within this range, this ensures that no other locks are held on the
1159 ** database.
danielk1977f42f25c2004-06-25 07:21:28 +00001160 **
1161 ** The reason a single byte cannot be used instead of the 'shared byte
1162 ** range' is that some versions of windows do not support read-locks. By
1163 ** locking a random byte from a range, concurrent SHARED locks may exist
1164 ** even if the locking primitive used is always a write-lock.
1165 */
danielk19779a1d0ab2004-06-01 14:09:28 +00001166 int rc = SQLITE_OK;
drh054889e2005-11-30 03:20:31 +00001167 unixFile *pFile = (unixFile*)id;
drh6c7d5c52008-11-21 20:32:33 +00001168 struct unixLockInfo *pLock = pFile->pLock;
danielk19779a1d0ab2004-06-01 14:09:28 +00001169 struct flock lock;
1170 int s;
1171
drh054889e2005-11-30 03:20:31 +00001172 assert( pFile );
drh4f0c5872007-03-26 22:05:01 +00001173 OSTRACE7("LOCK %d %s was %s(%s,%d) pid=%d\n", pFile->h,
drh054889e2005-11-30 03:20:31 +00001174 locktypeName(locktype), locktypeName(pFile->locktype),
1175 locktypeName(pLock->locktype), pLock->cnt , getpid());
danielk19779a1d0ab2004-06-01 14:09:28 +00001176
1177 /* If there is already a lock of this type or more restrictive on the
danielk1977ad94b582007-08-20 06:44:22 +00001178 ** unixFile, do nothing. Don't use the end_lock: exit path, as
drh6c7d5c52008-11-21 20:32:33 +00001179 ** unixEnterMutex() hasn't been called yet.
danielk19779a1d0ab2004-06-01 14:09:28 +00001180 */
drh054889e2005-11-30 03:20:31 +00001181 if( pFile->locktype>=locktype ){
drh4f0c5872007-03-26 22:05:01 +00001182 OSTRACE3("LOCK %d %s ok (already held)\n", pFile->h,
drh054889e2005-11-30 03:20:31 +00001183 locktypeName(locktype));
danielk19779a1d0ab2004-06-01 14:09:28 +00001184 return SQLITE_OK;
1185 }
1186
drhb3e04342004-06-08 00:47:47 +00001187 /* Make sure the locking sequence is correct
drh2ac3ee92004-06-07 16:27:46 +00001188 */
drh054889e2005-11-30 03:20:31 +00001189 assert( pFile->locktype!=NO_LOCK || locktype==SHARED_LOCK );
drhb3e04342004-06-08 00:47:47 +00001190 assert( locktype!=PENDING_LOCK );
drh054889e2005-11-30 03:20:31 +00001191 assert( locktype!=RESERVED_LOCK || pFile->locktype==SHARED_LOCK );
drh2ac3ee92004-06-07 16:27:46 +00001192
drh054889e2005-11-30 03:20:31 +00001193 /* This mutex is needed because pFile->pLock is shared across threads
drhb3e04342004-06-08 00:47:47 +00001194 */
drh6c7d5c52008-11-21 20:32:33 +00001195 unixEnterMutex();
danielk19779a1d0ab2004-06-01 14:09:28 +00001196
drh029b44b2006-01-15 00:13:15 +00001197 /* Make sure the current thread owns the pFile.
1198 */
1199 rc = transferOwnership(pFile);
1200 if( rc!=SQLITE_OK ){
drh6c7d5c52008-11-21 20:32:33 +00001201 unixLeaveMutex();
drh029b44b2006-01-15 00:13:15 +00001202 return rc;
1203 }
drh64b1bea2006-01-15 02:30:57 +00001204 pLock = pFile->pLock;
drh029b44b2006-01-15 00:13:15 +00001205
danielk1977ad94b582007-08-20 06:44:22 +00001206 /* If some thread using this PID has a lock via a different unixFile*
danielk19779a1d0ab2004-06-01 14:09:28 +00001207 ** handle that precludes the requested lock, return BUSY.
1208 */
drh054889e2005-11-30 03:20:31 +00001209 if( (pFile->locktype!=pLock->locktype &&
drh2ac3ee92004-06-07 16:27:46 +00001210 (pLock->locktype>=PENDING_LOCK || locktype>SHARED_LOCK))
danielk19779a1d0ab2004-06-01 14:09:28 +00001211 ){
1212 rc = SQLITE_BUSY;
1213 goto end_lock;
1214 }
1215
1216 /* If a SHARED lock is requested, and some thread using this PID already
1217 ** has a SHARED or RESERVED lock, then increment reference counts and
1218 ** return SQLITE_OK.
1219 */
1220 if( locktype==SHARED_LOCK &&
1221 (pLock->locktype==SHARED_LOCK || pLock->locktype==RESERVED_LOCK) ){
1222 assert( locktype==SHARED_LOCK );
drh054889e2005-11-30 03:20:31 +00001223 assert( pFile->locktype==0 );
danielk1977ecb2a962004-06-02 06:30:16 +00001224 assert( pLock->cnt>0 );
drh054889e2005-11-30 03:20:31 +00001225 pFile->locktype = SHARED_LOCK;
danielk19779a1d0ab2004-06-01 14:09:28 +00001226 pLock->cnt++;
drh054889e2005-11-30 03:20:31 +00001227 pFile->pOpen->nLock++;
danielk19779a1d0ab2004-06-01 14:09:28 +00001228 goto end_lock;
1229 }
1230
danielk197713adf8a2004-06-03 16:08:41 +00001231 lock.l_len = 1L;
drh2b4b5962005-06-15 17:47:55 +00001232
danielk19779a1d0ab2004-06-01 14:09:28 +00001233 lock.l_whence = SEEK_SET;
1234
drh3cde3bb2004-06-12 02:17:14 +00001235 /* A PENDING lock is needed before acquiring a SHARED lock and before
1236 ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will
1237 ** be released.
danielk19779a1d0ab2004-06-01 14:09:28 +00001238 */
drh3cde3bb2004-06-12 02:17:14 +00001239 if( locktype==SHARED_LOCK
drh054889e2005-11-30 03:20:31 +00001240 || (locktype==EXCLUSIVE_LOCK && pFile->locktype<PENDING_LOCK)
drh3cde3bb2004-06-12 02:17:14 +00001241 ){
danielk1977489468c2004-06-28 08:25:47 +00001242 lock.l_type = (locktype==SHARED_LOCK?F_RDLCK:F_WRLCK);
drh2ac3ee92004-06-07 16:27:46 +00001243 lock.l_start = PENDING_BYTE;
drh054889e2005-11-30 03:20:31 +00001244 s = fcntl(pFile->h, F_SETLK, &lock);
drhe2396a12007-03-29 20:19:58 +00001245 if( s==(-1) ){
aswift5b1a2562008-08-22 00:22:35 +00001246 int tErrno = errno;
1247 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
1248 if( IS_LOCK_ERROR(rc) ){
1249 pFile->lastErrno = tErrno;
1250 }
danielk19779a1d0ab2004-06-01 14:09:28 +00001251 goto end_lock;
1252 }
drh3cde3bb2004-06-12 02:17:14 +00001253 }
1254
1255
1256 /* If control gets to this point, then actually go ahead and make
1257 ** operating system calls for the specified lock.
1258 */
1259 if( locktype==SHARED_LOCK ){
aswift5b1a2562008-08-22 00:22:35 +00001260 int tErrno = 0;
drh3cde3bb2004-06-12 02:17:14 +00001261 assert( pLock->cnt==0 );
1262 assert( pLock->locktype==0 );
danielk19779a1d0ab2004-06-01 14:09:28 +00001263
drh2ac3ee92004-06-07 16:27:46 +00001264 /* Now get the read-lock */
1265 lock.l_start = SHARED_FIRST;
1266 lock.l_len = SHARED_SIZE;
aswift5b1a2562008-08-22 00:22:35 +00001267 if( (s = fcntl(pFile->h, F_SETLK, &lock))==(-1) ){
1268 tErrno = errno;
1269 }
drh2ac3ee92004-06-07 16:27:46 +00001270 /* Drop the temporary PENDING lock */
1271 lock.l_start = PENDING_BYTE;
1272 lock.l_len = 1L;
danielk19779a1d0ab2004-06-01 14:09:28 +00001273 lock.l_type = F_UNLCK;
drh054889e2005-11-30 03:20:31 +00001274 if( fcntl(pFile->h, F_SETLK, &lock)!=0 ){
aswift5b1a2562008-08-22 00:22:35 +00001275 if( s != -1 ){
1276 /* This could happen with a network mount */
1277 tErrno = errno;
1278 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK);
1279 if( IS_LOCK_ERROR(rc) ){
1280 pFile->lastErrno = tErrno;
1281 }
1282 goto end_lock;
1283 }
drh2b4b5962005-06-15 17:47:55 +00001284 }
drhe2396a12007-03-29 20:19:58 +00001285 if( s==(-1) ){
aswift5b1a2562008-08-22 00:22:35 +00001286 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
1287 if( IS_LOCK_ERROR(rc) ){
1288 pFile->lastErrno = tErrno;
1289 }
drhbbd42a62004-05-22 17:41:58 +00001290 }else{
drh054889e2005-11-30 03:20:31 +00001291 pFile->locktype = SHARED_LOCK;
1292 pFile->pOpen->nLock++;
danielk19779a1d0ab2004-06-01 14:09:28 +00001293 pLock->cnt = 1;
drhbbd42a62004-05-22 17:41:58 +00001294 }
drh3cde3bb2004-06-12 02:17:14 +00001295 }else if( locktype==EXCLUSIVE_LOCK && pLock->cnt>1 ){
1296 /* We are trying for an exclusive lock but another thread in this
1297 ** same process is still holding a shared lock. */
1298 rc = SQLITE_BUSY;
drhbbd42a62004-05-22 17:41:58 +00001299 }else{
drh3cde3bb2004-06-12 02:17:14 +00001300 /* The request was for a RESERVED or EXCLUSIVE lock. It is
danielk19779a1d0ab2004-06-01 14:09:28 +00001301 ** assumed that there is a SHARED or greater lock on the file
1302 ** already.
1303 */
drh054889e2005-11-30 03:20:31 +00001304 assert( 0!=pFile->locktype );
danielk19779a1d0ab2004-06-01 14:09:28 +00001305 lock.l_type = F_WRLCK;
1306 switch( locktype ){
1307 case RESERVED_LOCK:
drh2ac3ee92004-06-07 16:27:46 +00001308 lock.l_start = RESERVED_BYTE;
danielk19779a1d0ab2004-06-01 14:09:28 +00001309 break;
danielk19779a1d0ab2004-06-01 14:09:28 +00001310 case EXCLUSIVE_LOCK:
drh2ac3ee92004-06-07 16:27:46 +00001311 lock.l_start = SHARED_FIRST;
1312 lock.l_len = SHARED_SIZE;
danielk19779a1d0ab2004-06-01 14:09:28 +00001313 break;
1314 default:
1315 assert(0);
1316 }
drh054889e2005-11-30 03:20:31 +00001317 s = fcntl(pFile->h, F_SETLK, &lock);
drhe2396a12007-03-29 20:19:58 +00001318 if( s==(-1) ){
aswift5b1a2562008-08-22 00:22:35 +00001319 int tErrno = errno;
1320 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
1321 if( IS_LOCK_ERROR(rc) ){
1322 pFile->lastErrno = tErrno;
1323 }
danielk19779a1d0ab2004-06-01 14:09:28 +00001324 }
drhbbd42a62004-05-22 17:41:58 +00001325 }
danielk19779a1d0ab2004-06-01 14:09:28 +00001326
danielk1977ecb2a962004-06-02 06:30:16 +00001327 if( rc==SQLITE_OK ){
drh054889e2005-11-30 03:20:31 +00001328 pFile->locktype = locktype;
danielk1977ecb2a962004-06-02 06:30:16 +00001329 pLock->locktype = locktype;
drh3cde3bb2004-06-12 02:17:14 +00001330 }else if( locktype==EXCLUSIVE_LOCK ){
drh054889e2005-11-30 03:20:31 +00001331 pFile->locktype = PENDING_LOCK;
drh3cde3bb2004-06-12 02:17:14 +00001332 pLock->locktype = PENDING_LOCK;
danielk1977ecb2a962004-06-02 06:30:16 +00001333 }
danielk19779a1d0ab2004-06-01 14:09:28 +00001334
1335end_lock:
drh6c7d5c52008-11-21 20:32:33 +00001336 unixLeaveMutex();
drh4f0c5872007-03-26 22:05:01 +00001337 OSTRACE4("LOCK %d %s %s\n", pFile->h, locktypeName(locktype),
danielk19772b444852004-06-29 07:45:33 +00001338 rc==SQLITE_OK ? "ok" : "failed");
drhbbd42a62004-05-22 17:41:58 +00001339 return rc;
1340}
1341
1342/*
drh054889e2005-11-30 03:20:31 +00001343** Lower the locking level on file descriptor pFile to locktype. locktype
drha6abd042004-06-09 17:37:22 +00001344** must be either NO_LOCK or SHARED_LOCK.
1345**
1346** If the locking level of the file descriptor is already at or below
1347** the requested locking level, this routine is a no-op.
drhbbd42a62004-05-22 17:41:58 +00001348*/
danielk197762079062007-08-15 17:08:46 +00001349static int unixUnlock(sqlite3_file *id, int locktype){
drh6c7d5c52008-11-21 20:32:33 +00001350 struct unixLockInfo *pLock;
drha6abd042004-06-09 17:37:22 +00001351 struct flock lock;
drh9c105bb2004-10-02 20:38:28 +00001352 int rc = SQLITE_OK;
drh054889e2005-11-30 03:20:31 +00001353 unixFile *pFile = (unixFile*)id;
drh1aa5af12008-03-07 19:51:14 +00001354 int h;
drha6abd042004-06-09 17:37:22 +00001355
drh054889e2005-11-30 03:20:31 +00001356 assert( pFile );
drh4f0c5872007-03-26 22:05:01 +00001357 OSTRACE7("UNLOCK %d %d was %d(%d,%d) pid=%d\n", pFile->h, locktype,
drh054889e2005-11-30 03:20:31 +00001358 pFile->locktype, pFile->pLock->locktype, pFile->pLock->cnt, getpid());
drha6abd042004-06-09 17:37:22 +00001359
1360 assert( locktype<=SHARED_LOCK );
drh054889e2005-11-30 03:20:31 +00001361 if( pFile->locktype<=locktype ){
drha6abd042004-06-09 17:37:22 +00001362 return SQLITE_OK;
1363 }
drhf1a221e2006-01-15 17:27:17 +00001364 if( CHECK_THREADID(pFile) ){
1365 return SQLITE_MISUSE;
1366 }
drh6c7d5c52008-11-21 20:32:33 +00001367 unixEnterMutex();
drh1aa5af12008-03-07 19:51:14 +00001368 h = pFile->h;
drh054889e2005-11-30 03:20:31 +00001369 pLock = pFile->pLock;
drha6abd042004-06-09 17:37:22 +00001370 assert( pLock->cnt!=0 );
drh054889e2005-11-30 03:20:31 +00001371 if( pFile->locktype>SHARED_LOCK ){
1372 assert( pLock->locktype==pFile->locktype );
drh1aa5af12008-03-07 19:51:14 +00001373 SimulateIOErrorBenign(1);
1374 SimulateIOError( h=(-1) )
1375 SimulateIOErrorBenign(0);
drh9c105bb2004-10-02 20:38:28 +00001376 if( locktype==SHARED_LOCK ){
1377 lock.l_type = F_RDLCK;
1378 lock.l_whence = SEEK_SET;
1379 lock.l_start = SHARED_FIRST;
1380 lock.l_len = SHARED_SIZE;
drh1aa5af12008-03-07 19:51:14 +00001381 if( fcntl(h, F_SETLK, &lock)==(-1) ){
aswift5b1a2562008-08-22 00:22:35 +00001382 int tErrno = errno;
1383 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_RDLOCK);
1384 if( IS_LOCK_ERROR(rc) ){
1385 pFile->lastErrno = tErrno;
1386 }
1387 goto end_unlock;
drh9c105bb2004-10-02 20:38:28 +00001388 }
1389 }
drhbbd42a62004-05-22 17:41:58 +00001390 lock.l_type = F_UNLCK;
1391 lock.l_whence = SEEK_SET;
drha6abd042004-06-09 17:37:22 +00001392 lock.l_start = PENDING_BYTE;
1393 lock.l_len = 2L; assert( PENDING_BYTE+1==RESERVED_BYTE );
drh1aa5af12008-03-07 19:51:14 +00001394 if( fcntl(h, F_SETLK, &lock)!=(-1) ){
drh2b4b5962005-06-15 17:47:55 +00001395 pLock->locktype = SHARED_LOCK;
1396 }else{
aswift5b1a2562008-08-22 00:22:35 +00001397 int tErrno = errno;
1398 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK);
1399 if( IS_LOCK_ERROR(rc) ){
1400 pFile->lastErrno = tErrno;
1401 }
1402 goto end_unlock;
drh2b4b5962005-06-15 17:47:55 +00001403 }
drhbbd42a62004-05-22 17:41:58 +00001404 }
drha6abd042004-06-09 17:37:22 +00001405 if( locktype==NO_LOCK ){
drh6c7d5c52008-11-21 20:32:33 +00001406 struct unixOpenCnt *pOpen;
danielk1977ecb2a962004-06-02 06:30:16 +00001407
drha6abd042004-06-09 17:37:22 +00001408 /* Decrement the shared lock counter. Release the lock using an
1409 ** OS call only when all threads in this same process have released
1410 ** the lock.
1411 */
1412 pLock->cnt--;
1413 if( pLock->cnt==0 ){
1414 lock.l_type = F_UNLCK;
1415 lock.l_whence = SEEK_SET;
1416 lock.l_start = lock.l_len = 0L;
drh1aa5af12008-03-07 19:51:14 +00001417 SimulateIOErrorBenign(1);
1418 SimulateIOError( h=(-1) )
1419 SimulateIOErrorBenign(0);
1420 if( fcntl(h, F_SETLK, &lock)!=(-1) ){
drh2b4b5962005-06-15 17:47:55 +00001421 pLock->locktype = NO_LOCK;
1422 }else{
aswift5b1a2562008-08-22 00:22:35 +00001423 int tErrno = errno;
danielk19775ad6a882008-09-15 04:20:31 +00001424 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK);
aswift5b1a2562008-08-22 00:22:35 +00001425 if( IS_LOCK_ERROR(rc) ){
1426 pFile->lastErrno = tErrno;
1427 }
drh1aa5af12008-03-07 19:51:14 +00001428 pLock->cnt = 1;
aswift5b1a2562008-08-22 00:22:35 +00001429 goto end_unlock;
drh2b4b5962005-06-15 17:47:55 +00001430 }
drha6abd042004-06-09 17:37:22 +00001431 }
1432
drhbbd42a62004-05-22 17:41:58 +00001433 /* Decrement the count of locks against this same file. When the
1434 ** count reaches zero, close any other file descriptors whose close
1435 ** was deferred because of outstanding locks.
1436 */
drh1aa5af12008-03-07 19:51:14 +00001437 if( rc==SQLITE_OK ){
1438 pOpen = pFile->pOpen;
1439 pOpen->nLock--;
1440 assert( pOpen->nLock>=0 );
1441 if( pOpen->nLock==0 && pOpen->nPending>0 ){
1442 int i;
1443 for(i=0; i<pOpen->nPending; i++){
aswiftaebf4132008-11-21 00:10:35 +00001444 /* close pending fds, but if closing fails don't free the array
1445 ** assign -1 to the successfully closed descriptors and record the
1446 ** error. The next attempt to unlock will try again. */
1447 if( pOpen->aPending[i] < 0 ) continue;
1448 if( close(pOpen->aPending[i]) ){
1449 pFile->lastErrno = errno;
1450 rc = SQLITE_IOERR_CLOSE;
1451 }else{
1452 pOpen->aPending[i] = -1;
1453 }
drh1aa5af12008-03-07 19:51:14 +00001454 }
aswiftaebf4132008-11-21 00:10:35 +00001455 if( rc==SQLITE_OK ){
1456 sqlite3_free(pOpen->aPending);
1457 pOpen->nPending = 0;
1458 pOpen->aPending = 0;
1459 }
drhbbd42a62004-05-22 17:41:58 +00001460 }
drhbbd42a62004-05-22 17:41:58 +00001461 }
1462 }
aswift5b1a2562008-08-22 00:22:35 +00001463
1464end_unlock:
drh6c7d5c52008-11-21 20:32:33 +00001465 unixLeaveMutex();
drh1aa5af12008-03-07 19:51:14 +00001466 if( rc==SQLITE_OK ) pFile->locktype = locktype;
drh9c105bb2004-10-02 20:38:28 +00001467 return rc;
drhbbd42a62004-05-22 17:41:58 +00001468}
1469
1470/*
danielk1977e339d652008-06-28 11:23:00 +00001471** This function performs the parts of the "close file" operation
1472** common to all locking schemes. It closes the directory and file
1473** handles, if they are valid, and sets all fields of the unixFile
1474** structure to 0.
drh9b35ea62008-11-29 02:20:26 +00001475**
1476** It is *not* necessary to hold the mutex when this routine is called,
1477** even on VxWorks. A mutex will be acquired on VxWorks by the
1478** vxworksReleaseFileId() routine.
danielk1977e339d652008-06-28 11:23:00 +00001479*/
1480static int closeUnixFile(sqlite3_file *id){
1481 unixFile *pFile = (unixFile*)id;
1482 if( pFile ){
1483 if( pFile->dirfd>=0 ){
aswiftaebf4132008-11-21 00:10:35 +00001484 int err = close(pFile->dirfd);
1485 if( err ){
1486 pFile->lastErrno = errno;
1487 return SQLITE_IOERR_DIR_CLOSE;
1488 }else{
1489 pFile->dirfd=-1;
1490 }
danielk1977e339d652008-06-28 11:23:00 +00001491 }
1492 if( pFile->h>=0 ){
aswiftaebf4132008-11-21 00:10:35 +00001493 int err = close(pFile->h);
1494 if( err ){
1495 pFile->lastErrno = errno;
1496 return SQLITE_IOERR_CLOSE;
1497 }
danielk1977e339d652008-06-28 11:23:00 +00001498 }
drh6c7d5c52008-11-21 20:32:33 +00001499#if OS_VXWORKS
drh107886a2008-11-21 22:21:50 +00001500 if( pFile->pId ){
1501 if( pFile->isDelete ){
drh9b35ea62008-11-29 02:20:26 +00001502 unlink(pFile->pId->zCanonicalName);
chw97185482008-11-17 08:05:31 +00001503 }
drh107886a2008-11-21 22:21:50 +00001504 vxworksReleaseFileId(pFile->pId);
1505 pFile->pId = 0;
chw97185482008-11-17 08:05:31 +00001506 }
1507#endif
danielk1977e339d652008-06-28 11:23:00 +00001508 OSTRACE2("CLOSE %-3d\n", pFile->h);
1509 OpenCounter(-1);
1510 memset(pFile, 0, sizeof(unixFile));
1511 }
1512 return SQLITE_OK;
1513}
1514
1515/*
danielk1977e3026632004-06-22 11:29:02 +00001516** Close a file.
1517*/
danielk197762079062007-08-15 17:08:46 +00001518static int unixClose(sqlite3_file *id){
aswiftaebf4132008-11-21 00:10:35 +00001519 int rc = SQLITE_OK;
danielk1977e339d652008-06-28 11:23:00 +00001520 if( id ){
1521 unixFile *pFile = (unixFile *)id;
1522 unixUnlock(id, NO_LOCK);
drh6c7d5c52008-11-21 20:32:33 +00001523 unixEnterMutex();
danielk19776cb427f2008-06-30 10:16:04 +00001524 if( pFile->pOpen && pFile->pOpen->nLock ){
danielk1977e339d652008-06-28 11:23:00 +00001525 /* If there are outstanding locks, do not actually close the file just
1526 ** yet because that would clear those locks. Instead, add the file
1527 ** descriptor to pOpen->aPending. It will be automatically closed when
1528 ** the last lock is cleared.
1529 */
1530 int *aNew;
drh6c7d5c52008-11-21 20:32:33 +00001531 struct unixOpenCnt *pOpen = pFile->pOpen;
drhda0e7682008-07-30 15:27:54 +00001532 aNew = sqlite3_realloc(pOpen->aPending, (pOpen->nPending+1)*sizeof(int) );
danielk1977e339d652008-06-28 11:23:00 +00001533 if( aNew==0 ){
1534 /* If a malloc fails, just leak the file descriptor */
1535 }else{
1536 pOpen->aPending = aNew;
1537 pOpen->aPending[pOpen->nPending] = pFile->h;
1538 pOpen->nPending++;
1539 pFile->h = -1;
1540 }
danielk1977e3026632004-06-22 11:29:02 +00001541 }
danielk1977e339d652008-06-28 11:23:00 +00001542 releaseLockInfo(pFile->pLock);
1543 releaseOpenCnt(pFile->pOpen);
aswiftaebf4132008-11-21 00:10:35 +00001544 rc = closeUnixFile(id);
drh6c7d5c52008-11-21 20:32:33 +00001545 unixLeaveMutex();
danielk1977e3026632004-06-22 11:29:02 +00001546 }
aswiftaebf4132008-11-21 00:10:35 +00001547 return rc;
danielk1977e3026632004-06-22 11:29:02 +00001548}
1549
drh734c9862008-11-28 15:37:20 +00001550/************** End of the posix advisory lock implementation *****************
1551******************************************************************************/
drhbfe66312006-10-03 17:40:40 +00001552
drh734c9862008-11-28 15:37:20 +00001553/******************************************************************************
1554****************************** No-op Locking **********************************
1555**
1556** Of the various locking implementations available, this is by far the
1557** simplest: locking is ignored. No attempt is made to lock the database
1558** file for reading or writing.
1559**
1560** This locking mode is appropriate for use on read-only databases
1561** (ex: databases that are burned into CD-ROM, for example.) It can
1562** also be used if the application employs some external mechanism to
1563** prevent simultaneous access of the same database by two or more
1564** database connections. But there is a serious risk of database
1565** corruption if this locking mode is used in situations where multiple
1566** database connections are accessing the same database file at the same
1567** time and one or more of those connections are writing.
1568*/
drhbfe66312006-10-03 17:40:40 +00001569
drh734c9862008-11-28 15:37:20 +00001570static int nolockCheckReservedLock(sqlite3_file *NotUsed, int *pResOut){
1571 UNUSED_PARAMETER(NotUsed);
1572 *pResOut = 0;
1573 return SQLITE_OK;
1574}
drh734c9862008-11-28 15:37:20 +00001575static int nolockLock(sqlite3_file *NotUsed, int NotUsed2){
1576 UNUSED_PARAMETER2(NotUsed, NotUsed2);
1577 return SQLITE_OK;
1578}
drh734c9862008-11-28 15:37:20 +00001579static int nolockUnlock(sqlite3_file *NotUsed, int NotUsed2){
1580 UNUSED_PARAMETER2(NotUsed, NotUsed2);
1581 return SQLITE_OK;
1582}
1583
1584/*
drh9b35ea62008-11-29 02:20:26 +00001585** Close the file.
drh734c9862008-11-28 15:37:20 +00001586*/
1587static int nolockClose(sqlite3_file *id) {
drh9b35ea62008-11-29 02:20:26 +00001588 return closeUnixFile(id);
drh734c9862008-11-28 15:37:20 +00001589}
1590
1591/******************* End of the no-op lock implementation *********************
1592******************************************************************************/
1593
1594/******************************************************************************
1595************************* Begin dot-file Locking ******************************
1596**
1597** The dotfile locking implementation uses the existing of separate lock
1598** files in order to control access to the database. This works on just
1599** about every filesystem imaginable. But there are serious downsides:
1600**
1601** (1) There is zero concurrency. A single reader blocks all other
1602** connections from reading or writing the database.
1603**
1604** (2) An application crash or power loss can leave stale lock files
1605** sitting around that need to be cleared manually.
1606**
1607** Nevertheless, a dotlock is an appropriate locking mode for use if no
1608** other locking strategy is available.
drh7708e972008-11-29 00:56:52 +00001609**
1610** Dotfile locking works by creating a file in the same directory as the
1611** database and with the same name but with a ".lock" extension added.
1612** The existance of a lock file implies an EXCLUSIVE lock. All other lock
1613** types (SHARED, RESERVED, PENDING) are mapped into EXCLUSIVE.
drh734c9862008-11-28 15:37:20 +00001614*/
1615
1616/*
1617** The file suffix added to the data base filename in order to create the
1618** lock file.
1619*/
1620#define DOTLOCK_SUFFIX ".lock"
1621
drh7708e972008-11-29 00:56:52 +00001622/*
1623** This routine checks if there is a RESERVED lock held on the specified
1624** file by this or any other process. If such a lock is held, set *pResOut
1625** to a non-zero value otherwise *pResOut is set to zero. The return value
1626** is set to SQLITE_OK unless an I/O error occurs during lock checking.
1627**
1628** In dotfile locking, either a lock exists or it does not. So in this
1629** variation of CheckReservedLock(), *pResOut is set to true if any lock
1630** is held on the file and false if the file is unlocked.
1631*/
drh734c9862008-11-28 15:37:20 +00001632static int dotlockCheckReservedLock(sqlite3_file *id, int *pResOut) {
1633 int rc = SQLITE_OK;
1634 int reserved = 0;
1635 unixFile *pFile = (unixFile*)id;
1636
1637 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
1638
1639 assert( pFile );
1640
1641 /* Check if a thread in this process holds such a lock */
1642 if( pFile->locktype>SHARED_LOCK ){
drh7708e972008-11-29 00:56:52 +00001643 /* Either this connection or some other connection in the same process
1644 ** holds a lock on the file. No need to check further. */
drh734c9862008-11-28 15:37:20 +00001645 reserved = 1;
drh7708e972008-11-29 00:56:52 +00001646 }else{
1647 /* The lock is held if and only if the lockfile exists */
1648 const char *zLockFile = (const char*)pFile->lockingContext;
1649 reserved = access(zLockFile, 0)==0;
drh734c9862008-11-28 15:37:20 +00001650 }
1651 OSTRACE4("TEST WR-LOCK %d %d %d\n", pFile->h, rc, reserved);
drh734c9862008-11-28 15:37:20 +00001652 *pResOut = reserved;
1653 return rc;
1654}
1655
drh7708e972008-11-29 00:56:52 +00001656/*
1657** Lock the file with the lock specified by parameter locktype - one
1658** of the following:
1659**
1660** (1) SHARED_LOCK
1661** (2) RESERVED_LOCK
1662** (3) PENDING_LOCK
1663** (4) EXCLUSIVE_LOCK
1664**
1665** Sometimes when requesting one lock state, additional lock states
1666** are inserted in between. The locking might fail on one of the later
1667** transitions leaving the lock state different from what it started but
1668** still short of its goal. The following chart shows the allowed
1669** transitions and the inserted intermediate states:
1670**
1671** UNLOCKED -> SHARED
1672** SHARED -> RESERVED
1673** SHARED -> (PENDING) -> EXCLUSIVE
1674** RESERVED -> (PENDING) -> EXCLUSIVE
1675** PENDING -> EXCLUSIVE
1676**
1677** This routine will only increase a lock. Use the sqlite3OsUnlock()
1678** routine to lower a locking level.
1679**
1680** With dotfile locking, we really only support state (4): EXCLUSIVE.
1681** But we track the other locking levels internally.
1682*/
drh734c9862008-11-28 15:37:20 +00001683static int dotlockLock(sqlite3_file *id, int locktype) {
1684 unixFile *pFile = (unixFile*)id;
1685 int fd;
1686 char *zLockFile = (char *)pFile->lockingContext;
drh7708e972008-11-29 00:56:52 +00001687 int rc = SQLITE_OK;
drh734c9862008-11-28 15:37:20 +00001688
drh7708e972008-11-29 00:56:52 +00001689
1690 /* If we have any lock, then the lock file already exists. All we have
1691 ** to do is adjust our internal record of the lock level.
1692 */
1693 if( pFile->locktype > NO_LOCK ){
drh734c9862008-11-28 15:37:20 +00001694 pFile->locktype = locktype;
1695#if !OS_VXWORKS
1696 /* Always update the timestamp on the old file */
1697 utimes(zLockFile, NULL);
1698#endif
drh7708e972008-11-29 00:56:52 +00001699 return SQLITE_OK;
drh734c9862008-11-28 15:37:20 +00001700 }
1701
1702 /* grab an exclusive lock */
1703 fd = open(zLockFile,O_RDONLY|O_CREAT|O_EXCL,0600);
1704 if( fd<0 ){
1705 /* failed to open/create the file, someone else may have stolen the lock */
1706 int tErrno = errno;
1707 if( EEXIST == tErrno ){
1708 rc = SQLITE_BUSY;
1709 } else {
1710 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
1711 if( IS_LOCK_ERROR(rc) ){
1712 pFile->lastErrno = tErrno;
1713 }
1714 }
drh7708e972008-11-29 00:56:52 +00001715 return rc;
drh734c9862008-11-28 15:37:20 +00001716 }
1717 if( close(fd) ){
1718 pFile->lastErrno = errno;
1719 rc = SQLITE_IOERR_CLOSE;
1720 }
1721
1722 /* got it, set the type and return ok */
1723 pFile->locktype = locktype;
drh734c9862008-11-28 15:37:20 +00001724 return rc;
1725}
1726
drh7708e972008-11-29 00:56:52 +00001727/*
1728** Lower the locking level on file descriptor pFile to locktype. locktype
1729** must be either NO_LOCK or SHARED_LOCK.
1730**
1731** If the locking level of the file descriptor is already at or below
1732** the requested locking level, this routine is a no-op.
1733**
1734** When the locking level reaches NO_LOCK, delete the lock file.
1735*/
drh734c9862008-11-28 15:37:20 +00001736static int dotlockUnlock(sqlite3_file *id, int locktype) {
1737 unixFile *pFile = (unixFile*)id;
1738 char *zLockFile = (char *)pFile->lockingContext;
1739
1740 assert( pFile );
1741 OSTRACE5("UNLOCK %d %d was %d pid=%d\n", pFile->h, locktype,
1742 pFile->locktype, getpid());
1743 assert( locktype<=SHARED_LOCK );
1744
1745 /* no-op if possible */
1746 if( pFile->locktype==locktype ){
1747 return SQLITE_OK;
1748 }
drh7708e972008-11-29 00:56:52 +00001749
1750 /* To downgrade to shared, simply update our internal notion of the
1751 ** lock state. No need to mess with the file on disk.
1752 */
1753 if( locktype==SHARED_LOCK ){
1754 pFile->locktype = SHARED_LOCK;
drh734c9862008-11-28 15:37:20 +00001755 return SQLITE_OK;
1756 }
1757
drh7708e972008-11-29 00:56:52 +00001758 /* To fully unlock the database, delete the lock file */
1759 assert( locktype==NO_LOCK );
1760 if( unlink(zLockFile) ){
drh734c9862008-11-28 15:37:20 +00001761 int rc, tErrno = errno;
1762 if( ENOENT != tErrno ){
1763 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK);
1764 }
1765 if( IS_LOCK_ERROR(rc) ){
1766 pFile->lastErrno = tErrno;
1767 }
1768 return rc;
1769 }
1770 pFile->locktype = NO_LOCK;
1771 return SQLITE_OK;
1772}
1773
1774/*
drh9b35ea62008-11-29 02:20:26 +00001775** Close a file. Make sure the lock has been released before closing.
drh734c9862008-11-28 15:37:20 +00001776*/
1777static int dotlockClose(sqlite3_file *id) {
1778 int rc;
1779 if( id ){
1780 unixFile *pFile = (unixFile*)id;
1781 dotlockUnlock(id, NO_LOCK);
1782 sqlite3_free(pFile->lockingContext);
1783 }
drh734c9862008-11-28 15:37:20 +00001784 rc = closeUnixFile(id);
drh734c9862008-11-28 15:37:20 +00001785 return rc;
1786}
1787/****************** End of the dot-file lock implementation *******************
1788******************************************************************************/
1789
1790/******************************************************************************
1791************************** Begin flock Locking ********************************
1792**
1793** Use the flock() system call to do file locking.
1794**
drh6b9d6dd2008-12-03 19:34:47 +00001795** flock() locking is like dot-file locking in that the various
1796** fine-grain locking levels supported by SQLite are collapsed into
1797** a single exclusive lock. In other words, SHARED, RESERVED, and
1798** PENDING locks are the same thing as an EXCLUSIVE lock. SQLite
1799** still works when you do this, but concurrency is reduced since
1800** only a single process can be reading the database at a time.
1801**
drh734c9862008-11-28 15:37:20 +00001802** Omit this section if SQLITE_ENABLE_LOCKING_STYLE is turned off or if
1803** compiling for VXWORKS.
1804*/
1805#if SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORKS
drh734c9862008-11-28 15:37:20 +00001806
drh6b9d6dd2008-12-03 19:34:47 +00001807/*
1808** This routine checks if there is a RESERVED lock held on the specified
1809** file by this or any other process. If such a lock is held, set *pResOut
1810** to a non-zero value otherwise *pResOut is set to zero. The return value
1811** is set to SQLITE_OK unless an I/O error occurs during lock checking.
1812*/
drh734c9862008-11-28 15:37:20 +00001813static int flockCheckReservedLock(sqlite3_file *id, int *pResOut){
1814 int rc = SQLITE_OK;
1815 int reserved = 0;
1816 unixFile *pFile = (unixFile*)id;
1817
1818 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
1819
1820 assert( pFile );
1821
1822 /* Check if a thread in this process holds such a lock */
1823 if( pFile->locktype>SHARED_LOCK ){
1824 reserved = 1;
1825 }
1826
1827 /* Otherwise see if some other process holds it. */
1828 if( !reserved ){
1829 /* attempt to get the lock */
1830 int lrc = flock(pFile->h, LOCK_EX | LOCK_NB);
1831 if( !lrc ){
1832 /* got the lock, unlock it */
1833 lrc = flock(pFile->h, LOCK_UN);
1834 if ( lrc ) {
1835 int tErrno = errno;
1836 /* unlock failed with an error */
1837 lrc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK);
1838 if( IS_LOCK_ERROR(lrc) ){
1839 pFile->lastErrno = tErrno;
1840 rc = lrc;
1841 }
1842 }
1843 } else {
1844 int tErrno = errno;
1845 reserved = 1;
1846 /* someone else might have it reserved */
1847 lrc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
1848 if( IS_LOCK_ERROR(lrc) ){
1849 pFile->lastErrno = tErrno;
1850 rc = lrc;
1851 }
1852 }
1853 }
1854 OSTRACE4("TEST WR-LOCK %d %d %d\n", pFile->h, rc, reserved);
1855
1856#ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS
1857 if( (rc & SQLITE_IOERR) == SQLITE_IOERR ){
1858 rc = SQLITE_OK;
1859 reserved=1;
1860 }
1861#endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */
1862 *pResOut = reserved;
1863 return rc;
1864}
1865
drh6b9d6dd2008-12-03 19:34:47 +00001866/*
1867** Lock the file with the lock specified by parameter locktype - one
1868** of the following:
1869**
1870** (1) SHARED_LOCK
1871** (2) RESERVED_LOCK
1872** (3) PENDING_LOCK
1873** (4) EXCLUSIVE_LOCK
1874**
1875** Sometimes when requesting one lock state, additional lock states
1876** are inserted in between. The locking might fail on one of the later
1877** transitions leaving the lock state different from what it started but
1878** still short of its goal. The following chart shows the allowed
1879** transitions and the inserted intermediate states:
1880**
1881** UNLOCKED -> SHARED
1882** SHARED -> RESERVED
1883** SHARED -> (PENDING) -> EXCLUSIVE
1884** RESERVED -> (PENDING) -> EXCLUSIVE
1885** PENDING -> EXCLUSIVE
1886**
1887** flock() only really support EXCLUSIVE locks. We track intermediate
1888** lock states in the sqlite3_file structure, but all locks SHARED or
1889** above are really EXCLUSIVE locks and exclude all other processes from
1890** access the file.
1891**
1892** This routine will only increase a lock. Use the sqlite3OsUnlock()
1893** routine to lower a locking level.
1894*/
drh734c9862008-11-28 15:37:20 +00001895static int flockLock(sqlite3_file *id, int locktype) {
1896 int rc = SQLITE_OK;
drh734c9862008-11-28 15:37:20 +00001897 unixFile *pFile = (unixFile*)id;
1898
1899 assert( pFile );
1900
1901 /* if we already have a lock, it is exclusive.
1902 ** Just adjust level and punt on outta here. */
1903 if (pFile->locktype > NO_LOCK) {
1904 pFile->locktype = locktype;
1905 return SQLITE_OK;
1906 }
1907
1908 /* grab an exclusive lock */
1909
1910 if (flock(pFile->h, LOCK_EX | LOCK_NB)) {
1911 int tErrno = errno;
1912 /* didn't get, must be busy */
1913 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
1914 if( IS_LOCK_ERROR(rc) ){
1915 pFile->lastErrno = tErrno;
1916 }
1917 } else {
1918 /* got it, set the type and return ok */
1919 pFile->locktype = locktype;
1920 }
1921 OSTRACE4("LOCK %d %s %s\n", pFile->h, locktypeName(locktype),
1922 rc==SQLITE_OK ? "ok" : "failed");
1923#ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS
1924 if( (rc & SQLITE_IOERR) == SQLITE_IOERR ){
1925 rc = SQLITE_BUSY;
1926 }
1927#endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */
1928 return rc;
1929}
1930
drh6b9d6dd2008-12-03 19:34:47 +00001931
1932/*
1933** Lower the locking level on file descriptor pFile to locktype. locktype
1934** must be either NO_LOCK or SHARED_LOCK.
1935**
1936** If the locking level of the file descriptor is already at or below
1937** the requested locking level, this routine is a no-op.
1938*/
drh734c9862008-11-28 15:37:20 +00001939static int flockUnlock(sqlite3_file *id, int locktype) {
1940 unixFile *pFile = (unixFile*)id;
1941
1942 assert( pFile );
1943 OSTRACE5("UNLOCK %d %d was %d pid=%d\n", pFile->h, locktype,
1944 pFile->locktype, getpid());
1945 assert( locktype<=SHARED_LOCK );
1946
1947 /* no-op if possible */
1948 if( pFile->locktype==locktype ){
1949 return SQLITE_OK;
1950 }
1951
1952 /* shared can just be set because we always have an exclusive */
1953 if (locktype==SHARED_LOCK) {
1954 pFile->locktype = locktype;
1955 return SQLITE_OK;
1956 }
1957
1958 /* no, really, unlock. */
1959 int rc = flock(pFile->h, LOCK_UN);
1960 if (rc) {
1961 int r, tErrno = errno;
1962 r = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK);
1963 if( IS_LOCK_ERROR(r) ){
1964 pFile->lastErrno = tErrno;
1965 }
1966#ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS
1967 if( (r & SQLITE_IOERR) == SQLITE_IOERR ){
1968 r = SQLITE_BUSY;
1969 }
1970#endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */
1971
1972 return r;
1973 } else {
1974 pFile->locktype = NO_LOCK;
1975 return SQLITE_OK;
1976 }
1977}
1978
1979/*
1980** Close a file.
1981*/
1982static int flockClose(sqlite3_file *id) {
1983 if( id ){
1984 flockUnlock(id, NO_LOCK);
1985 }
1986 return closeUnixFile(id);
1987}
1988
1989#endif /* SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORK */
1990
1991/******************* End of the flock lock implementation *********************
1992******************************************************************************/
1993
1994/******************************************************************************
1995************************ Begin Named Semaphore Locking ************************
1996**
1997** Named semaphore locking is only supported on VxWorks.
drh6b9d6dd2008-12-03 19:34:47 +00001998**
1999** Semaphore locking is like dot-lock and flock in that it really only
2000** supports EXCLUSIVE locking. Only a single process can read or write
2001** the database file at a time. This reduces potential concurrency, but
2002** makes the lock implementation much easier.
drh734c9862008-11-28 15:37:20 +00002003*/
2004#if OS_VXWORKS
2005
drh6b9d6dd2008-12-03 19:34:47 +00002006/*
2007** This routine checks if there is a RESERVED lock held on the specified
2008** file by this or any other process. If such a lock is held, set *pResOut
2009** to a non-zero value otherwise *pResOut is set to zero. The return value
2010** is set to SQLITE_OK unless an I/O error occurs during lock checking.
2011*/
drh734c9862008-11-28 15:37:20 +00002012static int semCheckReservedLock(sqlite3_file *id, int *pResOut) {
2013 int rc = SQLITE_OK;
2014 int reserved = 0;
2015 unixFile *pFile = (unixFile*)id;
2016
2017 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
2018
2019 assert( pFile );
2020
2021 /* Check if a thread in this process holds such a lock */
2022 if( pFile->locktype>SHARED_LOCK ){
2023 reserved = 1;
2024 }
2025
2026 /* Otherwise see if some other process holds it. */
2027 if( !reserved ){
2028 sem_t *pSem = pFile->pOpen->pSem;
2029 struct stat statBuf;
2030
2031 if( sem_trywait(pSem)==-1 ){
2032 int tErrno = errno;
2033 if( EAGAIN != tErrno ){
2034 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_CHECKRESERVEDLOCK);
2035 pFile->lastErrno = tErrno;
2036 } else {
2037 /* someone else has the lock when we are in NO_LOCK */
2038 reserved = (pFile->locktype < SHARED_LOCK);
2039 }
2040 }else{
2041 /* we could have it if we want it */
2042 sem_post(pSem);
2043 }
2044 }
2045 OSTRACE4("TEST WR-LOCK %d %d %d\n", pFile->h, rc, reserved);
2046
2047 *pResOut = reserved;
2048 return rc;
2049}
2050
drh6b9d6dd2008-12-03 19:34:47 +00002051/*
2052** Lock the file with the lock specified by parameter locktype - one
2053** of the following:
2054**
2055** (1) SHARED_LOCK
2056** (2) RESERVED_LOCK
2057** (3) PENDING_LOCK
2058** (4) EXCLUSIVE_LOCK
2059**
2060** Sometimes when requesting one lock state, additional lock states
2061** are inserted in between. The locking might fail on one of the later
2062** transitions leaving the lock state different from what it started but
2063** still short of its goal. The following chart shows the allowed
2064** transitions and the inserted intermediate states:
2065**
2066** UNLOCKED -> SHARED
2067** SHARED -> RESERVED
2068** SHARED -> (PENDING) -> EXCLUSIVE
2069** RESERVED -> (PENDING) -> EXCLUSIVE
2070** PENDING -> EXCLUSIVE
2071**
2072** Semaphore locks only really support EXCLUSIVE locks. We track intermediate
2073** lock states in the sqlite3_file structure, but all locks SHARED or
2074** above are really EXCLUSIVE locks and exclude all other processes from
2075** access the file.
2076**
2077** This routine will only increase a lock. Use the sqlite3OsUnlock()
2078** routine to lower a locking level.
2079*/
drh734c9862008-11-28 15:37:20 +00002080static int semLock(sqlite3_file *id, int locktype) {
2081 unixFile *pFile = (unixFile*)id;
2082 int fd;
2083 sem_t *pSem = pFile->pOpen->pSem;
2084 int rc = SQLITE_OK;
2085
2086 /* if we already have a lock, it is exclusive.
2087 ** Just adjust level and punt on outta here. */
2088 if (pFile->locktype > NO_LOCK) {
2089 pFile->locktype = locktype;
2090 rc = SQLITE_OK;
2091 goto sem_end_lock;
2092 }
2093
2094 /* lock semaphore now but bail out when already locked. */
2095 if( sem_trywait(pSem)==-1 ){
2096 rc = SQLITE_BUSY;
2097 goto sem_end_lock;
2098 }
2099
2100 /* got it, set the type and return ok */
2101 pFile->locktype = locktype;
2102
2103 sem_end_lock:
2104 return rc;
2105}
2106
drh6b9d6dd2008-12-03 19:34:47 +00002107/*
2108** Lower the locking level on file descriptor pFile to locktype. locktype
2109** must be either NO_LOCK or SHARED_LOCK.
2110**
2111** If the locking level of the file descriptor is already at or below
2112** the requested locking level, this routine is a no-op.
2113*/
drh734c9862008-11-28 15:37:20 +00002114static int semUnlock(sqlite3_file *id, int locktype) {
2115 unixFile *pFile = (unixFile*)id;
2116 sem_t *pSem = pFile->pOpen->pSem;
2117
2118 assert( pFile );
2119 assert( pSem );
2120 OSTRACE5("UNLOCK %d %d was %d pid=%d\n", pFile->h, locktype,
2121 pFile->locktype, getpid());
2122 assert( locktype<=SHARED_LOCK );
2123
2124 /* no-op if possible */
2125 if( pFile->locktype==locktype ){
2126 return SQLITE_OK;
2127 }
2128
2129 /* shared can just be set because we always have an exclusive */
2130 if (locktype==SHARED_LOCK) {
2131 pFile->locktype = locktype;
2132 return SQLITE_OK;
2133 }
2134
2135 /* no, really unlock. */
2136 if ( sem_post(pSem)==-1 ) {
2137 int rc, tErrno = errno;
2138 rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK);
2139 if( IS_LOCK_ERROR(rc) ){
2140 pFile->lastErrno = tErrno;
2141 }
2142 return rc;
2143 }
2144 pFile->locktype = NO_LOCK;
2145 return SQLITE_OK;
2146}
2147
2148/*
2149 ** Close a file.
drhbfe66312006-10-03 17:40:40 +00002150 */
drh734c9862008-11-28 15:37:20 +00002151static int semClose(sqlite3_file *id) {
2152 if( id ){
2153 unixFile *pFile = (unixFile*)id;
2154 semUnlock(id, NO_LOCK);
2155 assert( pFile );
2156 unixEnterMutex();
2157 releaseLockInfo(pFile->pLock);
2158 releaseOpenCnt(pFile->pOpen);
2159 closeUnixFile(id);
2160 unixLeaveMutex();
2161 }
2162 return SQLITE_OK;
2163}
2164
2165#endif /* OS_VXWORKS */
2166/*
2167** Named semaphore locking is only available on VxWorks.
2168**
2169*************** End of the named semaphore lock implementation ****************
2170******************************************************************************/
2171
2172
2173/******************************************************************************
2174*************************** Begin AFP Locking *********************************
2175**
2176** AFP is the Apple Filing Protocol. AFP is a network filesystem found
2177** on Apple Macintosh computers - both OS9 and OSX.
2178**
2179** Third-party implementations of AFP are available. But this code here
2180** only works on OSX.
2181*/
2182
2183#if defined(__DARWIN__) && SQLITE_ENABLE_LOCKING_STYLE
2184/*
2185** The afpLockingContext structure contains all afp lock specific state
2186*/
drhbfe66312006-10-03 17:40:40 +00002187typedef struct afpLockingContext afpLockingContext;
2188struct afpLockingContext {
aswiftaebf4132008-11-21 00:10:35 +00002189 unsigned long long sharedByte;
drh6b9d6dd2008-12-03 19:34:47 +00002190 const char *dbPath; /* Name of the open file */
drhbfe66312006-10-03 17:40:40 +00002191};
2192
2193struct ByteRangeLockPB2
2194{
2195 unsigned long long offset; /* offset to first byte to lock */
2196 unsigned long long length; /* nbr of bytes to lock */
2197 unsigned long long retRangeStart; /* nbr of 1st byte locked if successful */
2198 unsigned char unLockFlag; /* 1 = unlock, 0 = lock */
2199 unsigned char startEndFlag; /* 1=rel to end of fork, 0=rel to start */
2200 int fd; /* file desc to assoc this lock with */
2201};
2202
drhfd131da2007-08-07 17:13:03 +00002203#define afpfsByteRangeLock2FSCTL _IOWR('z', 23, struct ByteRangeLockPB2)
drhbfe66312006-10-03 17:40:40 +00002204
drh6b9d6dd2008-12-03 19:34:47 +00002205/*
2206** This is a utility for setting or clearing a bit-range lock on an
2207** AFP filesystem.
2208**
2209** Return SQLITE_OK on success, SQLITE_BUSY on failure.
2210*/
2211static int afpSetLock(
2212 const char *path, /* Name of the file to be locked or unlocked */
2213 unixFile *pFile, /* Open file descriptor on path */
2214 unsigned long long offset, /* First byte to be locked */
2215 unsigned long long length, /* Number of bytes to lock */
2216 int setLockFlag /* True to set lock. False to clear lock */
danielk1977ad94b582007-08-20 06:44:22 +00002217){
drh6b9d6dd2008-12-03 19:34:47 +00002218 struct ByteRangeLockPB2 pb;
2219 int err;
drhbfe66312006-10-03 17:40:40 +00002220
2221 pb.unLockFlag = setLockFlag ? 0 : 1;
2222 pb.startEndFlag = 0;
2223 pb.offset = offset;
2224 pb.length = length;
aswift5b1a2562008-08-22 00:22:35 +00002225 pb.fd = pFile->h;
aswiftaebf4132008-11-21 00:10:35 +00002226 //SimulateIOErrorBenign(1);
2227 //SimulateIOError( pb.fd=(-1) )
2228 //SimulateIOErrorBenign(0);
2229
2230 OSTRACE6("AFPSETLOCK [%s] for %d%s in range %llx:%llx\n",
drh734c9862008-11-28 15:37:20 +00002231 (setLockFlag?"ON":"OFF"), pFile->h, (pb.fd==-1?"[testval-1]":""),
2232 offset, length);
drhbfe66312006-10-03 17:40:40 +00002233 err = fsctl(path, afpfsByteRangeLock2FSCTL, &pb, 0);
2234 if ( err==-1 ) {
aswift5b1a2562008-08-22 00:22:35 +00002235 int rc;
2236 int tErrno = errno;
drh734c9862008-11-28 15:37:20 +00002237 OSTRACE4("AFPSETLOCK failed to fsctl() '%s' %d %s\n",
2238 path, tErrno, strerror(tErrno));
aswiftaebf4132008-11-21 00:10:35 +00002239#ifdef SQLITE_IGNORE_AFP_LOCK_ERRORS
2240 rc = SQLITE_BUSY;
2241#else
drh734c9862008-11-28 15:37:20 +00002242 rc = sqliteErrorFromPosixError(tErrno,
2243 setLockFlag ? SQLITE_IOERR_LOCK : SQLITE_IOERR_UNLOCK);
aswiftaebf4132008-11-21 00:10:35 +00002244#endif /* SQLITE_IGNORE_AFP_LOCK_ERRORS */
aswift5b1a2562008-08-22 00:22:35 +00002245 if( IS_LOCK_ERROR(rc) ){
2246 pFile->lastErrno = tErrno;
2247 }
2248 return rc;
drhbfe66312006-10-03 17:40:40 +00002249 } else {
aswift5b1a2562008-08-22 00:22:35 +00002250 return SQLITE_OK;
drhbfe66312006-10-03 17:40:40 +00002251 }
2252}
2253
drh6b9d6dd2008-12-03 19:34:47 +00002254/*
2255** This routine checks if there is a RESERVED lock held on the specified
2256** file by this or any other process. If such a lock is held, set *pResOut
2257** to a non-zero value otherwise *pResOut is set to zero. The return value
2258** is set to SQLITE_OK unless an I/O error occurs during lock checking.
2259*/
danielk1977e339d652008-06-28 11:23:00 +00002260static int afpCheckReservedLock(sqlite3_file *id, int *pResOut){
aswift5b1a2562008-08-22 00:22:35 +00002261 int rc = SQLITE_OK;
2262 int reserved = 0;
drhbfe66312006-10-03 17:40:40 +00002263 unixFile *pFile = (unixFile*)id;
2264
aswift5b1a2562008-08-22 00:22:35 +00002265 SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
2266
2267 assert( pFile );
drhbfe66312006-10-03 17:40:40 +00002268 afpLockingContext *context = (afpLockingContext *) pFile->lockingContext;
2269
2270 /* Check if a thread in this process holds such a lock */
2271 if( pFile->locktype>SHARED_LOCK ){
aswift5b1a2562008-08-22 00:22:35 +00002272 reserved = 1;
drhbfe66312006-10-03 17:40:40 +00002273 }
2274
2275 /* Otherwise see if some other process holds it.
2276 */
aswift5b1a2562008-08-22 00:22:35 +00002277 if( !reserved ){
2278 /* lock the RESERVED byte */
drh6b9d6dd2008-12-03 19:34:47 +00002279 int lrc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1,1);
aswift5b1a2562008-08-22 00:22:35 +00002280 if( SQLITE_OK==lrc ){
drhbfe66312006-10-03 17:40:40 +00002281 /* if we succeeded in taking the reserved lock, unlock it to restore
2282 ** the original state */
drh6b9d6dd2008-12-03 19:34:47 +00002283 lrc = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1, 0);
aswift5b1a2562008-08-22 00:22:35 +00002284 } else {
2285 /* if we failed to get the lock then someone else must have it */
2286 reserved = 1;
2287 }
2288 if( IS_LOCK_ERROR(lrc) ){
2289 rc=lrc;
drhbfe66312006-10-03 17:40:40 +00002290 }
2291 }
drhbfe66312006-10-03 17:40:40 +00002292
aswift5b1a2562008-08-22 00:22:35 +00002293 OSTRACE4("TEST WR-LOCK %d %d %d\n", pFile->h, rc, reserved);
2294
2295 *pResOut = reserved;
2296 return rc;
drhbfe66312006-10-03 17:40:40 +00002297}
2298
drh6b9d6dd2008-12-03 19:34:47 +00002299/*
2300** Lock the file with the lock specified by parameter locktype - one
2301** of the following:
2302**
2303** (1) SHARED_LOCK
2304** (2) RESERVED_LOCK
2305** (3) PENDING_LOCK
2306** (4) EXCLUSIVE_LOCK
2307**
2308** Sometimes when requesting one lock state, additional lock states
2309** are inserted in between. The locking might fail on one of the later
2310** transitions leaving the lock state different from what it started but
2311** still short of its goal. The following chart shows the allowed
2312** transitions and the inserted intermediate states:
2313**
2314** UNLOCKED -> SHARED
2315** SHARED -> RESERVED
2316** SHARED -> (PENDING) -> EXCLUSIVE
2317** RESERVED -> (PENDING) -> EXCLUSIVE
2318** PENDING -> EXCLUSIVE
2319**
2320** This routine will only increase a lock. Use the sqlite3OsUnlock()
2321** routine to lower a locking level.
2322*/
danielk1977e339d652008-06-28 11:23:00 +00002323static int afpLock(sqlite3_file *id, int locktype){
drhbfe66312006-10-03 17:40:40 +00002324 int rc = SQLITE_OK;
2325 unixFile *pFile = (unixFile*)id;
2326 afpLockingContext *context = (afpLockingContext *) pFile->lockingContext;
drhbfe66312006-10-03 17:40:40 +00002327
2328 assert( pFile );
drh4f0c5872007-03-26 22:05:01 +00002329 OSTRACE5("LOCK %d %s was %s pid=%d\n", pFile->h,
drh339eb0b2008-03-07 15:34:11 +00002330 locktypeName(locktype), locktypeName(pFile->locktype), getpid());
2331
drhbfe66312006-10-03 17:40:40 +00002332 /* If there is already a lock of this type or more restrictive on the
drh339eb0b2008-03-07 15:34:11 +00002333 ** unixFile, do nothing. Don't use the afp_end_lock: exit path, as
drh6c7d5c52008-11-21 20:32:33 +00002334 ** unixEnterMutex() hasn't been called yet.
drh339eb0b2008-03-07 15:34:11 +00002335 */
drhbfe66312006-10-03 17:40:40 +00002336 if( pFile->locktype>=locktype ){
drh4f0c5872007-03-26 22:05:01 +00002337 OSTRACE3("LOCK %d %s ok (already held)\n", pFile->h,
drhbfe66312006-10-03 17:40:40 +00002338 locktypeName(locktype));
2339 return SQLITE_OK;
2340 }
2341
2342 /* Make sure the locking sequence is correct
drh339eb0b2008-03-07 15:34:11 +00002343 */
drhbfe66312006-10-03 17:40:40 +00002344 assert( pFile->locktype!=NO_LOCK || locktype==SHARED_LOCK );
2345 assert( locktype!=PENDING_LOCK );
2346 assert( locktype!=RESERVED_LOCK || pFile->locktype==SHARED_LOCK );
2347
2348 /* This mutex is needed because pFile->pLock is shared across threads
drh339eb0b2008-03-07 15:34:11 +00002349 */
drh6c7d5c52008-11-21 20:32:33 +00002350 unixEnterMutex();
drhbfe66312006-10-03 17:40:40 +00002351
2352 /* Make sure the current thread owns the pFile.
drh339eb0b2008-03-07 15:34:11 +00002353 */
drhbfe66312006-10-03 17:40:40 +00002354 rc = transferOwnership(pFile);
2355 if( rc!=SQLITE_OK ){
drh6c7d5c52008-11-21 20:32:33 +00002356 unixLeaveMutex();
drhbfe66312006-10-03 17:40:40 +00002357 return rc;
2358 }
2359
2360 /* A PENDING lock is needed before acquiring a SHARED lock and before
drh339eb0b2008-03-07 15:34:11 +00002361 ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will
2362 ** be released.
2363 */
drhbfe66312006-10-03 17:40:40 +00002364 if( locktype==SHARED_LOCK
2365 || (locktype==EXCLUSIVE_LOCK && pFile->locktype<PENDING_LOCK)
drh339eb0b2008-03-07 15:34:11 +00002366 ){
2367 int failed;
drh6b9d6dd2008-12-03 19:34:47 +00002368 failed = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 1);
drhbfe66312006-10-03 17:40:40 +00002369 if (failed) {
aswift5b1a2562008-08-22 00:22:35 +00002370 rc = failed;
drhbfe66312006-10-03 17:40:40 +00002371 goto afp_end_lock;
2372 }
2373 }
2374
2375 /* If control gets to this point, then actually go ahead and make
drh339eb0b2008-03-07 15:34:11 +00002376 ** operating system calls for the specified lock.
2377 */
drhbfe66312006-10-03 17:40:40 +00002378 if( locktype==SHARED_LOCK ){
aswift5b1a2562008-08-22 00:22:35 +00002379 int lk, lrc1, lrc2, lrc1Errno;
drhbfe66312006-10-03 17:40:40 +00002380
aswift5b1a2562008-08-22 00:22:35 +00002381 /* Now get the read-lock SHARED_LOCK */
drhbfe66312006-10-03 17:40:40 +00002382 /* note that the quality of the randomness doesn't matter that much */
2383 lk = random();
aswiftaebf4132008-11-21 00:10:35 +00002384 context->sharedByte = (lk & 0x7fffffff)%(SHARED_SIZE - 1);
drh6b9d6dd2008-12-03 19:34:47 +00002385 lrc1 = afpSetLock(context->dbPath, pFile,
aswiftaebf4132008-11-21 00:10:35 +00002386 SHARED_FIRST+context->sharedByte, 1, 1);
aswift5b1a2562008-08-22 00:22:35 +00002387 if( IS_LOCK_ERROR(lrc1) ){
2388 lrc1Errno = pFile->lastErrno;
drhbfe66312006-10-03 17:40:40 +00002389 }
aswift5b1a2562008-08-22 00:22:35 +00002390 /* Drop the temporary PENDING lock */
drh6b9d6dd2008-12-03 19:34:47 +00002391 lrc2 = afpSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 0);
drhbfe66312006-10-03 17:40:40 +00002392
aswift5b1a2562008-08-22 00:22:35 +00002393 if( IS_LOCK_ERROR(lrc1) ) {
2394 pFile->lastErrno = lrc1Errno;
2395 rc = lrc1;
2396 goto afp_end_lock;
2397 } else if( IS_LOCK_ERROR(lrc2) ){
2398 rc = lrc2;
2399 goto afp_end_lock;
2400 } else if( lrc1 != SQLITE_OK ) {
2401 rc = lrc1;
drhbfe66312006-10-03 17:40:40 +00002402 } else {
2403 pFile->locktype = SHARED_LOCK;
aswiftaebf4132008-11-21 00:10:35 +00002404 pFile->pOpen->nLock++;
drhbfe66312006-10-03 17:40:40 +00002405 }
2406 }else{
2407 /* The request was for a RESERVED or EXCLUSIVE lock. It is
2408 ** assumed that there is a SHARED or greater lock on the file
2409 ** already.
2410 */
2411 int failed = 0;
2412 assert( 0!=pFile->locktype );
2413 if (locktype >= RESERVED_LOCK && pFile->locktype < RESERVED_LOCK) {
2414 /* Acquire a RESERVED lock */
drh6b9d6dd2008-12-03 19:34:47 +00002415 failed = afpSetLock(context->dbPath, pFile, RESERVED_BYTE, 1,1);
drhbfe66312006-10-03 17:40:40 +00002416 }
2417 if (!failed && locktype == EXCLUSIVE_LOCK) {
2418 /* Acquire an EXCLUSIVE lock */
2419
2420 /* Remove the shared lock before trying the range. we'll need to
danielk1977e339d652008-06-28 11:23:00 +00002421 ** reestablish the shared lock if we can't get the afpUnlock
drhbfe66312006-10-03 17:40:40 +00002422 */
drh6b9d6dd2008-12-03 19:34:47 +00002423 if( !(failed = afpSetLock(context->dbPath, pFile, SHARED_FIRST +
aswiftaebf4132008-11-21 00:10:35 +00002424 context->sharedByte, 1, 0)) ){
2425 int failed2 = SQLITE_OK;
drhbfe66312006-10-03 17:40:40 +00002426 /* now attemmpt to get the exclusive lock range */
drh6b9d6dd2008-12-03 19:34:47 +00002427 failed = afpSetLock(context->dbPath, pFile, SHARED_FIRST,
drhbfe66312006-10-03 17:40:40 +00002428 SHARED_SIZE, 1);
drh6b9d6dd2008-12-03 19:34:47 +00002429 if( failed && (failed2 = afpSetLock(context->dbPath, pFile,
aswiftaebf4132008-11-21 00:10:35 +00002430 SHARED_FIRST + context->sharedByte, 1, 1)) ){
2431 /* Can't reestablish the shared lock. Sqlite can't deal, this is
2432 ** a critical I/O error
2433 */
2434 rc = ((failed & SQLITE_IOERR) == SQLITE_IOERR) ? failed2 :
2435 SQLITE_IOERR_LOCK;
2436 goto afp_end_lock;
2437 }
2438 }else{
aswift5b1a2562008-08-22 00:22:35 +00002439 rc = failed;
drhbfe66312006-10-03 17:40:40 +00002440 }
2441 }
aswift5b1a2562008-08-22 00:22:35 +00002442 if( failed ){
2443 rc = failed;
drhbfe66312006-10-03 17:40:40 +00002444 }
2445 }
2446
2447 if( rc==SQLITE_OK ){
2448 pFile->locktype = locktype;
2449 }else if( locktype==EXCLUSIVE_LOCK ){
2450 pFile->locktype = PENDING_LOCK;
2451 }
2452
2453afp_end_lock:
drh6c7d5c52008-11-21 20:32:33 +00002454 unixLeaveMutex();
drh4f0c5872007-03-26 22:05:01 +00002455 OSTRACE4("LOCK %d %s %s\n", pFile->h, locktypeName(locktype),
drhbfe66312006-10-03 17:40:40 +00002456 rc==SQLITE_OK ? "ok" : "failed");
2457 return rc;
2458}
2459
2460/*
drh339eb0b2008-03-07 15:34:11 +00002461** Lower the locking level on file descriptor pFile to locktype. locktype
2462** must be either NO_LOCK or SHARED_LOCK.
2463**
2464** If the locking level of the file descriptor is already at or below
2465** the requested locking level, this routine is a no-op.
2466*/
danielk1977e339d652008-06-28 11:23:00 +00002467static int afpUnlock(sqlite3_file *id, int locktype) {
drhbfe66312006-10-03 17:40:40 +00002468 int rc = SQLITE_OK;
2469 unixFile *pFile = (unixFile*)id;
aswiftaebf4132008-11-21 00:10:35 +00002470 afpLockingContext *pCtx = (afpLockingContext *) pFile->lockingContext;
drhbfe66312006-10-03 17:40:40 +00002471
2472 assert( pFile );
drh4f0c5872007-03-26 22:05:01 +00002473 OSTRACE5("UNLOCK %d %d was %d pid=%d\n", pFile->h, locktype,
drhbfe66312006-10-03 17:40:40 +00002474 pFile->locktype, getpid());
aswift5b1a2562008-08-22 00:22:35 +00002475
drhbfe66312006-10-03 17:40:40 +00002476 assert( locktype<=SHARED_LOCK );
2477 if( pFile->locktype<=locktype ){
2478 return SQLITE_OK;
2479 }
2480 if( CHECK_THREADID(pFile) ){
2481 return SQLITE_MISUSE;
2482 }
drh6c7d5c52008-11-21 20:32:33 +00002483 unixEnterMutex();
drhbfe66312006-10-03 17:40:40 +00002484 if( pFile->locktype>SHARED_LOCK ){
aswiftaebf4132008-11-21 00:10:35 +00002485
2486 if( pFile->locktype==EXCLUSIVE_LOCK ){
drh6b9d6dd2008-12-03 19:34:47 +00002487 rc = afpSetLock(pCtx->dbPath, pFile, SHARED_FIRST, SHARED_SIZE, 0);
aswiftaebf4132008-11-21 00:10:35 +00002488 if( rc==SQLITE_OK && locktype==SHARED_LOCK ){
2489 /* only re-establish the shared lock if necessary */
2490 int sharedLockByte = SHARED_FIRST+pCtx->sharedByte;
drh6b9d6dd2008-12-03 19:34:47 +00002491 rc = afpSetLock(pCtx->dbPath, pFile, sharedLockByte, 1, 1);
aswiftaebf4132008-11-21 00:10:35 +00002492 }
2493 }
2494 if( rc==SQLITE_OK && pFile->locktype>=PENDING_LOCK ){
drh6b9d6dd2008-12-03 19:34:47 +00002495 rc = afpSetLock(pCtx->dbPath, pFile, PENDING_BYTE, 1, 0);
aswiftaebf4132008-11-21 00:10:35 +00002496 }
2497 if( rc==SQLITE_OK && pFile->locktype>=RESERVED_LOCK ){
drh6b9d6dd2008-12-03 19:34:47 +00002498 rc = afpSetLock(pCtx->dbPath, pFile, RESERVED_BYTE, 1, 0);
aswiftaebf4132008-11-21 00:10:35 +00002499 }
2500 }else if( locktype==NO_LOCK ){
2501 /* clear the shared lock */
2502 int sharedLockByte = SHARED_FIRST+pCtx->sharedByte;
drh6b9d6dd2008-12-03 19:34:47 +00002503 rc = afpSetLock(pCtx->dbPath, pFile, sharedLockByte, 1, 0);
aswiftaebf4132008-11-21 00:10:35 +00002504 }
drhbfe66312006-10-03 17:40:40 +00002505
aswiftaebf4132008-11-21 00:10:35 +00002506 if( rc==SQLITE_OK ){
2507 if( locktype==NO_LOCK ){
drh6c7d5c52008-11-21 20:32:33 +00002508 struct unixOpenCnt *pOpen = pFile->pOpen;
aswiftaebf4132008-11-21 00:10:35 +00002509 pOpen->nLock--;
2510 assert( pOpen->nLock>=0 );
2511 if( pOpen->nLock==0 && pOpen->nPending>0 ){
2512 int i;
2513 for(i=0; i<pOpen->nPending; i++){
2514 if( pOpen->aPending[i] < 0 ) continue;
2515 if( close(pOpen->aPending[i]) ){
2516 pFile->lastErrno = errno;
2517 rc = SQLITE_IOERR_CLOSE;
2518 }else{
2519 pOpen->aPending[i] = -1;
drhbfe66312006-10-03 17:40:40 +00002520 }
aswiftaebf4132008-11-21 00:10:35 +00002521 }
2522 if( rc==SQLITE_OK ){
2523 sqlite3_free(pOpen->aPending);
2524 pOpen->nPending = 0;
2525 pOpen->aPending = 0;
2526 }
drhbfe66312006-10-03 17:40:40 +00002527 }
2528 }
drhbfe66312006-10-03 17:40:40 +00002529 }
aswiftaebf4132008-11-21 00:10:35 +00002530end_afpunlock:
drh6c7d5c52008-11-21 20:32:33 +00002531 unixLeaveMutex();
aswiftaebf4132008-11-21 00:10:35 +00002532 if( rc==SQLITE_OK ) pFile->locktype = locktype;
drhbfe66312006-10-03 17:40:40 +00002533 return rc;
2534}
2535
2536/*
drh339eb0b2008-03-07 15:34:11 +00002537** Close a file & cleanup AFP specific locking context
2538*/
danielk1977e339d652008-06-28 11:23:00 +00002539static int afpClose(sqlite3_file *id) {
2540 if( id ){
2541 unixFile *pFile = (unixFile*)id;
2542 afpUnlock(id, NO_LOCK);
drh6c7d5c52008-11-21 20:32:33 +00002543 unixEnterMutex();
aswiftaebf4132008-11-21 00:10:35 +00002544 if( pFile->pOpen && pFile->pOpen->nLock ){
2545 /* If there are outstanding locks, do not actually close the file just
drh734c9862008-11-28 15:37:20 +00002546 ** yet because that would clear those locks. Instead, add the file
2547 ** descriptor to pOpen->aPending. It will be automatically closed when
2548 ** the last lock is cleared.
2549 */
aswiftaebf4132008-11-21 00:10:35 +00002550 int *aNew;
drh6c7d5c52008-11-21 20:32:33 +00002551 struct unixOpenCnt *pOpen = pFile->pOpen;
aswiftaebf4132008-11-21 00:10:35 +00002552 aNew = sqlite3_realloc(pOpen->aPending, (pOpen->nPending+1)*sizeof(int) );
2553 if( aNew==0 ){
2554 /* If a malloc fails, just leak the file descriptor */
2555 }else{
2556 pOpen->aPending = aNew;
2557 pOpen->aPending[pOpen->nPending] = pFile->h;
2558 pOpen->nPending++;
2559 pFile->h = -1;
2560 }
2561 }
2562 releaseOpenCnt(pFile->pOpen);
danielk1977e339d652008-06-28 11:23:00 +00002563 sqlite3_free(pFile->lockingContext);
aswiftaebf4132008-11-21 00:10:35 +00002564 closeUnixFile(id);
drh6c7d5c52008-11-21 20:32:33 +00002565 unixLeaveMutex();
danielk1977e339d652008-06-28 11:23:00 +00002566 }
aswiftaebf4132008-11-21 00:10:35 +00002567 return SQLITE_OK;
drhbfe66312006-10-03 17:40:40 +00002568}
2569
drh734c9862008-11-28 15:37:20 +00002570#endif /* defined(__DARWIN__) && SQLITE_ENABLE_LOCKING_STYLE */
2571/*
2572** The code above is the AFP lock implementation. The code is specific
2573** to MacOSX and does not work on other unix platforms. No alternative
2574** is available. If you don't compile for a mac, then the "unix-afp"
2575** VFS is not available.
2576**
2577********************* End of the AFP lock implementation **********************
2578******************************************************************************/
drhbfe66312006-10-03 17:40:40 +00002579
drh734c9862008-11-28 15:37:20 +00002580/******************************************************************************
2581************************** Begin Proxy Locking ********************************
2582**
2583**
2584** The default locking schemes in SQLite use byte-range locks on the
2585** database file to coordinate safe, concurrent access by multiple readers
2586** and writers [http://sqlite.org/lockingv3.html]. The five file locking
2587** states (UNLOCKED, PENDING, SHARED, RESERVED, EXCLUSIVE) are implemented
2588** as POSIX read & write locks over fixed set of locations (via fsctl),
2589** on AFP and SMB only exclusive byte-range locks are available via fsctl
2590** with _IOWR('z', 23, struct ByteRangeLockPB2) to track the same 5 states.
2591** To simulate a F_RDLCK on the shared range, on AFP a randomly selected
2592** address in the shared range is taken for a SHARED lock, the entire
2593** shared range is taken for an EXCLUSIVE lock):
2594**
2595** PENDING_BYTE 0x40000000
2596** RESERVED_BYTE 0x40000001
2597** SHARED_RANGE 0x40000002 -> 0x40000200
2598**
2599** This works well on the local file system, but shows a nearly 100x
2600** slowdown in read performance on AFP because the AFP client disables
2601** the read cache when byte-range locks are present. Enabling the read
2602** cache exposes a cache coherency problem that is present on all OS X
2603** supported network file systems. NFS and AFP both observe the
2604** close-to-open semantics for ensuring cache coherency
2605** [http://nfs.sourceforge.net/#faq_a8], which does not effectively
2606** address the requirements for concurrent database access by multiple
2607** readers and writers
2608** [http://www.nabble.com/SQLite-on-NFS-cache-coherency-td15655701.html].
2609**
2610** To address the performance and cache coherency issues, proxy file locking
2611** changes the way database access is controlled by limiting access to a
2612** single host at a time and moving file locks off of the database file
2613** and onto a proxy file on the local file system.
2614**
2615**
2616** Using proxy locks
2617** -----------------
2618**
2619** C APIs
2620**
2621** sqlite3_file_control(db, dbname, SQLITE_SET_LOCKPROXYFILE,
2622** <proxy_path> | ":auto:");
2623** sqlite3_file_control(db, dbname, SQLITE_GET_LOCKPROXYFILE, &<proxy_path>);
2624**
2625**
2626** SQL pragmas
2627**
2628** PRAGMA [database.]lock_proxy_file=<proxy_path> | :auto:
2629** PRAGMA [database.]lock_proxy_file
2630**
2631** Specifying ":auto:" means that if there is a conch file with a matching
2632** host ID in it, the proxy path in the conch file will be used, otherwise
2633** a proxy path based on the user's temp dir
2634** (via confstr(_CS_DARWIN_USER_TEMP_DIR,...)) will be used and the
2635** actual proxy file name is generated from the name and path of the
2636** database file. For example:
2637**
2638** For database path "/Users/me/foo.db"
2639** The lock path will be "<tmpdir>/sqliteplocks/_Users_me_foo.db:auto:")
2640**
2641** Once a lock proxy is configured for a database connection, it can not
2642** be removed, however it may be switched to a different proxy path via
2643** the above APIs (assuming the conch file is not being held by another
2644** connection or process).
2645**
2646**
2647** How proxy locking works
2648** -----------------------
2649**
2650** Proxy file locking relies primarily on two new supporting files:
2651**
2652** * conch file to limit access to the database file to a single host
2653** at a time
2654**
2655** * proxy file to act as a proxy for the advisory locks normally
2656** taken on the database
2657**
2658** The conch file - to use a proxy file, sqlite must first "hold the conch"
2659** by taking an sqlite-style shared lock on the conch file, reading the
2660** contents and comparing the host's unique host ID (see below) and lock
2661** proxy path against the values stored in the conch. The conch file is
2662** stored in the same directory as the database file and the file name
2663** is patterned after the database file name as ".<databasename>-conch".
2664** If the conch file does not exist, or it's contents do not match the
2665** host ID and/or proxy path, then the lock is escalated to an exclusive
2666** lock and the conch file contents is updated with the host ID and proxy
2667** path and the lock is downgraded to a shared lock again. If the conch
2668** is held by another process (with a shared lock), the exclusive lock
2669** will fail and SQLITE_BUSY is returned.
2670**
2671** The proxy file - a single-byte file used for all advisory file locks
2672** normally taken on the database file. This allows for safe sharing
2673** of the database file for multiple readers and writers on the same
2674** host (the conch ensures that they all use the same local lock file).
2675**
2676** There is a third file - the host ID file - used as a persistent record
2677** of a unique identifier for the host, a 128-byte unique host id file
2678** in the path defined by the HOSTIDPATH macro (default value is
2679** /Library/Caches/.com.apple.sqliteConchHostId).
2680**
2681** Requesting the lock proxy does not immediately take the conch, it is
2682** only taken when the first request to lock database file is made.
2683** This matches the semantics of the traditional locking behavior, where
2684** opening a connection to a database file does not take a lock on it.
2685** The shared lock and an open file descriptor are maintained until
2686** the connection to the database is closed.
2687**
2688** The proxy file and the lock file are never deleted so they only need
2689** to be created the first time they are used.
2690**
2691** Configuration options
2692** ---------------------
2693**
2694** SQLITE_PREFER_PROXY_LOCKING
2695**
2696** Database files accessed on non-local file systems are
2697** automatically configured for proxy locking, lock files are
2698** named automatically using the same logic as
2699** PRAGMA lock_proxy_file=":auto:"
2700**
2701** SQLITE_PROXY_DEBUG
2702**
2703** Enables the logging of error messages during host id file
2704** retrieval and creation
2705**
2706** HOSTIDPATH
2707**
2708** Overrides the default host ID file path location
2709**
2710** LOCKPROXYDIR
2711**
2712** Overrides the default directory used for lock proxy files that
2713** are named automatically via the ":auto:" setting
2714**
2715** SQLITE_DEFAULT_PROXYDIR_PERMISSIONS
2716**
2717** Permissions to use when creating a directory for storing the
2718** lock proxy files, only used when LOCKPROXYDIR is not set.
2719**
2720**
2721** As mentioned above, when compiled with SQLITE_PREFER_PROXY_LOCKING,
2722** setting the environment variable SQLITE_FORCE_PROXY_LOCKING to 1 will
2723** force proxy locking to be used for every database file opened, and 0
2724** will force automatic proxy locking to be disabled for all database
2725** files (explicity calling the SQLITE_SET_LOCKPROXYFILE pragma or
2726** sqlite_file_control API is not affected by SQLITE_FORCE_PROXY_LOCKING).
2727*/
drhbfe66312006-10-03 17:40:40 +00002728
2729/*
drh734c9862008-11-28 15:37:20 +00002730** Proxy locking is only available on MacOSX
drh339eb0b2008-03-07 15:34:11 +00002731*/
drh734c9862008-11-28 15:37:20 +00002732#if defined(__DARWIN__) && SQLITE_ENABLE_LOCKING_STYLE
drhbfe66312006-10-03 17:40:40 +00002733
drh6b9d6dd2008-12-03 19:34:47 +00002734/*
2735** Forward reference
2736*/
2737static int fillInUnixFile(
2738 sqlite3_vfs *pVfs,
2739 int h,
2740 int dirfd,
2741 sqlite3_file *pId,
2742 const char *zFilename,
2743 int noLock,
2744 int isDelete
2745);
drhbfe66312006-10-03 17:40:40 +00002746
drh339eb0b2008-03-07 15:34:11 +00002747
drh734c9862008-11-28 15:37:20 +00002748#ifdef SQLITE_TEST
2749/* simulate multiple hosts by creating unique hostid file paths */
2750int sqlite3_hostid_num = 0;
chw97185482008-11-17 08:05:31 +00002751#endif
drhbfe66312006-10-03 17:40:40 +00002752
2753/*
drh734c9862008-11-28 15:37:20 +00002754** The proxyLockingContext has the path and file structures for the remote
2755** and local proxy files in it
2756*/
2757typedef struct proxyLockingContext proxyLockingContext;
2758struct proxyLockingContext {
drh6b9d6dd2008-12-03 19:34:47 +00002759 unixFile *conchFile; /* Open conch file */
2760 char *conchFilePath; /* Name of the conch file */
2761 unixFile *lockProxy; /* Open proxy lock file */
2762 char *lockProxyPath; /* Name of the proxy lock file */
2763 char *dbPath; /* Name of the open file */
2764 int conchHeld; /* True if the conch is currently held */
2765 void *oldLockingContext; /* Original lockingcontext to restore on close */
2766 sqlite3_io_methods const *pOldMethod; /* Original I/O methods for close */
drh734c9862008-11-28 15:37:20 +00002767};
drhbfe66312006-10-03 17:40:40 +00002768
aswiftaebf4132008-11-21 00:10:35 +00002769/* HOSTIDLEN and CONCHLEN both include space for the string
2770** terminating nul
2771*/
2772#define HOSTIDLEN 128
2773#define CONCHLEN (MAXPATHLEN+HOSTIDLEN+1)
2774#ifndef HOSTIDPATH
2775# define HOSTIDPATH "/Library/Caches/.com.apple.sqliteConchHostId"
2776#endif
2777
2778/* basically a copy of unixRandomness with different
2779** test behavior built in */
drh6b9d6dd2008-12-03 19:34:47 +00002780static int proxyGenerateHostID(char *pHostID){
aswiftaebf4132008-11-21 00:10:35 +00002781 int pid, fd, i, len;
2782 unsigned char *key = (unsigned char *)pHostID;
2783
2784 memset(key, 0, HOSTIDLEN);
2785 len = 0;
2786 fd = open("/dev/urandom", O_RDONLY);
2787 if( fd>=0 ){
2788 len = read(fd, key, HOSTIDLEN);
2789 close(fd); /* silently leak the fd if it fails */
2790 }
2791 if( len < HOSTIDLEN ){
2792 time_t t;
2793 time(&t);
2794 memcpy(key, &t, sizeof(t));
2795 pid = getpid();
2796 memcpy(&key[sizeof(t)], &pid, sizeof(pid));
2797 }
2798
2799#ifdef MAKE_PRETTY_HOSTID
2800 /* filter the bytes into printable ascii characters and NUL terminate */
2801 key[(HOSTIDLEN-1)] = 0x00;
2802 for( i=0; i<(HOSTIDLEN-1); i++ ){
2803 unsigned char pa = key[i]&0x7F;
2804 if( pa<0x20 ){
2805 key[i] = (key[i]&0x80 == 0x80) ? pa+0x40 : pa+0x20;
2806 }else if( pa==0x7F ){
2807 key[i] = (key[i]&0x80 == 0x80) ? pa=0x20 : pa+0x7E;
2808 }
2809 }
2810#endif
2811 return SQLITE_OK;
2812}
2813
aswiftaebf4132008-11-21 00:10:35 +00002814/* writes the host id path to path, path should be an pre-allocated buffer
drh6b9d6dd2008-12-03 19:34:47 +00002815** with enough space for a path
2816*/
2817static int proxyGetHostIDPath(char *path, size_t len){
aswiftaebf4132008-11-21 00:10:35 +00002818 strlcpy(path, HOSTIDPATH, len);
2819#ifdef SQLITE_TEST
2820 if( sqlite3_hostid_num>0 ){
2821 char suffix[2] = "1";
2822 suffix[0] = suffix[0] + sqlite3_hostid_num;
2823 strlcat(path, suffix, len);
2824 }
2825#endif
2826 OSTRACE3("GETHOSTIDPATH %s pid=%d\n", path, getpid());
2827}
2828
2829/* get the host ID from a sqlite hostid file stored in the
2830** user-specific tmp directory, create the ID if it's not there already
2831*/
drh6b9d6dd2008-12-03 19:34:47 +00002832static int proxyGetHostID(char *pHostID, int *pError){
aswiftaebf4132008-11-21 00:10:35 +00002833 int fd;
2834 char path[MAXPATHLEN];
2835 size_t len;
drh6b9d6dd2008-12-03 19:34:47 +00002836 int rc=SQLITE_OK;
aswiftaebf4132008-11-21 00:10:35 +00002837
drh6b9d6dd2008-12-03 19:34:47 +00002838 proxyGetHostIDPath(path, MAXPATHLEN);
aswiftaebf4132008-11-21 00:10:35 +00002839 /* try to create the host ID file, if it already exists read the contents */
2840 fd = open(path, O_CREAT|O_WRONLY|O_EXCL, 0644);
2841 if( fd<0 ){
2842 int err=errno;
2843
2844 if( err!=EEXIST ){
2845#ifdef SQLITE_PROXY_DEBUG /* set the sqlite error message instead */
drh734c9862008-11-28 15:37:20 +00002846 fprintf(stderr, "sqlite error creating host ID file %s: %s\n",
2847 path, strerror(err));
aswiftaebf4132008-11-21 00:10:35 +00002848#endif
2849 return SQLITE_PERM;
2850 }
2851 /* couldn't create the file, read it instead */
2852 fd = open(path, O_RDONLY|O_EXCL);
2853 if( fd<0 ){
2854 int err = errno;
2855#ifdef SQLITE_PROXY_DEBUG /* set the sqlite error message instead */
drh734c9862008-11-28 15:37:20 +00002856 fprintf(stderr, "sqlite error opening host ID file %s: %s\n",
2857 path, strerror(err));
aswiftaebf4132008-11-21 00:10:35 +00002858#endif
2859 return SQLITE_PERM;
2860 }
2861 len = pread(fd, pHostID, HOSTIDLEN, 0);
drh734c9862008-11-28 15:37:20 +00002862 if( len<0 ){
2863 *pError = errno;
2864 rc = SQLITE_IOERR_READ;
2865 }else if( len<HOSTIDLEN ){
2866 *pError = 0;
2867 rc = SQLITE_IOERR_SHORT_READ;
2868 }
aswiftaebf4132008-11-21 00:10:35 +00002869 close(fd); /* silently leak the fd if it fails */
2870 OSTRACE3("GETHOSTID read %s pid=%d\n", pHostID, getpid());
2871 return rc;
2872 }else{
2873 int i;
2874 /* we're creating the host ID file (use a random string of bytes) */
drh6b9d6dd2008-12-03 19:34:47 +00002875 proxyGenerateHostID(pHostID);
aswiftaebf4132008-11-21 00:10:35 +00002876 len = pwrite(fd, pHostID, HOSTIDLEN, 0);
drh734c9862008-11-28 15:37:20 +00002877 if( len<0 ){
2878 *pError = errno;
2879 rc = SQLITE_IOERR_WRITE;
2880 }else if( len<HOSTIDLEN ){
2881 *pError = 0;
2882 rc = SQLITE_IOERR_WRITE;
2883 }
aswiftaebf4132008-11-21 00:10:35 +00002884 close(fd); /* silently leak the fd if it fails */
2885 OSTRACE3("GETHOSTID wrote %s pid=%d\n", pHostID, getpid());
2886 return rc;
2887 }
2888}
2889
drh6b9d6dd2008-12-03 19:34:47 +00002890static int proxyGetLockPath(const char *dbPath, char *lPath, size_t maxLen){
2891 int len;
2892 int dbLen;
2893 int i;
2894
2895#ifdef LOCKPROXYDIR
2896 len = strlcpy(lPath, LOCKPROXYDIR, maxLen);
2897#else
2898# ifdef _CS_DARWIN_USER_TEMP_DIR
2899 {
2900 char utdir[MAXPATHLEN];
2901
2902 confstr(_CS_DARWIN_USER_TEMP_DIR, lPath, maxLen);
2903 len = strlcat(lPath, "sqliteplocks", maxLen);
2904 if( mkdir(lPath, SQLITE_DEFAULT_PROXYDIR_PERMISSIONS) ){
2905 /* if mkdir fails, handle as lock file creation failure */
2906 int err = errno;
2907# ifdef SQLITE_DEBUG
2908 if( err!=EEXIST ){
2909 fprintf(stderr, "proxyGetLockPath: mkdir(%s,0%o) error %d %s\n", lPath,
2910 SQLITE_DEFAULT_PROXYDIR_PERMISSIONS, err, strerror(err));
2911 }
2912# endif
2913 }else{
2914 OSTRACE3("GETLOCKPATH mkdir %s pid=%d\n", lPath, getpid());
2915 }
2916
2917 }
2918# else
2919 len = strlcpy(lPath, "/tmp/", maxLen);
2920# endif
2921#endif
2922
2923 if( lPath[len-1]!='/' ){
2924 len = strlcat(lPath, "/", maxLen);
2925 }
2926
2927 /* transform the db path to a unique cache name */
2928 dbLen = strlen(dbPath);
2929 for( i=0; i<dbLen && (i+len+7)<maxLen; i++){
2930 char c = dbPath[i];
2931 lPath[i+len] = (c=='/')?'_':c;
2932 }
2933 lPath[i+len]='\0';
2934 strlcat(lPath, ":auto:", maxLen);
2935 return SQLITE_OK;
2936}
2937
2938/*
2939** Create a new VFS file descriptor (stored in memory obtained from
2940** sqlite3_malloc) and open the file named "path" in the file descriptor.
2941**
2942** The caller is responsible not only for closing the file descriptor
2943** but also for freeing the memory associated with the file descriptor.
2944*/
2945static int proxyCreateUnixFile(const char *path, unixFile **ppFile) {
2946 int fd;
2947 int dirfd = -1;
2948 unixFile *pNew;
2949 int rc = SQLITE_OK;
2950
2951 fd = open(path, O_RDWR | O_CREAT, SQLITE_DEFAULT_FILE_PERMISSIONS);
2952 if( fd<0 ){
2953 return SQLITE_CANTOPEN;
2954 }
2955
2956 pNew = (unixFile *)sqlite3_malloc(sizeof(unixFile));
2957 if( pNew==NULL ){
2958 rc = SQLITE_NOMEM;
2959 goto end_create_proxy;
2960 }
2961 memset(pNew, 0, sizeof(unixFile));
2962
2963 rc = fillInUnixFile(NULL, fd, dirfd, (sqlite3_file*)pNew, path, 0, 0);
2964 if( rc==SQLITE_OK ){
2965 *ppFile = pNew;
2966 return SQLITE_OK;
2967 }
2968end_create_proxy:
2969 close(fd); /* silently leak fd if error, we're already in error */
2970 sqlite3_free(pNew);
2971 return rc;
2972}
2973
aswiftaebf4132008-11-21 00:10:35 +00002974/* takes the conch by taking a shared lock and read the contents conch, if
2975** lockPath is non-NULL, the host ID and lock file path must match. A NULL
2976** lockPath means that the lockPath in the conch file will be used if the
2977** host IDs match, or a new lock path will be generated automatically
2978** and written to the conch file.
2979*/
drh6b9d6dd2008-12-03 19:34:47 +00002980static int proxyTakeConch(unixFile *pFile){
aswiftaebf4132008-11-21 00:10:35 +00002981 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
2982
2983 if( pCtx->conchHeld>0 ){
2984 return SQLITE_OK;
2985 }else{
2986 unixFile *conchFile = pCtx->conchFile;
2987 char testValue[CONCHLEN];
2988 char conchValue[CONCHLEN];
2989 char lockPath[MAXPATHLEN];
2990 char *tLockPath = NULL;
2991 int rc = SQLITE_OK;
2992 int readRc = SQLITE_OK;
2993 int syncPerms = 0;
2994
2995 OSTRACE4("TAKECONCH %d for %s pid=%d\n", conchFile->h,
2996 (pCtx->lockProxyPath ? pCtx->lockProxyPath : ":auto:"), getpid());
2997
2998 rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, SHARED_LOCK);
2999 if( rc==SQLITE_OK ){
drh6b9d6dd2008-12-03 19:34:47 +00003000 int pError = 0;
aswiftaebf4132008-11-21 00:10:35 +00003001 memset(testValue, 0, CONCHLEN); // conch is fixed size
drh6b9d6dd2008-12-03 19:34:47 +00003002 rc = proxyGetHostID(testValue, &pError);
3003 if( rc&SQLITE_IOERR==SQLITE_IOERR ){
3004 pFile->lastErrno = pError;
3005 }
aswiftaebf4132008-11-21 00:10:35 +00003006 if( pCtx->lockProxyPath ){
3007 strlcpy(&testValue[HOSTIDLEN], pCtx->lockProxyPath, MAXPATHLEN);
3008 }
3009 }
3010 if( rc!=SQLITE_OK ){
3011 goto end_takeconch;
3012 }
3013
3014 readRc = unixRead((sqlite3_file *)conchFile, conchValue, CONCHLEN, 0);
3015 if( readRc!=SQLITE_IOERR_SHORT_READ ){
3016 int match = 0;
3017 if( readRc!=SQLITE_OK ){
drh734c9862008-11-28 15:37:20 +00003018 if( rc&SQLITE_IOERR==SQLITE_IOERR ){
3019 pFile->lastErrno = conchFile->lastErrno;
3020 }
aswiftaebf4132008-11-21 00:10:35 +00003021 rc = readRc;
3022 goto end_takeconch;
3023 }
3024 /* if the conch has data compare the contents */
3025 if( !pCtx->lockProxyPath ){
3026 /* for auto-named local lock file, just check the host ID and we'll
3027 ** use the local lock file path that's already in there */
3028 if( !memcmp(testValue, conchValue, HOSTIDLEN) ){
3029 tLockPath = (char *)&conchValue[HOSTIDLEN];
3030 goto end_takeconch;
3031 }
3032 }else{
3033 /* we've got the conch if conchValue matches our path and host ID */
3034 if( !memcmp(testValue, conchValue, CONCHLEN) ){
3035 goto end_takeconch;
3036 }
3037 }
3038 }else{
3039 /* a short read means we're "creating" the conch (even though it could
3040 ** have been user-intervention), if we acquire the exclusive lock,
3041 ** we'll try to match the current on-disk permissions of the database
3042 */
3043 syncPerms = 1;
3044 }
3045
3046 /* either conch was emtpy or didn't match */
3047 if( !pCtx->lockProxyPath ){
drh6b9d6dd2008-12-03 19:34:47 +00003048 proxyGetLockPath(pCtx->dbPath, lockPath, MAXPATHLEN);
aswiftaebf4132008-11-21 00:10:35 +00003049 tLockPath = lockPath;
3050 strlcpy(&testValue[HOSTIDLEN], lockPath, MAXPATHLEN);
3051 }
3052
3053 /* update conch with host and path (this will fail if other process
3054 ** has a shared lock already) */
3055 rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, EXCLUSIVE_LOCK);
3056 if( rc==SQLITE_OK ){
3057 rc = unixWrite((sqlite3_file *)conchFile, testValue, CONCHLEN, 0);
3058 if( rc==SQLITE_OK && syncPerms ){
3059 struct stat buf;
3060 int err = fstat(pFile->h, &buf);
3061 if( err==0 ){
3062 mode_t mode = buf.st_mode & 0100666;
3063 /* try to match the database file permissions, ignore failure */
3064#ifndef SQLITE_PROXY_DEBUG
3065 fchmod(conchFile->h, buf.st_mode);
3066#else
3067 if( fchmod(conchFile->h, buf.st_mode)!=0 ){
3068 int code = errno;
drh734c9862008-11-28 15:37:20 +00003069 fprintf(stderr, "fchmod %o FAILED with %d %s\n",
3070 buf.st_mode, code, strerror(code));
aswiftaebf4132008-11-21 00:10:35 +00003071 } else {
3072 fprintf(stderr, "fchmod %o SUCCEDED\n",buf.st_mode);
3073 }
3074 }else{
3075 int code = errno;
drh734c9862008-11-28 15:37:20 +00003076 fprintf(stderr, "STAT FAILED[%d] with %d %s\n",
3077 err, code, strerror(code));
aswiftaebf4132008-11-21 00:10:35 +00003078#endif
3079 }
3080 }
3081 }
3082 conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, SHARED_LOCK);
3083
3084end_takeconch:
3085 OSTRACE2("TRANSPROXY: CLOSE %d\n", pFile->h);
drh734c9862008-11-28 15:37:20 +00003086 if( rc==SQLITE_OK && pFile->openFlags ){
aswiftaebf4132008-11-21 00:10:35 +00003087 if( pFile->h>=0 ){
3088#ifdef STRICT_CLOSE_ERROR
3089 if( close(pFile->h) ){
3090 pFile->lastErrno = errno;
3091 return SQLITE_IOERR_CLOSE;
3092 }
3093#else
3094 close(pFile->h); /* silently leak fd if fail */
3095#endif
3096 }
3097 pFile->h = -1;
drh734c9862008-11-28 15:37:20 +00003098 int fd = open(pCtx->dbPath, pFile->openFlags,
3099 SQLITE_DEFAULT_FILE_PERMISSIONS);
aswiftaebf4132008-11-21 00:10:35 +00003100 OSTRACE2("TRANSPROXY: OPEN %d\n", fd);
3101 if( fd>=0 ){
3102 pFile->h = fd;
3103 }else{
drh6b9d6dd2008-12-03 19:34:47 +00003104 rc=SQLITE_CANTOPEN; // SQLITE_BUSY? proxyTakeConch called during locking
aswiftaebf4132008-11-21 00:10:35 +00003105 }
3106 }
3107 if( rc==SQLITE_OK && !pCtx->lockProxy ){
3108 char *path = tLockPath ? tLockPath : pCtx->lockProxyPath;
3109 // ACS: Need to make a copy of path sometimes
drh6b9d6dd2008-12-03 19:34:47 +00003110 rc = proxyCreateUnixFile(path, &pCtx->lockProxy);
aswiftaebf4132008-11-21 00:10:35 +00003111 }
3112 if( rc==SQLITE_OK ){
3113 pCtx->conchHeld = 1;
3114
3115 if( tLockPath ){
3116 pCtx->lockProxyPath = sqlite3DbStrDup(0, tLockPath);
drh7708e972008-11-29 00:56:52 +00003117 if( pCtx->lockProxy->pMethod == &afpIoMethods ){
drh734c9862008-11-28 15:37:20 +00003118 ((afpLockingContext *)pCtx->lockProxy->lockingContext)->dbPath =
3119 pCtx->lockProxyPath;
aswiftaebf4132008-11-21 00:10:35 +00003120 }
3121 }
3122 } else {
3123 conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, NO_LOCK);
3124 }
drh734c9862008-11-28 15:37:20 +00003125 OSTRACE3("TAKECONCH %d %s\n", conchFile->h, rc==SQLITE_OK?"ok":"failed");
aswiftaebf4132008-11-21 00:10:35 +00003126 return rc;
3127 }
3128}
aswiftaebf4132008-11-21 00:10:35 +00003129
drh6b9d6dd2008-12-03 19:34:47 +00003130/*
3131** If pFile holds a lock on a conch file, then release that lock.
3132*/
3133static int proxyReleaseConch(unixFile *pFile){
3134 int rc; /* Subroutine return code */
3135 proxyLockingContext *pCtx; /* The locking context for the proxy lock */
3136 unixFile *conchFile; /* Name of the conch file */
3137
3138 pCtx = (proxyLockingContext *)pFile->lockingContext;
3139 conchFile = pCtx->conchFile;
aswiftaebf4132008-11-21 00:10:35 +00003140 OSTRACE4("RELEASECONCH %d for %s pid=%d\n", conchFile->h,
3141 (pCtx->lockProxyPath ? pCtx->lockProxyPath : ":auto:"),
3142 getpid());
3143 pCtx->conchHeld = 0;
3144 rc = conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, NO_LOCK);
3145 OSTRACE3("RELEASECONCH %d %s\n", conchFile->h,
3146 (rc==SQLITE_OK ? "ok" : "failed"));
3147 return rc;
3148}
3149
drh6b9d6dd2008-12-03 19:34:47 +00003150/*
3151** Given the name of a database file, compute the name of its conch file.
3152** Store the conch filename in memory obtained from sqlite3_malloc().
3153** Make *pConchPath point to the new name. Return SQLITE_OK on success
3154** or SQLITE_NOMEM if unable to obtain memory.
3155**
3156** The caller is responsible for ensuring that the allocated memory
3157** space is eventually freed.
3158**
3159** *pConchPath is set to NULL if a memory allocation error occurs.
3160*/
3161static int proxyCreateConchPathname(char *dbPath, char **pConchPath){
3162 int i; /* Loop counter */
3163 int len = strlen(dbPath); /* Length of database filename - dbPath */
3164 char *conchPath; /* buffer in which to construct conch name */
3165
3166 /* Allocate space for the conch filename and initialize the name to
3167 ** the name of the original database file. */
3168 *pConchPath = conchPath = (char *)sqlite3_malloc(len + 8);
aswiftaebf4132008-11-21 00:10:35 +00003169 if( conchPath==0 ){
3170 return SQLITE_NOMEM;
3171 }
drh6b9d6dd2008-12-03 19:34:47 +00003172 memcpy(conchPath, dbPath, len+1);
aswiftaebf4132008-11-21 00:10:35 +00003173
3174 /* now insert a "." before the last / character */
3175 for( i=(len-1); i>=0; i-- ){
3176 if( conchPath[i]=='/' ){
3177 i++;
3178 break;
3179 }
3180 }
3181 conchPath[i]='.';
3182 while ( i<len ){
3183 conchPath[i+1]=dbPath[i];
3184 i++;
3185 }
drh6b9d6dd2008-12-03 19:34:47 +00003186
3187 /* append the "-conch" suffix to the file */
3188 memcpy(&conchPath[i+1], "-conch", 7);
3189 assert( strlen(conchPath) == len+7 );
3190
aswiftaebf4132008-11-21 00:10:35 +00003191 return SQLITE_OK;
3192}
3193
drh734c9862008-11-28 15:37:20 +00003194
aswiftaebf4132008-11-21 00:10:35 +00003195/* Takes a fully configured proxy locking-style unix file and switches
3196** the local lock file path
3197*/
3198static int switchLockProxyPath(unixFile *pFile, const char *path) {
3199 proxyLockingContext *pCtx = (proxyLockingContext*)pFile->lockingContext;
3200 char *oldPath = pCtx->lockProxyPath;
3201 int taken = 0;
3202 int rc = SQLITE_OK;
3203
3204 if( pFile->locktype!=NO_LOCK ){
3205 return SQLITE_BUSY;
3206 }
3207
3208 /* nothing to do if the path is NULL, :auto: or matches the existing path */
3209 if( !path || path[0]=='\0' || !strcmp(path, ":auto:") ||
3210 (oldPath && !strncmp(oldPath, path, MAXPATHLEN)) ){
3211 return SQLITE_OK;
3212 }else{
3213 unixFile *lockProxy = pCtx->lockProxy;
3214 pCtx->lockProxy=NULL;
3215 pCtx->conchHeld = 0;
3216 if( lockProxy!=NULL ){
3217 rc=lockProxy->pMethod->xClose((sqlite3_file *)lockProxy);
3218 if( rc ) return rc;
3219 sqlite3_free(lockProxy);
3220 }
3221 sqlite3_free(oldPath);
3222 pCtx->lockProxyPath = sqlite3DbStrDup(0, path);
3223 }
3224
3225 return rc;
3226}
3227
3228/*
drh6b9d6dd2008-12-03 19:34:47 +00003229** pFile is a file that has been opened by a prior xOpen call. dbPath
3230** is a string buffer at least MAXPATHLEN+1 characters in size.
3231**
3232** This routine find the filename associated with pFile and writes it
3233** int dbPath.
3234*/
3235static int proxyGetDbPathForUnixFile(unixFile *pFile, char *dbPath){
3236#if defined(__DARWIN__)
3237 if( pFile->pMethod == &afpIoMethods ){
3238 /* afp style keeps a reference to the db path in the filePath field
3239 ** of the struct */
3240 assert( strlen((char*)pFile->lockingContext)<=MAXPATHLEN );
3241 strcpy(dbPath, ((afpLockingContext *)pFile->lockingContext)->dbPath)
3242 }else
3243#endif
3244 if( pFile->pMethod == &dotlockIoMethods ){
3245 /* dot lock style uses the locking context to store the dot lock
3246 ** file path */
3247 int len = strlen((char *)pFile->lockingContext) - strlen(DOTLOCK_SUFFIX);
3248 memcpy(dbPath, (char *)pFile->lockingContext, len + 1);
3249 }else{
3250 /* all other styles use the locking context to store the db file path */
3251 assert( strlen((char*)pFile->lockingContext)<=MAXPATHLEN );
3252 strcpy(dbPath, (char *)pFile->lockingContext);
3253 }
3254 return SQLITE_OK;
3255}
3256
3257/*
aswiftaebf4132008-11-21 00:10:35 +00003258** Takes an already filled in unix file and alters it so all file locking
3259** will be performed on the local proxy lock file. The following fields
3260** are preserved in the locking context so that they can be restored and
3261** the unix structure properly cleaned up at close time:
3262** ->lockingContext
3263** ->pMethod
3264*/
3265static int transformUnixFileForLockProxy(unixFile *pFile, const char *path) {
3266 proxyLockingContext *pCtx;
drh6b9d6dd2008-12-03 19:34:47 +00003267 char dbPath[MAXPATHLEN+1]; /* Name of the database file */
aswiftaebf4132008-11-21 00:10:35 +00003268 char *lockPath=NULL;
3269 int rc = SQLITE_OK;
3270
3271 if( pFile->locktype!=NO_LOCK ){
3272 return SQLITE_BUSY;
3273 }
drh6b9d6dd2008-12-03 19:34:47 +00003274 proxyGetDbPathForUnixFile(pFile, dbPath);
aswiftaebf4132008-11-21 00:10:35 +00003275 if( !path || path[0]=='\0' || !strcmp(path, ":auto:") ){
3276 lockPath=NULL;
3277 }else{
3278 lockPath=(char *)path;
3279 }
3280
3281 OSTRACE4("TRANSPROXY %d for %s pid=%d\n", pFile->h,
3282 (lockPath ? lockPath : ":auto:"), getpid());
3283
3284 pCtx = sqlite3_malloc( sizeof(*pCtx) );
3285 if( pCtx==0 ){
3286 return SQLITE_NOMEM;
3287 }
3288 memset(pCtx, 0, sizeof(*pCtx));
3289
drh6b9d6dd2008-12-03 19:34:47 +00003290 rc = proxyCreateConchPathname(dbPath, &pCtx->conchFilePath);
aswiftaebf4132008-11-21 00:10:35 +00003291 if( rc==SQLITE_OK ){
drh6b9d6dd2008-12-03 19:34:47 +00003292 rc = proxyCreateUnixFile(pCtx->conchFilePath, &pCtx->conchFile);
aswiftaebf4132008-11-21 00:10:35 +00003293 }
3294 if( rc==SQLITE_OK && lockPath ){
3295 pCtx->lockProxyPath = sqlite3DbStrDup(0, lockPath);
3296 }
3297
3298end_transform_file:
3299 if( rc==SQLITE_OK ){
3300 /* all memory is allocated, proxys are created and assigned,
3301 ** switch the locking context and pMethod then return.
3302 */
3303 pCtx->dbPath = sqlite3DbStrDup(0, dbPath);
3304 pCtx->oldLockingContext = pFile->lockingContext;
3305 pFile->lockingContext = pCtx;
3306 pCtx->pOldMethod = pFile->pMethod;
drh7708e972008-11-29 00:56:52 +00003307 pFile->pMethod = &proxyIoMethods;
aswiftaebf4132008-11-21 00:10:35 +00003308 }else{
3309 if( pCtx->conchFile ){
3310 rc = pCtx->conchFile->pMethod->xClose((sqlite3_file *)pCtx->conchFile);
3311 if( rc ) return rc;
3312 sqlite3_free(pCtx->conchFile);
3313 }
3314 sqlite3_free(pCtx->conchFilePath);
3315 sqlite3_free(pCtx);
3316 }
3317 OSTRACE3("TRANSPROXY %d %s\n", pFile->h,
3318 (rc==SQLITE_OK ? "ok" : "failed"));
3319 return rc;
drh6b9d6dd2008-12-03 19:34:47 +00003320}
aswiftaebf4132008-11-21 00:10:35 +00003321
drh6b9d6dd2008-12-03 19:34:47 +00003322/*
3323** Within this division (the proxying locking implementation) the procedures
3324** above this point are all utilities. The lock-related methods of the
3325** proxy-locking sqlite3_io_method object follow.
3326*/
aswiftaebf4132008-11-21 00:10:35 +00003327
drh6b9d6dd2008-12-03 19:34:47 +00003328
3329/*
3330** This routine checks if there is a RESERVED lock held on the specified
3331** file by this or any other process. If such a lock is held, set *pResOut
3332** to a non-zero value otherwise *pResOut is set to zero. The return value
3333** is set to SQLITE_OK unless an I/O error occurs during lock checking.
3334*/
3335static int proxyCheckReservedLock(sqlite3_file *id, int *pResOut) {
3336 unixFile *pFile = (unixFile*)id;
3337 int rc = proxyTakeConch(pFile);
aswiftaebf4132008-11-21 00:10:35 +00003338 if( rc==SQLITE_OK ){
drh6b9d6dd2008-12-03 19:34:47 +00003339 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
3340 unixFile *proxy = pCtx->lockProxy;
3341 return proxy->pMethod->xCheckReservedLock((sqlite3_file*)proxy, pResOut);
aswiftaebf4132008-11-21 00:10:35 +00003342 }
aswiftaebf4132008-11-21 00:10:35 +00003343 return rc;
3344}
3345
drh6b9d6dd2008-12-03 19:34:47 +00003346/*
3347** Lock the file with the lock specified by parameter locktype - one
3348** of the following:
3349**
3350** (1) SHARED_LOCK
3351** (2) RESERVED_LOCK
3352** (3) PENDING_LOCK
3353** (4) EXCLUSIVE_LOCK
3354**
3355** Sometimes when requesting one lock state, additional lock states
3356** are inserted in between. The locking might fail on one of the later
3357** transitions leaving the lock state different from what it started but
3358** still short of its goal. The following chart shows the allowed
3359** transitions and the inserted intermediate states:
3360**
3361** UNLOCKED -> SHARED
3362** SHARED -> RESERVED
3363** SHARED -> (PENDING) -> EXCLUSIVE
3364** RESERVED -> (PENDING) -> EXCLUSIVE
3365** PENDING -> EXCLUSIVE
3366**
3367** This routine will only increase a lock. Use the sqlite3OsUnlock()
3368** routine to lower a locking level.
3369*/
3370static int proxyLock(sqlite3_file *id, int locktype) {
3371 unixFile *pFile = (unixFile*)id;
3372 int rc = proxyTakeConch(pFile);
3373 if( rc==SQLITE_OK ){
3374 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
3375 unixFile *proxy = pCtx->lockProxy;
3376 rc = proxy->pMethod->xLock((sqlite3_file*)proxy, locktype);
3377 pFile->locktype = proxy->locktype;
3378 }
3379 return rc;
3380}
3381
3382
3383/*
3384** Lower the locking level on file descriptor pFile to locktype. locktype
3385** must be either NO_LOCK or SHARED_LOCK.
3386**
3387** If the locking level of the file descriptor is already at or below
3388** the requested locking level, this routine is a no-op.
3389*/
3390static int proxyUnlock(sqlite3_file *id, int locktype) {
3391 unixFile *pFile = (unixFile*)id;
3392 int rc = proxyTakeConch(pFile);
3393 if( rc==SQLITE_OK ){
3394 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
3395 unixFile *proxy = pCtx->lockProxy;
3396 rc = proxy->pMethod->xUnlock((sqlite3_file*)proxy, locktype);
3397 pFile->locktype = proxy->locktype;
3398 }
3399 return rc;
3400}
3401
3402/*
3403** Close a file that uses proxy locks.
3404*/
3405static int proxyClose(sqlite3_file *id) {
3406 if( id ){
3407 unixFile *pFile = (unixFile*)id;
3408 proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
3409 unixFile *lockProxy = pCtx->lockProxy;
3410 unixFile *conchFile = pCtx->conchFile;
3411 int rc = SQLITE_OK;
3412
3413 if( lockProxy ){
3414 rc = lockProxy->pMethod->xUnlock((sqlite3_file*)lockProxy, NO_LOCK);
3415 if( rc ) return rc;
3416 rc = lockProxy->pMethod->xClose((sqlite3_file*)lockProxy);
3417 if( rc ) return rc;
3418 sqlite3_free(lockProxy);
3419 pCtx->lockProxy = 0;
3420 }
3421 if( conchFile ){
3422 if( pCtx->conchHeld ){
3423 rc = proxyReleaseConch(pFile);
3424 if( rc ) return rc;
3425 }
3426 rc = conchFile->pMethod->xClose((sqlite3_file*)conchFile);
3427 if( rc ) return rc;
3428 sqlite3_free(conchFile);
3429 }
3430 sqlite3_free(pCtx->lockProxyPath);
3431 sqlite3_free(pCtx->conchFilePath);
3432 sqlite3_free(pCtx->dbPath);
3433 /* restore the original locking context and pMethod then close it */
3434 pFile->lockingContext = pCtx->oldLockingContext;
3435 pFile->pMethod = pCtx->pOldMethod;
3436 sqlite3_free(pCtx);
3437 return pFile->pMethod->xClose(id);
3438 }
3439 return SQLITE_OK;
3440}
3441
3442
aswiftaebf4132008-11-21 00:10:35 +00003443
drh734c9862008-11-28 15:37:20 +00003444#endif /* defined(__DARWIN__) && SQLITE_ENABLE_LOCKING_STYLE */
3445/*
3446** The proxy locking style is intended for use with AFP filesystems.
3447** And since AFP is only supported on MacOSX, the proxy locking is also
3448** restricted to MacOSX.
3449**
3450**
3451******************* End of the proxy lock implementation **********************
3452******************************************************************************/
3453
3454
3455/******************************************************************************
3456**************** Non-locking sqlite3_file methods *****************************
3457**
3458** The next division contains implementations for all methods of the
3459** sqlite3_file object other than the locking methods. The locking
3460** methods were defined in divisions above (one locking method per
3461** division). Those methods that are common to all locking modes
3462** are gather together into this division.
3463*/
drhbfe66312006-10-03 17:40:40 +00003464
3465/*
drh734c9862008-11-28 15:37:20 +00003466** Seek to the offset passed as the second argument, then read cnt
3467** bytes into pBuf. Return the number of bytes actually read.
3468**
3469** NB: If you define USE_PREAD or USE_PREAD64, then it might also
3470** be necessary to define _XOPEN_SOURCE to be 500. This varies from
3471** one system to another. Since SQLite does not define USE_PREAD
3472** any any form by default, we will not attempt to define _XOPEN_SOURCE.
3473** See tickets #2741 and #2681.
3474**
3475** To avoid stomping the errno value on a failed read the lastErrno value
3476** is set before returning.
drh339eb0b2008-03-07 15:34:11 +00003477*/
drh734c9862008-11-28 15:37:20 +00003478static int seekAndRead(unixFile *id, sqlite3_int64 offset, void *pBuf, int cnt){
3479 int got;
3480 i64 newOffset;
3481 TIMER_START;
3482#if defined(USE_PREAD)
3483 got = pread(id->h, pBuf, cnt, offset);
3484 SimulateIOError( got = -1 );
3485#elif defined(USE_PREAD64)
3486 got = pread64(id->h, pBuf, cnt, offset);
3487 SimulateIOError( got = -1 );
3488#else
3489 newOffset = lseek(id->h, offset, SEEK_SET);
3490 SimulateIOError( newOffset-- );
3491 if( newOffset!=offset ){
3492 if( newOffset == -1 ){
3493 ((unixFile*)id)->lastErrno = errno;
3494 }else{
3495 ((unixFile*)id)->lastErrno = 0;
3496 }
3497 return -1;
3498 }
3499 got = read(id->h, pBuf, cnt);
3500#endif
3501 TIMER_END;
3502 if( got<0 ){
3503 ((unixFile*)id)->lastErrno = errno;
3504 }
3505 OSTRACE5("READ %-3d %5d %7lld %llu\n", id->h, got, offset, TIMER_ELAPSED);
3506 return got;
drhbfe66312006-10-03 17:40:40 +00003507}
3508
3509/*
drh734c9862008-11-28 15:37:20 +00003510** Read data from a file into a buffer. Return SQLITE_OK if all
3511** bytes were read successfully and SQLITE_IOERR if anything goes
3512** wrong.
drh339eb0b2008-03-07 15:34:11 +00003513*/
drh734c9862008-11-28 15:37:20 +00003514static int unixRead(
3515 sqlite3_file *id,
3516 void *pBuf,
3517 int amt,
3518 sqlite3_int64 offset
3519){
3520 int got;
3521 assert( id );
3522 got = seekAndRead((unixFile*)id, offset, pBuf, amt);
3523 if( got==amt ){
3524 return SQLITE_OK;
3525 }else if( got<0 ){
3526 /* lastErrno set by seekAndRead */
3527 return SQLITE_IOERR_READ;
3528 }else{
3529 ((unixFile*)id)->lastErrno = 0; /* not a system error */
3530 /* Unread parts of the buffer must be zero-filled */
3531 memset(&((char*)pBuf)[got], 0, amt-got);
3532 return SQLITE_IOERR_SHORT_READ;
3533 }
3534}
3535
3536/*
3537** Seek to the offset in id->offset then read cnt bytes into pBuf.
3538** Return the number of bytes actually read. Update the offset.
3539**
3540** To avoid stomping the errno value on a failed write the lastErrno value
3541** is set before returning.
3542*/
3543static int seekAndWrite(unixFile *id, i64 offset, const void *pBuf, int cnt){
3544 int got;
3545 i64 newOffset;
3546 TIMER_START;
3547#if defined(USE_PREAD)
3548 got = pwrite(id->h, pBuf, cnt, offset);
3549#elif defined(USE_PREAD64)
3550 got = pwrite64(id->h, pBuf, cnt, offset);
3551#else
3552 newOffset = lseek(id->h, offset, SEEK_SET);
3553 if( newOffset!=offset ){
3554 if( newOffset == -1 ){
3555 ((unixFile*)id)->lastErrno = errno;
3556 }else{
3557 ((unixFile*)id)->lastErrno = 0;
3558 }
3559 return -1;
3560 }
3561 got = write(id->h, pBuf, cnt);
3562#endif
3563 TIMER_END;
3564 if( got<0 ){
3565 ((unixFile*)id)->lastErrno = errno;
3566 }
3567
3568 OSTRACE5("WRITE %-3d %5d %7lld %llu\n", id->h, got, offset, TIMER_ELAPSED);
3569 return got;
3570}
3571
3572
3573/*
3574** Write data from a buffer into a file. Return SQLITE_OK on success
3575** or some other error code on failure.
3576*/
3577static int unixWrite(
3578 sqlite3_file *id,
3579 const void *pBuf,
3580 int amt,
3581 sqlite3_int64 offset
3582){
3583 int wrote = 0;
3584 assert( id );
3585 assert( amt>0 );
3586 while( amt>0 && (wrote = seekAndWrite((unixFile*)id, offset, pBuf, amt))>0 ){
3587 amt -= wrote;
3588 offset += wrote;
3589 pBuf = &((char*)pBuf)[wrote];
3590 }
3591 SimulateIOError(( wrote=(-1), amt=1 ));
3592 SimulateDiskfullError(( wrote=0, amt=1 ));
3593 if( amt>0 ){
3594 if( wrote<0 ){
3595 /* lastErrno set by seekAndWrite */
3596 return SQLITE_IOERR_WRITE;
3597 }else{
3598 ((unixFile*)id)->lastErrno = 0; /* not a system error */
3599 return SQLITE_FULL;
3600 }
3601 }
3602 return SQLITE_OK;
3603}
3604
3605#ifdef SQLITE_TEST
3606/*
3607** Count the number of fullsyncs and normal syncs. This is used to test
drh6b9d6dd2008-12-03 19:34:47 +00003608** that syncs and fullsyncs are occurring at the right times.
drh734c9862008-11-28 15:37:20 +00003609*/
3610int sqlite3_sync_count = 0;
3611int sqlite3_fullsync_count = 0;
3612#endif
3613
3614/*
3615** Use the fdatasync() API only if the HAVE_FDATASYNC macro is defined.
3616** Otherwise use fsync() in its place.
3617*/
3618#ifndef HAVE_FDATASYNC
3619# define fdatasync fsync
3620#endif
3621
3622/*
3623** Define HAVE_FULLFSYNC to 0 or 1 depending on whether or not
3624** the F_FULLFSYNC macro is defined. F_FULLFSYNC is currently
3625** only available on Mac OS X. But that could change.
3626*/
3627#ifdef F_FULLFSYNC
3628# define HAVE_FULLFSYNC 1
3629#else
3630# define HAVE_FULLFSYNC 0
3631#endif
3632
3633
3634/*
3635** The fsync() system call does not work as advertised on many
3636** unix systems. The following procedure is an attempt to make
3637** it work better.
3638**
3639** The SQLITE_NO_SYNC macro disables all fsync()s. This is useful
3640** for testing when we want to run through the test suite quickly.
3641** You are strongly advised *not* to deploy with SQLITE_NO_SYNC
3642** enabled, however, since with SQLITE_NO_SYNC enabled, an OS crash
3643** or power failure will likely corrupt the database file.
3644*/
3645static int full_fsync(int fd, int fullSync, int dataOnly){
chw97185482008-11-17 08:05:31 +00003646 int rc;
drh734c9862008-11-28 15:37:20 +00003647
3648 /* The following "ifdef/elif/else/" block has the same structure as
3649 ** the one below. It is replicated here solely to avoid cluttering
3650 ** up the real code with the UNUSED_PARAMETER() macros.
3651 */
3652#ifdef SQLITE_NO_SYNC
3653 UNUSED_PARAMETER(fd);
3654 UNUSED_PARAMETER(fullSync);
3655 UNUSED_PARAMETER(dataOnly);
3656#elif HAVE_FULLFSYNC
3657 UNUSED_PARAMETER(dataOnly);
3658#else
3659 UNUSED_PARAMETER(fullSync);
3660#endif
3661
3662 /* Record the number of times that we do a normal fsync() and
3663 ** FULLSYNC. This is used during testing to verify that this procedure
3664 ** gets called with the correct arguments.
3665 */
3666#ifdef SQLITE_TEST
3667 if( fullSync ) sqlite3_fullsync_count++;
3668 sqlite3_sync_count++;
3669#endif
3670
3671 /* If we compiled with the SQLITE_NO_SYNC flag, then syncing is a
3672 ** no-op
3673 */
3674#ifdef SQLITE_NO_SYNC
3675 rc = SQLITE_OK;
3676#elif HAVE_FULLFSYNC
3677 if( fullSync ){
3678 rc = fcntl(fd, F_FULLFSYNC, 0);
3679 }else{
3680 rc = 1;
3681 }
3682 /* If the FULLFSYNC failed, fall back to attempting an fsync().
drh6b9d6dd2008-12-03 19:34:47 +00003683 ** It shouldn't be possible for fullfsync to fail on the local
3684 ** file system (on OSX), so failure indicates that FULLFSYNC
3685 ** isn't supported for this file system. So, attempt an fsync
3686 ** and (for now) ignore the overhead of a superfluous fcntl call.
3687 ** It'd be better to detect fullfsync support once and avoid
3688 ** the fcntl call every time sync is called.
3689 */
drh734c9862008-11-28 15:37:20 +00003690 if( rc ) rc = fsync(fd);
3691
3692#else
3693 if( dataOnly ){
3694 rc = fdatasync(fd);
3695 if( OS_VXWORKS && rc==-1 && errno==ENOTSUP ){
3696 rc = fsync(fd);
3697 }
3698 }else{
3699 rc = fsync(fd);
3700 }
3701#endif /* ifdef SQLITE_NO_SYNC elif HAVE_FULLFSYNC */
3702
3703 if( OS_VXWORKS && rc!= -1 ){
3704 rc = 0;
3705 }
chw97185482008-11-17 08:05:31 +00003706 return rc;
drhbfe66312006-10-03 17:40:40 +00003707}
3708
drh734c9862008-11-28 15:37:20 +00003709/*
3710** Make sure all writes to a particular file are committed to disk.
3711**
3712** If dataOnly==0 then both the file itself and its metadata (file
3713** size, access time, etc) are synced. If dataOnly!=0 then only the
3714** file data is synced.
3715**
3716** Under Unix, also make sure that the directory entry for the file
3717** has been created by fsync-ing the directory that contains the file.
3718** If we do not do this and we encounter a power failure, the directory
3719** entry for the journal might not exist after we reboot. The next
3720** SQLite to access the file will not know that the journal exists (because
3721** the directory entry for the journal was never created) and the transaction
3722** will not roll back - possibly leading to database corruption.
3723*/
3724static int unixSync(sqlite3_file *id, int flags){
3725 int rc;
3726 unixFile *pFile = (unixFile*)id;
3727
3728 int isDataOnly = (flags&SQLITE_SYNC_DATAONLY);
3729 int isFullsync = (flags&0x0F)==SQLITE_SYNC_FULL;
3730
3731 /* Check that one of SQLITE_SYNC_NORMAL or FULL was passed */
3732 assert((flags&0x0F)==SQLITE_SYNC_NORMAL
3733 || (flags&0x0F)==SQLITE_SYNC_FULL
3734 );
3735
3736 /* Unix cannot, but some systems may return SQLITE_FULL from here. This
3737 ** line is to test that doing so does not cause any problems.
3738 */
3739 SimulateDiskfullError( return SQLITE_FULL );
3740
3741 assert( pFile );
3742 OSTRACE2("SYNC %-3d\n", pFile->h);
3743 rc = full_fsync(pFile->h, isFullsync, isDataOnly);
3744 SimulateIOError( rc=1 );
3745 if( rc ){
3746 pFile->lastErrno = errno;
3747 return SQLITE_IOERR_FSYNC;
3748 }
3749 if( pFile->dirfd>=0 ){
3750 int err;
3751 OSTRACE4("DIRSYNC %-3d (have_fullfsync=%d fullsync=%d)\n", pFile->dirfd,
3752 HAVE_FULLFSYNC, isFullsync);
3753#ifndef SQLITE_DISABLE_DIRSYNC
3754 /* The directory sync is only attempted if full_fsync is
3755 ** turned off or unavailable. If a full_fsync occurred above,
3756 ** then the directory sync is superfluous.
3757 */
3758 if( (!HAVE_FULLFSYNC || !isFullsync) && full_fsync(pFile->dirfd,0,0) ){
3759 /*
3760 ** We have received multiple reports of fsync() returning
3761 ** errors when applied to directories on certain file systems.
3762 ** A failed directory sync is not a big deal. So it seems
3763 ** better to ignore the error. Ticket #1657
3764 */
3765 /* pFile->lastErrno = errno; */
3766 /* return SQLITE_IOERR; */
3767 }
3768#endif
3769 err = close(pFile->dirfd); /* Only need to sync once, so close the */
3770 if( err==0 ){ /* directory when we are done */
3771 pFile->dirfd = -1;
3772 }else{
3773 pFile->lastErrno = errno;
3774 rc = SQLITE_IOERR_DIR_CLOSE;
3775 }
3776 }
3777 return rc;
3778}
3779
3780/*
3781** Truncate an open file to a specified size
3782*/
3783static int unixTruncate(sqlite3_file *id, i64 nByte){
3784 int rc;
3785 assert( id );
3786 SimulateIOError( return SQLITE_IOERR_TRUNCATE );
3787 rc = ftruncate(((unixFile*)id)->h, (off_t)nByte);
3788 if( rc ){
3789 ((unixFile*)id)->lastErrno = errno;
3790 return SQLITE_IOERR_TRUNCATE;
3791 }else{
3792 return SQLITE_OK;
3793 }
3794}
3795
3796/*
3797** Determine the current size of a file in bytes
3798*/
3799static int unixFileSize(sqlite3_file *id, i64 *pSize){
3800 int rc;
3801 struct stat buf;
3802 assert( id );
3803 rc = fstat(((unixFile*)id)->h, &buf);
3804 SimulateIOError( rc=1 );
3805 if( rc!=0 ){
3806 ((unixFile*)id)->lastErrno = errno;
3807 return SQLITE_IOERR_FSTAT;
3808 }
3809 *pSize = buf.st_size;
3810
3811 /* When opening a zero-size database, the findLockInfo() procedure
3812 ** writes a single byte into that file in order to work around a bug
3813 ** in the OS-X msdos filesystem. In order to avoid problems with upper
3814 ** layers, we need to report this file size as zero even though it is
3815 ** really 1. Ticket #3260.
3816 */
3817 if( *pSize==1 ) *pSize = 0;
3818
3819
3820 return SQLITE_OK;
3821}
3822
danielk1977ad94b582007-08-20 06:44:22 +00003823
danielk1977e3026632004-06-22 11:29:02 +00003824/*
drh9e33c2c2007-08-31 18:34:59 +00003825** Information and control of an open file handle.
drh18839212005-11-26 03:43:23 +00003826*/
drhcc6bb3e2007-08-31 16:11:35 +00003827static int unixFileControl(sqlite3_file *id, int op, void *pArg){
drh9e33c2c2007-08-31 18:34:59 +00003828 switch( op ){
3829 case SQLITE_FCNTL_LOCKSTATE: {
3830 *(int*)pArg = ((unixFile*)id)->locktype;
3831 return SQLITE_OK;
3832 }
drh7708e972008-11-29 00:56:52 +00003833 case SQLITE_LAST_ERRNO: {
3834 *(int*)pArg = ((unixFile*)id)->lastErrno;
3835 return SQLITE_OK;
3836 }
3837#if SQLITE_ENABLE_LOCKING_STYLE && defined(__DARWIN__)
aswiftaebf4132008-11-21 00:10:35 +00003838 case SQLITE_GET_LOCKPROXYFILE: {
aswiftaebf4132008-11-21 00:10:35 +00003839 unixFile *pFile = (unixFile*)id;
drh7708e972008-11-29 00:56:52 +00003840 if( pFile->pMethod == &proxyIoMethods ){
aswiftaebf4132008-11-21 00:10:35 +00003841 proxyLockingContext *pCtx = (proxyLockingContext*)pFile->lockingContext;
drh6b9d6dd2008-12-03 19:34:47 +00003842 proxyTakeConch(pFile);
aswiftaebf4132008-11-21 00:10:35 +00003843 if( pCtx->lockProxyPath ){
3844 *(const char **)pArg = pCtx->lockProxyPath;
3845 }else{
3846 *(const char **)pArg = ":auto: (not held)";
3847 }
3848 } else {
3849 *(const char **)pArg = NULL;
3850 }
aswiftaebf4132008-11-21 00:10:35 +00003851 return SQLITE_OK;
3852 }
3853 case SQLITE_SET_LOCKPROXYFILE: {
aswiftaebf4132008-11-21 00:10:35 +00003854 unixFile *pFile = (unixFile*)id;
3855 int rc = SQLITE_OK;
drh7708e972008-11-29 00:56:52 +00003856 int isProxyStyle = (pFile->pMethod == &proxyIoMethods);
aswiftaebf4132008-11-21 00:10:35 +00003857 if( pArg==NULL || (const char *)pArg==0 ){
3858 if( isProxyStyle ){
drh7708e972008-11-29 00:56:52 +00003859 /* turn off proxy locking - not supported */
aswiftaebf4132008-11-21 00:10:35 +00003860 rc = SQLITE_ERROR /*SQLITE_PROTOCOL? SQLITE_MISUSE?*/;
3861 }else{
drh7708e972008-11-29 00:56:52 +00003862 /* turn off proxy locking - already off - NOOP */
aswiftaebf4132008-11-21 00:10:35 +00003863 rc = SQLITE_OK;
3864 }
3865 }else{
3866 const char *proxyPath = (const char *)pArg;
3867 if( isProxyStyle ){
3868 proxyLockingContext *pCtx =
3869 (proxyLockingContext*)pFile->lockingContext;
drh7708e972008-11-29 00:56:52 +00003870 if( !strcmp(pArg, ":auto:")
3871 || (pCtx->lockProxyPath &&
3872 !strncmp(pCtx->lockProxyPath, proxyPath, MAXPATHLEN))
3873 ){
aswiftaebf4132008-11-21 00:10:35 +00003874 rc = SQLITE_OK;
3875 }else{
3876 rc = switchLockProxyPath(pFile, proxyPath);
3877 }
3878 }else{
drh7708e972008-11-29 00:56:52 +00003879 /* turn on proxy file locking */
aswiftaebf4132008-11-21 00:10:35 +00003880 rc = transformUnixFileForLockProxy(pFile, proxyPath);
3881 }
3882 }
3883 return rc;
drh7708e972008-11-29 00:56:52 +00003884 }
drh6b9d6dd2008-12-03 19:34:47 +00003885#endif /* SQLITE_ENABLE_LOCKING_STYLE && defined(__DARWIN__) */
drh9e33c2c2007-08-31 18:34:59 +00003886 }
drhcc6bb3e2007-08-31 16:11:35 +00003887 return SQLITE_ERROR;
drh9cbe6352005-11-29 03:13:21 +00003888}
3889
3890/*
danielk1977a3d4c882007-03-23 10:08:38 +00003891** Return the sector size in bytes of the underlying block device for
3892** the specified file. This is almost always 512 bytes, but may be
3893** larger for some devices.
3894**
3895** SQLite code assumes this function cannot fail. It also assumes that
3896** if two files are created in the same file-system directory (i.e.
drh85b623f2007-12-13 21:54:09 +00003897** a database and its journal file) that the sector size will be the
danielk1977a3d4c882007-03-23 10:08:38 +00003898** same for both.
3899*/
danielk1977397d65f2008-11-19 11:35:39 +00003900static int unixSectorSize(sqlite3_file *NotUsed){
3901 UNUSED_PARAMETER(NotUsed);
drh3ceeb752007-03-29 18:19:52 +00003902 return SQLITE_DEFAULT_SECTOR_SIZE;
danielk1977a3d4c882007-03-23 10:08:38 +00003903}
3904
danielk197790949c22007-08-17 16:50:38 +00003905/*
danielk1977397d65f2008-11-19 11:35:39 +00003906** Return the device characteristics for the file. This is always 0 for unix.
danielk197790949c22007-08-17 16:50:38 +00003907*/
danielk1977397d65f2008-11-19 11:35:39 +00003908static int unixDeviceCharacteristics(sqlite3_file *NotUsed){
3909 UNUSED_PARAMETER(NotUsed);
danielk197762079062007-08-15 17:08:46 +00003910 return 0;
3911}
3912
drh734c9862008-11-28 15:37:20 +00003913/*
3914** Here ends the implementation of all sqlite3_file methods.
3915**
3916********************** End sqlite3_file Methods *******************************
3917******************************************************************************/
3918
3919/*
drh6b9d6dd2008-12-03 19:34:47 +00003920** This division contains definitions of sqlite3_io_methods objects that
3921** implement various file locking strategies. It also contains definitions
3922** of "finder" functions. A finder-function is used to locate the appropriate
3923** sqlite3_io_methods object for a particular database file. The pAppData
3924** field of the sqlite3_vfs VFS objects are initialized to be pointers to
3925** the correct finder-function for that VFS.
3926**
3927** Most finder functions return a pointer to a fixed sqlite3_io_methods
3928** object. The only interesting finder-function is autolockIoFinder, which
3929** looks at the filesystem type and tries to guess the best locking
3930** strategy from that.
3931**
3932**
drh7708e972008-11-29 00:56:52 +00003933** Each instance of this macro generates two objects:
drh734c9862008-11-28 15:37:20 +00003934**
drh7708e972008-11-29 00:56:52 +00003935** * A constant sqlite3_io_methods object call METHOD that has locking
3936** methods CLOSE, LOCK, UNLOCK, CKRESLOCK.
3937**
3938** * An I/O method finder function called FINDER that returns a pointer
3939** to the METHOD object in the previous bullet.
drh734c9862008-11-28 15:37:20 +00003940*/
drh7708e972008-11-29 00:56:52 +00003941#define IOMETHODS(FINDER, METHOD, CLOSE, LOCK, UNLOCK, CKLOCK) \
3942static const sqlite3_io_methods METHOD = { \
3943 1, /* iVersion */ \
3944 CLOSE, /* xClose */ \
3945 unixRead, /* xRead */ \
3946 unixWrite, /* xWrite */ \
3947 unixTruncate, /* xTruncate */ \
3948 unixSync, /* xSync */ \
3949 unixFileSize, /* xFileSize */ \
3950 LOCK, /* xLock */ \
3951 UNLOCK, /* xUnlock */ \
3952 CKLOCK, /* xCheckReservedLock */ \
3953 unixFileControl, /* xFileControl */ \
3954 unixSectorSize, /* xSectorSize */ \
3955 unixDeviceCharacteristics /* xDeviceCapabilities */ \
3956}; \
3957static const sqlite3_io_methods *FINDER(const char *z, int h){ \
3958 UNUSED_PARAMETER(z); UNUSED_PARAMETER(h); \
3959 return &METHOD; \
aswiftaebf4132008-11-21 00:10:35 +00003960}
drh7708e972008-11-29 00:56:52 +00003961
3962/*
3963** Here are all of the sqlite3_io_methods objects for each of the
3964** locking strategies. Functions that return pointers to these methods
3965** are also created.
3966*/
3967IOMETHODS(
3968 posixIoFinder, /* Finder function name */
3969 posixIoMethods, /* sqlite3_io_methods object name */
3970 unixClose, /* xClose method */
3971 unixLock, /* xLock method */
3972 unixUnlock, /* xUnlock method */
3973 unixCheckReservedLock /* xCheckReservedLock method */
3974);
3975IOMETHODS(
3976 nolockIoFinder, /* Finder function name */
3977 nolockIoMethods, /* sqlite3_io_methods object name */
3978 nolockClose, /* xClose method */
3979 nolockLock, /* xLock method */
3980 nolockUnlock, /* xUnlock method */
3981 nolockCheckReservedLock /* xCheckReservedLock method */
3982);
3983IOMETHODS(
3984 dotlockIoFinder, /* Finder function name */
3985 dotlockIoMethods, /* sqlite3_io_methods object name */
3986 dotlockClose, /* xClose method */
3987 dotlockLock, /* xLock method */
3988 dotlockUnlock, /* xUnlock method */
3989 dotlockCheckReservedLock /* xCheckReservedLock method */
3990);
3991
3992#if SQLITE_ENABLE_LOCKING_STYLE
3993IOMETHODS(
3994 flockIoFinder, /* Finder function name */
3995 flockIoMethods, /* sqlite3_io_methods object name */
3996 flockClose, /* xClose method */
3997 flockLock, /* xLock method */
3998 flockUnlock, /* xUnlock method */
3999 flockCheckReservedLock /* xCheckReservedLock method */
4000);
4001#endif
4002
drh6c7d5c52008-11-21 20:32:33 +00004003#if OS_VXWORKS
drh7708e972008-11-29 00:56:52 +00004004IOMETHODS(
4005 semIoFinder, /* Finder function name */
4006 semIoMethods, /* sqlite3_io_methods object name */
4007 semClose, /* xClose method */
4008 semLock, /* xLock method */
4009 semUnlock, /* xUnlock method */
4010 semCheckReservedLock /* xCheckReservedLock method */
4011);
aswiftaebf4132008-11-21 00:10:35 +00004012#endif
drh7708e972008-11-29 00:56:52 +00004013
drh734c9862008-11-28 15:37:20 +00004014#if defined(__DARWIN__) && SQLITE_ENABLE_LOCKING_STYLE
drh7708e972008-11-29 00:56:52 +00004015IOMETHODS(
4016 afpIoFinder, /* Finder function name */
4017 afpIoMethods, /* sqlite3_io_methods object name */
4018 afpClose, /* xClose method */
4019 afpLock, /* xLock method */
4020 afpUnlock, /* xUnlock method */
4021 afpCheckReservedLock /* xCheckReservedLock method */
4022);
4023IOMETHODS(
4024 proxyIoFinder, /* Finder function name */
4025 proxyIoMethods, /* sqlite3_io_methods object name */
4026 proxyClose, /* xClose method */
4027 proxyLock, /* xLock method */
4028 proxyUnlock, /* xUnlock method */
4029 proxyCheckReservedLock /* xCheckReservedLock method */
4030);
aswiftaebf4132008-11-21 00:10:35 +00004031#endif
drh7708e972008-11-29 00:56:52 +00004032
4033
4034#if defined(__DARWIN__) && SQLITE_ENABLE_LOCKING_STYLE
4035/*
drh6b9d6dd2008-12-03 19:34:47 +00004036** This "finder" function attempts to determine the best locking strategy
4037** for the database file "filePath". It then returns the sqlite3_io_methods
drh7708e972008-11-29 00:56:52 +00004038** object that implements that strategy.
4039**
4040** This is for MacOSX only.
4041*/
4042static const sqlite3_io_methods *autolockIoFinder(
4043 const char *filePath, /* name of the database file */
4044 int fd /* file descriptor open on the database file */
4045){
4046 static const struct Mapping {
drh6b9d6dd2008-12-03 19:34:47 +00004047 const char *zFilesystem; /* Filesystem type name */
4048 const sqlite3_io_methods *pMethods; /* Appropriate locking method */
drh7708e972008-11-29 00:56:52 +00004049 } aMap[] = {
4050 { "hfs", &posixIoMethods },
4051 { "ufs", &posixIoMethods },
4052 { "afpfs", &afpIoMethods },
4053#ifdef SQLITE_ENABLE_AFP_LOCKING_SMB
4054 { "smbfs", &afpIoMethods },
4055#else
4056 { "smbfs", &flockIoMethods },
4057#endif
4058 { "webdav", &nolockIoMethods },
4059 { 0, 0 }
4060 };
4061 int i;
4062 struct statfs fsInfo;
4063 struct flock lockInfo;
4064
4065 if( !filePath ){
drh6b9d6dd2008-12-03 19:34:47 +00004066 /* If filePath==NULL that means we are dealing with a transient file
4067 ** that does not need to be locked. */
drh7708e972008-11-29 00:56:52 +00004068 return &nolockIoMethods;
4069 }
4070 if( statfs(filePath, &fsInfo) != -1 ){
4071 if( fsInfo.f_flags & MNT_RDONLY ){
4072 return &nolockIoMethods;
4073 }
4074 for(i=0; aMap[i].zFilesystem; i++){
4075 if( strcmp(fsInfo.f_fstypename, aMap[i].zFilesystem)==0 ){
4076 return aMap[i].pMethods;
4077 }
4078 }
4079 }
4080
4081 /* Default case. Handles, amongst others, "nfs".
4082 ** Test byte-range lock using fcntl(). If the call succeeds,
4083 ** assume that the file-system supports POSIX style locks.
drh734c9862008-11-28 15:37:20 +00004084 */
drh7708e972008-11-29 00:56:52 +00004085 lockInfo.l_len = 1;
4086 lockInfo.l_start = 0;
4087 lockInfo.l_whence = SEEK_SET;
4088 lockInfo.l_type = F_RDLCK;
4089 if( fcntl(fd, F_GETLK, &lockInfo)!=-1 ) {
4090 return &posixIoMethods;
4091 }else{
4092 return &dotlockIoMethods;
4093 }
4094}
4095#endif /* defined(__DARWIN__) && SQLITE_ENABLE_LOCKING_STYLE */
4096
4097/*
4098** An abstract type for a pointer to a IO method finder function:
4099*/
4100typedef const sqlite3_io_methods *(*finder_type)(const char*,int);
4101
aswiftaebf4132008-11-21 00:10:35 +00004102
drh734c9862008-11-28 15:37:20 +00004103/****************************************************************************
4104**************************** sqlite3_vfs methods ****************************
4105**
4106** This division contains the implementation of methods on the
4107** sqlite3_vfs object.
4108*/
4109
danielk1977a3d4c882007-03-23 10:08:38 +00004110/*
danielk1977e339d652008-06-28 11:23:00 +00004111** Initialize the contents of the unixFile structure pointed to by pId.
danielk1977ad94b582007-08-20 06:44:22 +00004112*/
4113static int fillInUnixFile(
danielk1977e339d652008-06-28 11:23:00 +00004114 sqlite3_vfs *pVfs, /* Pointer to vfs object */
drhbfe66312006-10-03 17:40:40 +00004115 int h, /* Open file descriptor of file being opened */
danielk1977ad94b582007-08-20 06:44:22 +00004116 int dirfd, /* Directory file descriptor */
drh218c5082008-03-07 00:27:10 +00004117 sqlite3_file *pId, /* Write to the unixFile structure here */
drhda0e7682008-07-30 15:27:54 +00004118 const char *zFilename, /* Name of the file being opened */
chw97185482008-11-17 08:05:31 +00004119 int noLock, /* Omit locking if true */
4120 int isDelete /* Delete on close if true */
drhbfe66312006-10-03 17:40:40 +00004121){
drh7708e972008-11-29 00:56:52 +00004122 const sqlite3_io_methods *pLockingStyle;
drhda0e7682008-07-30 15:27:54 +00004123 unixFile *pNew = (unixFile *)pId;
4124 int rc = SQLITE_OK;
4125
danielk197717b90b52008-06-06 11:11:25 +00004126 assert( pNew->pLock==NULL );
4127 assert( pNew->pOpen==NULL );
drh218c5082008-03-07 00:27:10 +00004128
danielk1977a03396a2008-11-19 14:35:46 +00004129 /* Parameter isDelete is only used on vxworks. Parameter pVfs is only
4130 ** used if ENABLE_LOCKING_STYLE is defined. Express this explicitly
4131 ** here to prevent compiler warnings about unused parameters.
4132 */
drh7708e972008-11-29 00:56:52 +00004133#if !OS_VXWORKS
4134 UNUSED_PARAMETER(isDelete);
4135#endif
4136#if !SQLITE_ENABLE_LOCKING_STYLE
4137 UNUSED_PARAMETER(pVfs);
4138#endif
4139#if !OS_VXWORKS && !SQLITE_ENABLE_LOCKING_STYLE
4140 UNUSED_PARAMETER(zFilename);
4141#endif
danielk1977a03396a2008-11-19 14:35:46 +00004142
drh218c5082008-03-07 00:27:10 +00004143 OSTRACE3("OPEN %-3d %s\n", h, zFilename);
danielk1977ad94b582007-08-20 06:44:22 +00004144 pNew->h = h;
drh218c5082008-03-07 00:27:10 +00004145 pNew->dirfd = dirfd;
danielk1977ad94b582007-08-20 06:44:22 +00004146 SET_THREADID(pNew);
drh339eb0b2008-03-07 15:34:11 +00004147
drh6c7d5c52008-11-21 20:32:33 +00004148#if OS_VXWORKS
drh107886a2008-11-21 22:21:50 +00004149 pNew->pId = vxworksFindFileId(zFilename);
4150 if( pNew->pId==0 ){
4151 noLock = 1;
4152 rc = SQLITE_NOMEM;
chw97185482008-11-17 08:05:31 +00004153 }
4154#endif
4155
drhda0e7682008-07-30 15:27:54 +00004156 if( noLock ){
drh7708e972008-11-29 00:56:52 +00004157 pLockingStyle = &nolockIoMethods;
drhda0e7682008-07-30 15:27:54 +00004158 }else{
drh7708e972008-11-29 00:56:52 +00004159 pLockingStyle = (*(finder_type)pVfs->pAppData)(zFilename, h);
aswiftaebf4132008-11-21 00:10:35 +00004160#if SQLITE_ENABLE_LOCKING_STYLE
4161 /* Cache zFilename in the locking context (AFP and dotlock override) for
4162 ** proxyLock activation is possible (remote proxy is based on db name)
4163 ** zFilename remains valid until file is closed, to support */
4164 pNew->lockingContext = (void*)zFilename;
4165#endif
drhda0e7682008-07-30 15:27:54 +00004166 }
danielk1977e339d652008-06-28 11:23:00 +00004167
drh7708e972008-11-29 00:56:52 +00004168 if( pLockingStyle == &posixIoMethods ){
4169 unixEnterMutex();
4170 rc = findLockInfo(pNew, &pNew->pLock, &pNew->pOpen);
4171 unixLeaveMutex();
4172 }
danielk1977e339d652008-06-28 11:23:00 +00004173
drh7708e972008-11-29 00:56:52 +00004174#if SQLITE_ENABLE_LOCKING_STYLE && defined(__DARWIN__)
4175 else if( pLockingStyle == &apfIoMethods ){
4176 /* AFP locking uses the file path so it needs to be included in
4177 ** the afpLockingContext.
4178 */
4179 afpLockingContext *pCtx;
4180 pNew->lockingContext = pCtx = sqlite3_malloc( sizeof(*pCtx) );
4181 if( pCtx==0 ){
4182 rc = SQLITE_NOMEM;
4183 }else{
4184 /* NB: zFilename exists and remains valid until the file is closed
4185 ** according to requirement F11141. So we do not need to make a
4186 ** copy of the filename. */
4187 pCtx->dbPath = zFilename;
4188 srandomdev();
drh6c7d5c52008-11-21 20:32:33 +00004189 unixEnterMutex();
drh7708e972008-11-29 00:56:52 +00004190 rc = findLockInfo(pNew, NULL, &pNew->pOpen);
4191 unixLeaveMutex();
drhbfe66312006-10-03 17:40:40 +00004192 }
drh7708e972008-11-29 00:56:52 +00004193 }
4194#endif
danielk1977e339d652008-06-28 11:23:00 +00004195
drh40bbb0a2008-09-23 10:23:26 +00004196#if SQLITE_ENABLE_LOCKING_STYLE
drh7708e972008-11-29 00:56:52 +00004197 else if( pLockingStyle == &dotlockIoMethods ){
4198 /* Dotfile locking uses the file path so it needs to be included in
4199 ** the dotlockLockingContext
4200 */
4201 char *zLockFile;
4202 int nFilename;
4203 nFilename = strlen(zFilename) + 6;
4204 zLockFile = (char *)sqlite3_malloc(nFilename);
4205 if( zLockFile==0 ){
4206 rc = SQLITE_NOMEM;
4207 }else{
4208 sqlite3_snprintf(nFilename, zLockFile, "%s" DOTLOCK_SUFFIX, zFilename);
danielk1977e339d652008-06-28 11:23:00 +00004209 }
drh7708e972008-11-29 00:56:52 +00004210 pNew->lockingContext = zLockFile;
4211 }
chw97185482008-11-17 08:05:31 +00004212#endif
danielk1977e339d652008-06-28 11:23:00 +00004213
drh6c7d5c52008-11-21 20:32:33 +00004214#if OS_VXWORKS
drh7708e972008-11-29 00:56:52 +00004215 else if( pLockingStyle == &semIoMethods ){
4216 /* Named semaphore locking uses the file path so it needs to be
4217 ** included in the semLockingContext
4218 */
4219 unixEnterMutex();
4220 rc = findLockInfo(pNew, &pNew->pLock, &pNew->pOpen);
4221 if( (rc==SQLITE_OK) && (pNew->pOpen->pSem==NULL) ){
4222 char *zSemName = pNew->pOpen->aSemName;
4223 int n;
4224 sqlite3_snprintf(MAX_PATHNAME, zSemName, "%s.sem",
4225 pNew->pId->zCanonicalName);
4226 for( n=0; zSemName[n]; n++ )
4227 if( zSemName[n]=='/' ) zSemName[n] = '_';
4228 pNew->pOpen->pSem = sem_open(zSemName, O_CREAT, 0666, 1);
4229 if( pNew->pOpen->pSem == SEM_FAILED ){
4230 rc = SQLITE_NOMEM;
4231 pNew->pOpen->aSemName[0] = '\0';
chw97185482008-11-17 08:05:31 +00004232 }
chw97185482008-11-17 08:05:31 +00004233 }
drh7708e972008-11-29 00:56:52 +00004234 unixLeaveMutex();
danielk1977e339d652008-06-28 11:23:00 +00004235 }
drh7708e972008-11-29 00:56:52 +00004236#endif
aswift5b1a2562008-08-22 00:22:35 +00004237
4238 pNew->lastErrno = 0;
drh6c7d5c52008-11-21 20:32:33 +00004239#if OS_VXWORKS
chw97185482008-11-17 08:05:31 +00004240 if( rc!=SQLITE_OK ){
4241 unlink(zFilename);
4242 isDelete = 0;
4243 }
4244 pNew->isDelete = isDelete;
4245#endif
danielk1977e339d652008-06-28 11:23:00 +00004246 if( rc!=SQLITE_OK ){
aswiftaebf4132008-11-21 00:10:35 +00004247 if( dirfd>=0 ) close(dirfd); /* silent leak if fail, already in error */
drhbfe66312006-10-03 17:40:40 +00004248 close(h);
danielk1977e339d652008-06-28 11:23:00 +00004249 }else{
drh7708e972008-11-29 00:56:52 +00004250 pNew->pMethod = pLockingStyle;
danielk1977e339d652008-06-28 11:23:00 +00004251 OpenCounter(+1);
drhbfe66312006-10-03 17:40:40 +00004252 }
danielk1977e339d652008-06-28 11:23:00 +00004253 return rc;
drh054889e2005-11-30 03:20:31 +00004254}
drh9c06c952005-11-26 00:25:00 +00004255
danielk1977ad94b582007-08-20 06:44:22 +00004256/*
4257** Open a file descriptor to the directory containing file zFilename.
4258** If successful, *pFd is set to the opened file descriptor and
4259** SQLITE_OK is returned. If an error occurs, either SQLITE_NOMEM
4260** or SQLITE_CANTOPEN is returned and *pFd is set to an undefined
4261** value.
4262**
4263** If SQLITE_OK is returned, the caller is responsible for closing
4264** the file descriptor *pFd using close().
4265*/
danielk1977fee2d252007-08-18 10:59:19 +00004266static int openDirectory(const char *zFilename, int *pFd){
danielk1977fee2d252007-08-18 10:59:19 +00004267 int ii;
drh777b17a2007-09-20 10:02:54 +00004268 int fd = -1;
drhf3a65f72007-08-22 20:18:21 +00004269 char zDirname[MAX_PATHNAME+1];
danielk1977fee2d252007-08-18 10:59:19 +00004270
drh153c62c2007-08-24 03:51:33 +00004271 sqlite3_snprintf(MAX_PATHNAME, zDirname, "%s", zFilename);
danielk1977fee2d252007-08-18 10:59:19 +00004272 for(ii=strlen(zDirname); ii>=0 && zDirname[ii]!='/'; ii--);
4273 if( ii>0 ){
4274 zDirname[ii] = '\0';
4275 fd = open(zDirname, O_RDONLY|O_BINARY, 0);
drh777b17a2007-09-20 10:02:54 +00004276 if( fd>=0 ){
danielk1977fee2d252007-08-18 10:59:19 +00004277#ifdef FD_CLOEXEC
4278 fcntl(fd, F_SETFD, fcntl(fd, F_GETFD, 0) | FD_CLOEXEC);
4279#endif
4280 OSTRACE3("OPENDIR %-3d %s\n", fd, zDirname);
4281 }
4282 }
danielk1977fee2d252007-08-18 10:59:19 +00004283 *pFd = fd;
drh777b17a2007-09-20 10:02:54 +00004284 return (fd>=0?SQLITE_OK:SQLITE_CANTOPEN);
danielk1977fee2d252007-08-18 10:59:19 +00004285}
4286
danielk1977b4b47412007-08-17 15:53:36 +00004287/*
danielk197717b90b52008-06-06 11:11:25 +00004288** Create a temporary file name in zBuf. zBuf must be allocated
4289** by the calling process and must be big enough to hold at least
4290** pVfs->mxPathname bytes.
4291*/
4292static int getTempname(int nBuf, char *zBuf){
4293 static const char *azDirs[] = {
4294 0,
aswiftaebf4132008-11-21 00:10:35 +00004295 0,
danielk197717b90b52008-06-06 11:11:25 +00004296 "/var/tmp",
4297 "/usr/tmp",
4298 "/tmp",
4299 ".",
4300 };
4301 static const unsigned char zChars[] =
4302 "abcdefghijklmnopqrstuvwxyz"
4303 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
4304 "0123456789";
drh41022642008-11-21 00:24:42 +00004305 unsigned int i, j;
danielk197717b90b52008-06-06 11:11:25 +00004306 struct stat buf;
4307 const char *zDir = ".";
4308
4309 /* It's odd to simulate an io-error here, but really this is just
4310 ** using the io-error infrastructure to test that SQLite handles this
4311 ** function failing.
4312 */
4313 SimulateIOError( return SQLITE_IOERR );
4314
4315 azDirs[0] = sqlite3_temp_directory;
aswiftaebf4132008-11-21 00:10:35 +00004316 if (NULL == azDirs[1]) {
4317 azDirs[1] = getenv("TMPDIR");
4318 }
4319
4320 for(i=0; i<sizeof(azDirs)/sizeof(azDirs[0]); i++){
danielk197717b90b52008-06-06 11:11:25 +00004321 if( azDirs[i]==0 ) continue;
4322 if( stat(azDirs[i], &buf) ) continue;
4323 if( !S_ISDIR(buf.st_mode) ) continue;
4324 if( access(azDirs[i], 07) ) continue;
4325 zDir = azDirs[i];
4326 break;
4327 }
4328
4329 /* Check that the output buffer is large enough for the temporary file
4330 ** name. If it is not, return SQLITE_ERROR.
4331 */
danielk197700e13612008-11-17 19:18:54 +00004332 if( (strlen(zDir) + strlen(SQLITE_TEMP_FILE_PREFIX) + 17) >= (size_t)nBuf ){
danielk197717b90b52008-06-06 11:11:25 +00004333 return SQLITE_ERROR;
4334 }
4335
4336 do{
4337 sqlite3_snprintf(nBuf-17, zBuf, "%s/"SQLITE_TEMP_FILE_PREFIX, zDir);
4338 j = strlen(zBuf);
4339 sqlite3_randomness(15, &zBuf[j]);
4340 for(i=0; i<15; i++, j++){
4341 zBuf[j] = (char)zChars[ ((unsigned char)zBuf[j])%(sizeof(zChars)-1) ];
4342 }
4343 zBuf[j] = 0;
4344 }while( access(zBuf,0)==0 );
4345 return SQLITE_OK;
4346}
4347
4348
4349/*
danielk1977ad94b582007-08-20 06:44:22 +00004350** Open the file zPath.
4351**
danielk1977b4b47412007-08-17 15:53:36 +00004352** Previously, the SQLite OS layer used three functions in place of this
4353** one:
4354**
4355** sqlite3OsOpenReadWrite();
4356** sqlite3OsOpenReadOnly();
4357** sqlite3OsOpenExclusive();
4358**
4359** These calls correspond to the following combinations of flags:
4360**
4361** ReadWrite() -> (READWRITE | CREATE)
4362** ReadOnly() -> (READONLY)
4363** OpenExclusive() -> (READWRITE | CREATE | EXCLUSIVE)
4364**
4365** The old OpenExclusive() accepted a boolean argument - "delFlag". If
4366** true, the file was configured to be automatically deleted when the
4367** file handle closed. To achieve the same effect using this new
4368** interface, add the DELETEONCLOSE flag to those specified above for
4369** OpenExclusive().
4370*/
4371static int unixOpen(
drh6b9d6dd2008-12-03 19:34:47 +00004372 sqlite3_vfs *pVfs, /* The VFS for which this is the xOpen method */
4373 const char *zPath, /* Pathname of file to be opened */
4374 sqlite3_file *pFile, /* The file descriptor to be filled in */
4375 int flags, /* Input flags to control the opening */
4376 int *pOutFlags /* Output flags returned to SQLite core */
danielk1977b4b47412007-08-17 15:53:36 +00004377){
danielk1977fee2d252007-08-18 10:59:19 +00004378 int fd = 0; /* File descriptor returned by open() */
4379 int dirfd = -1; /* Directory file descriptor */
drh6b9d6dd2008-12-03 19:34:47 +00004380 int openFlags = 0; /* Flags to pass to open() */
danielk1977fee2d252007-08-18 10:59:19 +00004381 int eType = flags&0xFFFFFF00; /* Type of file to open */
drhda0e7682008-07-30 15:27:54 +00004382 int noLock; /* True to omit locking primitives */
aswiftaebf4132008-11-21 00:10:35 +00004383 int rc = SQLITE_OK;
danielk1977b4b47412007-08-17 15:53:36 +00004384
4385 int isExclusive = (flags & SQLITE_OPEN_EXCLUSIVE);
4386 int isDelete = (flags & SQLITE_OPEN_DELETEONCLOSE);
4387 int isCreate = (flags & SQLITE_OPEN_CREATE);
4388 int isReadonly = (flags & SQLITE_OPEN_READONLY);
4389 int isReadWrite = (flags & SQLITE_OPEN_READWRITE);
4390
danielk1977fee2d252007-08-18 10:59:19 +00004391 /* If creating a master or main-file journal, this function will open
4392 ** a file-descriptor on the directory too. The first time unixSync()
4393 ** is called the directory file descriptor will be fsync()ed and close()d.
4394 */
4395 int isOpenDirectory = (isCreate &&
4396 (eType==SQLITE_OPEN_MASTER_JOURNAL || eType==SQLITE_OPEN_MAIN_JOURNAL)
4397 );
4398
danielk197717b90b52008-06-06 11:11:25 +00004399 /* If argument zPath is a NULL pointer, this function is required to open
4400 ** a temporary file. Use this buffer to store the file name in.
4401 */
4402 char zTmpname[MAX_PATHNAME+1];
4403 const char *zName = zPath;
4404
danielk1977fee2d252007-08-18 10:59:19 +00004405 /* Check the following statements are true:
4406 **
4407 ** (a) Exactly one of the READWRITE and READONLY flags must be set, and
4408 ** (b) if CREATE is set, then READWRITE must also be set, and
4409 ** (c) if EXCLUSIVE is set, then CREATE must also be set.
drh33f4e022007-09-03 15:19:34 +00004410 ** (d) if DELETEONCLOSE is set, then CREATE must also be set.
danielk1977fee2d252007-08-18 10:59:19 +00004411 */
danielk1977b4b47412007-08-17 15:53:36 +00004412 assert((isReadonly==0 || isReadWrite==0) && (isReadWrite || isReadonly));
danielk1977b4b47412007-08-17 15:53:36 +00004413 assert(isCreate==0 || isReadWrite);
danielk1977b4b47412007-08-17 15:53:36 +00004414 assert(isExclusive==0 || isCreate);
drh33f4e022007-09-03 15:19:34 +00004415 assert(isDelete==0 || isCreate);
4416
drh33f4e022007-09-03 15:19:34 +00004417 /* The main DB, main journal, and master journal are never automatically
4418 ** deleted
4419 */
4420 assert( eType!=SQLITE_OPEN_MAIN_DB || !isDelete );
4421 assert( eType!=SQLITE_OPEN_MAIN_JOURNAL || !isDelete );
4422 assert( eType!=SQLITE_OPEN_MASTER_JOURNAL || !isDelete );
danielk1977b4b47412007-08-17 15:53:36 +00004423
danielk1977fee2d252007-08-18 10:59:19 +00004424 /* Assert that the upper layer has set one of the "file-type" flags. */
4425 assert( eType==SQLITE_OPEN_MAIN_DB || eType==SQLITE_OPEN_TEMP_DB
4426 || eType==SQLITE_OPEN_MAIN_JOURNAL || eType==SQLITE_OPEN_TEMP_JOURNAL
4427 || eType==SQLITE_OPEN_SUBJOURNAL || eType==SQLITE_OPEN_MASTER_JOURNAL
drh33f4e022007-09-03 15:19:34 +00004428 || eType==SQLITE_OPEN_TRANSIENT_DB
danielk1977fee2d252007-08-18 10:59:19 +00004429 );
4430
danielk1977e339d652008-06-28 11:23:00 +00004431 memset(pFile, 0, sizeof(unixFile));
4432
danielk197717b90b52008-06-06 11:11:25 +00004433 if( !zName ){
danielk197717b90b52008-06-06 11:11:25 +00004434 assert(isDelete && !isOpenDirectory);
4435 rc = getTempname(MAX_PATHNAME+1, zTmpname);
4436 if( rc!=SQLITE_OK ){
4437 return rc;
4438 }
4439 zName = zTmpname;
4440 }
4441
drh734c9862008-11-28 15:37:20 +00004442 if( isReadonly ) openFlags |= O_RDONLY;
4443 if( isReadWrite ) openFlags |= O_RDWR;
4444 if( isCreate ) openFlags |= O_CREAT;
4445 if( isExclusive ) openFlags |= (O_EXCL|O_NOFOLLOW);
4446 openFlags |= (O_LARGEFILE|O_BINARY);
danielk1977b4b47412007-08-17 15:53:36 +00004447
drh734c9862008-11-28 15:37:20 +00004448 fd = open(zName, openFlags, isDelete?0600:SQLITE_DEFAULT_FILE_PERMISSIONS);
4449 OSTRACE4("OPENX %-3d %s 0%o\n", fd, zName, openFlags);
danielk19772f2d8c72007-08-30 16:13:33 +00004450 if( fd<0 && errno!=EISDIR && isReadWrite && !isExclusive ){
danielk1977b4b47412007-08-17 15:53:36 +00004451 /* Failed to open the file for read/write access. Try read-only. */
4452 flags &= ~(SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE);
4453 flags |= SQLITE_OPEN_READONLY;
drh153c62c2007-08-24 03:51:33 +00004454 return unixOpen(pVfs, zPath, pFile, flags, pOutFlags);
danielk1977b4b47412007-08-17 15:53:36 +00004455 }
4456 if( fd<0 ){
4457 return SQLITE_CANTOPEN;
4458 }
4459 if( isDelete ){
drh6c7d5c52008-11-21 20:32:33 +00004460#if OS_VXWORKS
chw97185482008-11-17 08:05:31 +00004461 zPath = zName;
4462#else
danielk197717b90b52008-06-06 11:11:25 +00004463 unlink(zName);
chw97185482008-11-17 08:05:31 +00004464#endif
danielk1977b4b47412007-08-17 15:53:36 +00004465 }
drh41022642008-11-21 00:24:42 +00004466#if SQLITE_ENABLE_LOCKING_STYLE
4467 else{
drh734c9862008-11-28 15:37:20 +00004468 ((unixFile*)pFile)->openFlags = openFlags;
drh41022642008-11-21 00:24:42 +00004469 }
4470#endif
danielk1977b4b47412007-08-17 15:53:36 +00004471 if( pOutFlags ){
4472 *pOutFlags = flags;
4473 }
4474
4475 assert(fd!=0);
danielk1977fee2d252007-08-18 10:59:19 +00004476 if( isOpenDirectory ){
aswiftaebf4132008-11-21 00:10:35 +00004477 rc = openDirectory(zPath, &dirfd);
danielk1977fee2d252007-08-18 10:59:19 +00004478 if( rc!=SQLITE_OK ){
aswiftaebf4132008-11-21 00:10:35 +00004479 close(fd); /* silently leak if fail, already in error */
danielk1977fee2d252007-08-18 10:59:19 +00004480 return rc;
4481 }
4482 }
danielk1977e339d652008-06-28 11:23:00 +00004483
4484#ifdef FD_CLOEXEC
4485 fcntl(fd, F_SETFD, fcntl(fd, F_GETFD, 0) | FD_CLOEXEC);
4486#endif
4487
drhda0e7682008-07-30 15:27:54 +00004488 noLock = eType!=SQLITE_OPEN_MAIN_DB;
aswiftaebf4132008-11-21 00:10:35 +00004489
4490#if SQLITE_PREFER_PROXY_LOCKING
4491 if( zPath!=NULL && !noLock ){
4492 char *envforce = getenv("SQLITE_FORCE_PROXY_LOCKING");
4493 int useProxy = 0;
4494
4495 /* SQLITE_FORCE_PROXY_LOCKING==1 means force always use proxy,
drh7708e972008-11-29 00:56:52 +00004496 ** 0 means never use proxy, NULL means use proxy for non-local files only
4497 */
aswiftaebf4132008-11-21 00:10:35 +00004498 if( envforce!=NULL ){
4499 useProxy = atoi(envforce)>0;
4500 }else{
4501 struct statfs fsInfo;
4502
4503 if( statfs(zPath, &fsInfo) == -1 ){
4504 ((unixFile*)pFile)->lastErrno = errno;
4505 if( dirfd>=0 ) close(dirfd); /* silently leak if fail, in error */
4506 close(fd); /* silently leak if fail, in error */
4507 return SQLITE_IOERR_ACCESS;
4508 }
4509 useProxy = !(fsInfo.f_flags&MNT_LOCAL);
4510 }
4511 if( useProxy ){
4512 rc = fillInUnixFile(pVfs, fd, dirfd, pFile, zPath, noLock, isDelete);
4513 if( rc==SQLITE_OK ){
4514 rc = transformUnixFileForLockProxy((unixFile*)pFile, ":auto:");
4515 }
4516 return rc;
4517 }
4518 }
4519#endif
4520
chw97185482008-11-17 08:05:31 +00004521 return fillInUnixFile(pVfs, fd, dirfd, pFile, zPath, noLock, isDelete);
danielk1977b4b47412007-08-17 15:53:36 +00004522}
4523
4524/*
danielk1977fee2d252007-08-18 10:59:19 +00004525** Delete the file at zPath. If the dirSync argument is true, fsync()
4526** the directory after deleting the file.
danielk1977b4b47412007-08-17 15:53:36 +00004527*/
drh6b9d6dd2008-12-03 19:34:47 +00004528static int unixDelete(
4529 sqlite3_vfs *NotUsed, /* VFS containing this as the xDelete method */
4530 const char *zPath, /* Name of file to be deleted */
4531 int dirSync /* If true, fsync() directory after deleting file */
4532){
danielk1977fee2d252007-08-18 10:59:19 +00004533 int rc = SQLITE_OK;
danielk1977397d65f2008-11-19 11:35:39 +00004534 UNUSED_PARAMETER(NotUsed);
danielk1977b4b47412007-08-17 15:53:36 +00004535 SimulateIOError(return SQLITE_IOERR_DELETE);
4536 unlink(zPath);
danielk1977d39fa702008-10-16 13:27:40 +00004537#ifndef SQLITE_DISABLE_DIRSYNC
danielk1977fee2d252007-08-18 10:59:19 +00004538 if( dirSync ){
4539 int fd;
4540 rc = openDirectory(zPath, &fd);
4541 if( rc==SQLITE_OK ){
drh6c7d5c52008-11-21 20:32:33 +00004542#if OS_VXWORKS
chw97185482008-11-17 08:05:31 +00004543 if( fsync(fd)==-1 )
4544#else
4545 if( fsync(fd) )
4546#endif
4547 {
danielk1977fee2d252007-08-18 10:59:19 +00004548 rc = SQLITE_IOERR_DIR_FSYNC;
4549 }
aswiftaebf4132008-11-21 00:10:35 +00004550 if( close(fd)&&!rc ){
4551 rc = SQLITE_IOERR_DIR_CLOSE;
4552 }
danielk1977fee2d252007-08-18 10:59:19 +00004553 }
4554 }
danielk1977d138dd82008-10-15 16:02:48 +00004555#endif
danielk1977fee2d252007-08-18 10:59:19 +00004556 return rc;
danielk1977b4b47412007-08-17 15:53:36 +00004557}
4558
danielk197790949c22007-08-17 16:50:38 +00004559/*
4560** Test the existance of or access permissions of file zPath. The
4561** test performed depends on the value of flags:
4562**
4563** SQLITE_ACCESS_EXISTS: Return 1 if the file exists
4564** SQLITE_ACCESS_READWRITE: Return 1 if the file is read and writable.
4565** SQLITE_ACCESS_READONLY: Return 1 if the file is readable.
4566**
4567** Otherwise return 0.
4568*/
danielk1977861f7452008-06-05 11:39:11 +00004569static int unixAccess(
drh6b9d6dd2008-12-03 19:34:47 +00004570 sqlite3_vfs *NotUsed, /* The VFS containing this xAccess method */
4571 const char *zPath, /* Path of the file to examine */
4572 int flags, /* What do we want to learn about the zPath file? */
4573 int *pResOut /* Write result boolean here */
danielk1977861f7452008-06-05 11:39:11 +00004574){
rse25c0d1a2007-09-20 08:38:14 +00004575 int amode = 0;
danielk1977397d65f2008-11-19 11:35:39 +00004576 UNUSED_PARAMETER(NotUsed);
danielk1977861f7452008-06-05 11:39:11 +00004577 SimulateIOError( return SQLITE_IOERR_ACCESS; );
danielk1977b4b47412007-08-17 15:53:36 +00004578 switch( flags ){
4579 case SQLITE_ACCESS_EXISTS:
4580 amode = F_OK;
4581 break;
4582 case SQLITE_ACCESS_READWRITE:
4583 amode = W_OK|R_OK;
4584 break;
drh50d3f902007-08-27 21:10:36 +00004585 case SQLITE_ACCESS_READ:
danielk1977b4b47412007-08-17 15:53:36 +00004586 amode = R_OK;
4587 break;
4588
4589 default:
4590 assert(!"Invalid flags argument");
4591 }
danielk1977861f7452008-06-05 11:39:11 +00004592 *pResOut = (access(zPath, amode)==0);
4593 return SQLITE_OK;
danielk1977b4b47412007-08-17 15:53:36 +00004594}
4595
danielk1977b4b47412007-08-17 15:53:36 +00004596
4597/*
4598** Turn a relative pathname into a full pathname. The relative path
4599** is stored as a nul-terminated string in the buffer pointed to by
4600** zPath.
4601**
4602** zOut points to a buffer of at least sqlite3_vfs.mxPathname bytes
4603** (in this case, MAX_PATHNAME bytes). The full-path is written to
4604** this buffer before returning.
4605*/
danielk1977adfb9b02007-09-17 07:02:56 +00004606static int unixFullPathname(
4607 sqlite3_vfs *pVfs, /* Pointer to vfs object */
4608 const char *zPath, /* Possibly relative input path */
4609 int nOut, /* Size of output buffer in bytes */
4610 char *zOut /* Output buffer */
4611){
danielk1977843e65f2007-09-01 16:16:15 +00004612
4613 /* It's odd to simulate an io-error here, but really this is just
4614 ** using the io-error infrastructure to test that SQLite handles this
4615 ** function failing. This function could fail if, for example, the
drh6b9d6dd2008-12-03 19:34:47 +00004616 ** current working directory has been unlinked.
danielk1977843e65f2007-09-01 16:16:15 +00004617 */
4618 SimulateIOError( return SQLITE_ERROR );
4619
drh153c62c2007-08-24 03:51:33 +00004620 assert( pVfs->mxPathname==MAX_PATHNAME );
danielk1977f3d3c272008-11-19 16:52:44 +00004621 UNUSED_PARAMETER(pVfs);
chw97185482008-11-17 08:05:31 +00004622
drh3c7f2dc2007-12-06 13:26:20 +00004623 zOut[nOut-1] = '\0';
danielk1977b4b47412007-08-17 15:53:36 +00004624 if( zPath[0]=='/' ){
drh3c7f2dc2007-12-06 13:26:20 +00004625 sqlite3_snprintf(nOut, zOut, "%s", zPath);
danielk1977b4b47412007-08-17 15:53:36 +00004626 }else{
4627 int nCwd;
drh3c7f2dc2007-12-06 13:26:20 +00004628 if( getcwd(zOut, nOut-1)==0 ){
drh70c01452007-09-03 17:42:17 +00004629 return SQLITE_CANTOPEN;
danielk1977b4b47412007-08-17 15:53:36 +00004630 }
4631 nCwd = strlen(zOut);
drh3c7f2dc2007-12-06 13:26:20 +00004632 sqlite3_snprintf(nOut-nCwd, &zOut[nCwd], "/%s", zPath);
danielk1977b4b47412007-08-17 15:53:36 +00004633 }
4634 return SQLITE_OK;
danielk1977b4b47412007-08-17 15:53:36 +00004635}
4636
drh0ccebe72005-06-07 22:22:50 +00004637
drh761df872006-12-21 01:29:22 +00004638#ifndef SQLITE_OMIT_LOAD_EXTENSION
4639/*
4640** Interfaces for opening a shared library, finding entry points
4641** within the shared library, and closing the shared library.
4642*/
4643#include <dlfcn.h>
danielk1977397d65f2008-11-19 11:35:39 +00004644static void *unixDlOpen(sqlite3_vfs *NotUsed, const char *zFilename){
4645 UNUSED_PARAMETER(NotUsed);
drh761df872006-12-21 01:29:22 +00004646 return dlopen(zFilename, RTLD_NOW | RTLD_GLOBAL);
4647}
danielk197795c8a542007-09-01 06:51:27 +00004648
4649/*
4650** SQLite calls this function immediately after a call to unixDlSym() or
4651** unixDlOpen() fails (returns a null pointer). If a more detailed error
4652** message is available, it is written to zBufOut. If no error message
4653** is available, zBufOut is left unmodified and SQLite uses a default
4654** error message.
4655*/
danielk1977397d65f2008-11-19 11:35:39 +00004656static void unixDlError(sqlite3_vfs *NotUsed, int nBuf, char *zBufOut){
danielk1977b4b47412007-08-17 15:53:36 +00004657 char *zErr;
danielk1977397d65f2008-11-19 11:35:39 +00004658 UNUSED_PARAMETER(NotUsed);
drh6c7d5c52008-11-21 20:32:33 +00004659 unixEnterMutex();
danielk1977b4b47412007-08-17 15:53:36 +00004660 zErr = dlerror();
4661 if( zErr ){
drh153c62c2007-08-24 03:51:33 +00004662 sqlite3_snprintf(nBuf, zBufOut, "%s", zErr);
danielk1977b4b47412007-08-17 15:53:36 +00004663 }
drh6c7d5c52008-11-21 20:32:33 +00004664 unixLeaveMutex();
danielk1977b4b47412007-08-17 15:53:36 +00004665}
danielk1977397d65f2008-11-19 11:35:39 +00004666static void *unixDlSym(sqlite3_vfs *NotUsed, void *pHandle, const char*zSymbol){
4667 UNUSED_PARAMETER(NotUsed);
drh761df872006-12-21 01:29:22 +00004668 return dlsym(pHandle, zSymbol);
4669}
danielk1977397d65f2008-11-19 11:35:39 +00004670static void unixDlClose(sqlite3_vfs *NotUsed, void *pHandle){
4671 UNUSED_PARAMETER(NotUsed);
danielk1977b4b47412007-08-17 15:53:36 +00004672 dlclose(pHandle);
drh761df872006-12-21 01:29:22 +00004673}
danielk1977b4b47412007-08-17 15:53:36 +00004674#else /* if SQLITE_OMIT_LOAD_EXTENSION is defined: */
4675 #define unixDlOpen 0
4676 #define unixDlError 0
4677 #define unixDlSym 0
4678 #define unixDlClose 0
4679#endif
4680
4681/*
danielk197790949c22007-08-17 16:50:38 +00004682** Write nBuf bytes of random data to the supplied buffer zBuf.
drhbbd42a62004-05-22 17:41:58 +00004683*/
danielk1977397d65f2008-11-19 11:35:39 +00004684static int unixRandomness(sqlite3_vfs *NotUsed, int nBuf, char *zBuf){
4685 UNUSED_PARAMETER(NotUsed);
danielk197700e13612008-11-17 19:18:54 +00004686 assert((size_t)nBuf>=(sizeof(time_t)+sizeof(int)));
danielk197790949c22007-08-17 16:50:38 +00004687
drhbbd42a62004-05-22 17:41:58 +00004688 /* We have to initialize zBuf to prevent valgrind from reporting
4689 ** errors. The reports issued by valgrind are incorrect - we would
4690 ** prefer that the randomness be increased by making use of the
4691 ** uninitialized space in zBuf - but valgrind errors tend to worry
4692 ** some users. Rather than argue, it seems easier just to initialize
4693 ** the whole array and silence valgrind, even if that means less randomness
4694 ** in the random seed.
4695 **
4696 ** When testing, initializing zBuf[] to zero is all we do. That means
drhf1a221e2006-01-15 17:27:17 +00004697 ** that we always use the same random number sequence. This makes the
drhbbd42a62004-05-22 17:41:58 +00004698 ** tests repeatable.
4699 */
danielk1977b4b47412007-08-17 15:53:36 +00004700 memset(zBuf, 0, nBuf);
drhbbd42a62004-05-22 17:41:58 +00004701#if !defined(SQLITE_TEST)
4702 {
drh842b8642005-01-21 17:53:17 +00004703 int pid, fd;
4704 fd = open("/dev/urandom", O_RDONLY);
4705 if( fd<0 ){
drh07397232006-01-06 14:46:46 +00004706 time_t t;
4707 time(&t);
danielk197790949c22007-08-17 16:50:38 +00004708 memcpy(zBuf, &t, sizeof(t));
4709 pid = getpid();
4710 memcpy(&zBuf[sizeof(t)], &pid, sizeof(pid));
danielk197700e13612008-11-17 19:18:54 +00004711 assert( sizeof(t)+sizeof(pid)<=(size_t)nBuf );
drh72cbd072008-10-14 17:58:38 +00004712 nBuf = sizeof(t) + sizeof(pid);
drh842b8642005-01-21 17:53:17 +00004713 }else{
drh72cbd072008-10-14 17:58:38 +00004714 nBuf = read(fd, zBuf, nBuf);
drh842b8642005-01-21 17:53:17 +00004715 close(fd);
4716 }
drhbbd42a62004-05-22 17:41:58 +00004717 }
4718#endif
drh72cbd072008-10-14 17:58:38 +00004719 return nBuf;
drhbbd42a62004-05-22 17:41:58 +00004720}
4721
danielk1977b4b47412007-08-17 15:53:36 +00004722
drhbbd42a62004-05-22 17:41:58 +00004723/*
4724** Sleep for a little while. Return the amount of time slept.
danielk1977b4b47412007-08-17 15:53:36 +00004725** The argument is the number of microseconds we want to sleep.
drh4a50aac2007-08-23 02:47:53 +00004726** The return value is the number of microseconds of sleep actually
4727** requested from the underlying operating system, a number which
4728** might be greater than or equal to the argument, but not less
4729** than the argument.
drhbbd42a62004-05-22 17:41:58 +00004730*/
danielk1977397d65f2008-11-19 11:35:39 +00004731static int unixSleep(sqlite3_vfs *NotUsed, int microseconds){
drh6c7d5c52008-11-21 20:32:33 +00004732#if OS_VXWORKS
chw97185482008-11-17 08:05:31 +00004733 struct timespec sp;
4734
4735 sp.tv_sec = microseconds / 1000000;
4736 sp.tv_nsec = (microseconds % 1000000) * 1000;
4737 nanosleep(&sp, NULL);
danielk1977397d65f2008-11-19 11:35:39 +00004738 return microseconds;
4739#elif defined(HAVE_USLEEP) && HAVE_USLEEP
danielk1977b4b47412007-08-17 15:53:36 +00004740 usleep(microseconds);
4741 return microseconds;
drhbbd42a62004-05-22 17:41:58 +00004742#else
danielk1977b4b47412007-08-17 15:53:36 +00004743 int seconds = (microseconds+999999)/1000000;
4744 sleep(seconds);
drh4a50aac2007-08-23 02:47:53 +00004745 return seconds*1000000;
drha3fad6f2006-01-18 14:06:37 +00004746#endif
danielk1977397d65f2008-11-19 11:35:39 +00004747 UNUSED_PARAMETER(NotUsed);
drh88f474a2006-01-02 20:00:12 +00004748}
4749
4750/*
drh6b9d6dd2008-12-03 19:34:47 +00004751** The following variable, if set to a non-zero value, is interpreted as
4752** the number of seconds since 1970 and is used to set the result of
4753** sqlite3OsCurrentTime() during testing.
drhbbd42a62004-05-22 17:41:58 +00004754*/
4755#ifdef SQLITE_TEST
drh6b9d6dd2008-12-03 19:34:47 +00004756int sqlite3_current_time = 0; /* Fake system time in seconds since 1970. */
drhbbd42a62004-05-22 17:41:58 +00004757#endif
4758
4759/*
4760** Find the current time (in Universal Coordinated Time). Write the
4761** current time and date as a Julian Day number into *prNow and
4762** return 0. Return 1 if the time and date cannot be found.
4763*/
danielk1977397d65f2008-11-19 11:35:39 +00004764static int unixCurrentTime(sqlite3_vfs *NotUsed, double *prNow){
drh6c7d5c52008-11-21 20:32:33 +00004765#if defined(NO_GETTOD)
drhbbd42a62004-05-22 17:41:58 +00004766 time_t t;
4767 time(&t);
4768 *prNow = t/86400.0 + 2440587.5;
drh6c7d5c52008-11-21 20:32:33 +00004769#elif OS_VXWORKS
4770 struct timespec sNow;
4771 clock_gettime(CLOCK_REALTIME, &sNow);
4772 *prNow = 2440587.5 + sNow.tv_sec/86400.0 + sNow.tv_nsec/86400000000000.0;
drh19e2d372005-08-29 23:00:03 +00004773#else
4774 struct timeval sNow;
drhbdcc2762007-04-02 18:06:57 +00004775 gettimeofday(&sNow, 0);
drh19e2d372005-08-29 23:00:03 +00004776 *prNow = 2440587.5 + sNow.tv_sec/86400.0 + sNow.tv_usec/86400000000.0;
4777#endif
danielk1977397d65f2008-11-19 11:35:39 +00004778
drhbbd42a62004-05-22 17:41:58 +00004779#ifdef SQLITE_TEST
4780 if( sqlite3_current_time ){
4781 *prNow = sqlite3_current_time/86400.0 + 2440587.5;
4782 }
4783#endif
danielk1977397d65f2008-11-19 11:35:39 +00004784 UNUSED_PARAMETER(NotUsed);
drhbbd42a62004-05-22 17:41:58 +00004785 return 0;
4786}
danielk1977b4b47412007-08-17 15:53:36 +00004787
drh6b9d6dd2008-12-03 19:34:47 +00004788/*
4789** We added the xGetLastError() method with the intention of providing
4790** better low-level error messages when operating-system problems come up
4791** during SQLite operation. But so far, none of that has been implemented
4792** in the core. So this routine is never called. For now, it is merely
4793** a place-holder.
4794*/
danielk1977397d65f2008-11-19 11:35:39 +00004795static int unixGetLastError(sqlite3_vfs *NotUsed, int NotUsed2, char *NotUsed3){
4796 UNUSED_PARAMETER(NotUsed);
4797 UNUSED_PARAMETER(NotUsed2);
4798 UNUSED_PARAMETER(NotUsed3);
danielk1977bcb97fe2008-06-06 15:49:29 +00004799 return 0;
4800}
4801
drh153c62c2007-08-24 03:51:33 +00004802/*
drh734c9862008-11-28 15:37:20 +00004803************************ End of sqlite3_vfs methods ***************************
4804******************************************************************************/
4805
4806/*
danielk1977e339d652008-06-28 11:23:00 +00004807** Initialize the operating system interface.
drh734c9862008-11-28 15:37:20 +00004808**
4809** This routine registers all VFS implementations for unix-like operating
4810** systems. This routine, and the sqlite3_os_end() routine that follows,
4811** should be the only routines in this file that are visible from other
4812** files.
drh6b9d6dd2008-12-03 19:34:47 +00004813**
4814** This routine is called once during SQLite initialization and by a
4815** single thread. The memory allocation and mutex subsystems have not
4816** necessarily been initialized when this routine is called, and so they
4817** should not be used.
drh153c62c2007-08-24 03:51:33 +00004818*/
danielk1977c0fa4c52008-06-25 17:19:00 +00004819int sqlite3_os_init(void){
drh6b9d6dd2008-12-03 19:34:47 +00004820 /*
4821 ** The following macro defines an initializer for an sqlite3_vfs object.
4822 ** The name of the VFS is NAME. The pAppData is a pointer to a "finder"
4823 ** function. The FINDER parameter to this macro is the name of the
4824 ** finder-function. The finder-function returns a pointer to the
4825 ** sqlite_io_methods object that implements the desired locking
4826 ** behaviors. See the division above that contains the IOMETHODS
4827 ** macro for addition information on finder-functions.
4828 **
4829 ** Most finders simply return a pointer to a fixed sqlite3_io_methods
4830 ** object. But the "autolockIoFinder" available on MacOSX does a little
4831 ** more than that; it looks at the filesystem type that hosts the
4832 ** database file and tries to choose an locking method appropriate for
4833 ** that filesystem time.
danielk1977e339d652008-06-28 11:23:00 +00004834 */
drh7708e972008-11-29 00:56:52 +00004835 #define UNIXVFS(VFSNAME, FINDER) { \
danielk1977e339d652008-06-28 11:23:00 +00004836 1, /* iVersion */ \
4837 sizeof(unixFile), /* szOsFile */ \
4838 MAX_PATHNAME, /* mxPathname */ \
4839 0, /* pNext */ \
drh7708e972008-11-29 00:56:52 +00004840 VFSNAME, /* zName */ \
4841 (void*)FINDER, /* pAppData */ \
danielk1977e339d652008-06-28 11:23:00 +00004842 unixOpen, /* xOpen */ \
4843 unixDelete, /* xDelete */ \
4844 unixAccess, /* xAccess */ \
4845 unixFullPathname, /* xFullPathname */ \
4846 unixDlOpen, /* xDlOpen */ \
4847 unixDlError, /* xDlError */ \
4848 unixDlSym, /* xDlSym */ \
4849 unixDlClose, /* xDlClose */ \
4850 unixRandomness, /* xRandomness */ \
4851 unixSleep, /* xSleep */ \
4852 unixCurrentTime, /* xCurrentTime */ \
4853 unixGetLastError /* xGetLastError */ \
4854 }
4855
drh6b9d6dd2008-12-03 19:34:47 +00004856 /*
4857 ** All default VFSes for unix are contained in the following array.
4858 **
4859 ** Note that the sqlite3_vfs.pNext field of the VFS object is modified
4860 ** by the SQLite core when the VFS is registered. So the following
4861 ** array cannot be const.
4862 */
danielk1977e339d652008-06-28 11:23:00 +00004863 static sqlite3_vfs aVfs[] = {
drh7708e972008-11-29 00:56:52 +00004864#if SQLITE_ENABLE_LOCKING_STYLE && defined(__DARWIN__)
4865 UNIXVFS("unix", autolockIoFinder ),
4866#else
4867 UNIXVFS("unix", posixIoFinder ),
4868#endif
4869 UNIXVFS("unix-none", nolockIoFinder ),
4870 UNIXVFS("unix-dotfile", dotlockIoFinder ),
drh734c9862008-11-28 15:37:20 +00004871#if OS_VXWORKS
drh7708e972008-11-29 00:56:52 +00004872 UNIXVFS("unix-namedsem", semIoFinder ),
drh734c9862008-11-28 15:37:20 +00004873#endif
4874#if SQLITE_ENABLE_LOCKING_STYLE
drh7708e972008-11-29 00:56:52 +00004875 UNIXVFS("unix-posix", posixIoFinder ),
4876 UNIXVFS("unix-flock", flockIoFinder ),
drh734c9862008-11-28 15:37:20 +00004877#endif
4878#if SQLITE_ENABLE_LOCKING_STYLE && defined(__DARWIN__)
drh7708e972008-11-29 00:56:52 +00004879 UNIXVFS("unix-afp", afpIoFinder ),
4880 UNIXVFS("unix-proxy", proxyIoFinder ),
drh734c9862008-11-28 15:37:20 +00004881#endif
drh153c62c2007-08-24 03:51:33 +00004882 };
drh6b9d6dd2008-12-03 19:34:47 +00004883 unsigned int i; /* Loop counter */
4884
4885 /* Register all VFSes defined in the aVfs[] array */
danielk1977e339d652008-06-28 11:23:00 +00004886 for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){
drh734c9862008-11-28 15:37:20 +00004887 sqlite3_vfs_register(&aVfs[i], i==0);
danielk1977e339d652008-06-28 11:23:00 +00004888 }
danielk1977c0fa4c52008-06-25 17:19:00 +00004889 return SQLITE_OK;
drh153c62c2007-08-24 03:51:33 +00004890}
danielk1977e339d652008-06-28 11:23:00 +00004891
4892/*
drh6b9d6dd2008-12-03 19:34:47 +00004893** Shutdown the operating system interface.
4894**
4895** Some operating systems might need to do some cleanup in this routine,
4896** to release dynamically allocated objects. But not on unix.
4897** This routine is a no-op for unix.
danielk1977e339d652008-06-28 11:23:00 +00004898*/
danielk1977c0fa4c52008-06-25 17:19:00 +00004899int sqlite3_os_end(void){
4900 return SQLITE_OK;
4901}
drhdce8bdb2007-08-16 13:01:44 +00004902
danielk197729bafea2008-06-26 10:41:19 +00004903#endif /* SQLITE_OS_UNIX */