blob: 2f2303173c30cd58a8b924c6b73b8ed8f80029d4 [file] [log] [blame]
drhbbd42a62004-05-22 17:41:58 +00001/*
2** 2004 May 22
3**
4** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
6**
7** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
10**
11******************************************************************************
12**
13** This file contains code that is specific to Unix systems.
14*/
drhbbd42a62004-05-22 17:41:58 +000015#include "sqliteInt.h"
drheb206252004-10-01 02:00:31 +000016#include "os.h"
17#if OS_UNIX /* This file is used on unix only */
drhbbd42a62004-05-22 17:41:58 +000018
19
20#include <time.h>
drh19e2d372005-08-29 23:00:03 +000021#include <sys/time.h>
drhbbd42a62004-05-22 17:41:58 +000022#include <errno.h>
23#include <unistd.h>
drh0ccebe72005-06-07 22:22:50 +000024
25/*
26** Do not include any of the File I/O interface procedures if the
27** SQLITE_OMIT_DISKIO macro is defined (indicating that there database
28** will be in-memory only)
29*/
30#ifndef SQLITE_OMIT_DISKIO
31
32
33/*
34** Define various macros that are missing from some systems.
35*/
drhbbd42a62004-05-22 17:41:58 +000036#ifndef O_LARGEFILE
37# define O_LARGEFILE 0
38#endif
39#ifdef SQLITE_DISABLE_LFS
40# undef O_LARGEFILE
41# define O_LARGEFILE 0
42#endif
43#ifndef O_NOFOLLOW
44# define O_NOFOLLOW 0
45#endif
46#ifndef O_BINARY
47# define O_BINARY 0
48#endif
49
50/*
51** The DJGPP compiler environment looks mostly like Unix, but it
52** lacks the fcntl() system call. So redefine fcntl() to be something
53** that always succeeds. This means that locking does not occur under
54** DJGPP. But its DOS - what did you expect?
55*/
56#ifdef __DJGPP__
57# define fcntl(A,B,C) 0
58#endif
59
60/*
drhbbd42a62004-05-22 17:41:58 +000061** Include code that is common to all os_*.c files
62*/
63#include "os_common.h"
64
drh2b4b5962005-06-15 17:47:55 +000065/*
66** The threadid macro resolves to the thread-id or to 0. Used for
67** testing and debugging only.
68*/
69#ifdef SQLITE_UNIX_THREADS
70#define threadid pthread_self()
71#else
72#define threadid 0
73#endif
74
75/*
76** Set or check the OsFile.tid field. This field is set when an OsFile
77** is first opened. All subsequent uses of the OsFile verify that the
78** same thread is operating on the OsFile. Some operating systems do
79** not allow locks to be overridden by other threads and that restriction
80** means that sqlite3* database handles cannot be moved from one thread
81** to another. This logic makes sure a user does not try to do that
82** by mistake.
83*/
84#ifdef SQLITE_UNIX_THREADS
85# define SET_THREADID(X) X->tid = pthread_self()
86# define CHECK_THREADID(X) (!pthread_equal(X->tid, pthread_self()))
87#else
88# define SET_THREADID(X)
89# define CHECK_THREADID(X) 0
danielk197713adf8a2004-06-03 16:08:41 +000090#endif
91
drhbbd42a62004-05-22 17:41:58 +000092/*
93** Here is the dirt on POSIX advisory locks: ANSI STD 1003.1 (1996)
94** section 6.5.2.2 lines 483 through 490 specify that when a process
95** sets or clears a lock, that operation overrides any prior locks set
96** by the same process. It does not explicitly say so, but this implies
97** that it overrides locks set by the same process using a different
98** file descriptor. Consider this test case:
99**
100** int fd1 = open("./file1", O_RDWR|O_CREAT, 0644);
101** int fd2 = open("./file2", O_RDWR|O_CREAT, 0644);
102**
103** Suppose ./file1 and ./file2 are really the same file (because
104** one is a hard or symbolic link to the other) then if you set
105** an exclusive lock on fd1, then try to get an exclusive lock
106** on fd2, it works. I would have expected the second lock to
107** fail since there was already a lock on the file due to fd1.
108** But not so. Since both locks came from the same process, the
109** second overrides the first, even though they were on different
110** file descriptors opened on different file names.
111**
112** Bummer. If you ask me, this is broken. Badly broken. It means
113** that we cannot use POSIX locks to synchronize file access among
114** competing threads of the same process. POSIX locks will work fine
115** to synchronize access for threads in separate processes, but not
116** threads within the same process.
117**
118** To work around the problem, SQLite has to manage file locks internally
119** on its own. Whenever a new database is opened, we have to find the
120** specific inode of the database file (the inode is determined by the
121** st_dev and st_ino fields of the stat structure that fstat() fills in)
122** and check for locks already existing on that inode. When locks are
123** created or removed, we have to look at our own internal record of the
124** locks to see if another thread has previously set a lock on that same
125** inode.
126**
127** The OsFile structure for POSIX is no longer just an integer file
128** descriptor. It is now a structure that holds the integer file
129** descriptor and a pointer to a structure that describes the internal
130** locks on the corresponding inode. There is one locking structure
131** per inode, so if the same inode is opened twice, both OsFile structures
132** point to the same locking structure. The locking structure keeps
133** a reference count (so we will know when to delete it) and a "cnt"
134** field that tells us its internal lock status. cnt==0 means the
135** file is unlocked. cnt==-1 means the file has an exclusive lock.
136** cnt>0 means there are cnt shared locks on the file.
137**
138** Any attempt to lock or unlock a file first checks the locking
139** structure. The fcntl() system call is only invoked to set a
140** POSIX lock if the internal lock structure transitions between
141** a locked and an unlocked state.
142**
143** 2004-Jan-11:
144** More recent discoveries about POSIX advisory locks. (The more
145** I discover, the more I realize the a POSIX advisory locks are
146** an abomination.)
147**
148** If you close a file descriptor that points to a file that has locks,
149** all locks on that file that are owned by the current process are
150** released. To work around this problem, each OsFile structure contains
151** a pointer to an openCnt structure. There is one openCnt structure
152** per open inode, which means that multiple OsFiles can point to a single
153** openCnt. When an attempt is made to close an OsFile, if there are
154** other OsFiles open on the same inode that are holding locks, the call
155** to close() the file descriptor is deferred until all of the locks clear.
156** The openCnt structure keeps a list of file descriptors that need to
157** be closed and that list is walked (and cleared) when the last lock
158** clears.
159**
160** First, under Linux threads, because each thread has a separate
161** process ID, lock operations in one thread do not override locks
162** to the same file in other threads. Linux threads behave like
163** separate processes in this respect. But, if you close a file
164** descriptor in linux threads, all locks are cleared, even locks
165** on other threads and even though the other threads have different
166** process IDs. Linux threads is inconsistent in this respect.
167** (I'm beginning to think that linux threads is an abomination too.)
168** The consequence of this all is that the hash table for the lockInfo
169** structure has to include the process id as part of its key because
170** locks in different threads are treated as distinct. But the
171** openCnt structure should not include the process id in its
172** key because close() clears lock on all threads, not just the current
173** thread. Were it not for this goofiness in linux threads, we could
174** combine the lockInfo and openCnt structures into a single structure.
drh5fdae772004-06-29 03:29:00 +0000175**
176** 2004-Jun-28:
177** On some versions of linux, threads can override each others locks.
178** On others not. Sometimes you can change the behavior on the same
179** system by setting the LD_ASSUME_KERNEL environment variable. The
180** POSIX standard is silent as to which behavior is correct, as far
181** as I can tell, so other versions of unix might show the same
182** inconsistency. There is no little doubt in my mind that posix
183** advisory locks and linux threads are profoundly broken.
184**
185** To work around the inconsistencies, we have to test at runtime
186** whether or not threads can override each others locks. This test
187** is run once, the first time any lock is attempted. A static
188** variable is set to record the results of this test for future
189** use.
drhbbd42a62004-05-22 17:41:58 +0000190*/
191
192/*
193** An instance of the following structure serves as the key used
drh5fdae772004-06-29 03:29:00 +0000194** to locate a particular lockInfo structure given its inode.
195**
196** If threads cannot override each others locks, then we set the
197** lockKey.tid field to the thread ID. If threads can override
198** each others locks then tid is always set to zero. tid is also
199** set to zero if we compile without threading support.
drhbbd42a62004-05-22 17:41:58 +0000200*/
201struct lockKey {
drh5fdae772004-06-29 03:29:00 +0000202 dev_t dev; /* Device number */
203 ino_t ino; /* Inode number */
204#ifdef SQLITE_UNIX_THREADS
205 pthread_t tid; /* Thread ID or zero if threads cannot override each other */
206#endif
drhbbd42a62004-05-22 17:41:58 +0000207};
208
209/*
210** An instance of the following structure is allocated for each open
211** inode on each thread with a different process ID. (Threads have
212** different process IDs on linux, but not on most other unixes.)
213**
214** A single inode can have multiple file descriptors, so each OsFile
215** structure contains a pointer to an instance of this object and this
216** object keeps a count of the number of OsFiles pointing to it.
217*/
218struct lockInfo {
219 struct lockKey key; /* The lookup key */
drh2ac3ee92004-06-07 16:27:46 +0000220 int cnt; /* Number of SHARED locks held */
danielk19779a1d0ab2004-06-01 14:09:28 +0000221 int locktype; /* One of SHARED_LOCK, RESERVED_LOCK etc. */
drhbbd42a62004-05-22 17:41:58 +0000222 int nRef; /* Number of pointers to this structure */
223};
224
225/*
226** An instance of the following structure serves as the key used
227** to locate a particular openCnt structure given its inode. This
drh5fdae772004-06-29 03:29:00 +0000228** is the same as the lockKey except that the thread ID is omitted.
drhbbd42a62004-05-22 17:41:58 +0000229*/
230struct openKey {
231 dev_t dev; /* Device number */
232 ino_t ino; /* Inode number */
233};
234
235/*
236** An instance of the following structure is allocated for each open
237** inode. This structure keeps track of the number of locks on that
238** inode. If a close is attempted against an inode that is holding
239** locks, the close is deferred until all locks clear by adding the
240** file descriptor to be closed to the pending list.
241*/
242struct openCnt {
243 struct openKey key; /* The lookup key */
244 int nRef; /* Number of pointers to this structure */
245 int nLock; /* Number of outstanding locks */
246 int nPending; /* Number of pending close() operations */
247 int *aPending; /* Malloced space holding fd's awaiting a close() */
248};
249
250/*
251** These hash table maps inodes and process IDs into lockInfo and openCnt
252** structures. Access to these hash tables must be protected by a mutex.
253*/
254static Hash lockHash = { SQLITE_HASH_BINARY, 0, 0, 0, 0, 0 };
255static Hash openHash = { SQLITE_HASH_BINARY, 0, 0, 0, 0, 0 };
256
drh5fdae772004-06-29 03:29:00 +0000257
258#ifdef SQLITE_UNIX_THREADS
259/*
260** This variable records whether or not threads can override each others
261** locks.
262**
263** 0: No. Threads cannot override each others locks.
264** 1: Yes. Threads can override each others locks.
265** -1: We don't know yet.
266*/
267static int threadsOverrideEachOthersLocks = -1;
268
269/*
270** This structure holds information passed into individual test
271** threads by the testThreadLockingBehavior() routine.
272*/
273struct threadTestData {
274 int fd; /* File to be locked */
275 struct flock lock; /* The locking operation */
276 int result; /* Result of the locking operation */
277};
278
drh2b4b5962005-06-15 17:47:55 +0000279#ifdef SQLITE_LOCK_TRACE
280/*
281** Print out information about all locking operations.
282**
283** This routine is used for troubleshooting locks on multithreaded
284** platforms. Enable by compiling with the -DSQLITE_LOCK_TRACE
285** command-line option on the compiler. This code is normally
286** turnned off.
287*/
288static int lockTrace(int fd, int op, struct flock *p){
289 char *zOpName, *zType;
290 int s;
291 int savedErrno;
292 if( op==F_GETLK ){
293 zOpName = "GETLK";
294 }else if( op==F_SETLK ){
295 zOpName = "SETLK";
296 }else{
297 s = fcntl(fd, op, p);
298 sqlite3DebugPrintf("fcntl unknown %d %d %d\n", fd, op, s);
299 return s;
300 }
301 if( p->l_type==F_RDLCK ){
302 zType = "RDLCK";
303 }else if( p->l_type==F_WRLCK ){
304 zType = "WRLCK";
305 }else if( p->l_type==F_UNLCK ){
306 zType = "UNLCK";
307 }else{
308 assert( 0 );
309 }
310 assert( p->l_whence==SEEK_SET );
311 s = fcntl(fd, op, p);
312 savedErrno = errno;
313 sqlite3DebugPrintf("fcntl %d %d %s %s %d %d %d %d\n",
314 threadid, fd, zOpName, zType, (int)p->l_start, (int)p->l_len,
315 (int)p->l_pid, s);
316 if( s && op==F_SETLK && (p->l_type==F_RDLCK || p->l_type==F_WRLCK) ){
317 struct flock l2;
318 l2 = *p;
319 fcntl(fd, F_GETLK, &l2);
320 if( l2.l_type==F_RDLCK ){
321 zType = "RDLCK";
322 }else if( l2.l_type==F_WRLCK ){
323 zType = "WRLCK";
324 }else if( l2.l_type==F_UNLCK ){
325 zType = "UNLCK";
326 }else{
327 assert( 0 );
328 }
329 sqlite3DebugPrintf("fcntl-failure-reason: %s %d %d %d\n",
330 zType, (int)l2.l_start, (int)l2.l_len, (int)l2.l_pid);
331 }
332 errno = savedErrno;
333 return s;
334}
335#define fcntl lockTrace
336#endif /* SQLITE_LOCK_TRACE */
337
drh5fdae772004-06-29 03:29:00 +0000338/*
339** The testThreadLockingBehavior() routine launches two separate
340** threads on this routine. This routine attempts to lock a file
341** descriptor then returns. The success or failure of that attempt
342** allows the testThreadLockingBehavior() procedure to determine
343** whether or not threads can override each others locks.
344*/
345static void *threadLockingTest(void *pArg){
346 struct threadTestData *pData = (struct threadTestData*)pArg;
347 pData->result = fcntl(pData->fd, F_SETLK, &pData->lock);
348 return pArg;
349}
350
351/*
352** This procedure attempts to determine whether or not threads
353** can override each others locks then sets the
354** threadsOverrideEachOthersLocks variable appropriately.
355*/
356static void testThreadLockingBehavior(fd_orig){
357 int fd;
358 struct threadTestData d[2];
359 pthread_t t[2];
360
361 fd = dup(fd_orig);
362 if( fd<0 ) return;
363 memset(d, 0, sizeof(d));
364 d[0].fd = fd;
365 d[0].lock.l_type = F_RDLCK;
366 d[0].lock.l_len = 1;
367 d[0].lock.l_start = 0;
368 d[0].lock.l_whence = SEEK_SET;
369 d[1] = d[0];
370 d[1].lock.l_type = F_WRLCK;
371 pthread_create(&t[0], 0, threadLockingTest, &d[0]);
372 pthread_create(&t[1], 0, threadLockingTest, &d[1]);
373 pthread_join(t[0], 0);
374 pthread_join(t[1], 0);
375 close(fd);
376 threadsOverrideEachOthersLocks = d[0].result==0 && d[1].result==0;
377}
378#endif /* SQLITE_UNIX_THREADS */
379
drhbbd42a62004-05-22 17:41:58 +0000380/*
381** Release a lockInfo structure previously allocated by findLockInfo().
382*/
383static void releaseLockInfo(struct lockInfo *pLock){
384 pLock->nRef--;
385 if( pLock->nRef==0 ){
386 sqlite3HashInsert(&lockHash, &pLock->key, sizeof(pLock->key), 0);
387 sqliteFree(pLock);
388 }
389}
390
391/*
392** Release a openCnt structure previously allocated by findLockInfo().
393*/
394static void releaseOpenCnt(struct openCnt *pOpen){
395 pOpen->nRef--;
396 if( pOpen->nRef==0 ){
397 sqlite3HashInsert(&openHash, &pOpen->key, sizeof(pOpen->key), 0);
398 sqliteFree(pOpen->aPending);
399 sqliteFree(pOpen);
400 }
401}
402
403/*
404** Given a file descriptor, locate lockInfo and openCnt structures that
405** describes that file descriptor. Create a new ones if necessary. The
406** return values might be unset if an error occurs.
407**
408** Return the number of errors.
409*/
drh38f82712004-06-18 17:10:16 +0000410static int findLockInfo(
drhbbd42a62004-05-22 17:41:58 +0000411 int fd, /* The file descriptor used in the key */
412 struct lockInfo **ppLock, /* Return the lockInfo structure here */
drh5fdae772004-06-29 03:29:00 +0000413 struct openCnt **ppOpen /* Return the openCnt structure here */
drhbbd42a62004-05-22 17:41:58 +0000414){
415 int rc;
416 struct lockKey key1;
417 struct openKey key2;
418 struct stat statbuf;
419 struct lockInfo *pLock;
420 struct openCnt *pOpen;
421 rc = fstat(fd, &statbuf);
422 if( rc!=0 ) return 1;
423 memset(&key1, 0, sizeof(key1));
424 key1.dev = statbuf.st_dev;
425 key1.ino = statbuf.st_ino;
drh5fdae772004-06-29 03:29:00 +0000426#ifdef SQLITE_UNIX_THREADS
427 if( threadsOverrideEachOthersLocks<0 ){
428 testThreadLockingBehavior(fd);
429 }
430 key1.tid = threadsOverrideEachOthersLocks ? 0 : pthread_self();
431#endif
drhbbd42a62004-05-22 17:41:58 +0000432 memset(&key2, 0, sizeof(key2));
433 key2.dev = statbuf.st_dev;
434 key2.ino = statbuf.st_ino;
435 pLock = (struct lockInfo*)sqlite3HashFind(&lockHash, &key1, sizeof(key1));
436 if( pLock==0 ){
437 struct lockInfo *pOld;
438 pLock = sqliteMallocRaw( sizeof(*pLock) );
439 if( pLock==0 ) return 1;
440 pLock->key = key1;
441 pLock->nRef = 1;
442 pLock->cnt = 0;
danielk19779a1d0ab2004-06-01 14:09:28 +0000443 pLock->locktype = 0;
drhbbd42a62004-05-22 17:41:58 +0000444 pOld = sqlite3HashInsert(&lockHash, &pLock->key, sizeof(key1), pLock);
445 if( pOld!=0 ){
446 assert( pOld==pLock );
447 sqliteFree(pLock);
448 return 1;
449 }
450 }else{
451 pLock->nRef++;
452 }
453 *ppLock = pLock;
454 pOpen = (struct openCnt*)sqlite3HashFind(&openHash, &key2, sizeof(key2));
455 if( pOpen==0 ){
456 struct openCnt *pOld;
457 pOpen = sqliteMallocRaw( sizeof(*pOpen) );
458 if( pOpen==0 ){
459 releaseLockInfo(pLock);
460 return 1;
461 }
462 pOpen->key = key2;
463 pOpen->nRef = 1;
464 pOpen->nLock = 0;
465 pOpen->nPending = 0;
466 pOpen->aPending = 0;
467 pOld = sqlite3HashInsert(&openHash, &pOpen->key, sizeof(key2), pOpen);
468 if( pOld!=0 ){
469 assert( pOld==pOpen );
470 sqliteFree(pOpen);
471 releaseLockInfo(pLock);
472 return 1;
473 }
474 }else{
475 pOpen->nRef++;
476 }
477 *ppOpen = pOpen;
478 return 0;
479}
480
481/*
482** Delete the named file
483*/
484int sqlite3OsDelete(const char *zFilename){
485 unlink(zFilename);
486 return SQLITE_OK;
487}
488
489/*
490** Return TRUE if the named file exists.
491*/
492int sqlite3OsFileExists(const char *zFilename){
493 return access(zFilename, 0)==0;
494}
495
496/*
497** Attempt to open a file for both reading and writing. If that
498** fails, try opening it read-only. If the file does not exist,
499** try to create it.
500**
501** On success, a handle for the open file is written to *id
502** and *pReadonly is set to 0 if the file was opened for reading and
503** writing or 1 if the file was opened read-only. The function returns
504** SQLITE_OK.
505**
506** On failure, the function returns SQLITE_CANTOPEN and leaves
507** *id and *pReadonly unchanged.
508*/
509int sqlite3OsOpenReadWrite(
510 const char *zFilename,
511 OsFile *id,
512 int *pReadonly
513){
514 int rc;
drhda71ce12004-06-21 18:14:45 +0000515 assert( !id->isOpen );
drhbbd42a62004-05-22 17:41:58 +0000516 id->dirfd = -1;
drh2b4b5962005-06-15 17:47:55 +0000517 SET_THREADID(id);
drh8e855772005-05-17 11:25:31 +0000518 id->h = open(zFilename, O_RDWR|O_CREAT|O_LARGEFILE|O_BINARY,
519 SQLITE_DEFAULT_FILE_PERMISSIONS);
drha6abd042004-06-09 17:37:22 +0000520 if( id->h<0 ){
drh6458e392004-07-20 01:14:13 +0000521#ifdef EISDIR
522 if( errno==EISDIR ){
523 return SQLITE_CANTOPEN;
524 }
525#endif
drha6abd042004-06-09 17:37:22 +0000526 id->h = open(zFilename, O_RDONLY|O_LARGEFILE|O_BINARY);
527 if( id->h<0 ){
drhbbd42a62004-05-22 17:41:58 +0000528 return SQLITE_CANTOPEN;
529 }
530 *pReadonly = 1;
531 }else{
532 *pReadonly = 0;
533 }
534 sqlite3OsEnterMutex();
drha6abd042004-06-09 17:37:22 +0000535 rc = findLockInfo(id->h, &id->pLock, &id->pOpen);
drhbbd42a62004-05-22 17:41:58 +0000536 sqlite3OsLeaveMutex();
537 if( rc ){
drha6abd042004-06-09 17:37:22 +0000538 close(id->h);
drhbbd42a62004-05-22 17:41:58 +0000539 return SQLITE_NOMEM;
540 }
danielk197713adf8a2004-06-03 16:08:41 +0000541 id->locktype = 0;
drhda71ce12004-06-21 18:14:45 +0000542 id->isOpen = 1;
drha6abd042004-06-09 17:37:22 +0000543 TRACE3("OPEN %-3d %s\n", id->h, zFilename);
drhbbd42a62004-05-22 17:41:58 +0000544 OpenCounter(+1);
545 return SQLITE_OK;
546}
547
548
549/*
550** Attempt to open a new file for exclusive access by this process.
551** The file will be opened for both reading and writing. To avoid
552** a potential security problem, we do not allow the file to have
553** previously existed. Nor do we allow the file to be a symbolic
554** link.
555**
556** If delFlag is true, then make arrangements to automatically delete
557** the file when it is closed.
558**
559** On success, write the file handle into *id and return SQLITE_OK.
560**
561** On failure, return SQLITE_CANTOPEN.
562*/
563int sqlite3OsOpenExclusive(const char *zFilename, OsFile *id, int delFlag){
564 int rc;
drhda71ce12004-06-21 18:14:45 +0000565 assert( !id->isOpen );
drhbbd42a62004-05-22 17:41:58 +0000566 if( access(zFilename, 0)==0 ){
567 return SQLITE_CANTOPEN;
568 }
drh2b4b5962005-06-15 17:47:55 +0000569 SET_THREADID(id);
drhbbd42a62004-05-22 17:41:58 +0000570 id->dirfd = -1;
drha6abd042004-06-09 17:37:22 +0000571 id->h = open(zFilename,
drhd6459672005-08-13 17:17:01 +0000572 O_RDWR|O_CREAT|O_EXCL|O_NOFOLLOW|O_LARGEFILE|O_BINARY,
573 SQLITE_DEFAULT_FILE_PERMISSIONS);
drha6abd042004-06-09 17:37:22 +0000574 if( id->h<0 ){
drhbbd42a62004-05-22 17:41:58 +0000575 return SQLITE_CANTOPEN;
576 }
577 sqlite3OsEnterMutex();
drha6abd042004-06-09 17:37:22 +0000578 rc = findLockInfo(id->h, &id->pLock, &id->pOpen);
drhbbd42a62004-05-22 17:41:58 +0000579 sqlite3OsLeaveMutex();
580 if( rc ){
drha6abd042004-06-09 17:37:22 +0000581 close(id->h);
drhbbd42a62004-05-22 17:41:58 +0000582 unlink(zFilename);
583 return SQLITE_NOMEM;
584 }
danielk197713adf8a2004-06-03 16:08:41 +0000585 id->locktype = 0;
drhda71ce12004-06-21 18:14:45 +0000586 id->isOpen = 1;
drhbbd42a62004-05-22 17:41:58 +0000587 if( delFlag ){
588 unlink(zFilename);
589 }
drha6abd042004-06-09 17:37:22 +0000590 TRACE3("OPEN-EX %-3d %s\n", id->h, zFilename);
drhbbd42a62004-05-22 17:41:58 +0000591 OpenCounter(+1);
592 return SQLITE_OK;
593}
594
595/*
596** Attempt to open a new file for read-only access.
597**
598** On success, write the file handle into *id and return SQLITE_OK.
599**
600** On failure, return SQLITE_CANTOPEN.
601*/
602int sqlite3OsOpenReadOnly(const char *zFilename, OsFile *id){
603 int rc;
drhda71ce12004-06-21 18:14:45 +0000604 assert( !id->isOpen );
drh2b4b5962005-06-15 17:47:55 +0000605 SET_THREADID(id);
drhbbd42a62004-05-22 17:41:58 +0000606 id->dirfd = -1;
drha6abd042004-06-09 17:37:22 +0000607 id->h = open(zFilename, O_RDONLY|O_LARGEFILE|O_BINARY);
608 if( id->h<0 ){
drhbbd42a62004-05-22 17:41:58 +0000609 return SQLITE_CANTOPEN;
610 }
611 sqlite3OsEnterMutex();
drha6abd042004-06-09 17:37:22 +0000612 rc = findLockInfo(id->h, &id->pLock, &id->pOpen);
drhbbd42a62004-05-22 17:41:58 +0000613 sqlite3OsLeaveMutex();
614 if( rc ){
drha6abd042004-06-09 17:37:22 +0000615 close(id->h);
drhbbd42a62004-05-22 17:41:58 +0000616 return SQLITE_NOMEM;
617 }
danielk197713adf8a2004-06-03 16:08:41 +0000618 id->locktype = 0;
drhda71ce12004-06-21 18:14:45 +0000619 id->isOpen = 1;
drha6abd042004-06-09 17:37:22 +0000620 TRACE3("OPEN-RO %-3d %s\n", id->h, zFilename);
drhbbd42a62004-05-22 17:41:58 +0000621 OpenCounter(+1);
622 return SQLITE_OK;
623}
624
625/*
626** Attempt to open a file descriptor for the directory that contains a
627** file. This file descriptor can be used to fsync() the directory
628** in order to make sure the creation of a new file is actually written
629** to disk.
630**
631** This routine is only meaningful for Unix. It is a no-op under
632** windows since windows does not support hard links.
633**
634** On success, a handle for a previously open file is at *id is
635** updated with the new directory file descriptor and SQLITE_OK is
636** returned.
637**
638** On failure, the function returns SQLITE_CANTOPEN and leaves
639** *id unchanged.
640*/
641int sqlite3OsOpenDirectory(
642 const char *zDirname,
643 OsFile *id
644){
drhda71ce12004-06-21 18:14:45 +0000645 if( !id->isOpen ){
drhbbd42a62004-05-22 17:41:58 +0000646 /* Do not open the directory if the corresponding file is not already
647 ** open. */
648 return SQLITE_CANTOPEN;
649 }
drh2b4b5962005-06-15 17:47:55 +0000650 SET_THREADID(id);
drhbbd42a62004-05-22 17:41:58 +0000651 assert( id->dirfd<0 );
drh8e855772005-05-17 11:25:31 +0000652 id->dirfd = open(zDirname, O_RDONLY|O_BINARY, 0);
drhbbd42a62004-05-22 17:41:58 +0000653 if( id->dirfd<0 ){
654 return SQLITE_CANTOPEN;
655 }
656 TRACE3("OPENDIR %-3d %s\n", id->dirfd, zDirname);
657 return SQLITE_OK;
658}
659
660/*
drhab3f9fe2004-08-14 17:10:10 +0000661** If the following global variable points to a string which is the
662** name of a directory, then that directory will be used to store
663** temporary files.
664*/
tpoindex9a09a3c2004-12-20 19:01:32 +0000665char *sqlite3_temp_directory = 0;
drhab3f9fe2004-08-14 17:10:10 +0000666
667/*
drhbbd42a62004-05-22 17:41:58 +0000668** Create a temporary file name in zBuf. zBuf must be big enough to
669** hold at least SQLITE_TEMPNAME_SIZE characters.
670*/
671int sqlite3OsTempFileName(char *zBuf){
672 static const char *azDirs[] = {
drhab3f9fe2004-08-14 17:10:10 +0000673 0,
drhbbd42a62004-05-22 17:41:58 +0000674 "/var/tmp",
675 "/usr/tmp",
676 "/tmp",
677 ".",
678 };
drh57196282004-10-06 15:41:16 +0000679 static const unsigned char zChars[] =
drhbbd42a62004-05-22 17:41:58 +0000680 "abcdefghijklmnopqrstuvwxyz"
681 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
682 "0123456789";
683 int i, j;
684 struct stat buf;
685 const char *zDir = ".";
drheffd02b2004-08-29 23:42:13 +0000686 azDirs[0] = sqlite3_temp_directory;
drhbbd42a62004-05-22 17:41:58 +0000687 for(i=0; i<sizeof(azDirs)/sizeof(azDirs[0]); i++){
drhab3f9fe2004-08-14 17:10:10 +0000688 if( azDirs[i]==0 ) continue;
drhbbd42a62004-05-22 17:41:58 +0000689 if( stat(azDirs[i], &buf) ) continue;
690 if( !S_ISDIR(buf.st_mode) ) continue;
691 if( access(azDirs[i], 07) ) continue;
692 zDir = azDirs[i];
693 break;
694 }
695 do{
696 sprintf(zBuf, "%s/"TEMP_FILE_PREFIX, zDir);
697 j = strlen(zBuf);
698 sqlite3Randomness(15, &zBuf[j]);
699 for(i=0; i<15; i++, j++){
700 zBuf[j] = (char)zChars[ ((unsigned char)zBuf[j])%(sizeof(zChars)-1) ];
701 }
702 zBuf[j] = 0;
703 }while( access(zBuf,0)==0 );
704 return SQLITE_OK;
705}
706
drh268283b2005-01-08 15:44:25 +0000707#ifndef SQLITE_OMIT_PAGER_PRAGMAS
drhbbd42a62004-05-22 17:41:58 +0000708/*
tpoindex9a09a3c2004-12-20 19:01:32 +0000709** Check that a given pathname is a directory and is writable
710**
711*/
712int sqlite3OsIsDirWritable(char *zBuf){
713 struct stat buf;
714 if( zBuf==0 ) return 0;
drh268283b2005-01-08 15:44:25 +0000715 if( zBuf[0]==0 ) return 0;
tpoindex9a09a3c2004-12-20 19:01:32 +0000716 if( stat(zBuf, &buf) ) return 0;
717 if( !S_ISDIR(buf.st_mode) ) return 0;
718 if( access(zBuf, 07) ) return 0;
719 return 1;
720}
drh268283b2005-01-08 15:44:25 +0000721#endif /* SQLITE_OMIT_PAGER_PRAGMAS */
tpoindex9a09a3c2004-12-20 19:01:32 +0000722
723/*
drhbbd42a62004-05-22 17:41:58 +0000724** Read data from a file into a buffer. Return SQLITE_OK if all
725** bytes were read successfully and SQLITE_IOERR if anything goes
726** wrong.
727*/
728int sqlite3OsRead(OsFile *id, void *pBuf, int amt){
729 int got;
drhda71ce12004-06-21 18:14:45 +0000730 assert( id->isOpen );
drhbbd42a62004-05-22 17:41:58 +0000731 SimulateIOError(SQLITE_IOERR);
732 TIMER_START;
drha6abd042004-06-09 17:37:22 +0000733 got = read(id->h, pBuf, amt);
drhbbd42a62004-05-22 17:41:58 +0000734 TIMER_END;
drhe29b9152005-03-18 14:03:15 +0000735 TRACE5("READ %-3d %5d %7d %d\n", id->h, got, last_page, TIMER_ELAPSED);
drhbbd42a62004-05-22 17:41:58 +0000736 SEEK(0);
737 /* if( got<0 ) got = 0; */
738 if( got==amt ){
739 return SQLITE_OK;
740 }else{
741 return SQLITE_IOERR;
742 }
743}
744
745/*
746** Write data from a buffer into a file. Return SQLITE_OK on success
747** or some other error code on failure.
748*/
749int sqlite3OsWrite(OsFile *id, const void *pBuf, int amt){
750 int wrote = 0;
drhda71ce12004-06-21 18:14:45 +0000751 assert( id->isOpen );
drh4c7f9412005-02-03 00:29:47 +0000752 assert( amt>0 );
drhbbd42a62004-05-22 17:41:58 +0000753 SimulateIOError(SQLITE_IOERR);
drh047d4832004-10-01 14:38:02 +0000754 SimulateDiskfullError;
drhbbd42a62004-05-22 17:41:58 +0000755 TIMER_START;
drha6abd042004-06-09 17:37:22 +0000756 while( amt>0 && (wrote = write(id->h, pBuf, amt))>0 ){
drhbbd42a62004-05-22 17:41:58 +0000757 amt -= wrote;
758 pBuf = &((char*)pBuf)[wrote];
759 }
760 TIMER_END;
drhe29b9152005-03-18 14:03:15 +0000761 TRACE5("WRITE %-3d %5d %7d %d\n", id->h, wrote, last_page, TIMER_ELAPSED);
drhbbd42a62004-05-22 17:41:58 +0000762 SEEK(0);
763 if( amt>0 ){
764 return SQLITE_FULL;
765 }
766 return SQLITE_OK;
767}
768
769/*
770** Move the read/write pointer in a file.
771*/
drheb206252004-10-01 02:00:31 +0000772int sqlite3OsSeek(OsFile *id, i64 offset){
drhda71ce12004-06-21 18:14:45 +0000773 assert( id->isOpen );
drhbbd42a62004-05-22 17:41:58 +0000774 SEEK(offset/1024 + 1);
drha6abd042004-06-09 17:37:22 +0000775 lseek(id->h, offset, SEEK_SET);
drhbbd42a62004-05-22 17:41:58 +0000776 return SQLITE_OK;
777}
778
drhb851b2c2005-03-10 14:11:12 +0000779#ifdef SQLITE_TEST
780/*
781** Count the number of fullsyncs and normal syncs. This is used to test
782** that syncs and fullsyncs are occuring at the right times.
783*/
784int sqlite3_sync_count = 0;
785int sqlite3_fullsync_count = 0;
786#endif
787
788
drhbbd42a62004-05-22 17:41:58 +0000789/*
drhdd809b02004-07-17 21:44:57 +0000790** The fsync() system call does not work as advertised on many
791** unix systems. The following procedure is an attempt to make
792** it work better.
drh1398ad32005-01-19 23:24:50 +0000793**
794** The SQLITE_NO_SYNC macro disables all fsync()s. This is useful
795** for testing when we want to run through the test suite quickly.
796** You are strongly advised *not* to deploy with SQLITE_NO_SYNC
797** enabled, however, since with SQLITE_NO_SYNC enabled, an OS crash
798** or power failure will likely corrupt the database file.
drhdd809b02004-07-17 21:44:57 +0000799*/
drheb796a72005-09-08 12:38:41 +0000800static int full_fsync(int fd, int fullSync, int dataOnly){
drhdd809b02004-07-17 21:44:57 +0000801 int rc;
drhb851b2c2005-03-10 14:11:12 +0000802
803 /* Record the number of times that we do a normal fsync() and
804 ** FULLSYNC. This is used during testing to verify that this procedure
805 ** gets called with the correct arguments.
806 */
807#ifdef SQLITE_TEST
808 if( fullSync ) sqlite3_fullsync_count++;
809 sqlite3_sync_count++;
810#endif
811
812 /* If we compiled with the SQLITE_NO_SYNC flag, then syncing is a
813 ** no-op
814 */
815#ifdef SQLITE_NO_SYNC
816 rc = SQLITE_OK;
817#else
818
drhdd809b02004-07-17 21:44:57 +0000819#ifdef F_FULLFSYNC
drhb851b2c2005-03-10 14:11:12 +0000820 if( fullSync ){
drhf30cc942005-03-11 17:52:34 +0000821 rc = fcntl(fd, F_FULLFSYNC, 0);
drhb851b2c2005-03-10 14:11:12 +0000822 }else{
823 rc = 1;
824 }
825 /* If the FULLSYNC failed, try to do a normal fsync() */
drhdd809b02004-07-17 21:44:57 +0000826 if( rc ) rc = fsync(fd);
drhb851b2c2005-03-10 14:11:12 +0000827
drhdd809b02004-07-17 21:44:57 +0000828#else
drheb796a72005-09-08 12:38:41 +0000829 if( dataOnly ){
830 rc = fdatasync(fd);
831 }else{
832 rc = fsync(fd);
833 }
drhf30cc942005-03-11 17:52:34 +0000834#endif /* defined(F_FULLFSYNC) */
drhb851b2c2005-03-10 14:11:12 +0000835#endif /* defined(SQLITE_NO_SYNC) */
836
drhdd809b02004-07-17 21:44:57 +0000837 return rc;
838}
839
840/*
drhbbd42a62004-05-22 17:41:58 +0000841** Make sure all writes to a particular file are committed to disk.
842**
drheb796a72005-09-08 12:38:41 +0000843** If dataOnly==0 then both the file itself and its metadata (file
844** size, access time, etc) are synced. If dataOnly!=0 then only the
845** file data is synced.
846**
drhbbd42a62004-05-22 17:41:58 +0000847** Under Unix, also make sure that the directory entry for the file
848** has been created by fsync-ing the directory that contains the file.
849** If we do not do this and we encounter a power failure, the directory
850** entry for the journal might not exist after we reboot. The next
851** SQLite to access the file will not know that the journal exists (because
852** the directory entry for the journal was never created) and the transaction
853** will not roll back - possibly leading to database corruption.
854*/
drheb796a72005-09-08 12:38:41 +0000855int sqlite3OsSync(OsFile *id, int dataOnly){
drhda71ce12004-06-21 18:14:45 +0000856 assert( id->isOpen );
drhbbd42a62004-05-22 17:41:58 +0000857 SimulateIOError(SQLITE_IOERR);
drha6abd042004-06-09 17:37:22 +0000858 TRACE2("SYNC %-3d\n", id->h);
drheb796a72005-09-08 12:38:41 +0000859 if( full_fsync(id->h, id->fullSync, dataOnly) ){
drhbbd42a62004-05-22 17:41:58 +0000860 return SQLITE_IOERR;
drhbbd42a62004-05-22 17:41:58 +0000861 }
drha2854222004-06-17 19:04:17 +0000862 if( id->dirfd>=0 ){
863 TRACE2("DIRSYNC %-3d\n", id->dirfd);
drheb796a72005-09-08 12:38:41 +0000864 full_fsync(id->dirfd, id->fullSync, 0);
drha2854222004-06-17 19:04:17 +0000865 close(id->dirfd); /* Only need to sync once, so close the directory */
866 id->dirfd = -1; /* when we are done. */
867 }
drha2854222004-06-17 19:04:17 +0000868 return SQLITE_OK;
drhbbd42a62004-05-22 17:41:58 +0000869}
870
871/*
danielk1977962398d2004-06-14 09:35:16 +0000872** Sync the directory zDirname. This is a no-op on operating systems other
873** than UNIX.
drhb851b2c2005-03-10 14:11:12 +0000874**
875** This is used to make sure the master journal file has truely been deleted
876** before making changes to individual journals on a multi-database commit.
drhf30cc942005-03-11 17:52:34 +0000877** The F_FULLFSYNC option is not needed here.
danielk1977962398d2004-06-14 09:35:16 +0000878*/
879int sqlite3OsSyncDirectory(const char *zDirname){
880 int fd;
881 int r;
danielk1977369f27e2004-06-15 11:40:04 +0000882 SimulateIOError(SQLITE_IOERR);
drh8e855772005-05-17 11:25:31 +0000883 fd = open(zDirname, O_RDONLY|O_BINARY, 0);
danielk1977369f27e2004-06-15 11:40:04 +0000884 TRACE3("DIRSYNC %-3d (%s)\n", fd, zDirname);
danielk1977962398d2004-06-14 09:35:16 +0000885 if( fd<0 ){
886 return SQLITE_CANTOPEN;
887 }
888 r = fsync(fd);
889 close(fd);
890 return ((r==0)?SQLITE_OK:SQLITE_IOERR);
891}
892
893/*
drhbbd42a62004-05-22 17:41:58 +0000894** Truncate an open file to a specified size
895*/
drheb206252004-10-01 02:00:31 +0000896int sqlite3OsTruncate(OsFile *id, i64 nByte){
drhda71ce12004-06-21 18:14:45 +0000897 assert( id->isOpen );
drhbbd42a62004-05-22 17:41:58 +0000898 SimulateIOError(SQLITE_IOERR);
drha6abd042004-06-09 17:37:22 +0000899 return ftruncate(id->h, nByte)==0 ? SQLITE_OK : SQLITE_IOERR;
drhbbd42a62004-05-22 17:41:58 +0000900}
901
902/*
903** Determine the current size of a file in bytes
904*/
drheb206252004-10-01 02:00:31 +0000905int sqlite3OsFileSize(OsFile *id, i64 *pSize){
drhbbd42a62004-05-22 17:41:58 +0000906 struct stat buf;
drhda71ce12004-06-21 18:14:45 +0000907 assert( id->isOpen );
drhbbd42a62004-05-22 17:41:58 +0000908 SimulateIOError(SQLITE_IOERR);
drha6abd042004-06-09 17:37:22 +0000909 if( fstat(id->h, &buf)!=0 ){
drhbbd42a62004-05-22 17:41:58 +0000910 return SQLITE_IOERR;
911 }
912 *pSize = buf.st_size;
913 return SQLITE_OK;
914}
915
danielk19779a1d0ab2004-06-01 14:09:28 +0000916/*
danielk197713adf8a2004-06-03 16:08:41 +0000917** This routine checks if there is a RESERVED lock held on the specified
918** file by this or any other process. If such a lock is held, return
drh2ac3ee92004-06-07 16:27:46 +0000919** non-zero. If the file is unlocked or holds only SHARED locks, then
920** return zero.
danielk197713adf8a2004-06-03 16:08:41 +0000921*/
drha6abd042004-06-09 17:37:22 +0000922int sqlite3OsCheckReservedLock(OsFile *id){
danielk197713adf8a2004-06-03 16:08:41 +0000923 int r = 0;
924
drhda71ce12004-06-21 18:14:45 +0000925 assert( id->isOpen );
drh2b4b5962005-06-15 17:47:55 +0000926 if( CHECK_THREADID(id) ) return SQLITE_MISUSE;
drh2ac3ee92004-06-07 16:27:46 +0000927 sqlite3OsEnterMutex(); /* Needed because id->pLock is shared across threads */
danielk197713adf8a2004-06-03 16:08:41 +0000928
929 /* Check if a thread in this process holds such a lock */
930 if( id->pLock->locktype>SHARED_LOCK ){
931 r = 1;
932 }
933
drh2ac3ee92004-06-07 16:27:46 +0000934 /* Otherwise see if some other process holds it.
danielk197713adf8a2004-06-03 16:08:41 +0000935 */
936 if( !r ){
937 struct flock lock;
938 lock.l_whence = SEEK_SET;
drh2ac3ee92004-06-07 16:27:46 +0000939 lock.l_start = RESERVED_BYTE;
940 lock.l_len = 1;
941 lock.l_type = F_WRLCK;
drha6abd042004-06-09 17:37:22 +0000942 fcntl(id->h, F_GETLK, &lock);
danielk197713adf8a2004-06-03 16:08:41 +0000943 if( lock.l_type!=F_UNLCK ){
944 r = 1;
945 }
946 }
947
948 sqlite3OsLeaveMutex();
drha6abd042004-06-09 17:37:22 +0000949 TRACE3("TEST WR-LOCK %d %d\n", id->h, r);
danielk197713adf8a2004-06-03 16:08:41 +0000950
951 return r;
952}
953
danielk19772b444852004-06-29 07:45:33 +0000954#ifdef SQLITE_DEBUG
955/*
956** Helper function for printing out trace information from debugging
957** binaries. This returns the string represetation of the supplied
958** integer lock-type.
959*/
960static const char * locktypeName(int locktype){
961 switch( locktype ){
962 case NO_LOCK: return "NONE";
963 case SHARED_LOCK: return "SHARED";
964 case RESERVED_LOCK: return "RESERVED";
965 case PENDING_LOCK: return "PENDING";
966 case EXCLUSIVE_LOCK: return "EXCLUSIVE";
967 }
968 return "ERROR";
969}
970#endif
971
danielk197713adf8a2004-06-03 16:08:41 +0000972/*
danielk19779a1d0ab2004-06-01 14:09:28 +0000973** Lock the file with the lock specified by parameter locktype - one
974** of the following:
975**
drh2ac3ee92004-06-07 16:27:46 +0000976** (1) SHARED_LOCK
977** (2) RESERVED_LOCK
978** (3) PENDING_LOCK
979** (4) EXCLUSIVE_LOCK
980**
drhb3e04342004-06-08 00:47:47 +0000981** Sometimes when requesting one lock state, additional lock states
982** are inserted in between. The locking might fail on one of the later
983** transitions leaving the lock state different from what it started but
984** still short of its goal. The following chart shows the allowed
985** transitions and the inserted intermediate states:
986**
987** UNLOCKED -> SHARED
988** SHARED -> RESERVED
989** SHARED -> (PENDING) -> EXCLUSIVE
990** RESERVED -> (PENDING) -> EXCLUSIVE
991** PENDING -> EXCLUSIVE
drh2ac3ee92004-06-07 16:27:46 +0000992**
drha6abd042004-06-09 17:37:22 +0000993** This routine will only increase a lock. Use the sqlite3OsUnlock()
994** routine to lower a locking level.
danielk19779a1d0ab2004-06-01 14:09:28 +0000995*/
996int sqlite3OsLock(OsFile *id, int locktype){
danielk1977f42f25c2004-06-25 07:21:28 +0000997 /* The following describes the implementation of the various locks and
998 ** lock transitions in terms of the POSIX advisory shared and exclusive
999 ** lock primitives (called read-locks and write-locks below, to avoid
1000 ** confusion with SQLite lock names). The algorithms are complicated
1001 ** slightly in order to be compatible with windows systems simultaneously
1002 ** accessing the same database file, in case that is ever required.
1003 **
1004 ** Symbols defined in os.h indentify the 'pending byte' and the 'reserved
1005 ** byte', each single bytes at well known offsets, and the 'shared byte
1006 ** range', a range of 510 bytes at a well known offset.
1007 **
1008 ** To obtain a SHARED lock, a read-lock is obtained on the 'pending
1009 ** byte'. If this is successful, a random byte from the 'shared byte
1010 ** range' is read-locked and the lock on the 'pending byte' released.
1011 **
danielk197790ba3bd2004-06-25 08:32:25 +00001012 ** A process may only obtain a RESERVED lock after it has a SHARED lock.
1013 ** A RESERVED lock is implemented by grabbing a write-lock on the
1014 ** 'reserved byte'.
danielk1977f42f25c2004-06-25 07:21:28 +00001015 **
1016 ** A process may only obtain a PENDING lock after it has obtained a
danielk197790ba3bd2004-06-25 08:32:25 +00001017 ** SHARED lock. A PENDING lock is implemented by obtaining a write-lock
1018 ** on the 'pending byte'. This ensures that no new SHARED locks can be
1019 ** obtained, but existing SHARED locks are allowed to persist. A process
1020 ** does not have to obtain a RESERVED lock on the way to a PENDING lock.
1021 ** This property is used by the algorithm for rolling back a journal file
1022 ** after a crash.
danielk1977f42f25c2004-06-25 07:21:28 +00001023 **
danielk197790ba3bd2004-06-25 08:32:25 +00001024 ** An EXCLUSIVE lock, obtained after a PENDING lock is held, is
1025 ** implemented by obtaining a write-lock on the entire 'shared byte
1026 ** range'. Since all other locks require a read-lock on one of the bytes
1027 ** within this range, this ensures that no other locks are held on the
1028 ** database.
danielk1977f42f25c2004-06-25 07:21:28 +00001029 **
1030 ** The reason a single byte cannot be used instead of the 'shared byte
1031 ** range' is that some versions of windows do not support read-locks. By
1032 ** locking a random byte from a range, concurrent SHARED locks may exist
1033 ** even if the locking primitive used is always a write-lock.
1034 */
danielk19779a1d0ab2004-06-01 14:09:28 +00001035 int rc = SQLITE_OK;
1036 struct lockInfo *pLock = id->pLock;
1037 struct flock lock;
1038 int s;
1039
drhda71ce12004-06-21 18:14:45 +00001040 assert( id->isOpen );
drhe29b9152005-03-18 14:03:15 +00001041 TRACE7("LOCK %d %s was %s(%s,%d) pid=%d\n", id->h, locktypeName(locktype),
danielk19772b444852004-06-29 07:45:33 +00001042 locktypeName(id->locktype), locktypeName(pLock->locktype), pLock->cnt
1043 ,getpid() );
drh2b4b5962005-06-15 17:47:55 +00001044 if( CHECK_THREADID(id) ) return SQLITE_MISUSE;
danielk19779a1d0ab2004-06-01 14:09:28 +00001045
1046 /* If there is already a lock of this type or more restrictive on the
1047 ** OsFile, do nothing. Don't use the end_lock: exit path, as
1048 ** sqlite3OsEnterMutex() hasn't been called yet.
1049 */
danielk197713adf8a2004-06-03 16:08:41 +00001050 if( id->locktype>=locktype ){
drhe29b9152005-03-18 14:03:15 +00001051 TRACE3("LOCK %d %s ok (already held)\n", id->h, locktypeName(locktype));
danielk19779a1d0ab2004-06-01 14:09:28 +00001052 return SQLITE_OK;
1053 }
1054
drhb3e04342004-06-08 00:47:47 +00001055 /* Make sure the locking sequence is correct
drh2ac3ee92004-06-07 16:27:46 +00001056 */
drhb3e04342004-06-08 00:47:47 +00001057 assert( id->locktype!=NO_LOCK || locktype==SHARED_LOCK );
1058 assert( locktype!=PENDING_LOCK );
1059 assert( locktype!=RESERVED_LOCK || id->locktype==SHARED_LOCK );
drh2ac3ee92004-06-07 16:27:46 +00001060
drhb3e04342004-06-08 00:47:47 +00001061 /* This mutex is needed because id->pLock is shared across threads
1062 */
1063 sqlite3OsEnterMutex();
danielk19779a1d0ab2004-06-01 14:09:28 +00001064
1065 /* If some thread using this PID has a lock via a different OsFile*
1066 ** handle that precludes the requested lock, return BUSY.
1067 */
danielk197713adf8a2004-06-03 16:08:41 +00001068 if( (id->locktype!=pLock->locktype &&
drh2ac3ee92004-06-07 16:27:46 +00001069 (pLock->locktype>=PENDING_LOCK || locktype>SHARED_LOCK))
danielk19779a1d0ab2004-06-01 14:09:28 +00001070 ){
1071 rc = SQLITE_BUSY;
1072 goto end_lock;
1073 }
1074
1075 /* If a SHARED lock is requested, and some thread using this PID already
1076 ** has a SHARED or RESERVED lock, then increment reference counts and
1077 ** return SQLITE_OK.
1078 */
1079 if( locktype==SHARED_LOCK &&
1080 (pLock->locktype==SHARED_LOCK || pLock->locktype==RESERVED_LOCK) ){
1081 assert( locktype==SHARED_LOCK );
danielk197713adf8a2004-06-03 16:08:41 +00001082 assert( id->locktype==0 );
danielk1977ecb2a962004-06-02 06:30:16 +00001083 assert( pLock->cnt>0 );
danielk197713adf8a2004-06-03 16:08:41 +00001084 id->locktype = SHARED_LOCK;
danielk19779a1d0ab2004-06-01 14:09:28 +00001085 pLock->cnt++;
1086 id->pOpen->nLock++;
1087 goto end_lock;
1088 }
1089
danielk197713adf8a2004-06-03 16:08:41 +00001090 lock.l_len = 1L;
drh2b4b5962005-06-15 17:47:55 +00001091
danielk19779a1d0ab2004-06-01 14:09:28 +00001092 lock.l_whence = SEEK_SET;
1093
drh3cde3bb2004-06-12 02:17:14 +00001094 /* A PENDING lock is needed before acquiring a SHARED lock and before
1095 ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will
1096 ** be released.
danielk19779a1d0ab2004-06-01 14:09:28 +00001097 */
drh3cde3bb2004-06-12 02:17:14 +00001098 if( locktype==SHARED_LOCK
1099 || (locktype==EXCLUSIVE_LOCK && id->locktype<PENDING_LOCK)
1100 ){
danielk1977489468c2004-06-28 08:25:47 +00001101 lock.l_type = (locktype==SHARED_LOCK?F_RDLCK:F_WRLCK);
drh2ac3ee92004-06-07 16:27:46 +00001102 lock.l_start = PENDING_BYTE;
drha6abd042004-06-09 17:37:22 +00001103 s = fcntl(id->h, F_SETLK, &lock);
danielk19779a1d0ab2004-06-01 14:09:28 +00001104 if( s ){
1105 rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
1106 goto end_lock;
1107 }
drh3cde3bb2004-06-12 02:17:14 +00001108 }
1109
1110
1111 /* If control gets to this point, then actually go ahead and make
1112 ** operating system calls for the specified lock.
1113 */
1114 if( locktype==SHARED_LOCK ){
1115 assert( pLock->cnt==0 );
1116 assert( pLock->locktype==0 );
danielk19779a1d0ab2004-06-01 14:09:28 +00001117
drh2ac3ee92004-06-07 16:27:46 +00001118 /* Now get the read-lock */
1119 lock.l_start = SHARED_FIRST;
1120 lock.l_len = SHARED_SIZE;
drha6abd042004-06-09 17:37:22 +00001121 s = fcntl(id->h, F_SETLK, &lock);
drh2ac3ee92004-06-07 16:27:46 +00001122
1123 /* Drop the temporary PENDING lock */
1124 lock.l_start = PENDING_BYTE;
1125 lock.l_len = 1L;
danielk19779a1d0ab2004-06-01 14:09:28 +00001126 lock.l_type = F_UNLCK;
drh2b4b5962005-06-15 17:47:55 +00001127 if( fcntl(id->h, F_SETLK, &lock)!=0 ){
1128 rc = SQLITE_IOERR; /* This should never happen */
1129 goto end_lock;
1130 }
danielk19779a1d0ab2004-06-01 14:09:28 +00001131 if( s ){
drhbbd42a62004-05-22 17:41:58 +00001132 rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
1133 }else{
danielk197713adf8a2004-06-03 16:08:41 +00001134 id->locktype = SHARED_LOCK;
danielk1977ecb2a962004-06-02 06:30:16 +00001135 id->pOpen->nLock++;
danielk19779a1d0ab2004-06-01 14:09:28 +00001136 pLock->cnt = 1;
drhbbd42a62004-05-22 17:41:58 +00001137 }
drh3cde3bb2004-06-12 02:17:14 +00001138 }else if( locktype==EXCLUSIVE_LOCK && pLock->cnt>1 ){
1139 /* We are trying for an exclusive lock but another thread in this
1140 ** same process is still holding a shared lock. */
1141 rc = SQLITE_BUSY;
drhbbd42a62004-05-22 17:41:58 +00001142 }else{
drh3cde3bb2004-06-12 02:17:14 +00001143 /* The request was for a RESERVED or EXCLUSIVE lock. It is
danielk19779a1d0ab2004-06-01 14:09:28 +00001144 ** assumed that there is a SHARED or greater lock on the file
1145 ** already.
1146 */
danielk197713adf8a2004-06-03 16:08:41 +00001147 assert( 0!=id->locktype );
danielk19779a1d0ab2004-06-01 14:09:28 +00001148 lock.l_type = F_WRLCK;
1149 switch( locktype ){
1150 case RESERVED_LOCK:
drh2ac3ee92004-06-07 16:27:46 +00001151 lock.l_start = RESERVED_BYTE;
danielk19779a1d0ab2004-06-01 14:09:28 +00001152 break;
danielk19779a1d0ab2004-06-01 14:09:28 +00001153 case EXCLUSIVE_LOCK:
drh2ac3ee92004-06-07 16:27:46 +00001154 lock.l_start = SHARED_FIRST;
1155 lock.l_len = SHARED_SIZE;
danielk19779a1d0ab2004-06-01 14:09:28 +00001156 break;
1157 default:
1158 assert(0);
1159 }
drha6abd042004-06-09 17:37:22 +00001160 s = fcntl(id->h, F_SETLK, &lock);
danielk19779a1d0ab2004-06-01 14:09:28 +00001161 if( s ){
1162 rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
1163 }
drhbbd42a62004-05-22 17:41:58 +00001164 }
danielk19779a1d0ab2004-06-01 14:09:28 +00001165
danielk1977ecb2a962004-06-02 06:30:16 +00001166 if( rc==SQLITE_OK ){
danielk197713adf8a2004-06-03 16:08:41 +00001167 id->locktype = locktype;
danielk1977ecb2a962004-06-02 06:30:16 +00001168 pLock->locktype = locktype;
drh3cde3bb2004-06-12 02:17:14 +00001169 }else if( locktype==EXCLUSIVE_LOCK ){
1170 id->locktype = PENDING_LOCK;
1171 pLock->locktype = PENDING_LOCK;
danielk1977ecb2a962004-06-02 06:30:16 +00001172 }
danielk19779a1d0ab2004-06-01 14:09:28 +00001173
1174end_lock:
drhbbd42a62004-05-22 17:41:58 +00001175 sqlite3OsLeaveMutex();
drhe29b9152005-03-18 14:03:15 +00001176 TRACE4("LOCK %d %s %s\n", id->h, locktypeName(locktype),
danielk19772b444852004-06-29 07:45:33 +00001177 rc==SQLITE_OK ? "ok" : "failed");
drhbbd42a62004-05-22 17:41:58 +00001178 return rc;
1179}
1180
1181/*
drha6abd042004-06-09 17:37:22 +00001182** Lower the locking level on file descriptor id to locktype. locktype
1183** must be either NO_LOCK or SHARED_LOCK.
1184**
1185** If the locking level of the file descriptor is already at or below
1186** the requested locking level, this routine is a no-op.
1187**
drh9c105bb2004-10-02 20:38:28 +00001188** It is not possible for this routine to fail if the second argument
1189** is NO_LOCK. If the second argument is SHARED_LOCK, this routine
1190** might return SQLITE_IOERR instead of SQLITE_OK.
drhbbd42a62004-05-22 17:41:58 +00001191*/
drha6abd042004-06-09 17:37:22 +00001192int sqlite3OsUnlock(OsFile *id, int locktype){
1193 struct lockInfo *pLock;
1194 struct flock lock;
drh9c105bb2004-10-02 20:38:28 +00001195 int rc = SQLITE_OK;
drha6abd042004-06-09 17:37:22 +00001196
drhda71ce12004-06-21 18:14:45 +00001197 assert( id->isOpen );
drhe29b9152005-03-18 14:03:15 +00001198 TRACE7("UNLOCK %d %d was %d(%d,%d) pid=%d\n", id->h, locktype, id->locktype,
danielk19772b444852004-06-29 07:45:33 +00001199 id->pLock->locktype, id->pLock->cnt, getpid());
drh2b4b5962005-06-15 17:47:55 +00001200 if( CHECK_THREADID(id) ) return SQLITE_MISUSE;
drha6abd042004-06-09 17:37:22 +00001201
1202 assert( locktype<=SHARED_LOCK );
1203 if( id->locktype<=locktype ){
1204 return SQLITE_OK;
1205 }
drhbbd42a62004-05-22 17:41:58 +00001206 sqlite3OsEnterMutex();
drha6abd042004-06-09 17:37:22 +00001207 pLock = id->pLock;
1208 assert( pLock->cnt!=0 );
1209 if( id->locktype>SHARED_LOCK ){
1210 assert( pLock->locktype==id->locktype );
drh9c105bb2004-10-02 20:38:28 +00001211 if( locktype==SHARED_LOCK ){
1212 lock.l_type = F_RDLCK;
1213 lock.l_whence = SEEK_SET;
1214 lock.l_start = SHARED_FIRST;
1215 lock.l_len = SHARED_SIZE;
1216 if( fcntl(id->h, F_SETLK, &lock)!=0 ){
1217 /* This should never happen */
1218 rc = SQLITE_IOERR;
1219 }
1220 }
drhbbd42a62004-05-22 17:41:58 +00001221 lock.l_type = F_UNLCK;
1222 lock.l_whence = SEEK_SET;
drha6abd042004-06-09 17:37:22 +00001223 lock.l_start = PENDING_BYTE;
1224 lock.l_len = 2L; assert( PENDING_BYTE+1==RESERVED_BYTE );
drh2b4b5962005-06-15 17:47:55 +00001225 if( fcntl(id->h, F_SETLK, &lock)==0 ){
1226 pLock->locktype = SHARED_LOCK;
1227 }else{
1228 rc = SQLITE_IOERR; /* This should never happen */
1229 }
drhbbd42a62004-05-22 17:41:58 +00001230 }
drha6abd042004-06-09 17:37:22 +00001231 if( locktype==NO_LOCK ){
1232 struct openCnt *pOpen;
danielk1977ecb2a962004-06-02 06:30:16 +00001233
drha6abd042004-06-09 17:37:22 +00001234 /* Decrement the shared lock counter. Release the lock using an
1235 ** OS call only when all threads in this same process have released
1236 ** the lock.
1237 */
1238 pLock->cnt--;
1239 if( pLock->cnt==0 ){
1240 lock.l_type = F_UNLCK;
1241 lock.l_whence = SEEK_SET;
1242 lock.l_start = lock.l_len = 0L;
drh2b4b5962005-06-15 17:47:55 +00001243 if( fcntl(id->h, F_SETLK, &lock)==0 ){
1244 pLock->locktype = NO_LOCK;
1245 }else{
1246 rc = SQLITE_IOERR; /* This should never happen */
1247 }
drha6abd042004-06-09 17:37:22 +00001248 }
1249
drhbbd42a62004-05-22 17:41:58 +00001250 /* Decrement the count of locks against this same file. When the
1251 ** count reaches zero, close any other file descriptors whose close
1252 ** was deferred because of outstanding locks.
1253 */
drha6abd042004-06-09 17:37:22 +00001254 pOpen = id->pOpen;
drhbbd42a62004-05-22 17:41:58 +00001255 pOpen->nLock--;
1256 assert( pOpen->nLock>=0 );
1257 if( pOpen->nLock==0 && pOpen->nPending>0 ){
1258 int i;
1259 for(i=0; i<pOpen->nPending; i++){
1260 close(pOpen->aPending[i]);
1261 }
1262 sqliteFree(pOpen->aPending);
1263 pOpen->nPending = 0;
1264 pOpen->aPending = 0;
1265 }
1266 }
1267 sqlite3OsLeaveMutex();
drha6abd042004-06-09 17:37:22 +00001268 id->locktype = locktype;
drh9c105bb2004-10-02 20:38:28 +00001269 return rc;
drhbbd42a62004-05-22 17:41:58 +00001270}
1271
1272/*
danielk1977e3026632004-06-22 11:29:02 +00001273** Close a file.
1274*/
1275int sqlite3OsClose(OsFile *id){
1276 if( !id->isOpen ) return SQLITE_OK;
drh2b4b5962005-06-15 17:47:55 +00001277 if( CHECK_THREADID(id) ) return SQLITE_MISUSE;
danielk1977e3026632004-06-22 11:29:02 +00001278 sqlite3OsUnlock(id, NO_LOCK);
1279 if( id->dirfd>=0 ) close(id->dirfd);
1280 id->dirfd = -1;
1281 sqlite3OsEnterMutex();
1282 if( id->pOpen->nLock ){
1283 /* If there are outstanding locks, do not actually close the file just
1284 ** yet because that would clear those locks. Instead, add the file
1285 ** descriptor to pOpen->aPending. It will be automatically closed when
1286 ** the last lock is cleared.
1287 */
1288 int *aNew;
1289 struct openCnt *pOpen = id->pOpen;
drhad81e872005-08-21 21:45:01 +00001290 aNew = sqliteRealloc( pOpen->aPending, (pOpen->nPending+1)*sizeof(int) );
danielk1977e3026632004-06-22 11:29:02 +00001291 if( aNew==0 ){
1292 /* If a malloc fails, just leak the file descriptor */
1293 }else{
1294 pOpen->aPending = aNew;
drhad81e872005-08-21 21:45:01 +00001295 pOpen->aPending[pOpen->nPending] = id->h;
1296 pOpen->nPending++;
danielk1977e3026632004-06-22 11:29:02 +00001297 }
1298 }else{
1299 /* There are no outstanding locks so we can close the file immediately */
1300 close(id->h);
1301 }
1302 releaseLockInfo(id->pLock);
1303 releaseOpenCnt(id->pOpen);
1304 sqlite3OsLeaveMutex();
1305 id->isOpen = 0;
1306 TRACE2("CLOSE %-3d\n", id->h);
1307 OpenCounter(-1);
1308 return SQLITE_OK;
1309}
1310
1311/*
drh0ccebe72005-06-07 22:22:50 +00001312** Turn a relative pathname into a full pathname. Return a pointer
1313** to the full pathname stored in space obtained from sqliteMalloc().
1314** The calling function is responsible for freeing this space once it
1315** is no longer needed.
1316*/
1317char *sqlite3OsFullPathname(const char *zRelative){
1318 char *zFull = 0;
1319 if( zRelative[0]=='/' ){
1320 sqlite3SetString(&zFull, zRelative, (char*)0);
1321 }else{
drh79158e12005-09-06 21:40:45 +00001322 char *zBuf = sqliteMalloc(5000);
1323 if( zBuf==0 ){
1324 return 0;
1325 }
drh0ccebe72005-06-07 22:22:50 +00001326 zBuf[0] = 0;
drh79158e12005-09-06 21:40:45 +00001327 sqlite3SetString(&zFull, getcwd(zBuf, 5000), "/", zRelative,
drh0ccebe72005-06-07 22:22:50 +00001328 (char*)0);
drh79158e12005-09-06 21:40:45 +00001329 sqliteFree(zBuf);
drh0ccebe72005-06-07 22:22:50 +00001330 }
1331 return zFull;
1332}
1333
1334
1335#endif /* SQLITE_OMIT_DISKIO */
1336/***************************************************************************
1337** Everything above deals with file I/O. Everything that follows deals
1338** with other miscellanous aspects of the operating system interface
1339****************************************************************************/
1340
1341
1342/*
drhbbd42a62004-05-22 17:41:58 +00001343** Get information to seed the random number generator. The seed
1344** is written into the buffer zBuf[256]. The calling function must
1345** supply a sufficiently large buffer.
1346*/
1347int sqlite3OsRandomSeed(char *zBuf){
1348 /* We have to initialize zBuf to prevent valgrind from reporting
1349 ** errors. The reports issued by valgrind are incorrect - we would
1350 ** prefer that the randomness be increased by making use of the
1351 ** uninitialized space in zBuf - but valgrind errors tend to worry
1352 ** some users. Rather than argue, it seems easier just to initialize
1353 ** the whole array and silence valgrind, even if that means less randomness
1354 ** in the random seed.
1355 **
1356 ** When testing, initializing zBuf[] to zero is all we do. That means
1357 ** that we always use the same random number sequence.* This makes the
1358 ** tests repeatable.
1359 */
1360 memset(zBuf, 0, 256);
1361#if !defined(SQLITE_TEST)
1362 {
drh842b8642005-01-21 17:53:17 +00001363 int pid, fd;
1364 fd = open("/dev/urandom", O_RDONLY);
1365 if( fd<0 ){
1366 time((time_t*)zBuf);
1367 pid = getpid();
1368 memcpy(&zBuf[sizeof(time_t)], &pid, sizeof(pid));
1369 }else{
1370 read(fd, zBuf, 256);
1371 close(fd);
1372 }
drhbbd42a62004-05-22 17:41:58 +00001373 }
1374#endif
1375 return SQLITE_OK;
1376}
1377
1378/*
1379** Sleep for a little while. Return the amount of time slept.
1380*/
1381int sqlite3OsSleep(int ms){
1382#if defined(HAVE_USLEEP) && HAVE_USLEEP
1383 usleep(ms*1000);
1384 return ms;
1385#else
1386 sleep((ms+999)/1000);
1387 return 1000*((ms+999)/1000);
1388#endif
1389}
1390
1391/*
1392** Static variables used for thread synchronization
1393*/
1394static int inMutex = 0;
drh79069752004-05-22 21:30:40 +00001395#ifdef SQLITE_UNIX_THREADS
drhbbd42a62004-05-22 17:41:58 +00001396static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
drh79069752004-05-22 21:30:40 +00001397#endif
drhbbd42a62004-05-22 17:41:58 +00001398
1399/*
1400** The following pair of routine implement mutual exclusion for
1401** multi-threaded processes. Only a single thread is allowed to
1402** executed code that is surrounded by EnterMutex() and LeaveMutex().
1403**
1404** SQLite uses only a single Mutex. There is not much critical
1405** code and what little there is executes quickly and without blocking.
1406*/
1407void sqlite3OsEnterMutex(){
1408#ifdef SQLITE_UNIX_THREADS
1409 pthread_mutex_lock(&mutex);
1410#endif
1411 assert( !inMutex );
1412 inMutex = 1;
1413}
1414void sqlite3OsLeaveMutex(){
1415 assert( inMutex );
1416 inMutex = 0;
1417#ifdef SQLITE_UNIX_THREADS
1418 pthread_mutex_unlock(&mutex);
1419#endif
1420}
1421
1422/*
drhbbd42a62004-05-22 17:41:58 +00001423** The following variable, if set to a non-zero value, becomes the result
1424** returned from sqlite3OsCurrentTime(). This is used for testing.
1425*/
1426#ifdef SQLITE_TEST
1427int sqlite3_current_time = 0;
1428#endif
1429
1430/*
1431** Find the current time (in Universal Coordinated Time). Write the
1432** current time and date as a Julian Day number into *prNow and
1433** return 0. Return 1 if the time and date cannot be found.
1434*/
1435int sqlite3OsCurrentTime(double *prNow){
drh19e2d372005-08-29 23:00:03 +00001436#ifdef NO_GETTOD
drhbbd42a62004-05-22 17:41:58 +00001437 time_t t;
1438 time(&t);
1439 *prNow = t/86400.0 + 2440587.5;
drh19e2d372005-08-29 23:00:03 +00001440#else
1441 struct timeval sNow;
1442 struct timezone sTz; /* Not used */
1443 gettimeofday(&sNow, &sTz);
1444 *prNow = 2440587.5 + sNow.tv_sec/86400.0 + sNow.tv_usec/86400000000.0;
1445#endif
drhbbd42a62004-05-22 17:41:58 +00001446#ifdef SQLITE_TEST
1447 if( sqlite3_current_time ){
1448 *prNow = sqlite3_current_time/86400.0 + 2440587.5;
1449 }
1450#endif
1451 return 0;
1452}
1453
drhbbd42a62004-05-22 17:41:58 +00001454#endif /* OS_UNIX */