Blame - src/os_unix.c - chromium.googlesource.com/chromium/deps/sqlite

blob: cf855277009cb5f536570e852b3a1fa07e25e335 [file] [log] [blame]

drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1	/*
				2	** 2004 May 22
				3	**
				4	** The author disclaims copyright to this source code. In place of
				5	** a legal notice, here is a blessing:
				6	**
				7	** May you do good and not evil.
				8	** May you find forgiveness for yourself and forgive others.
				9	** May you share freely, never taking more than you give.
				10	**
				11	******************************************************************************
				12	**
				13	** This file contains code that is specific to Unix systems.
				14	*/
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	15	#include "sqliteInt.h"
drh	eb20625	2004-10-01 02:00:31 +0000	[diff] [blame]	16	#include "os.h"
				17	#if OS_UNIX /* This file is used on unix only */
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame^]	18	/*
				19	** These #defines should enable >2GB file support on Posix if the
				20	** underlying operating system supports it. If the OS lacks
				21	** large file support, or if the OS is windows, these should be no-ops.
				22	**
				23	** Large file support can be disabled using the -DSQLITE_DISABLE_LFS switch
				24	** on the compiler command line. This is necessary if you are compiling
				25	** on a recent machine (ex: RedHat 7.2) but you want your code to work
				26	** on an older machine (ex: RedHat 6.0). If you compile on RedHat 7.2
				27	** without this option, LFS is enable. But LFS does not exist in the kernel
				28	** in RedHat 6.0, so the code won't work. Hence, for maximum binary
				29	** portability you should omit LFS.
				30	**
				31	** Similar is true for MacOS. LFS is only supported on MacOS 9 and later.
				32	*/
				33	#ifndef SQLITE_DISABLE_LFS
				34	# define _LARGE_FILE 1
				35	# ifndef _FILE_OFFSET_BITS
				36	# define _FILE_OFFSET_BITS 64
				37	# endif
				38	# define _LARGEFILE_SOURCE 1
				39	#endif
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	40
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame^]	41	/*
				42	** standard include files.
				43	*/
				44	#include <sys/types.h>
				45	#include <sys/stat.h>
				46	#include <fcntl.h>
				47	#include <unistd.h>
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	48	#include <time.h>
drh	19e2d37	2005-08-29 23:00:03 +0000	[diff] [blame]	49	#include <sys/time.h>
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	50	#include <errno.h>
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame^]	51
				52	/*
				53	** Macros used to determine whether or not to use threads. The
				54	** SQLITE_UNIX_THREADS macro is defined if we are synchronizing for
				55	** Posix threads and SQLITE_W32_THREADS is defined if we are
				56	** synchronizing using Win32 threads.
				57	*/
				58	#if defined(THREADSAFE) && THREADSAFE
				59	# include <pthread.h>
				60	# define SQLITE_UNIX_THREADS 1
				61	#endif
				62
				63	/*
				64	** Default permissions when creating a new file
				65	*/
				66	#ifndef SQLITE_DEFAULT_FILE_PERMISSIONS
				67	# define SQLITE_DEFAULT_FILE_PERMISSIONS 0644
				68	#endif
				69
				70
				71
				72	/*
				73	** The OsFile structure is a operating-system dependent representation
				74	** of an open file handle. It is defined differently for each architecture.
				75	**
				76	** This is the definition for Unix.
				77	**
				78	** OsFile.locktype takes one of the values SHARED_LOCK, RESERVED_LOCK,
				79	** PENDING_LOCK or EXCLUSIVE_LOCK.
				80	*/
				81	struct OsFile {
				82	struct openCnt pOpen; / Info about all open fd's on this inode */
				83	struct lockInfo pLock; / Info about locks on this inode */
				84	int h; /* The file descriptor */
				85	unsigned char locktype; /* The type of lock held on this fd */
				86	unsigned char isOpen; /* True if needs to be closed */
				87	unsigned char fullSync; /* Use F_FULLSYNC if available */
				88	int dirfd; /* File descriptor for the directory */
				89	#ifdef SQLITE_UNIX_THREADS
				90	pthread_t tid; /* The thread authorized to use this OsFile */
				91	#endif
				92	};
				93
drh	0ccebe7	2005-06-07 22:22:50 +0000	[diff] [blame]	94
				95	/*
				96	** Do not include any of the File I/O interface procedures if the
				97	** SQLITE_OMIT_DISKIO macro is defined (indicating that there database
				98	** will be in-memory only)
				99	*/
				100	#ifndef SQLITE_OMIT_DISKIO
				101
				102
				103	/*
				104	** Define various macros that are missing from some systems.
				105	*/
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	106	#ifndef O_LARGEFILE
				107	# define O_LARGEFILE 0
				108	#endif
				109	#ifdef SQLITE_DISABLE_LFS
				110	# undef O_LARGEFILE
				111	# define O_LARGEFILE 0
				112	#endif
				113	#ifndef O_NOFOLLOW
				114	# define O_NOFOLLOW 0
				115	#endif
				116	#ifndef O_BINARY
				117	# define O_BINARY 0
				118	#endif
				119
				120	/*
				121	** The DJGPP compiler environment looks mostly like Unix, but it
				122	** lacks the fcntl() system call. So redefine fcntl() to be something
				123	** that always succeeds. This means that locking does not occur under
danielk1977	26c5d79	2005-11-25 09:01:23 +0000	[diff] [blame]	124	** DJGPP. But it's DOS - what did you expect?
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	125	*/
				126	#ifdef __DJGPP__
				127	# define fcntl(A,B,C) 0
				128	#endif
				129
				130	/*
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	131	** Include code that is common to all os_*.c files
				132	*/
				133	#include "os_common.h"
				134
drh	2b4b596	2005-06-15 17:47:55 +0000	[diff] [blame]	135	/*
				136	** The threadid macro resolves to the thread-id or to 0. Used for
				137	** testing and debugging only.
				138	*/
				139	#ifdef SQLITE_UNIX_THREADS
				140	#define threadid pthread_self()
				141	#else
				142	#define threadid 0
				143	#endif
				144
				145	/*
				146	** Set or check the OsFile.tid field. This field is set when an OsFile
				147	** is first opened. All subsequent uses of the OsFile verify that the
				148	** same thread is operating on the OsFile. Some operating systems do
				149	** not allow locks to be overridden by other threads and that restriction
				150	** means that sqlite3* database handles cannot be moved from one thread
				151	** to another. This logic makes sure a user does not try to do that
				152	** by mistake.
				153	*/
drh	91636d5	2005-11-24 23:14:00 +0000	[diff] [blame]	154	#if defined(SQLITE_UNIX_THREADS) && !defined(SQLITE_ALLOW_XTHREAD_CONNECTIONS)
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame^]	155	# define SET_THREADID(X) (X)->tid = pthread_self()
				156	# define CHECK_THREADID(X) (!pthread_equal((X)->tid, pthread_self()))
drh	2b4b596	2005-06-15 17:47:55 +0000	[diff] [blame]	157	#else
				158	# define SET_THREADID(X)
				159	# define CHECK_THREADID(X) 0
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	160	#endif
				161
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	162	/*
				163	** Here is the dirt on POSIX advisory locks: ANSI STD 1003.1 (1996)
				164	** section 6.5.2.2 lines 483 through 490 specify that when a process
				165	** sets or clears a lock, that operation overrides any prior locks set
				166	** by the same process. It does not explicitly say so, but this implies
				167	** that it overrides locks set by the same process using a different
				168	** file descriptor. Consider this test case:
				169	**
				170	** int fd1 = open("./file1", O_RDWR\|O_CREAT, 0644);
				171	** int fd2 = open("./file2", O_RDWR\|O_CREAT, 0644);
				172	**
				173	** Suppose ./file1 and ./file2 are really the same file (because
				174	** one is a hard or symbolic link to the other) then if you set
				175	** an exclusive lock on fd1, then try to get an exclusive lock
				176	** on fd2, it works. I would have expected the second lock to
				177	** fail since there was already a lock on the file due to fd1.
				178	** But not so. Since both locks came from the same process, the
				179	** second overrides the first, even though they were on different
				180	** file descriptors opened on different file names.
				181	**
				182	** Bummer. If you ask me, this is broken. Badly broken. It means
				183	** that we cannot use POSIX locks to synchronize file access among
				184	** competing threads of the same process. POSIX locks will work fine
				185	** to synchronize access for threads in separate processes, but not
				186	** threads within the same process.
				187	**
				188	** To work around the problem, SQLite has to manage file locks internally
				189	** on its own. Whenever a new database is opened, we have to find the
				190	** specific inode of the database file (the inode is determined by the
				191	** st_dev and st_ino fields of the stat structure that fstat() fills in)
				192	** and check for locks already existing on that inode. When locks are
				193	** created or removed, we have to look at our own internal record of the
				194	** locks to see if another thread has previously set a lock on that same
				195	** inode.
				196	**
				197	** The OsFile structure for POSIX is no longer just an integer file
				198	** descriptor. It is now a structure that holds the integer file
				199	** descriptor and a pointer to a structure that describes the internal
				200	** locks on the corresponding inode. There is one locking structure
				201	** per inode, so if the same inode is opened twice, both OsFile structures
				202	** point to the same locking structure. The locking structure keeps
				203	** a reference count (so we will know when to delete it) and a "cnt"
				204	** field that tells us its internal lock status. cnt==0 means the
				205	** file is unlocked. cnt==-1 means the file has an exclusive lock.
				206	** cnt>0 means there are cnt shared locks on the file.
				207	**
				208	** Any attempt to lock or unlock a file first checks the locking
				209	** structure. The fcntl() system call is only invoked to set a
				210	** POSIX lock if the internal lock structure transitions between
				211	** a locked and an unlocked state.
				212	**
				213	** 2004-Jan-11:
				214	** More recent discoveries about POSIX advisory locks. (The more
				215	** I discover, the more I realize the a POSIX advisory locks are
				216	** an abomination.)
				217	**
				218	** If you close a file descriptor that points to a file that has locks,
				219	** all locks on that file that are owned by the current process are
				220	** released. To work around this problem, each OsFile structure contains
				221	** a pointer to an openCnt structure. There is one openCnt structure
				222	** per open inode, which means that multiple OsFiles can point to a single
				223	** openCnt. When an attempt is made to close an OsFile, if there are
				224	** other OsFiles open on the same inode that are holding locks, the call
				225	** to close() the file descriptor is deferred until all of the locks clear.
				226	** The openCnt structure keeps a list of file descriptors that need to
				227	** be closed and that list is walked (and cleared) when the last lock
				228	** clears.
				229	**
				230	** First, under Linux threads, because each thread has a separate
				231	** process ID, lock operations in one thread do not override locks
				232	** to the same file in other threads. Linux threads behave like
				233	** separate processes in this respect. But, if you close a file
				234	** descriptor in linux threads, all locks are cleared, even locks
				235	** on other threads and even though the other threads have different
				236	** process IDs. Linux threads is inconsistent in this respect.
				237	** (I'm beginning to think that linux threads is an abomination too.)
				238	** The consequence of this all is that the hash table for the lockInfo
				239	** structure has to include the process id as part of its key because
				240	** locks in different threads are treated as distinct. But the
				241	** openCnt structure should not include the process id in its
				242	** key because close() clears lock on all threads, not just the current
				243	** thread. Were it not for this goofiness in linux threads, we could
				244	** combine the lockInfo and openCnt structures into a single structure.
drh	5fdae77	2004-06-29 03:29:00 +0000	[diff] [blame]	245	**
				246	** 2004-Jun-28:
				247	** On some versions of linux, threads can override each others locks.
				248	** On others not. Sometimes you can change the behavior on the same
				249	** system by setting the LD_ASSUME_KERNEL environment variable. The
				250	** POSIX standard is silent as to which behavior is correct, as far
				251	** as I can tell, so other versions of unix might show the same
				252	** inconsistency. There is no little doubt in my mind that posix
				253	** advisory locks and linux threads are profoundly broken.
				254	**
				255	** To work around the inconsistencies, we have to test at runtime
				256	** whether or not threads can override each others locks. This test
				257	** is run once, the first time any lock is attempted. A static
				258	** variable is set to record the results of this test for future
				259	** use.
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	260	*/
				261
				262	/*
				263	** An instance of the following structure serves as the key used
drh	5fdae77	2004-06-29 03:29:00 +0000	[diff] [blame]	264	** to locate a particular lockInfo structure given its inode.
				265	**
				266	** If threads cannot override each others locks, then we set the
				267	** lockKey.tid field to the thread ID. If threads can override
				268	** each others locks then tid is always set to zero. tid is also
				269	** set to zero if we compile without threading support.
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	270	*/
				271	struct lockKey {
drh	5fdae77	2004-06-29 03:29:00 +0000	[diff] [blame]	272	dev_t dev; /* Device number */
				273	ino_t ino; /* Inode number */
				274	#ifdef SQLITE_UNIX_THREADS
drh	d9cb6ac	2005-10-20 07:28:17 +0000	[diff] [blame]	275	pthread_t tid; /* Thread ID or zero if threads can override each other */
drh	5fdae77	2004-06-29 03:29:00 +0000	[diff] [blame]	276	#endif
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	277	};
				278
				279	/*
				280	** An instance of the following structure is allocated for each open
				281	** inode on each thread with a different process ID. (Threads have
				282	** different process IDs on linux, but not on most other unixes.)
				283	**
				284	** A single inode can have multiple file descriptors, so each OsFile
				285	** structure contains a pointer to an instance of this object and this
				286	** object keeps a count of the number of OsFiles pointing to it.
				287	*/
				288	struct lockInfo {
				289	struct lockKey key; /* The lookup key */
drh	2ac3ee9	2004-06-07 16:27:46 +0000	[diff] [blame]	290	int cnt; /* Number of SHARED locks held */
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	291	int locktype; /* One of SHARED_LOCK, RESERVED_LOCK etc. */
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	292	int nRef; /* Number of pointers to this structure */
				293	};
				294
				295	/*
				296	** An instance of the following structure serves as the key used
				297	** to locate a particular openCnt structure given its inode. This
drh	5fdae77	2004-06-29 03:29:00 +0000	[diff] [blame]	298	** is the same as the lockKey except that the thread ID is omitted.
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	299	*/
				300	struct openKey {
				301	dev_t dev; /* Device number */
				302	ino_t ino; /* Inode number */
				303	};
				304
				305	/*
				306	** An instance of the following structure is allocated for each open
				307	** inode. This structure keeps track of the number of locks on that
				308	** inode. If a close is attempted against an inode that is holding
				309	** locks, the close is deferred until all locks clear by adding the
				310	** file descriptor to be closed to the pending list.
				311	*/
				312	struct openCnt {
				313	struct openKey key; /* The lookup key */
				314	int nRef; /* Number of pointers to this structure */
				315	int nLock; /* Number of outstanding locks */
				316	int nPending; /* Number of pending close() operations */
				317	int aPending; / Malloced space holding fd's awaiting a close() */
				318	};
				319
				320	/*
				321	** These hash table maps inodes and process IDs into lockInfo and openCnt
				322	** structures. Access to these hash tables must be protected by a mutex.
				323	*/
				324	static Hash lockHash = { SQLITE_HASH_BINARY, 0, 0, 0, 0, 0 };
				325	static Hash openHash = { SQLITE_HASH_BINARY, 0, 0, 0, 0, 0 };
				326
drh	5fdae77	2004-06-29 03:29:00 +0000	[diff] [blame]	327
				328	#ifdef SQLITE_UNIX_THREADS
				329	/*
				330	** This variable records whether or not threads can override each others
				331	** locks.
				332	**
				333	** 0: No. Threads cannot override each others locks.
				334	** 1: Yes. Threads can override each others locks.
				335	** -1: We don't know yet.
				336	*/
				337	static int threadsOverrideEachOthersLocks = -1;
				338
				339	/*
				340	** This structure holds information passed into individual test
				341	** threads by the testThreadLockingBehavior() routine.
				342	*/
				343	struct threadTestData {
				344	int fd; /* File to be locked */
				345	struct flock lock; /* The locking operation */
				346	int result; /* Result of the locking operation */
				347	};
				348
drh	2b4b596	2005-06-15 17:47:55 +0000	[diff] [blame]	349	#ifdef SQLITE_LOCK_TRACE
				350	/*
				351	** Print out information about all locking operations.
				352	**
				353	** This routine is used for troubleshooting locks on multithreaded
				354	** platforms. Enable by compiling with the -DSQLITE_LOCK_TRACE
				355	** command-line option on the compiler. This code is normally
				356	** turnned off.
				357	*/
				358	static int lockTrace(int fd, int op, struct flock *p){
				359	char zOpName, zType;
				360	int s;
				361	int savedErrno;
				362	if( op==F_GETLK ){
				363	zOpName = "GETLK";
				364	}else if( op==F_SETLK ){
				365	zOpName = "SETLK";
				366	}else{
				367	s = fcntl(fd, op, p);
				368	sqlite3DebugPrintf("fcntl unknown %d %d %d\n", fd, op, s);
				369	return s;
				370	}
				371	if( p->l_type==F_RDLCK ){
				372	zType = "RDLCK";
				373	}else if( p->l_type==F_WRLCK ){
				374	zType = "WRLCK";
				375	}else if( p->l_type==F_UNLCK ){
				376	zType = "UNLCK";
				377	}else{
				378	assert( 0 );
				379	}
				380	assert( p->l_whence==SEEK_SET );
				381	s = fcntl(fd, op, p);
				382	savedErrno = errno;
				383	sqlite3DebugPrintf("fcntl %d %d %s %s %d %d %d %d\n",
				384	threadid, fd, zOpName, zType, (int)p->l_start, (int)p->l_len,
				385	(int)p->l_pid, s);
				386	if( s && op==F_SETLK && (p->l_type==F_RDLCK \|\| p->l_type==F_WRLCK) ){
				387	struct flock l2;
				388	l2 = *p;
				389	fcntl(fd, F_GETLK, &l2);
				390	if( l2.l_type==F_RDLCK ){
				391	zType = "RDLCK";
				392	}else if( l2.l_type==F_WRLCK ){
				393	zType = "WRLCK";
				394	}else if( l2.l_type==F_UNLCK ){
				395	zType = "UNLCK";
				396	}else{
				397	assert( 0 );
				398	}
				399	sqlite3DebugPrintf("fcntl-failure-reason: %s %d %d %d\n",
				400	zType, (int)l2.l_start, (int)l2.l_len, (int)l2.l_pid);
				401	}
				402	errno = savedErrno;
				403	return s;
				404	}
				405	#define fcntl lockTrace
				406	#endif /* SQLITE_LOCK_TRACE */
				407
drh	5fdae77	2004-06-29 03:29:00 +0000	[diff] [blame]	408	/*
				409	** The testThreadLockingBehavior() routine launches two separate
				410	** threads on this routine. This routine attempts to lock a file
				411	** descriptor then returns. The success or failure of that attempt
				412	** allows the testThreadLockingBehavior() procedure to determine
				413	** whether or not threads can override each others locks.
				414	*/
				415	static void threadLockingTest(void pArg){
				416	struct threadTestData pData = (struct threadTestData)pArg;
				417	pData->result = fcntl(pData->fd, F_SETLK, &pData->lock);
				418	return pArg;
				419	}
				420
				421	/*
				422	** This procedure attempts to determine whether or not threads
				423	** can override each others locks then sets the
				424	** threadsOverrideEachOthersLocks variable appropriately.
				425	*/
				426	static void testThreadLockingBehavior(fd_orig){
				427	int fd;
				428	struct threadTestData d[2];
				429	pthread_t t[2];
				430
				431	fd = dup(fd_orig);
				432	if( fd<0 ) return;
				433	memset(d, 0, sizeof(d));
				434	d[0].fd = fd;
				435	d[0].lock.l_type = F_RDLCK;
				436	d[0].lock.l_len = 1;
				437	d[0].lock.l_start = 0;
				438	d[0].lock.l_whence = SEEK_SET;
				439	d[1] = d[0];
				440	d[1].lock.l_type = F_WRLCK;
				441	pthread_create(&t[0], 0, threadLockingTest, &d[0]);
				442	pthread_create(&t[1], 0, threadLockingTest, &d[1]);
				443	pthread_join(t[0], 0);
				444	pthread_join(t[1], 0);
				445	close(fd);
				446	threadsOverrideEachOthersLocks = d[0].result==0 && d[1].result==0;
				447	}
				448	#endif /* SQLITE_UNIX_THREADS */
				449
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	450	/*
				451	** Release a lockInfo structure previously allocated by findLockInfo().
				452	*/
				453	static void releaseLockInfo(struct lockInfo *pLock){
				454	pLock->nRef--;
				455	if( pLock->nRef==0 ){
				456	sqlite3HashInsert(&lockHash, &pLock->key, sizeof(pLock->key), 0);
				457	sqliteFree(pLock);
				458	}
				459	}
				460
				461	/*
				462	** Release a openCnt structure previously allocated by findLockInfo().
				463	*/
				464	static void releaseOpenCnt(struct openCnt *pOpen){
				465	pOpen->nRef--;
				466	if( pOpen->nRef==0 ){
				467	sqlite3HashInsert(&openHash, &pOpen->key, sizeof(pOpen->key), 0);
				468	sqliteFree(pOpen->aPending);
				469	sqliteFree(pOpen);
				470	}
				471	}
				472
				473	/*
				474	** Given a file descriptor, locate lockInfo and openCnt structures that
				475	** describes that file descriptor. Create a new ones if necessary. The
				476	** return values might be unset if an error occurs.
				477	**
				478	** Return the number of errors.
				479	*/
drh	38f8271	2004-06-18 17:10:16 +0000	[diff] [blame]	480	static int findLockInfo(
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	481	int fd, /* The file descriptor used in the key */
				482	struct lockInfo *ppLock, / Return the lockInfo structure here */
drh	5fdae77	2004-06-29 03:29:00 +0000	[diff] [blame]	483	struct openCnt *ppOpen / Return the openCnt structure here */
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	484	){
				485	int rc;
				486	struct lockKey key1;
				487	struct openKey key2;
				488	struct stat statbuf;
				489	struct lockInfo *pLock;
				490	struct openCnt *pOpen;
				491	rc = fstat(fd, &statbuf);
				492	if( rc!=0 ) return 1;
				493	memset(&key1, 0, sizeof(key1));
				494	key1.dev = statbuf.st_dev;
				495	key1.ino = statbuf.st_ino;
drh	5fdae77	2004-06-29 03:29:00 +0000	[diff] [blame]	496	#ifdef SQLITE_UNIX_THREADS
				497	if( threadsOverrideEachOthersLocks<0 ){
				498	testThreadLockingBehavior(fd);
				499	}
				500	key1.tid = threadsOverrideEachOthersLocks ? 0 : pthread_self();
				501	#endif
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	502	memset(&key2, 0, sizeof(key2));
				503	key2.dev = statbuf.st_dev;
				504	key2.ino = statbuf.st_ino;
				505	pLock = (struct lockInfo*)sqlite3HashFind(&lockHash, &key1, sizeof(key1));
				506	if( pLock==0 ){
				507	struct lockInfo *pOld;
				508	pLock = sqliteMallocRaw( sizeof(*pLock) );
				509	if( pLock==0 ) return 1;
				510	pLock->key = key1;
				511	pLock->nRef = 1;
				512	pLock->cnt = 0;
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	513	pLock->locktype = 0;
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	514	pOld = sqlite3HashInsert(&lockHash, &pLock->key, sizeof(key1), pLock);
				515	if( pOld!=0 ){
				516	assert( pOld==pLock );
				517	sqliteFree(pLock);
				518	return 1;
				519	}
				520	}else{
				521	pLock->nRef++;
				522	}
				523	*ppLock = pLock;
				524	pOpen = (struct openCnt*)sqlite3HashFind(&openHash, &key2, sizeof(key2));
				525	if( pOpen==0 ){
				526	struct openCnt *pOld;
				527	pOpen = sqliteMallocRaw( sizeof(*pOpen) );
				528	if( pOpen==0 ){
				529	releaseLockInfo(pLock);
				530	return 1;
				531	}
				532	pOpen->key = key2;
				533	pOpen->nRef = 1;
				534	pOpen->nLock = 0;
				535	pOpen->nPending = 0;
				536	pOpen->aPending = 0;
				537	pOld = sqlite3HashInsert(&openHash, &pOpen->key, sizeof(key2), pOpen);
				538	if( pOld!=0 ){
				539	assert( pOld==pOpen );
				540	sqliteFree(pOpen);
				541	releaseLockInfo(pLock);
				542	return 1;
				543	}
				544	}else{
				545	pOpen->nRef++;
				546	}
				547	*ppOpen = pOpen;
				548	return 0;
				549	}
				550
				551	/*
				552	** Delete the named file
				553	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame]	554	static int unixDelete(const char *zFilename){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	555	unlink(zFilename);
				556	return SQLITE_OK;
				557	}
				558
				559	/*
				560	** Return TRUE if the named file exists.
				561	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame]	562	static int unixFileExists(const char *zFilename){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	563	return access(zFilename, 0)==0;
				564	}
				565
				566	/*
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame^]	567	** Allocate memory for an OsFile. Initialize the new OsFile
				568	** to the value given in pInit and return a pointer to the new
				569	** OsFile. If we run out of memory, close the file and return NULL.
				570	*/
				571	static OsFile allocateOsFile(OsFile pInit){
				572	OsFile *pNew;
				573	pNew = sqliteMalloc( sizeof(OsFile) );
				574	if( pNew==0 ){
				575	close(pInit->h);
				576	}else{
				577	pNew = pInit;
				578	}
				579	return pNew;
				580	}
				581
				582	/*
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	583	** Attempt to open a file for both reading and writing. If that
				584	** fails, try opening it read-only. If the file does not exist,
				585	** try to create it.
				586	**
				587	** On success, a handle for the open file is written to *id
				588	** and *pReadonly is set to 0 if the file was opened for reading and
				589	** writing or 1 if the file was opened read-only. The function returns
				590	** SQLITE_OK.
				591	**
				592	** On failure, the function returns SQLITE_CANTOPEN and leaves
				593	** id and pReadonly unchanged.
				594	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame]	595	static int unixOpenReadWrite(
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	596	const char *zFilename,
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame^]	597	OsFile **pId,
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	598	int *pReadonly
				599	){
				600	int rc;
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame^]	601	OsFile f;
				602
				603	assert( 0==*pId );
				604	f.dirfd = -1;
				605	SET_THREADID(&f);
				606	f.h = open(zFilename, O_RDWR\|O_CREAT\|O_LARGEFILE\|O_BINARY,
drh	8e85577	2005-05-17 11:25:31 +0000	[diff] [blame]	607	SQLITE_DEFAULT_FILE_PERMISSIONS);
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame^]	608	if( f.h<0 ){
drh	6458e39	2004-07-20 01:14:13 +0000	[diff] [blame]	609	#ifdef EISDIR
				610	if( errno==EISDIR ){
				611	return SQLITE_CANTOPEN;
				612	}
				613	#endif
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame^]	614	f.h = open(zFilename, O_RDONLY\|O_LARGEFILE\|O_BINARY);
				615	if( f.h<0 ){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	616	return SQLITE_CANTOPEN;
				617	}
				618	*pReadonly = 1;
				619	}else{
				620	*pReadonly = 0;
				621	}
				622	sqlite3OsEnterMutex();
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame^]	623	rc = findLockInfo(f.h, &f.pLock, &f.pOpen);
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	624	sqlite3OsLeaveMutex();
				625	if( rc ){
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame^]	626	close(f.h);
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	627	return SQLITE_NOMEM;
				628	}
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame^]	629	f.locktype = 0;
				630	TRACE3("OPEN %-3d %s\n", f.h, zFilename);
				631	*pId = allocateOsFile(&f);
				632	if( *pId==0 ){
				633	return SQLITE_NOMEM;
				634	}else{
				635	OpenCounter(+1);
				636	return SQLITE_OK;
				637	}
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	638	}
				639
				640
				641	/*
				642	** Attempt to open a new file for exclusive access by this process.
				643	** The file will be opened for both reading and writing. To avoid
				644	** a potential security problem, we do not allow the file to have
				645	** previously existed. Nor do we allow the file to be a symbolic
				646	** link.
				647	**
				648	** If delFlag is true, then make arrangements to automatically delete
				649	** the file when it is closed.
				650	**
				651	** On success, write the file handle into *id and return SQLITE_OK.
				652	**
				653	** On failure, return SQLITE_CANTOPEN.
				654	*/
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame^]	655	static int unixOpenExclusive(const char zFilename, OsFile *pId, int delFlag){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	656	int rc;
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame^]	657	OsFile f;
				658
				659	assert( 0==*pId );
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	660	if( access(zFilename, 0)==0 ){
				661	return SQLITE_CANTOPEN;
				662	}
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame^]	663	SET_THREADID(&f);
				664	f.dirfd = -1;
				665	f.h = open(zFilename,
drh	d645967	2005-08-13 17:17:01 +0000	[diff] [blame]	666	O_RDWR\|O_CREAT\|O_EXCL\|O_NOFOLLOW\|O_LARGEFILE\|O_BINARY,
				667	SQLITE_DEFAULT_FILE_PERMISSIONS);
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame^]	668	if( f.h<0 ){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	669	return SQLITE_CANTOPEN;
				670	}
				671	sqlite3OsEnterMutex();
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame^]	672	rc = findLockInfo(f.h, &f.pLock, &f.pOpen);
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	673	sqlite3OsLeaveMutex();
				674	if( rc ){
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame^]	675	close(f.h);
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	676	unlink(zFilename);
				677	return SQLITE_NOMEM;
				678	}
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame^]	679	f.locktype = 0;
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	680	if( delFlag ){
				681	unlink(zFilename);
				682	}
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame^]	683	TRACE3("OPEN-EX %-3d %s\n", f.h, zFilename);
				684	*pId = allocateOsFile(&f);
				685	if( *pId==0 ){
				686	return SQLITE_NOMEM;
				687	}else{
				688	OpenCounter(+1);
				689	return SQLITE_OK;
				690	}
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	691	}
				692
				693	/*
				694	** Attempt to open a new file for read-only access.
				695	**
				696	** On success, write the file handle into *id and return SQLITE_OK.
				697	**
				698	** On failure, return SQLITE_CANTOPEN.
				699	*/
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame^]	700	static int unixOpenReadOnly(const char zFilename, OsFile *pId){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	701	int rc;
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame^]	702	OsFile f;
				703
				704	assert( 0==*pId );
				705	SET_THREADID(&f);
				706	f.dirfd = -1;
				707	f.h = open(zFilename, O_RDONLY\|O_LARGEFILE\|O_BINARY);
				708	if( f.h<0 ){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	709	return SQLITE_CANTOPEN;
				710	}
				711	sqlite3OsEnterMutex();
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame^]	712	rc = findLockInfo(f.h, &f.pLock, &f.pOpen);
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	713	sqlite3OsLeaveMutex();
				714	if( rc ){
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame^]	715	close(f.h);
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	716	return SQLITE_NOMEM;
				717	}
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame^]	718	f.locktype = 0;
				719	TRACE3("OPEN-RO %-3d %s\n", f.h, zFilename);
				720	*pId = allocateOsFile(&f);
				721	if( *pId==0 ){
				722	return SQLITE_NOMEM;
				723	}else{
				724	OpenCounter(+1);
				725	return SQLITE_OK;
				726	}
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	727	}
				728
				729	/*
				730	** Attempt to open a file descriptor for the directory that contains a
				731	** file. This file descriptor can be used to fsync() the directory
				732	** in order to make sure the creation of a new file is actually written
				733	** to disk.
				734	**
				735	** This routine is only meaningful for Unix. It is a no-op under
				736	** windows since windows does not support hard links.
				737	**
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame^]	738	** On success, a handle for a previously open file at *id is
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	739	** updated with the new directory file descriptor and SQLITE_OK is
				740	** returned.
				741	**
				742	** On failure, the function returns SQLITE_CANTOPEN and leaves
				743	** *id unchanged.
				744	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame]	745	static int unixOpenDirectory(
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	746	const char *zDirname,
				747	OsFile *id
				748	){
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame^]	749	if( id==0 ){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	750	/* Do not open the directory if the corresponding file is not already
				751	** open. */
				752	return SQLITE_CANTOPEN;
				753	}
drh	2b4b596	2005-06-15 17:47:55 +0000	[diff] [blame]	754	SET_THREADID(id);
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	755	assert( id->dirfd<0 );
drh	8e85577	2005-05-17 11:25:31 +0000	[diff] [blame]	756	id->dirfd = open(zDirname, O_RDONLY\|O_BINARY, 0);
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	757	if( id->dirfd<0 ){
				758	return SQLITE_CANTOPEN;
				759	}
				760	TRACE3("OPENDIR %-3d %s\n", id->dirfd, zDirname);
				761	return SQLITE_OK;
				762	}
				763
				764	/*
drh	ab3f9fe	2004-08-14 17:10:10 +0000	[diff] [blame]	765	** If the following global variable points to a string which is the
				766	** name of a directory, then that directory will be used to store
				767	** temporary files.
				768	*/
tpoindex	9a09a3c	2004-12-20 19:01:32 +0000	[diff] [blame]	769	char *sqlite3_temp_directory = 0;
drh	ab3f9fe	2004-08-14 17:10:10 +0000	[diff] [blame]	770
				771	/*
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	772	** Create a temporary file name in zBuf. zBuf must be big enough to
				773	** hold at least SQLITE_TEMPNAME_SIZE characters.
				774	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame]	775	static int unixTempFileName(char *zBuf){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	776	static const char *azDirs[] = {
drh	ab3f9fe	2004-08-14 17:10:10 +0000	[diff] [blame]	777	0,
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	778	"/var/tmp",
				779	"/usr/tmp",
				780	"/tmp",
				781	".",
				782	};
drh	5719628	2004-10-06 15:41:16 +0000	[diff] [blame]	783	static const unsigned char zChars[] =
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	784	"abcdefghijklmnopqrstuvwxyz"
				785	"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
				786	"0123456789";
				787	int i, j;
				788	struct stat buf;
				789	const char *zDir = ".";
drh	effd02b	2004-08-29 23:42:13 +0000	[diff] [blame]	790	azDirs[0] = sqlite3_temp_directory;
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	791	for(i=0; i<sizeof(azDirs)/sizeof(azDirs[0]); i++){
drh	ab3f9fe	2004-08-14 17:10:10 +0000	[diff] [blame]	792	if( azDirs[i]==0 ) continue;
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	793	if( stat(azDirs[i], &buf) ) continue;
				794	if( !S_ISDIR(buf.st_mode) ) continue;
				795	if( access(azDirs[i], 07) ) continue;
				796	zDir = azDirs[i];
				797	break;
				798	}
				799	do{
				800	sprintf(zBuf, "%s/"TEMP_FILE_PREFIX, zDir);
				801	j = strlen(zBuf);
				802	sqlite3Randomness(15, &zBuf[j]);
				803	for(i=0; i<15; i++, j++){
				804	zBuf[j] = (char)zChars[ ((unsigned char)zBuf[j])%(sizeof(zChars)-1) ];
				805	}
				806	zBuf[j] = 0;
				807	}while( access(zBuf,0)==0 );
				808	return SQLITE_OK;
				809	}
				810
				811	/*
tpoindex	9a09a3c	2004-12-20 19:01:32 +0000	[diff] [blame]	812	** Check that a given pathname is a directory and is writable
				813	**
				814	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame]	815	static int unixIsDirWritable(char *zBuf){
				816	#ifndef SQLITE_OMIT_PAGER_PRAGMAS
tpoindex	9a09a3c	2004-12-20 19:01:32 +0000	[diff] [blame]	817	struct stat buf;
				818	if( zBuf==0 ) return 0;
drh	268283b	2005-01-08 15:44:25 +0000	[diff] [blame]	819	if( zBuf[0]==0 ) return 0;
tpoindex	9a09a3c	2004-12-20 19:01:32 +0000	[diff] [blame]	820	if( stat(zBuf, &buf) ) return 0;
				821	if( !S_ISDIR(buf.st_mode) ) return 0;
				822	if( access(zBuf, 07) ) return 0;
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame]	823	#endif /* SQLITE_OMIT_PAGER_PRAGMAS */
tpoindex	9a09a3c	2004-12-20 19:01:32 +0000	[diff] [blame]	824	return 1;
				825	}
				826
				827	/*
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	828	** Read data from a file into a buffer. Return SQLITE_OK if all
				829	** bytes were read successfully and SQLITE_IOERR if anything goes
				830	** wrong.
				831	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame]	832	static int unixRead(OsFile id, void pBuf, int amt){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	833	int got;
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame^]	834	assert( id );
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	835	SimulateIOError(SQLITE_IOERR);
				836	TIMER_START;
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	837	got = read(id->h, pBuf, amt);
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	838	TIMER_END;
drh	e29b915	2005-03-18 14:03:15 +0000	[diff] [blame]	839	TRACE5("READ %-3d %5d %7d %d\n", id->h, got, last_page, TIMER_ELAPSED);
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	840	SEEK(0);
				841	/* if( got<0 ) got = 0; */
				842	if( got==amt ){
				843	return SQLITE_OK;
				844	}else{
				845	return SQLITE_IOERR;
				846	}
				847	}
				848
				849	/*
				850	** Write data from a buffer into a file. Return SQLITE_OK on success
				851	** or some other error code on failure.
				852	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame]	853	static int unixWrite(OsFile id, const void pBuf, int amt){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	854	int wrote = 0;
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame^]	855	assert( id );
drh	4c7f941	2005-02-03 00:29:47 +0000	[diff] [blame]	856	assert( amt>0 );
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	857	SimulateIOError(SQLITE_IOERR);
drh	047d483	2004-10-01 14:38:02 +0000	[diff] [blame]	858	SimulateDiskfullError;
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	859	TIMER_START;
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	860	while( amt>0 && (wrote = write(id->h, pBuf, amt))>0 ){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	861	amt -= wrote;
				862	pBuf = &((char*)pBuf)[wrote];
				863	}
				864	TIMER_END;
drh	e29b915	2005-03-18 14:03:15 +0000	[diff] [blame]	865	TRACE5("WRITE %-3d %5d %7d %d\n", id->h, wrote, last_page, TIMER_ELAPSED);
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	866	SEEK(0);
				867	if( amt>0 ){
				868	return SQLITE_FULL;
				869	}
				870	return SQLITE_OK;
				871	}
				872
				873	/*
				874	** Move the read/write pointer in a file.
				875	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame]	876	static int unixSeek(OsFile *id, i64 offset){
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame^]	877	assert( id );
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	878	SEEK(offset/1024 + 1);
drh	b4746b9	2005-09-09 01:32:06 +0000	[diff] [blame]	879	#ifdef SQLITE_TEST
				880	if( offset ) SimulateDiskfullError
				881	#endif
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	882	lseek(id->h, offset, SEEK_SET);
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	883	return SQLITE_OK;
				884	}
				885
drh	b851b2c	2005-03-10 14:11:12 +0000	[diff] [blame]	886	#ifdef SQLITE_TEST
				887	/*
				888	** Count the number of fullsyncs and normal syncs. This is used to test
				889	** that syncs and fullsyncs are occuring at the right times.
				890	*/
				891	int sqlite3_sync_count = 0;
				892	int sqlite3_fullsync_count = 0;
				893	#endif
				894
drh	f2f2391	2005-10-05 10:29:36 +0000	[diff] [blame]	895	/*
				896	** Use the fdatasync() API only if the HAVE_FDATASYNC macro is defined.
				897	** Otherwise use fsync() in its place.
				898	*/
				899	#ifndef HAVE_FDATASYNC
				900	# define fdatasync fsync
				901	#endif
				902
drh	b851b2c	2005-03-10 14:11:12 +0000	[diff] [blame]	903
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	904	/*
drh	dd809b0	2004-07-17 21:44:57 +0000	[diff] [blame]	905	** The fsync() system call does not work as advertised on many
				906	** unix systems. The following procedure is an attempt to make
				907	** it work better.
drh	1398ad3	2005-01-19 23:24:50 +0000	[diff] [blame]	908	**
				909	** The SQLITE_NO_SYNC macro disables all fsync()s. This is useful
				910	** for testing when we want to run through the test suite quickly.
				911	** You are strongly advised not to deploy with SQLITE_NO_SYNC
				912	** enabled, however, since with SQLITE_NO_SYNC enabled, an OS crash
				913	** or power failure will likely corrupt the database file.
drh	dd809b0	2004-07-17 21:44:57 +0000	[diff] [blame]	914	*/
drh	eb796a7	2005-09-08 12:38:41 +0000	[diff] [blame]	915	static int full_fsync(int fd, int fullSync, int dataOnly){
drh	dd809b0	2004-07-17 21:44:57 +0000	[diff] [blame]	916	int rc;
drh	b851b2c	2005-03-10 14:11:12 +0000	[diff] [blame]	917
				918	/* Record the number of times that we do a normal fsync() and
				919	** FULLSYNC. This is used during testing to verify that this procedure
				920	** gets called with the correct arguments.
				921	*/
				922	#ifdef SQLITE_TEST
				923	if( fullSync ) sqlite3_fullsync_count++;
				924	sqlite3_sync_count++;
				925	#endif
				926
				927	/* If we compiled with the SQLITE_NO_SYNC flag, then syncing is a
				928	** no-op
				929	*/
				930	#ifdef SQLITE_NO_SYNC
				931	rc = SQLITE_OK;
				932	#else
				933
drh	dd809b0	2004-07-17 21:44:57 +0000	[diff] [blame]	934	#ifdef F_FULLFSYNC
drh	b851b2c	2005-03-10 14:11:12 +0000	[diff] [blame]	935	if( fullSync ){
drh	f30cc94	2005-03-11 17:52:34 +0000	[diff] [blame]	936	rc = fcntl(fd, F_FULLFSYNC, 0);
drh	b851b2c	2005-03-10 14:11:12 +0000	[diff] [blame]	937	}else{
				938	rc = 1;
				939	}
				940	/* If the FULLSYNC failed, try to do a normal fsync() */
drh	dd809b0	2004-07-17 21:44:57 +0000	[diff] [blame]	941	if( rc ) rc = fsync(fd);
drh	b851b2c	2005-03-10 14:11:12 +0000	[diff] [blame]	942
drh	c035e6e	2005-09-22 15:45:04 +0000	[diff] [blame]	943	#else /* if !defined(F_FULLSYNC) */
drh	eb796a7	2005-09-08 12:38:41 +0000	[diff] [blame]	944	if( dataOnly ){
				945	rc = fdatasync(fd);
drh	f2f2391	2005-10-05 10:29:36 +0000	[diff] [blame]	946	}else{
drh	eb796a7	2005-09-08 12:38:41 +0000	[diff] [blame]	947	rc = fsync(fd);
				948	}
drh	f30cc94	2005-03-11 17:52:34 +0000	[diff] [blame]	949	#endif /* defined(F_FULLFSYNC) */
drh	b851b2c	2005-03-10 14:11:12 +0000	[diff] [blame]	950	#endif /* defined(SQLITE_NO_SYNC) */
				951
drh	dd809b0	2004-07-17 21:44:57 +0000	[diff] [blame]	952	return rc;
				953	}
				954
				955	/*
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	956	** Make sure all writes to a particular file are committed to disk.
				957	**
drh	eb796a7	2005-09-08 12:38:41 +0000	[diff] [blame]	958	** If dataOnly==0 then both the file itself and its metadata (file
				959	** size, access time, etc) are synced. If dataOnly!=0 then only the
				960	** file data is synced.
				961	**
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	962	** Under Unix, also make sure that the directory entry for the file
				963	** has been created by fsync-ing the directory that contains the file.
				964	** If we do not do this and we encounter a power failure, the directory
				965	** entry for the journal might not exist after we reboot. The next
				966	** SQLite to access the file will not know that the journal exists (because
				967	** the directory entry for the journal was never created) and the transaction
				968	** will not roll back - possibly leading to database corruption.
				969	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame]	970	static int unixSync(OsFile *id, int dataOnly){
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame^]	971	assert( id );
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	972	SimulateIOError(SQLITE_IOERR);
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	973	TRACE2("SYNC %-3d\n", id->h);
drh	eb796a7	2005-09-08 12:38:41 +0000	[diff] [blame]	974	if( full_fsync(id->h, id->fullSync, dataOnly) ){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	975	return SQLITE_IOERR;
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	976	}
drh	a285422	2004-06-17 19:04:17 +0000	[diff] [blame]	977	if( id->dirfd>=0 ){
				978	TRACE2("DIRSYNC %-3d\n", id->dirfd);
danielk1977	d7c03f7	2005-11-25 10:38:22 +0000	[diff] [blame]	979	#ifndef SQLITE_DISABLE_DIRSYNC
danielk1977	0964b23	2005-11-25 08:47:57 +0000	[diff] [blame]	980	if( full_fsync(id->dirfd, id->fullSync, 0) ){
				981	return SQLITE_IOERR;
				982	}
danielk1977	d7c03f7	2005-11-25 10:38:22 +0000	[diff] [blame]	983	#endif
drh	a285422	2004-06-17 19:04:17 +0000	[diff] [blame]	984	close(id->dirfd); /* Only need to sync once, so close the directory */
				985	id->dirfd = -1; /* when we are done. */
				986	}
drh	a285422	2004-06-17 19:04:17 +0000	[diff] [blame]	987	return SQLITE_OK;
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	988	}
				989
				990	/*
danielk1977	962398d	2004-06-14 09:35:16 +0000	[diff] [blame]	991	** Sync the directory zDirname. This is a no-op on operating systems other
				992	** than UNIX.
drh	b851b2c	2005-03-10 14:11:12 +0000	[diff] [blame]	993	**
				994	** This is used to make sure the master journal file has truely been deleted
				995	** before making changes to individual journals on a multi-database commit.
drh	f30cc94	2005-03-11 17:52:34 +0000	[diff] [blame]	996	** The F_FULLFSYNC option is not needed here.
danielk1977	962398d	2004-06-14 09:35:16 +0000	[diff] [blame]	997	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame]	998	static int unixSyncDirectory(const char *zDirname){
danielk1977	d7c03f7	2005-11-25 10:38:22 +0000	[diff] [blame]	999	#ifdef SQLITE_DISABLE_DIRSYNC
				1000	return SQLITE_OK;
				1001	#else
danielk1977	962398d	2004-06-14 09:35:16 +0000	[diff] [blame]	1002	int fd;
				1003	int r;
danielk1977	369f27e	2004-06-15 11:40:04 +0000	[diff] [blame]	1004	SimulateIOError(SQLITE_IOERR);
drh	8e85577	2005-05-17 11:25:31 +0000	[diff] [blame]	1005	fd = open(zDirname, O_RDONLY\|O_BINARY, 0);
danielk1977	369f27e	2004-06-15 11:40:04 +0000	[diff] [blame]	1006	TRACE3("DIRSYNC %-3d (%s)\n", fd, zDirname);
danielk1977	962398d	2004-06-14 09:35:16 +0000	[diff] [blame]	1007	if( fd<0 ){
				1008	return SQLITE_CANTOPEN;
				1009	}
				1010	r = fsync(fd);
				1011	close(fd);
				1012	return ((r==0)?SQLITE_OK:SQLITE_IOERR);
danielk1977	d7c03f7	2005-11-25 10:38:22 +0000	[diff] [blame]	1013	#endif
danielk1977	962398d	2004-06-14 09:35:16 +0000	[diff] [blame]	1014	}
				1015
				1016	/*
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1017	** Truncate an open file to a specified size
				1018	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame]	1019	static int unixTruncate(OsFile *id, i64 nByte){
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame^]	1020	assert( id );
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1021	SimulateIOError(SQLITE_IOERR);
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	1022	return ftruncate(id->h, nByte)==0 ? SQLITE_OK : SQLITE_IOERR;
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1023	}
				1024
				1025	/*
				1026	** Determine the current size of a file in bytes
				1027	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame]	1028	static int unixFileSize(OsFile id, i64 pSize){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1029	struct stat buf;
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame^]	1030	assert( id );
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1031	SimulateIOError(SQLITE_IOERR);
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	1032	if( fstat(id->h, &buf)!=0 ){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1033	return SQLITE_IOERR;
				1034	}
				1035	*pSize = buf.st_size;
				1036	return SQLITE_OK;
				1037	}
				1038
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1039	/*
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	1040	** This routine checks if there is a RESERVED lock held on the specified
				1041	** file by this or any other process. If such a lock is held, return
drh	2ac3ee9	2004-06-07 16:27:46 +0000	[diff] [blame]	1042	** non-zero. If the file is unlocked or holds only SHARED locks, then
				1043	** return zero.
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	1044	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame]	1045	static int unixCheckReservedLock(OsFile *id){
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	1046	int r = 0;
				1047
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame^]	1048	assert( id );
drh	2b4b596	2005-06-15 17:47:55 +0000	[diff] [blame]	1049	if( CHECK_THREADID(id) ) return SQLITE_MISUSE;
drh	2ac3ee9	2004-06-07 16:27:46 +0000	[diff] [blame]	1050	sqlite3OsEnterMutex(); /* Needed because id->pLock is shared across threads */
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	1051
				1052	/* Check if a thread in this process holds such a lock */
				1053	if( id->pLock->locktype>SHARED_LOCK ){
				1054	r = 1;
				1055	}
				1056
drh	2ac3ee9	2004-06-07 16:27:46 +0000	[diff] [blame]	1057	/* Otherwise see if some other process holds it.
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	1058	*/
				1059	if( !r ){
				1060	struct flock lock;
				1061	lock.l_whence = SEEK_SET;
drh	2ac3ee9	2004-06-07 16:27:46 +0000	[diff] [blame]	1062	lock.l_start = RESERVED_BYTE;
				1063	lock.l_len = 1;
				1064	lock.l_type = F_WRLCK;
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	1065	fcntl(id->h, F_GETLK, &lock);
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	1066	if( lock.l_type!=F_UNLCK ){
				1067	r = 1;
				1068	}
				1069	}
				1070
				1071	sqlite3OsLeaveMutex();
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	1072	TRACE3("TEST WR-LOCK %d %d\n", id->h, r);
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	1073
				1074	return r;
				1075	}
				1076
danielk1977	2b44485	2004-06-29 07:45:33 +0000	[diff] [blame]	1077	#ifdef SQLITE_DEBUG
				1078	/*
				1079	** Helper function for printing out trace information from debugging
				1080	** binaries. This returns the string represetation of the supplied
				1081	** integer lock-type.
				1082	*/
				1083	static const char * locktypeName(int locktype){
				1084	switch( locktype ){
				1085	case NO_LOCK: return "NONE";
				1086	case SHARED_LOCK: return "SHARED";
				1087	case RESERVED_LOCK: return "RESERVED";
				1088	case PENDING_LOCK: return "PENDING";
				1089	case EXCLUSIVE_LOCK: return "EXCLUSIVE";
				1090	}
				1091	return "ERROR";
				1092	}
				1093	#endif
				1094
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	1095	/*
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1096	** Lock the file with the lock specified by parameter locktype - one
				1097	** of the following:
				1098	**
drh	2ac3ee9	2004-06-07 16:27:46 +0000	[diff] [blame]	1099	** (1) SHARED_LOCK
				1100	** (2) RESERVED_LOCK
				1101	** (3) PENDING_LOCK
				1102	** (4) EXCLUSIVE_LOCK
				1103	**
drh	b3e0434	2004-06-08 00:47:47 +0000	[diff] [blame]	1104	** Sometimes when requesting one lock state, additional lock states
				1105	** are inserted in between. The locking might fail on one of the later
				1106	** transitions leaving the lock state different from what it started but
				1107	** still short of its goal. The following chart shows the allowed
				1108	** transitions and the inserted intermediate states:
				1109	**
				1110	** UNLOCKED -> SHARED
				1111	** SHARED -> RESERVED
				1112	** SHARED -> (PENDING) -> EXCLUSIVE
				1113	** RESERVED -> (PENDING) -> EXCLUSIVE
				1114	** PENDING -> EXCLUSIVE
drh	2ac3ee9	2004-06-07 16:27:46 +0000	[diff] [blame]	1115	**
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	1116	** This routine will only increase a lock. Use the sqlite3OsUnlock()
				1117	** routine to lower a locking level.
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1118	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame]	1119	static int unixLock(OsFile *id, int locktype){
danielk1977	f42f25c	2004-06-25 07:21:28 +0000	[diff] [blame]	1120	/* The following describes the implementation of the various locks and
				1121	** lock transitions in terms of the POSIX advisory shared and exclusive
				1122	** lock primitives (called read-locks and write-locks below, to avoid
				1123	** confusion with SQLite lock names). The algorithms are complicated
				1124	** slightly in order to be compatible with windows systems simultaneously
				1125	** accessing the same database file, in case that is ever required.
				1126	**
				1127	** Symbols defined in os.h indentify the 'pending byte' and the 'reserved
				1128	** byte', each single bytes at well known offsets, and the 'shared byte
				1129	** range', a range of 510 bytes at a well known offset.
				1130	**
				1131	** To obtain a SHARED lock, a read-lock is obtained on the 'pending
				1132	** byte'. If this is successful, a random byte from the 'shared byte
				1133	** range' is read-locked and the lock on the 'pending byte' released.
				1134	**
danielk1977	90ba3bd	2004-06-25 08:32:25 +0000	[diff] [blame]	1135	** A process may only obtain a RESERVED lock after it has a SHARED lock.
				1136	** A RESERVED lock is implemented by grabbing a write-lock on the
				1137	** 'reserved byte'.
danielk1977	f42f25c	2004-06-25 07:21:28 +0000	[diff] [blame]	1138	**
				1139	** A process may only obtain a PENDING lock after it has obtained a
danielk1977	90ba3bd	2004-06-25 08:32:25 +0000	[diff] [blame]	1140	** SHARED lock. A PENDING lock is implemented by obtaining a write-lock
				1141	** on the 'pending byte'. This ensures that no new SHARED locks can be
				1142	** obtained, but existing SHARED locks are allowed to persist. A process
				1143	** does not have to obtain a RESERVED lock on the way to a PENDING lock.
				1144	** This property is used by the algorithm for rolling back a journal file
				1145	** after a crash.
danielk1977	f42f25c	2004-06-25 07:21:28 +0000	[diff] [blame]	1146	**
danielk1977	90ba3bd	2004-06-25 08:32:25 +0000	[diff] [blame]	1147	** An EXCLUSIVE lock, obtained after a PENDING lock is held, is
				1148	** implemented by obtaining a write-lock on the entire 'shared byte
				1149	** range'. Since all other locks require a read-lock on one of the bytes
				1150	** within this range, this ensures that no other locks are held on the
				1151	** database.
danielk1977	f42f25c	2004-06-25 07:21:28 +0000	[diff] [blame]	1152	**
				1153	** The reason a single byte cannot be used instead of the 'shared byte
				1154	** range' is that some versions of windows do not support read-locks. By
				1155	** locking a random byte from a range, concurrent SHARED locks may exist
				1156	** even if the locking primitive used is always a write-lock.
				1157	*/
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1158	int rc = SQLITE_OK;
				1159	struct lockInfo *pLock = id->pLock;
				1160	struct flock lock;
				1161	int s;
				1162
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame^]	1163	assert( id );
drh	e29b915	2005-03-18 14:03:15 +0000	[diff] [blame]	1164	TRACE7("LOCK %d %s was %s(%s,%d) pid=%d\n", id->h, locktypeName(locktype),
danielk1977	2b44485	2004-06-29 07:45:33 +0000	[diff] [blame]	1165	locktypeName(id->locktype), locktypeName(pLock->locktype), pLock->cnt
				1166	,getpid() );
drh	2b4b596	2005-06-15 17:47:55 +0000	[diff] [blame]	1167	if( CHECK_THREADID(id) ) return SQLITE_MISUSE;
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1168
				1169	/* If there is already a lock of this type or more restrictive on the
				1170	** OsFile, do nothing. Don't use the end_lock: exit path, as
				1171	** sqlite3OsEnterMutex() hasn't been called yet.
				1172	*/
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	1173	if( id->locktype>=locktype ){
drh	e29b915	2005-03-18 14:03:15 +0000	[diff] [blame]	1174	TRACE3("LOCK %d %s ok (already held)\n", id->h, locktypeName(locktype));
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1175	return SQLITE_OK;
				1176	}
				1177
drh	b3e0434	2004-06-08 00:47:47 +0000	[diff] [blame]	1178	/* Make sure the locking sequence is correct
drh	2ac3ee9	2004-06-07 16:27:46 +0000	[diff] [blame]	1179	*/
drh	b3e0434	2004-06-08 00:47:47 +0000	[diff] [blame]	1180	assert( id->locktype!=NO_LOCK \|\| locktype==SHARED_LOCK );
				1181	assert( locktype!=PENDING_LOCK );
				1182	assert( locktype!=RESERVED_LOCK \|\| id->locktype==SHARED_LOCK );
drh	2ac3ee9	2004-06-07 16:27:46 +0000	[diff] [blame]	1183
drh	b3e0434	2004-06-08 00:47:47 +0000	[diff] [blame]	1184	/* This mutex is needed because id->pLock is shared across threads
				1185	*/
				1186	sqlite3OsEnterMutex();
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1187
				1188	/* If some thread using this PID has a lock via a different OsFile*
				1189	** handle that precludes the requested lock, return BUSY.
				1190	*/
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	1191	if( (id->locktype!=pLock->locktype &&
drh	2ac3ee9	2004-06-07 16:27:46 +0000	[diff] [blame]	1192	(pLock->locktype>=PENDING_LOCK \|\| locktype>SHARED_LOCK))
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1193	){
				1194	rc = SQLITE_BUSY;
				1195	goto end_lock;
				1196	}
				1197
				1198	/* If a SHARED lock is requested, and some thread using this PID already
				1199	** has a SHARED or RESERVED lock, then increment reference counts and
				1200	** return SQLITE_OK.
				1201	*/
				1202	if( locktype==SHARED_LOCK &&
				1203	(pLock->locktype==SHARED_LOCK \|\| pLock->locktype==RESERVED_LOCK) ){
				1204	assert( locktype==SHARED_LOCK );
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	1205	assert( id->locktype==0 );
danielk1977	ecb2a96	2004-06-02 06:30:16 +0000	[diff] [blame]	1206	assert( pLock->cnt>0 );
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	1207	id->locktype = SHARED_LOCK;
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1208	pLock->cnt++;
				1209	id->pOpen->nLock++;
				1210	goto end_lock;
				1211	}
				1212
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	1213	lock.l_len = 1L;
drh	2b4b596	2005-06-15 17:47:55 +0000	[diff] [blame]	1214
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1215	lock.l_whence = SEEK_SET;
				1216
drh	3cde3bb	2004-06-12 02:17:14 +0000	[diff] [blame]	1217	/* A PENDING lock is needed before acquiring a SHARED lock and before
				1218	** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will
				1219	** be released.
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1220	*/
drh	3cde3bb	2004-06-12 02:17:14 +0000	[diff] [blame]	1221	if( locktype==SHARED_LOCK
				1222	\|\| (locktype==EXCLUSIVE_LOCK && id->locktype<PENDING_LOCK)
				1223	){
danielk1977	489468c	2004-06-28 08:25:47 +0000	[diff] [blame]	1224	lock.l_type = (locktype==SHARED_LOCK?F_RDLCK:F_WRLCK);
drh	2ac3ee9	2004-06-07 16:27:46 +0000	[diff] [blame]	1225	lock.l_start = PENDING_BYTE;
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	1226	s = fcntl(id->h, F_SETLK, &lock);
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1227	if( s ){
				1228	rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
				1229	goto end_lock;
				1230	}
drh	3cde3bb	2004-06-12 02:17:14 +0000	[diff] [blame]	1231	}
				1232
				1233
				1234	/* If control gets to this point, then actually go ahead and make
				1235	** operating system calls for the specified lock.
				1236	*/
				1237	if( locktype==SHARED_LOCK ){
				1238	assert( pLock->cnt==0 );
				1239	assert( pLock->locktype==0 );
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1240
drh	2ac3ee9	2004-06-07 16:27:46 +0000	[diff] [blame]	1241	/* Now get the read-lock */
				1242	lock.l_start = SHARED_FIRST;
				1243	lock.l_len = SHARED_SIZE;
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	1244	s = fcntl(id->h, F_SETLK, &lock);
drh	2ac3ee9	2004-06-07 16:27:46 +0000	[diff] [blame]	1245
				1246	/* Drop the temporary PENDING lock */
				1247	lock.l_start = PENDING_BYTE;
				1248	lock.l_len = 1L;
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1249	lock.l_type = F_UNLCK;
drh	2b4b596	2005-06-15 17:47:55 +0000	[diff] [blame]	1250	if( fcntl(id->h, F_SETLK, &lock)!=0 ){
				1251	rc = SQLITE_IOERR; /* This should never happen */
				1252	goto end_lock;
				1253	}
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1254	if( s ){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1255	rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
				1256	}else{
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	1257	id->locktype = SHARED_LOCK;
danielk1977	ecb2a96	2004-06-02 06:30:16 +0000	[diff] [blame]	1258	id->pOpen->nLock++;
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1259	pLock->cnt = 1;
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1260	}
drh	3cde3bb	2004-06-12 02:17:14 +0000	[diff] [blame]	1261	}else if( locktype==EXCLUSIVE_LOCK && pLock->cnt>1 ){
				1262	/* We are trying for an exclusive lock but another thread in this
				1263	** same process is still holding a shared lock. */
				1264	rc = SQLITE_BUSY;
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1265	}else{
drh	3cde3bb	2004-06-12 02:17:14 +0000	[diff] [blame]	1266	/* The request was for a RESERVED or EXCLUSIVE lock. It is
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1267	** assumed that there is a SHARED or greater lock on the file
				1268	** already.
				1269	*/
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	1270	assert( 0!=id->locktype );
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1271	lock.l_type = F_WRLCK;
				1272	switch( locktype ){
				1273	case RESERVED_LOCK:
drh	2ac3ee9	2004-06-07 16:27:46 +0000	[diff] [blame]	1274	lock.l_start = RESERVED_BYTE;
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1275	break;
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1276	case EXCLUSIVE_LOCK:
drh	2ac3ee9	2004-06-07 16:27:46 +0000	[diff] [blame]	1277	lock.l_start = SHARED_FIRST;
				1278	lock.l_len = SHARED_SIZE;
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1279	break;
				1280	default:
				1281	assert(0);
				1282	}
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	1283	s = fcntl(id->h, F_SETLK, &lock);
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1284	if( s ){
				1285	rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
				1286	}
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1287	}
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1288
danielk1977	ecb2a96	2004-06-02 06:30:16 +0000	[diff] [blame]	1289	if( rc==SQLITE_OK ){
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	1290	id->locktype = locktype;
danielk1977	ecb2a96	2004-06-02 06:30:16 +0000	[diff] [blame]	1291	pLock->locktype = locktype;
drh	3cde3bb	2004-06-12 02:17:14 +0000	[diff] [blame]	1292	}else if( locktype==EXCLUSIVE_LOCK ){
				1293	id->locktype = PENDING_LOCK;
				1294	pLock->locktype = PENDING_LOCK;
danielk1977	ecb2a96	2004-06-02 06:30:16 +0000	[diff] [blame]	1295	}
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1296
				1297	end_lock:
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1298	sqlite3OsLeaveMutex();
drh	e29b915	2005-03-18 14:03:15 +0000	[diff] [blame]	1299	TRACE4("LOCK %d %s %s\n", id->h, locktypeName(locktype),
danielk1977	2b44485	2004-06-29 07:45:33 +0000	[diff] [blame]	1300	rc==SQLITE_OK ? "ok" : "failed");
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1301	return rc;
				1302	}
				1303
				1304	/*
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	1305	** Lower the locking level on file descriptor id to locktype. locktype
				1306	** must be either NO_LOCK or SHARED_LOCK.
				1307	**
				1308	** If the locking level of the file descriptor is already at or below
				1309	** the requested locking level, this routine is a no-op.
				1310	**
drh	9c105bb	2004-10-02 20:38:28 +0000	[diff] [blame]	1311	** It is not possible for this routine to fail if the second argument
				1312	** is NO_LOCK. If the second argument is SHARED_LOCK, this routine
				1313	** might return SQLITE_IOERR instead of SQLITE_OK.
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1314	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame]	1315	static int unixUnlock(OsFile *id, int locktype){
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	1316	struct lockInfo *pLock;
				1317	struct flock lock;
drh	9c105bb	2004-10-02 20:38:28 +0000	[diff] [blame]	1318	int rc = SQLITE_OK;
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	1319
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame^]	1320	assert( id );
drh	e29b915	2005-03-18 14:03:15 +0000	[diff] [blame]	1321	TRACE7("UNLOCK %d %d was %d(%d,%d) pid=%d\n", id->h, locktype, id->locktype,
danielk1977	2b44485	2004-06-29 07:45:33 +0000	[diff] [blame]	1322	id->pLock->locktype, id->pLock->cnt, getpid());
drh	2b4b596	2005-06-15 17:47:55 +0000	[diff] [blame]	1323	if( CHECK_THREADID(id) ) return SQLITE_MISUSE;
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	1324
				1325	assert( locktype<=SHARED_LOCK );
				1326	if( id->locktype<=locktype ){
				1327	return SQLITE_OK;
				1328	}
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1329	sqlite3OsEnterMutex();
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	1330	pLock = id->pLock;
				1331	assert( pLock->cnt!=0 );
				1332	if( id->locktype>SHARED_LOCK ){
				1333	assert( pLock->locktype==id->locktype );
drh	9c105bb	2004-10-02 20:38:28 +0000	[diff] [blame]	1334	if( locktype==SHARED_LOCK ){
				1335	lock.l_type = F_RDLCK;
				1336	lock.l_whence = SEEK_SET;
				1337	lock.l_start = SHARED_FIRST;
				1338	lock.l_len = SHARED_SIZE;
				1339	if( fcntl(id->h, F_SETLK, &lock)!=0 ){
				1340	/* This should never happen */
				1341	rc = SQLITE_IOERR;
				1342	}
				1343	}
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1344	lock.l_type = F_UNLCK;
				1345	lock.l_whence = SEEK_SET;
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	1346	lock.l_start = PENDING_BYTE;
				1347	lock.l_len = 2L; assert( PENDING_BYTE+1==RESERVED_BYTE );
drh	2b4b596	2005-06-15 17:47:55 +0000	[diff] [blame]	1348	if( fcntl(id->h, F_SETLK, &lock)==0 ){
				1349	pLock->locktype = SHARED_LOCK;
				1350	}else{
				1351	rc = SQLITE_IOERR; /* This should never happen */
				1352	}
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1353	}
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	1354	if( locktype==NO_LOCK ){
				1355	struct openCnt *pOpen;
danielk1977	ecb2a96	2004-06-02 06:30:16 +0000	[diff] [blame]	1356
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	1357	/* Decrement the shared lock counter. Release the lock using an
				1358	** OS call only when all threads in this same process have released
				1359	** the lock.
				1360	*/
				1361	pLock->cnt--;
				1362	if( pLock->cnt==0 ){
				1363	lock.l_type = F_UNLCK;
				1364	lock.l_whence = SEEK_SET;
				1365	lock.l_start = lock.l_len = 0L;
drh	2b4b596	2005-06-15 17:47:55 +0000	[diff] [blame]	1366	if( fcntl(id->h, F_SETLK, &lock)==0 ){
				1367	pLock->locktype = NO_LOCK;
				1368	}else{
				1369	rc = SQLITE_IOERR; /* This should never happen */
				1370	}
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	1371	}
				1372
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1373	/* Decrement the count of locks against this same file. When the
				1374	** count reaches zero, close any other file descriptors whose close
				1375	** was deferred because of outstanding locks.
				1376	*/
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	1377	pOpen = id->pOpen;
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1378	pOpen->nLock--;
				1379	assert( pOpen->nLock>=0 );
				1380	if( pOpen->nLock==0 && pOpen->nPending>0 ){
				1381	int i;
				1382	for(i=0; i<pOpen->nPending; i++){
				1383	close(pOpen->aPending[i]);
				1384	}
				1385	sqliteFree(pOpen->aPending);
				1386	pOpen->nPending = 0;
				1387	pOpen->aPending = 0;
				1388	}
				1389	}
				1390	sqlite3OsLeaveMutex();
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	1391	id->locktype = locktype;
drh	9c105bb	2004-10-02 20:38:28 +0000	[diff] [blame]	1392	return rc;
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1393	}
				1394
				1395	/*
danielk1977	e302663	2004-06-22 11:29:02 +0000	[diff] [blame]	1396	** Close a file.
				1397	*/
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame^]	1398	static int unixClose(OsFile **pId){
				1399	OsFile id = pId;
				1400	if( !id ) return SQLITE_OK;
drh	2b4b596	2005-06-15 17:47:55 +0000	[diff] [blame]	1401	if( CHECK_THREADID(id) ) return SQLITE_MISUSE;
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame]	1402	sqlite3Io.xUnlock(id, NO_LOCK);
danielk1977	e302663	2004-06-22 11:29:02 +0000	[diff] [blame]	1403	if( id->dirfd>=0 ) close(id->dirfd);
				1404	id->dirfd = -1;
				1405	sqlite3OsEnterMutex();
				1406	if( id->pOpen->nLock ){
				1407	/* If there are outstanding locks, do not actually close the file just
				1408	** yet because that would clear those locks. Instead, add the file
				1409	** descriptor to pOpen->aPending. It will be automatically closed when
				1410	** the last lock is cleared.
				1411	*/
				1412	int *aNew;
				1413	struct openCnt *pOpen = id->pOpen;
drh	ad81e87	2005-08-21 21:45:01 +0000	[diff] [blame]	1414	aNew = sqliteRealloc( pOpen->aPending, (pOpen->nPending+1)*sizeof(int) );
danielk1977	e302663	2004-06-22 11:29:02 +0000	[diff] [blame]	1415	if( aNew==0 ){
				1416	/* If a malloc fails, just leak the file descriptor */
				1417	}else{
				1418	pOpen->aPending = aNew;
drh	ad81e87	2005-08-21 21:45:01 +0000	[diff] [blame]	1419	pOpen->aPending[pOpen->nPending] = id->h;
				1420	pOpen->nPending++;
danielk1977	e302663	2004-06-22 11:29:02 +0000	[diff] [blame]	1421	}
				1422	}else{
				1423	/* There are no outstanding locks so we can close the file immediately */
				1424	close(id->h);
				1425	}
				1426	releaseLockInfo(id->pLock);
				1427	releaseOpenCnt(id->pOpen);
				1428	sqlite3OsLeaveMutex();
				1429	id->isOpen = 0;
				1430	TRACE2("CLOSE %-3d\n", id->h);
				1431	OpenCounter(-1);
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame^]	1432	sqliteFree(id);
				1433	*pId = 0;
danielk1977	e302663	2004-06-22 11:29:02 +0000	[diff] [blame]	1434	return SQLITE_OK;
				1435	}
				1436
				1437	/*
drh	0ccebe7	2005-06-07 22:22:50 +0000	[diff] [blame]	1438	** Turn a relative pathname into a full pathname. Return a pointer
				1439	** to the full pathname stored in space obtained from sqliteMalloc().
				1440	** The calling function is responsible for freeing this space once it
				1441	** is no longer needed.
				1442	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame]	1443	static char unixFullPathname(const char zRelative){
drh	0ccebe7	2005-06-07 22:22:50 +0000	[diff] [blame]	1444	char *zFull = 0;
				1445	if( zRelative[0]=='/' ){
				1446	sqlite3SetString(&zFull, zRelative, (char*)0);
				1447	}else{
drh	79158e1	2005-09-06 21:40:45 +0000	[diff] [blame]	1448	char *zBuf = sqliteMalloc(5000);
				1449	if( zBuf==0 ){
				1450	return 0;
				1451	}
drh	0ccebe7	2005-06-07 22:22:50 +0000	[diff] [blame]	1452	zBuf[0] = 0;
drh	79158e1	2005-09-06 21:40:45 +0000	[diff] [blame]	1453	sqlite3SetString(&zFull, getcwd(zBuf, 5000), "/", zRelative,
drh	0ccebe7	2005-06-07 22:22:50 +0000	[diff] [blame]	1454	(char*)0);
drh	79158e1	2005-09-06 21:40:45 +0000	[diff] [blame]	1455	sqliteFree(zBuf);
drh	0ccebe7	2005-06-07 22:22:50 +0000	[diff] [blame]	1456	}
				1457	return zFull;
				1458	}
				1459
drh	1883921	2005-11-26 03:43:23 +0000	[diff] [blame]	1460	/*
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame^]	1461	** Change the value of the fullsync flag in the given file descriptor.
drh	1883921	2005-11-26 03:43:23 +0000	[diff] [blame]	1462	*/
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame^]	1463	static void unixSetFullSync(OsFile *id, int v){
				1464	id->fullSync = v;
				1465	}
				1466
				1467	/*
				1468	** Return the underlying file handle for an OsFile
				1469	*/
				1470	static int unixFileHandle(OsFile *id){
				1471	return id->h;
				1472	}
				1473
				1474	/*
				1475	** Return an integer that indices the type of lock currently held
				1476	** by this handle. (Used for testing and analysis only.)
				1477	*/
				1478	static int unixLockState(OsFile *id){
				1479	return id->locktype;
drh	1883921	2005-11-26 03:43:23 +0000	[diff] [blame]	1480	}
drh	0ccebe7	2005-06-07 22:22:50 +0000	[diff] [blame]	1481
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame]	1482	/*
				1483	** This is the structure that defines all of the I/O routines.
				1484	*/
				1485	struct sqlite3IoVtbl sqlite3Io = {
				1486	unixDelete,
				1487	unixFileExists,
				1488	unixOpenReadWrite,
				1489	unixOpenExclusive,
				1490	unixOpenReadOnly,
				1491	unixOpenDirectory,
				1492	unixSyncDirectory,
				1493	unixTempFileName,
				1494	unixIsDirWritable,
				1495	unixClose,
				1496	unixRead,
				1497	unixWrite,
				1498	unixSeek,
				1499	unixSync,
				1500	unixTruncate,
				1501	unixFileSize,
				1502	unixFullPathname,
				1503	unixLock,
				1504	unixUnlock,
				1505	unixCheckReservedLock,
drh	9cbe635	2005-11-29 03:13:21 +0000	[diff] [blame^]	1506	unixSetFullSync,
				1507	unixFileHandle,
				1508	unixLockState,
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame]	1509	};
				1510
				1511
drh	0ccebe7	2005-06-07 22:22:50 +0000	[diff] [blame]	1512	#endif /* SQLITE_OMIT_DISKIO */
				1513	/***************************************************************************
				1514	** Everything above deals with file I/O. Everything that follows deals
				1515	** with other miscellanous aspects of the operating system interface
				1516	****************************************************************************/
				1517
				1518
				1519	/*
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1520	** Get information to seed the random number generator. The seed
				1521	** is written into the buffer zBuf[256]. The calling function must
				1522	** supply a sufficiently large buffer.
				1523	*/
				1524	int sqlite3OsRandomSeed(char *zBuf){
				1525	/* We have to initialize zBuf to prevent valgrind from reporting
				1526	** errors. The reports issued by valgrind are incorrect - we would
				1527	** prefer that the randomness be increased by making use of the
				1528	** uninitialized space in zBuf - but valgrind errors tend to worry
				1529	** some users. Rather than argue, it seems easier just to initialize
				1530	** the whole array and silence valgrind, even if that means less randomness
				1531	** in the random seed.
				1532	**
				1533	** When testing, initializing zBuf[] to zero is all we do. That means
				1534	** that we always use the same random number sequence.* This makes the
				1535	** tests repeatable.
				1536	*/
				1537	memset(zBuf, 0, 256);
				1538	#if !defined(SQLITE_TEST)
				1539	{
drh	842b864	2005-01-21 17:53:17 +0000	[diff] [blame]	1540	int pid, fd;
				1541	fd = open("/dev/urandom", O_RDONLY);
				1542	if( fd<0 ){
				1543	time((time_t*)zBuf);
				1544	pid = getpid();
				1545	memcpy(&zBuf[sizeof(time_t)], &pid, sizeof(pid));
				1546	}else{
				1547	read(fd, zBuf, 256);
				1548	close(fd);
				1549	}
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1550	}
				1551	#endif
				1552	return SQLITE_OK;
				1553	}
				1554
				1555	/*
				1556	** Sleep for a little while. Return the amount of time slept.
				1557	*/
				1558	int sqlite3OsSleep(int ms){
				1559	#if defined(HAVE_USLEEP) && HAVE_USLEEP
				1560	usleep(ms*1000);
				1561	return ms;
				1562	#else
				1563	sleep((ms+999)/1000);
				1564	return 1000*((ms+999)/1000);
				1565	#endif
				1566	}
				1567
				1568	/*
				1569	** Static variables used for thread synchronization
				1570	*/
				1571	static int inMutex = 0;
drh	7906975	2004-05-22 21:30:40 +0000	[diff] [blame]	1572	#ifdef SQLITE_UNIX_THREADS
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1573	static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
drh	7906975	2004-05-22 21:30:40 +0000	[diff] [blame]	1574	#endif
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1575
				1576	/*
				1577	** The following pair of routine implement mutual exclusion for
				1578	** multi-threaded processes. Only a single thread is allowed to
				1579	** executed code that is surrounded by EnterMutex() and LeaveMutex().
				1580	**
				1581	** SQLite uses only a single Mutex. There is not much critical
				1582	** code and what little there is executes quickly and without blocking.
				1583	*/
				1584	void sqlite3OsEnterMutex(){
				1585	#ifdef SQLITE_UNIX_THREADS
				1586	pthread_mutex_lock(&mutex);
				1587	#endif
				1588	assert( !inMutex );
				1589	inMutex = 1;
				1590	}
				1591	void sqlite3OsLeaveMutex(){
				1592	assert( inMutex );
				1593	inMutex = 0;
				1594	#ifdef SQLITE_UNIX_THREADS
				1595	pthread_mutex_unlock(&mutex);
				1596	#endif
				1597	}
				1598
				1599	/*
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1600	** The following variable, if set to a non-zero value, becomes the result
				1601	** returned from sqlite3OsCurrentTime(). This is used for testing.
				1602	*/
				1603	#ifdef SQLITE_TEST
				1604	int sqlite3_current_time = 0;
				1605	#endif
				1606
				1607	/*
				1608	** Find the current time (in Universal Coordinated Time). Write the
				1609	** current time and date as a Julian Day number into *prNow and
				1610	** return 0. Return 1 if the time and date cannot be found.
				1611	*/
				1612	int sqlite3OsCurrentTime(double *prNow){
drh	19e2d37	2005-08-29 23:00:03 +0000	[diff] [blame]	1613	#ifdef NO_GETTOD
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1614	time_t t;
				1615	time(&t);
				1616	*prNow = t/86400.0 + 2440587.5;
drh	19e2d37	2005-08-29 23:00:03 +0000	[diff] [blame]	1617	#else
				1618	struct timeval sNow;
				1619	struct timezone sTz; /* Not used */
				1620	gettimeofday(&sNow, &sTz);
				1621	*prNow = 2440587.5 + sNow.tv_sec/86400.0 + sNow.tv_usec/86400000000.0;
				1622	#endif
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1623	#ifdef SQLITE_TEST
				1624	if( sqlite3_current_time ){
				1625	*prNow = sqlite3_current_time/86400.0 + 2440587.5;
				1626	}
				1627	#endif
				1628	return 0;
				1629	}
				1630
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1631	#endif /* OS_UNIX */