Blame - src/os_unix.c - chromium.googlesource.com/chromium/deps/sqlite

blob: 33185ed82c0508c18f81bf400f5faa91f8b09b30 [file] [log] [blame]

drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1	/*
				2	** 2004 May 22
				3	**
				4	** The author disclaims copyright to this source code. In place of
				5	** a legal notice, here is a blessing:
				6	**
				7	** May you do good and not evil.
				8	** May you find forgiveness for yourself and forgive others.
				9	** May you share freely, never taking more than you give.
				10	**
				11	******************************************************************************
				12	**
				13	** This file contains code that is specific to Unix systems.
				14	*/
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	15	#include "sqliteInt.h"
drh	eb20625	2004-10-01 02:00:31 +0000	[diff] [blame]	16	#include "os.h"
				17	#if OS_UNIX /* This file is used on unix only */
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	18
				19
				20	#include <time.h>
drh	19e2d37	2005-08-29 23:00:03 +0000	[diff] [blame]	21	#include <sys/time.h>
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	22	#include <errno.h>
				23	#include <unistd.h>
drh	0ccebe7	2005-06-07 22:22:50 +0000	[diff] [blame]	24
				25	/*
				26	** Do not include any of the File I/O interface procedures if the
				27	** SQLITE_OMIT_DISKIO macro is defined (indicating that there database
				28	** will be in-memory only)
				29	*/
				30	#ifndef SQLITE_OMIT_DISKIO
				31
				32
				33	/*
				34	** Define various macros that are missing from some systems.
				35	*/
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	36	#ifndef O_LARGEFILE
				37	# define O_LARGEFILE 0
				38	#endif
				39	#ifdef SQLITE_DISABLE_LFS
				40	# undef O_LARGEFILE
				41	# define O_LARGEFILE 0
				42	#endif
				43	#ifndef O_NOFOLLOW
				44	# define O_NOFOLLOW 0
				45	#endif
				46	#ifndef O_BINARY
				47	# define O_BINARY 0
				48	#endif
				49
				50	/*
				51	** The DJGPP compiler environment looks mostly like Unix, but it
				52	** lacks the fcntl() system call. So redefine fcntl() to be something
				53	** that always succeeds. This means that locking does not occur under
danielk1977	26c5d79	2005-11-25 09:01:23 +0000	[diff] [blame]	54	** DJGPP. But it's DOS - what did you expect?
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	55	*/
				56	#ifdef __DJGPP__
				57	# define fcntl(A,B,C) 0
				58	#endif
				59
				60	/*
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	61	** Include code that is common to all os_*.c files
				62	*/
				63	#include "os_common.h"
				64
drh	2b4b596	2005-06-15 17:47:55 +0000	[diff] [blame]	65	/*
				66	** The threadid macro resolves to the thread-id or to 0. Used for
				67	** testing and debugging only.
				68	*/
				69	#ifdef SQLITE_UNIX_THREADS
				70	#define threadid pthread_self()
				71	#else
				72	#define threadid 0
				73	#endif
				74
				75	/*
				76	** Set or check the OsFile.tid field. This field is set when an OsFile
				77	** is first opened. All subsequent uses of the OsFile verify that the
				78	** same thread is operating on the OsFile. Some operating systems do
				79	** not allow locks to be overridden by other threads and that restriction
				80	** means that sqlite3* database handles cannot be moved from one thread
				81	** to another. This logic makes sure a user does not try to do that
				82	** by mistake.
				83	*/
drh	91636d5	2005-11-24 23:14:00 +0000	[diff] [blame]	84	#if defined(SQLITE_UNIX_THREADS) && !defined(SQLITE_ALLOW_XTHREAD_CONNECTIONS)
drh	2b4b596	2005-06-15 17:47:55 +0000	[diff] [blame]	85	# define SET_THREADID(X) X->tid = pthread_self()
				86	# define CHECK_THREADID(X) (!pthread_equal(X->tid, pthread_self()))
				87	#else
				88	# define SET_THREADID(X)
				89	# define CHECK_THREADID(X) 0
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	90	#endif
				91
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	92	/*
				93	** Here is the dirt on POSIX advisory locks: ANSI STD 1003.1 (1996)
				94	** section 6.5.2.2 lines 483 through 490 specify that when a process
				95	** sets or clears a lock, that operation overrides any prior locks set
				96	** by the same process. It does not explicitly say so, but this implies
				97	** that it overrides locks set by the same process using a different
				98	** file descriptor. Consider this test case:
				99	**
				100	** int fd1 = open("./file1", O_RDWR\|O_CREAT, 0644);
				101	** int fd2 = open("./file2", O_RDWR\|O_CREAT, 0644);
				102	**
				103	** Suppose ./file1 and ./file2 are really the same file (because
				104	** one is a hard or symbolic link to the other) then if you set
				105	** an exclusive lock on fd1, then try to get an exclusive lock
				106	** on fd2, it works. I would have expected the second lock to
				107	** fail since there was already a lock on the file due to fd1.
				108	** But not so. Since both locks came from the same process, the
				109	** second overrides the first, even though they were on different
				110	** file descriptors opened on different file names.
				111	**
				112	** Bummer. If you ask me, this is broken. Badly broken. It means
				113	** that we cannot use POSIX locks to synchronize file access among
				114	** competing threads of the same process. POSIX locks will work fine
				115	** to synchronize access for threads in separate processes, but not
				116	** threads within the same process.
				117	**
				118	** To work around the problem, SQLite has to manage file locks internally
				119	** on its own. Whenever a new database is opened, we have to find the
				120	** specific inode of the database file (the inode is determined by the
				121	** st_dev and st_ino fields of the stat structure that fstat() fills in)
				122	** and check for locks already existing on that inode. When locks are
				123	** created or removed, we have to look at our own internal record of the
				124	** locks to see if another thread has previously set a lock on that same
				125	** inode.
				126	**
				127	** The OsFile structure for POSIX is no longer just an integer file
				128	** descriptor. It is now a structure that holds the integer file
				129	** descriptor and a pointer to a structure that describes the internal
				130	** locks on the corresponding inode. There is one locking structure
				131	** per inode, so if the same inode is opened twice, both OsFile structures
				132	** point to the same locking structure. The locking structure keeps
				133	** a reference count (so we will know when to delete it) and a "cnt"
				134	** field that tells us its internal lock status. cnt==0 means the
				135	** file is unlocked. cnt==-1 means the file has an exclusive lock.
				136	** cnt>0 means there are cnt shared locks on the file.
				137	**
				138	** Any attempt to lock or unlock a file first checks the locking
				139	** structure. The fcntl() system call is only invoked to set a
				140	** POSIX lock if the internal lock structure transitions between
				141	** a locked and an unlocked state.
				142	**
				143	** 2004-Jan-11:
				144	** More recent discoveries about POSIX advisory locks. (The more
				145	** I discover, the more I realize the a POSIX advisory locks are
				146	** an abomination.)
				147	**
				148	** If you close a file descriptor that points to a file that has locks,
				149	** all locks on that file that are owned by the current process are
				150	** released. To work around this problem, each OsFile structure contains
				151	** a pointer to an openCnt structure. There is one openCnt structure
				152	** per open inode, which means that multiple OsFiles can point to a single
				153	** openCnt. When an attempt is made to close an OsFile, if there are
				154	** other OsFiles open on the same inode that are holding locks, the call
				155	** to close() the file descriptor is deferred until all of the locks clear.
				156	** The openCnt structure keeps a list of file descriptors that need to
				157	** be closed and that list is walked (and cleared) when the last lock
				158	** clears.
				159	**
				160	** First, under Linux threads, because each thread has a separate
				161	** process ID, lock operations in one thread do not override locks
				162	** to the same file in other threads. Linux threads behave like
				163	** separate processes in this respect. But, if you close a file
				164	** descriptor in linux threads, all locks are cleared, even locks
				165	** on other threads and even though the other threads have different
				166	** process IDs. Linux threads is inconsistent in this respect.
				167	** (I'm beginning to think that linux threads is an abomination too.)
				168	** The consequence of this all is that the hash table for the lockInfo
				169	** structure has to include the process id as part of its key because
				170	** locks in different threads are treated as distinct. But the
				171	** openCnt structure should not include the process id in its
				172	** key because close() clears lock on all threads, not just the current
				173	** thread. Were it not for this goofiness in linux threads, we could
				174	** combine the lockInfo and openCnt structures into a single structure.
drh	5fdae77	2004-06-29 03:29:00 +0000	[diff] [blame]	175	**
				176	** 2004-Jun-28:
				177	** On some versions of linux, threads can override each others locks.
				178	** On others not. Sometimes you can change the behavior on the same
				179	** system by setting the LD_ASSUME_KERNEL environment variable. The
				180	** POSIX standard is silent as to which behavior is correct, as far
				181	** as I can tell, so other versions of unix might show the same
				182	** inconsistency. There is no little doubt in my mind that posix
				183	** advisory locks and linux threads are profoundly broken.
				184	**
				185	** To work around the inconsistencies, we have to test at runtime
				186	** whether or not threads can override each others locks. This test
				187	** is run once, the first time any lock is attempted. A static
				188	** variable is set to record the results of this test for future
				189	** use.
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	190	*/
				191
				192	/*
				193	** An instance of the following structure serves as the key used
drh	5fdae77	2004-06-29 03:29:00 +0000	[diff] [blame]	194	** to locate a particular lockInfo structure given its inode.
				195	**
				196	** If threads cannot override each others locks, then we set the
				197	** lockKey.tid field to the thread ID. If threads can override
				198	** each others locks then tid is always set to zero. tid is also
				199	** set to zero if we compile without threading support.
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	200	*/
				201	struct lockKey {
drh	5fdae77	2004-06-29 03:29:00 +0000	[diff] [blame]	202	dev_t dev; /* Device number */
				203	ino_t ino; /* Inode number */
				204	#ifdef SQLITE_UNIX_THREADS
drh	d9cb6ac	2005-10-20 07:28:17 +0000	[diff] [blame]	205	pthread_t tid; /* Thread ID or zero if threads can override each other */
drh	5fdae77	2004-06-29 03:29:00 +0000	[diff] [blame]	206	#endif
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	207	};
				208
				209	/*
				210	** An instance of the following structure is allocated for each open
				211	** inode on each thread with a different process ID. (Threads have
				212	** different process IDs on linux, but not on most other unixes.)
				213	**
				214	** A single inode can have multiple file descriptors, so each OsFile
				215	** structure contains a pointer to an instance of this object and this
				216	** object keeps a count of the number of OsFiles pointing to it.
				217	*/
				218	struct lockInfo {
				219	struct lockKey key; /* The lookup key */
drh	2ac3ee9	2004-06-07 16:27:46 +0000	[diff] [blame]	220	int cnt; /* Number of SHARED locks held */
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	221	int locktype; /* One of SHARED_LOCK, RESERVED_LOCK etc. */
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	222	int nRef; /* Number of pointers to this structure */
				223	};
				224
				225	/*
				226	** An instance of the following structure serves as the key used
				227	** to locate a particular openCnt structure given its inode. This
drh	5fdae77	2004-06-29 03:29:00 +0000	[diff] [blame]	228	** is the same as the lockKey except that the thread ID is omitted.
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	229	*/
				230	struct openKey {
				231	dev_t dev; /* Device number */
				232	ino_t ino; /* Inode number */
				233	};
				234
				235	/*
				236	** An instance of the following structure is allocated for each open
				237	** inode. This structure keeps track of the number of locks on that
				238	** inode. If a close is attempted against an inode that is holding
				239	** locks, the close is deferred until all locks clear by adding the
				240	** file descriptor to be closed to the pending list.
				241	*/
				242	struct openCnt {
				243	struct openKey key; /* The lookup key */
				244	int nRef; /* Number of pointers to this structure */
				245	int nLock; /* Number of outstanding locks */
				246	int nPending; /* Number of pending close() operations */
				247	int aPending; / Malloced space holding fd's awaiting a close() */
				248	};
				249
				250	/*
				251	** These hash table maps inodes and process IDs into lockInfo and openCnt
				252	** structures. Access to these hash tables must be protected by a mutex.
				253	*/
				254	static Hash lockHash = { SQLITE_HASH_BINARY, 0, 0, 0, 0, 0 };
				255	static Hash openHash = { SQLITE_HASH_BINARY, 0, 0, 0, 0, 0 };
				256
drh	5fdae77	2004-06-29 03:29:00 +0000	[diff] [blame]	257
				258	#ifdef SQLITE_UNIX_THREADS
				259	/*
				260	** This variable records whether or not threads can override each others
				261	** locks.
				262	**
				263	** 0: No. Threads cannot override each others locks.
				264	** 1: Yes. Threads can override each others locks.
				265	** -1: We don't know yet.
				266	*/
				267	static int threadsOverrideEachOthersLocks = -1;
				268
				269	/*
				270	** This structure holds information passed into individual test
				271	** threads by the testThreadLockingBehavior() routine.
				272	*/
				273	struct threadTestData {
				274	int fd; /* File to be locked */
				275	struct flock lock; /* The locking operation */
				276	int result; /* Result of the locking operation */
				277	};
				278
drh	2b4b596	2005-06-15 17:47:55 +0000	[diff] [blame]	279	#ifdef SQLITE_LOCK_TRACE
				280	/*
				281	** Print out information about all locking operations.
				282	**
				283	** This routine is used for troubleshooting locks on multithreaded
				284	** platforms. Enable by compiling with the -DSQLITE_LOCK_TRACE
				285	** command-line option on the compiler. This code is normally
				286	** turnned off.
				287	*/
				288	static int lockTrace(int fd, int op, struct flock *p){
				289	char zOpName, zType;
				290	int s;
				291	int savedErrno;
				292	if( op==F_GETLK ){
				293	zOpName = "GETLK";
				294	}else if( op==F_SETLK ){
				295	zOpName = "SETLK";
				296	}else{
				297	s = fcntl(fd, op, p);
				298	sqlite3DebugPrintf("fcntl unknown %d %d %d\n", fd, op, s);
				299	return s;
				300	}
				301	if( p->l_type==F_RDLCK ){
				302	zType = "RDLCK";
				303	}else if( p->l_type==F_WRLCK ){
				304	zType = "WRLCK";
				305	}else if( p->l_type==F_UNLCK ){
				306	zType = "UNLCK";
				307	}else{
				308	assert( 0 );
				309	}
				310	assert( p->l_whence==SEEK_SET );
				311	s = fcntl(fd, op, p);
				312	savedErrno = errno;
				313	sqlite3DebugPrintf("fcntl %d %d %s %s %d %d %d %d\n",
				314	threadid, fd, zOpName, zType, (int)p->l_start, (int)p->l_len,
				315	(int)p->l_pid, s);
				316	if( s && op==F_SETLK && (p->l_type==F_RDLCK \|\| p->l_type==F_WRLCK) ){
				317	struct flock l2;
				318	l2 = *p;
				319	fcntl(fd, F_GETLK, &l2);
				320	if( l2.l_type==F_RDLCK ){
				321	zType = "RDLCK";
				322	}else if( l2.l_type==F_WRLCK ){
				323	zType = "WRLCK";
				324	}else if( l2.l_type==F_UNLCK ){
				325	zType = "UNLCK";
				326	}else{
				327	assert( 0 );
				328	}
				329	sqlite3DebugPrintf("fcntl-failure-reason: %s %d %d %d\n",
				330	zType, (int)l2.l_start, (int)l2.l_len, (int)l2.l_pid);
				331	}
				332	errno = savedErrno;
				333	return s;
				334	}
				335	#define fcntl lockTrace
				336	#endif /* SQLITE_LOCK_TRACE */
				337
drh	5fdae77	2004-06-29 03:29:00 +0000	[diff] [blame]	338	/*
				339	** The testThreadLockingBehavior() routine launches two separate
				340	** threads on this routine. This routine attempts to lock a file
				341	** descriptor then returns. The success or failure of that attempt
				342	** allows the testThreadLockingBehavior() procedure to determine
				343	** whether or not threads can override each others locks.
				344	*/
				345	static void threadLockingTest(void pArg){
				346	struct threadTestData pData = (struct threadTestData)pArg;
				347	pData->result = fcntl(pData->fd, F_SETLK, &pData->lock);
				348	return pArg;
				349	}
				350
				351	/*
				352	** This procedure attempts to determine whether or not threads
				353	** can override each others locks then sets the
				354	** threadsOverrideEachOthersLocks variable appropriately.
				355	*/
				356	static void testThreadLockingBehavior(fd_orig){
				357	int fd;
				358	struct threadTestData d[2];
				359	pthread_t t[2];
				360
				361	fd = dup(fd_orig);
				362	if( fd<0 ) return;
				363	memset(d, 0, sizeof(d));
				364	d[0].fd = fd;
				365	d[0].lock.l_type = F_RDLCK;
				366	d[0].lock.l_len = 1;
				367	d[0].lock.l_start = 0;
				368	d[0].lock.l_whence = SEEK_SET;
				369	d[1] = d[0];
				370	d[1].lock.l_type = F_WRLCK;
				371	pthread_create(&t[0], 0, threadLockingTest, &d[0]);
				372	pthread_create(&t[1], 0, threadLockingTest, &d[1]);
				373	pthread_join(t[0], 0);
				374	pthread_join(t[1], 0);
				375	close(fd);
				376	threadsOverrideEachOthersLocks = d[0].result==0 && d[1].result==0;
				377	}
				378	#endif /* SQLITE_UNIX_THREADS */
				379
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	380	/*
				381	** Release a lockInfo structure previously allocated by findLockInfo().
				382	*/
				383	static void releaseLockInfo(struct lockInfo *pLock){
				384	pLock->nRef--;
				385	if( pLock->nRef==0 ){
				386	sqlite3HashInsert(&lockHash, &pLock->key, sizeof(pLock->key), 0);
				387	sqliteFree(pLock);
				388	}
				389	}
				390
				391	/*
				392	** Release a openCnt structure previously allocated by findLockInfo().
				393	*/
				394	static void releaseOpenCnt(struct openCnt *pOpen){
				395	pOpen->nRef--;
				396	if( pOpen->nRef==0 ){
				397	sqlite3HashInsert(&openHash, &pOpen->key, sizeof(pOpen->key), 0);
				398	sqliteFree(pOpen->aPending);
				399	sqliteFree(pOpen);
				400	}
				401	}
				402
				403	/*
				404	** Given a file descriptor, locate lockInfo and openCnt structures that
				405	** describes that file descriptor. Create a new ones if necessary. The
				406	** return values might be unset if an error occurs.
				407	**
				408	** Return the number of errors.
				409	*/
drh	38f8271	2004-06-18 17:10:16 +0000	[diff] [blame]	410	static int findLockInfo(
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	411	int fd, /* The file descriptor used in the key */
				412	struct lockInfo *ppLock, / Return the lockInfo structure here */
drh	5fdae77	2004-06-29 03:29:00 +0000	[diff] [blame]	413	struct openCnt *ppOpen / Return the openCnt structure here */
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	414	){
				415	int rc;
				416	struct lockKey key1;
				417	struct openKey key2;
				418	struct stat statbuf;
				419	struct lockInfo *pLock;
				420	struct openCnt *pOpen;
				421	rc = fstat(fd, &statbuf);
				422	if( rc!=0 ) return 1;
				423	memset(&key1, 0, sizeof(key1));
				424	key1.dev = statbuf.st_dev;
				425	key1.ino = statbuf.st_ino;
drh	5fdae77	2004-06-29 03:29:00 +0000	[diff] [blame]	426	#ifdef SQLITE_UNIX_THREADS
				427	if( threadsOverrideEachOthersLocks<0 ){
				428	testThreadLockingBehavior(fd);
				429	}
				430	key1.tid = threadsOverrideEachOthersLocks ? 0 : pthread_self();
				431	#endif
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	432	memset(&key2, 0, sizeof(key2));
				433	key2.dev = statbuf.st_dev;
				434	key2.ino = statbuf.st_ino;
				435	pLock = (struct lockInfo*)sqlite3HashFind(&lockHash, &key1, sizeof(key1));
				436	if( pLock==0 ){
				437	struct lockInfo *pOld;
				438	pLock = sqliteMallocRaw( sizeof(*pLock) );
				439	if( pLock==0 ) return 1;
				440	pLock->key = key1;
				441	pLock->nRef = 1;
				442	pLock->cnt = 0;
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	443	pLock->locktype = 0;
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	444	pOld = sqlite3HashInsert(&lockHash, &pLock->key, sizeof(key1), pLock);
				445	if( pOld!=0 ){
				446	assert( pOld==pLock );
				447	sqliteFree(pLock);
				448	return 1;
				449	}
				450	}else{
				451	pLock->nRef++;
				452	}
				453	*ppLock = pLock;
				454	pOpen = (struct openCnt*)sqlite3HashFind(&openHash, &key2, sizeof(key2));
				455	if( pOpen==0 ){
				456	struct openCnt *pOld;
				457	pOpen = sqliteMallocRaw( sizeof(*pOpen) );
				458	if( pOpen==0 ){
				459	releaseLockInfo(pLock);
				460	return 1;
				461	}
				462	pOpen->key = key2;
				463	pOpen->nRef = 1;
				464	pOpen->nLock = 0;
				465	pOpen->nPending = 0;
				466	pOpen->aPending = 0;
				467	pOld = sqlite3HashInsert(&openHash, &pOpen->key, sizeof(key2), pOpen);
				468	if( pOld!=0 ){
				469	assert( pOld==pOpen );
				470	sqliteFree(pOpen);
				471	releaseLockInfo(pLock);
				472	return 1;
				473	}
				474	}else{
				475	pOpen->nRef++;
				476	}
				477	*ppOpen = pOpen;
				478	return 0;
				479	}
				480
				481	/*
				482	** Delete the named file
				483	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame^]	484	static int unixDelete(const char *zFilename){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	485	unlink(zFilename);
				486	return SQLITE_OK;
				487	}
				488
				489	/*
				490	** Return TRUE if the named file exists.
				491	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame^]	492	static int unixFileExists(const char *zFilename){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	493	return access(zFilename, 0)==0;
				494	}
				495
				496	/*
				497	** Attempt to open a file for both reading and writing. If that
				498	** fails, try opening it read-only. If the file does not exist,
				499	** try to create it.
				500	**
				501	** On success, a handle for the open file is written to *id
				502	** and *pReadonly is set to 0 if the file was opened for reading and
				503	** writing or 1 if the file was opened read-only. The function returns
				504	** SQLITE_OK.
				505	**
				506	** On failure, the function returns SQLITE_CANTOPEN and leaves
				507	** id and pReadonly unchanged.
				508	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame^]	509	static int unixOpenReadWrite(
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	510	const char *zFilename,
				511	OsFile *id,
				512	int *pReadonly
				513	){
				514	int rc;
drh	da71ce1	2004-06-21 18:14:45 +0000	[diff] [blame]	515	assert( !id->isOpen );
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	516	id->dirfd = -1;
drh	2b4b596	2005-06-15 17:47:55 +0000	[diff] [blame]	517	SET_THREADID(id);
drh	8e85577	2005-05-17 11:25:31 +0000	[diff] [blame]	518	id->h = open(zFilename, O_RDWR\|O_CREAT\|O_LARGEFILE\|O_BINARY,
				519	SQLITE_DEFAULT_FILE_PERMISSIONS);
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	520	if( id->h<0 ){
drh	6458e39	2004-07-20 01:14:13 +0000	[diff] [blame]	521	#ifdef EISDIR
				522	if( errno==EISDIR ){
				523	return SQLITE_CANTOPEN;
				524	}
				525	#endif
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	526	id->h = open(zFilename, O_RDONLY\|O_LARGEFILE\|O_BINARY);
				527	if( id->h<0 ){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	528	return SQLITE_CANTOPEN;
				529	}
				530	*pReadonly = 1;
				531	}else{
				532	*pReadonly = 0;
				533	}
				534	sqlite3OsEnterMutex();
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	535	rc = findLockInfo(id->h, &id->pLock, &id->pOpen);
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	536	sqlite3OsLeaveMutex();
				537	if( rc ){
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	538	close(id->h);
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	539	return SQLITE_NOMEM;
				540	}
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	541	id->locktype = 0;
drh	da71ce1	2004-06-21 18:14:45 +0000	[diff] [blame]	542	id->isOpen = 1;
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	543	TRACE3("OPEN %-3d %s\n", id->h, zFilename);
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	544	OpenCounter(+1);
				545	return SQLITE_OK;
				546	}
				547
				548
				549	/*
				550	** Attempt to open a new file for exclusive access by this process.
				551	** The file will be opened for both reading and writing. To avoid
				552	** a potential security problem, we do not allow the file to have
				553	** previously existed. Nor do we allow the file to be a symbolic
				554	** link.
				555	**
				556	** If delFlag is true, then make arrangements to automatically delete
				557	** the file when it is closed.
				558	**
				559	** On success, write the file handle into *id and return SQLITE_OK.
				560	**
				561	** On failure, return SQLITE_CANTOPEN.
				562	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame^]	563	static int unixOpenExclusive(const char zFilename, OsFile id, int delFlag){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	564	int rc;
drh	da71ce1	2004-06-21 18:14:45 +0000	[diff] [blame]	565	assert( !id->isOpen );
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	566	if( access(zFilename, 0)==0 ){
				567	return SQLITE_CANTOPEN;
				568	}
drh	2b4b596	2005-06-15 17:47:55 +0000	[diff] [blame]	569	SET_THREADID(id);
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	570	id->dirfd = -1;
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	571	id->h = open(zFilename,
drh	d645967	2005-08-13 17:17:01 +0000	[diff] [blame]	572	O_RDWR\|O_CREAT\|O_EXCL\|O_NOFOLLOW\|O_LARGEFILE\|O_BINARY,
				573	SQLITE_DEFAULT_FILE_PERMISSIONS);
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	574	if( id->h<0 ){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	575	return SQLITE_CANTOPEN;
				576	}
				577	sqlite3OsEnterMutex();
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	578	rc = findLockInfo(id->h, &id->pLock, &id->pOpen);
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	579	sqlite3OsLeaveMutex();
				580	if( rc ){
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	581	close(id->h);
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	582	unlink(zFilename);
				583	return SQLITE_NOMEM;
				584	}
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	585	id->locktype = 0;
drh	da71ce1	2004-06-21 18:14:45 +0000	[diff] [blame]	586	id->isOpen = 1;
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	587	if( delFlag ){
				588	unlink(zFilename);
				589	}
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	590	TRACE3("OPEN-EX %-3d %s\n", id->h, zFilename);
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	591	OpenCounter(+1);
				592	return SQLITE_OK;
				593	}
				594
				595	/*
				596	** Attempt to open a new file for read-only access.
				597	**
				598	** On success, write the file handle into *id and return SQLITE_OK.
				599	**
				600	** On failure, return SQLITE_CANTOPEN.
				601	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame^]	602	static int unixOpenReadOnly(const char zFilename, OsFile id){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	603	int rc;
drh	da71ce1	2004-06-21 18:14:45 +0000	[diff] [blame]	604	assert( !id->isOpen );
drh	2b4b596	2005-06-15 17:47:55 +0000	[diff] [blame]	605	SET_THREADID(id);
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	606	id->dirfd = -1;
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	607	id->h = open(zFilename, O_RDONLY\|O_LARGEFILE\|O_BINARY);
				608	if( id->h<0 ){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	609	return SQLITE_CANTOPEN;
				610	}
				611	sqlite3OsEnterMutex();
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	612	rc = findLockInfo(id->h, &id->pLock, &id->pOpen);
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	613	sqlite3OsLeaveMutex();
				614	if( rc ){
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	615	close(id->h);
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	616	return SQLITE_NOMEM;
				617	}
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	618	id->locktype = 0;
drh	da71ce1	2004-06-21 18:14:45 +0000	[diff] [blame]	619	id->isOpen = 1;
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	620	TRACE3("OPEN-RO %-3d %s\n", id->h, zFilename);
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	621	OpenCounter(+1);
				622	return SQLITE_OK;
				623	}
				624
				625	/*
				626	** Attempt to open a file descriptor for the directory that contains a
				627	** file. This file descriptor can be used to fsync() the directory
				628	** in order to make sure the creation of a new file is actually written
				629	** to disk.
				630	**
				631	** This routine is only meaningful for Unix. It is a no-op under
				632	** windows since windows does not support hard links.
				633	**
				634	** On success, a handle for a previously open file is at *id is
				635	** updated with the new directory file descriptor and SQLITE_OK is
				636	** returned.
				637	**
				638	** On failure, the function returns SQLITE_CANTOPEN and leaves
				639	** *id unchanged.
				640	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame^]	641	static int unixOpenDirectory(
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	642	const char *zDirname,
				643	OsFile *id
				644	){
drh	da71ce1	2004-06-21 18:14:45 +0000	[diff] [blame]	645	if( !id->isOpen ){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	646	/* Do not open the directory if the corresponding file is not already
				647	** open. */
				648	return SQLITE_CANTOPEN;
				649	}
drh	2b4b596	2005-06-15 17:47:55 +0000	[diff] [blame]	650	SET_THREADID(id);
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	651	assert( id->dirfd<0 );
drh	8e85577	2005-05-17 11:25:31 +0000	[diff] [blame]	652	id->dirfd = open(zDirname, O_RDONLY\|O_BINARY, 0);
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	653	if( id->dirfd<0 ){
				654	return SQLITE_CANTOPEN;
				655	}
				656	TRACE3("OPENDIR %-3d %s\n", id->dirfd, zDirname);
				657	return SQLITE_OK;
				658	}
				659
				660	/*
drh	ab3f9fe	2004-08-14 17:10:10 +0000	[diff] [blame]	661	** If the following global variable points to a string which is the
				662	** name of a directory, then that directory will be used to store
				663	** temporary files.
				664	*/
tpoindex	9a09a3c	2004-12-20 19:01:32 +0000	[diff] [blame]	665	char *sqlite3_temp_directory = 0;
drh	ab3f9fe	2004-08-14 17:10:10 +0000	[diff] [blame]	666
				667	/*
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	668	** Create a temporary file name in zBuf. zBuf must be big enough to
				669	** hold at least SQLITE_TEMPNAME_SIZE characters.
				670	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame^]	671	static int unixTempFileName(char *zBuf){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	672	static const char *azDirs[] = {
drh	ab3f9fe	2004-08-14 17:10:10 +0000	[diff] [blame]	673	0,
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	674	"/var/tmp",
				675	"/usr/tmp",
				676	"/tmp",
				677	".",
				678	};
drh	5719628	2004-10-06 15:41:16 +0000	[diff] [blame]	679	static const unsigned char zChars[] =
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	680	"abcdefghijklmnopqrstuvwxyz"
				681	"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
				682	"0123456789";
				683	int i, j;
				684	struct stat buf;
				685	const char *zDir = ".";
drh	effd02b	2004-08-29 23:42:13 +0000	[diff] [blame]	686	azDirs[0] = sqlite3_temp_directory;
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	687	for(i=0; i<sizeof(azDirs)/sizeof(azDirs[0]); i++){
drh	ab3f9fe	2004-08-14 17:10:10 +0000	[diff] [blame]	688	if( azDirs[i]==0 ) continue;
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	689	if( stat(azDirs[i], &buf) ) continue;
				690	if( !S_ISDIR(buf.st_mode) ) continue;
				691	if( access(azDirs[i], 07) ) continue;
				692	zDir = azDirs[i];
				693	break;
				694	}
				695	do{
				696	sprintf(zBuf, "%s/"TEMP_FILE_PREFIX, zDir);
				697	j = strlen(zBuf);
				698	sqlite3Randomness(15, &zBuf[j]);
				699	for(i=0; i<15; i++, j++){
				700	zBuf[j] = (char)zChars[ ((unsigned char)zBuf[j])%(sizeof(zChars)-1) ];
				701	}
				702	zBuf[j] = 0;
				703	}while( access(zBuf,0)==0 );
				704	return SQLITE_OK;
				705	}
				706
				707	/*
tpoindex	9a09a3c	2004-12-20 19:01:32 +0000	[diff] [blame]	708	** Check that a given pathname is a directory and is writable
				709	**
				710	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame^]	711	static int unixIsDirWritable(char *zBuf){
				712	#ifndef SQLITE_OMIT_PAGER_PRAGMAS
tpoindex	9a09a3c	2004-12-20 19:01:32 +0000	[diff] [blame]	713	struct stat buf;
				714	if( zBuf==0 ) return 0;
drh	268283b	2005-01-08 15:44:25 +0000	[diff] [blame]	715	if( zBuf[0]==0 ) return 0;
tpoindex	9a09a3c	2004-12-20 19:01:32 +0000	[diff] [blame]	716	if( stat(zBuf, &buf) ) return 0;
				717	if( !S_ISDIR(buf.st_mode) ) return 0;
				718	if( access(zBuf, 07) ) return 0;
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame^]	719	#endif /* SQLITE_OMIT_PAGER_PRAGMAS */
tpoindex	9a09a3c	2004-12-20 19:01:32 +0000	[diff] [blame]	720	return 1;
				721	}
				722
				723	/*
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	724	** Read data from a file into a buffer. Return SQLITE_OK if all
				725	** bytes were read successfully and SQLITE_IOERR if anything goes
				726	** wrong.
				727	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame^]	728	static int unixRead(OsFile id, void pBuf, int amt){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	729	int got;
drh	da71ce1	2004-06-21 18:14:45 +0000	[diff] [blame]	730	assert( id->isOpen );
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	731	SimulateIOError(SQLITE_IOERR);
				732	TIMER_START;
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	733	got = read(id->h, pBuf, amt);
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	734	TIMER_END;
drh	e29b915	2005-03-18 14:03:15 +0000	[diff] [blame]	735	TRACE5("READ %-3d %5d %7d %d\n", id->h, got, last_page, TIMER_ELAPSED);
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	736	SEEK(0);
				737	/* if( got<0 ) got = 0; */
				738	if( got==amt ){
				739	return SQLITE_OK;
				740	}else{
				741	return SQLITE_IOERR;
				742	}
				743	}
				744
				745	/*
				746	** Write data from a buffer into a file. Return SQLITE_OK on success
				747	** or some other error code on failure.
				748	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame^]	749	static int unixWrite(OsFile id, const void pBuf, int amt){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	750	int wrote = 0;
drh	da71ce1	2004-06-21 18:14:45 +0000	[diff] [blame]	751	assert( id->isOpen );
drh	4c7f941	2005-02-03 00:29:47 +0000	[diff] [blame]	752	assert( amt>0 );
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	753	SimulateIOError(SQLITE_IOERR);
drh	047d483	2004-10-01 14:38:02 +0000	[diff] [blame]	754	SimulateDiskfullError;
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	755	TIMER_START;
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	756	while( amt>0 && (wrote = write(id->h, pBuf, amt))>0 ){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	757	amt -= wrote;
				758	pBuf = &((char*)pBuf)[wrote];
				759	}
				760	TIMER_END;
drh	e29b915	2005-03-18 14:03:15 +0000	[diff] [blame]	761	TRACE5("WRITE %-3d %5d %7d %d\n", id->h, wrote, last_page, TIMER_ELAPSED);
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	762	SEEK(0);
				763	if( amt>0 ){
				764	return SQLITE_FULL;
				765	}
				766	return SQLITE_OK;
				767	}
				768
				769	/*
				770	** Move the read/write pointer in a file.
				771	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame^]	772	static int unixSeek(OsFile *id, i64 offset){
drh	da71ce1	2004-06-21 18:14:45 +0000	[diff] [blame]	773	assert( id->isOpen );
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	774	SEEK(offset/1024 + 1);
drh	b4746b9	2005-09-09 01:32:06 +0000	[diff] [blame]	775	#ifdef SQLITE_TEST
				776	if( offset ) SimulateDiskfullError
				777	#endif
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	778	lseek(id->h, offset, SEEK_SET);
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	779	return SQLITE_OK;
				780	}
				781
drh	b851b2c	2005-03-10 14:11:12 +0000	[diff] [blame]	782	#ifdef SQLITE_TEST
				783	/*
				784	** Count the number of fullsyncs and normal syncs. This is used to test
				785	** that syncs and fullsyncs are occuring at the right times.
				786	*/
				787	int sqlite3_sync_count = 0;
				788	int sqlite3_fullsync_count = 0;
				789	#endif
				790
drh	f2f2391	2005-10-05 10:29:36 +0000	[diff] [blame]	791	/*
				792	** Use the fdatasync() API only if the HAVE_FDATASYNC macro is defined.
				793	** Otherwise use fsync() in its place.
				794	*/
				795	#ifndef HAVE_FDATASYNC
				796	# define fdatasync fsync
				797	#endif
				798
drh	b851b2c	2005-03-10 14:11:12 +0000	[diff] [blame]	799
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	800	/*
drh	dd809b0	2004-07-17 21:44:57 +0000	[diff] [blame]	801	** The fsync() system call does not work as advertised on many
				802	** unix systems. The following procedure is an attempt to make
				803	** it work better.
drh	1398ad3	2005-01-19 23:24:50 +0000	[diff] [blame]	804	**
				805	** The SQLITE_NO_SYNC macro disables all fsync()s. This is useful
				806	** for testing when we want to run through the test suite quickly.
				807	** You are strongly advised not to deploy with SQLITE_NO_SYNC
				808	** enabled, however, since with SQLITE_NO_SYNC enabled, an OS crash
				809	** or power failure will likely corrupt the database file.
drh	dd809b0	2004-07-17 21:44:57 +0000	[diff] [blame]	810	*/
drh	eb796a7	2005-09-08 12:38:41 +0000	[diff] [blame]	811	static int full_fsync(int fd, int fullSync, int dataOnly){
drh	dd809b0	2004-07-17 21:44:57 +0000	[diff] [blame]	812	int rc;
drh	b851b2c	2005-03-10 14:11:12 +0000	[diff] [blame]	813
				814	/* Record the number of times that we do a normal fsync() and
				815	** FULLSYNC. This is used during testing to verify that this procedure
				816	** gets called with the correct arguments.
				817	*/
				818	#ifdef SQLITE_TEST
				819	if( fullSync ) sqlite3_fullsync_count++;
				820	sqlite3_sync_count++;
				821	#endif
				822
				823	/* If we compiled with the SQLITE_NO_SYNC flag, then syncing is a
				824	** no-op
				825	*/
				826	#ifdef SQLITE_NO_SYNC
				827	rc = SQLITE_OK;
				828	#else
				829
drh	dd809b0	2004-07-17 21:44:57 +0000	[diff] [blame]	830	#ifdef F_FULLFSYNC
drh	b851b2c	2005-03-10 14:11:12 +0000	[diff] [blame]	831	if( fullSync ){
drh	f30cc94	2005-03-11 17:52:34 +0000	[diff] [blame]	832	rc = fcntl(fd, F_FULLFSYNC, 0);
drh	b851b2c	2005-03-10 14:11:12 +0000	[diff] [blame]	833	}else{
				834	rc = 1;
				835	}
				836	/* If the FULLSYNC failed, try to do a normal fsync() */
drh	dd809b0	2004-07-17 21:44:57 +0000	[diff] [blame]	837	if( rc ) rc = fsync(fd);
drh	b851b2c	2005-03-10 14:11:12 +0000	[diff] [blame]	838
drh	c035e6e	2005-09-22 15:45:04 +0000	[diff] [blame]	839	#else /* if !defined(F_FULLSYNC) */
drh	eb796a7	2005-09-08 12:38:41 +0000	[diff] [blame]	840	if( dataOnly ){
				841	rc = fdatasync(fd);
drh	f2f2391	2005-10-05 10:29:36 +0000	[diff] [blame]	842	}else{
drh	eb796a7	2005-09-08 12:38:41 +0000	[diff] [blame]	843	rc = fsync(fd);
				844	}
drh	f30cc94	2005-03-11 17:52:34 +0000	[diff] [blame]	845	#endif /* defined(F_FULLFSYNC) */
drh	b851b2c	2005-03-10 14:11:12 +0000	[diff] [blame]	846	#endif /* defined(SQLITE_NO_SYNC) */
				847
drh	dd809b0	2004-07-17 21:44:57 +0000	[diff] [blame]	848	return rc;
				849	}
				850
				851	/*
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	852	** Make sure all writes to a particular file are committed to disk.
				853	**
drh	eb796a7	2005-09-08 12:38:41 +0000	[diff] [blame]	854	** If dataOnly==0 then both the file itself and its metadata (file
				855	** size, access time, etc) are synced. If dataOnly!=0 then only the
				856	** file data is synced.
				857	**
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	858	** Under Unix, also make sure that the directory entry for the file
				859	** has been created by fsync-ing the directory that contains the file.
				860	** If we do not do this and we encounter a power failure, the directory
				861	** entry for the journal might not exist after we reboot. The next
				862	** SQLite to access the file will not know that the journal exists (because
				863	** the directory entry for the journal was never created) and the transaction
				864	** will not roll back - possibly leading to database corruption.
				865	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame^]	866	static int unixSync(OsFile *id, int dataOnly){
drh	da71ce1	2004-06-21 18:14:45 +0000	[diff] [blame]	867	assert( id->isOpen );
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	868	SimulateIOError(SQLITE_IOERR);
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	869	TRACE2("SYNC %-3d\n", id->h);
drh	eb796a7	2005-09-08 12:38:41 +0000	[diff] [blame]	870	if( full_fsync(id->h, id->fullSync, dataOnly) ){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	871	return SQLITE_IOERR;
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	872	}
drh	a285422	2004-06-17 19:04:17 +0000	[diff] [blame]	873	if( id->dirfd>=0 ){
				874	TRACE2("DIRSYNC %-3d\n", id->dirfd);
danielk1977	d7c03f7	2005-11-25 10:38:22 +0000	[diff] [blame]	875	#ifndef SQLITE_DISABLE_DIRSYNC
danielk1977	0964b23	2005-11-25 08:47:57 +0000	[diff] [blame]	876	if( full_fsync(id->dirfd, id->fullSync, 0) ){
				877	return SQLITE_IOERR;
				878	}
danielk1977	d7c03f7	2005-11-25 10:38:22 +0000	[diff] [blame]	879	#endif
drh	a285422	2004-06-17 19:04:17 +0000	[diff] [blame]	880	close(id->dirfd); /* Only need to sync once, so close the directory */
				881	id->dirfd = -1; /* when we are done. */
				882	}
drh	a285422	2004-06-17 19:04:17 +0000	[diff] [blame]	883	return SQLITE_OK;
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	884	}
				885
				886	/*
danielk1977	962398d	2004-06-14 09:35:16 +0000	[diff] [blame]	887	** Sync the directory zDirname. This is a no-op on operating systems other
				888	** than UNIX.
drh	b851b2c	2005-03-10 14:11:12 +0000	[diff] [blame]	889	**
				890	** This is used to make sure the master journal file has truely been deleted
				891	** before making changes to individual journals on a multi-database commit.
drh	f30cc94	2005-03-11 17:52:34 +0000	[diff] [blame]	892	** The F_FULLFSYNC option is not needed here.
danielk1977	962398d	2004-06-14 09:35:16 +0000	[diff] [blame]	893	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame^]	894	static int unixSyncDirectory(const char *zDirname){
danielk1977	d7c03f7	2005-11-25 10:38:22 +0000	[diff] [blame]	895	#ifdef SQLITE_DISABLE_DIRSYNC
				896	return SQLITE_OK;
				897	#else
danielk1977	962398d	2004-06-14 09:35:16 +0000	[diff] [blame]	898	int fd;
				899	int r;
danielk1977	369f27e	2004-06-15 11:40:04 +0000	[diff] [blame]	900	SimulateIOError(SQLITE_IOERR);
drh	8e85577	2005-05-17 11:25:31 +0000	[diff] [blame]	901	fd = open(zDirname, O_RDONLY\|O_BINARY, 0);
danielk1977	369f27e	2004-06-15 11:40:04 +0000	[diff] [blame]	902	TRACE3("DIRSYNC %-3d (%s)\n", fd, zDirname);
danielk1977	962398d	2004-06-14 09:35:16 +0000	[diff] [blame]	903	if( fd<0 ){
				904	return SQLITE_CANTOPEN;
				905	}
				906	r = fsync(fd);
				907	close(fd);
				908	return ((r==0)?SQLITE_OK:SQLITE_IOERR);
danielk1977	d7c03f7	2005-11-25 10:38:22 +0000	[diff] [blame]	909	#endif
danielk1977	962398d	2004-06-14 09:35:16 +0000	[diff] [blame]	910	}
				911
				912	/*
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	913	** Truncate an open file to a specified size
				914	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame^]	915	static int unixTruncate(OsFile *id, i64 nByte){
drh	da71ce1	2004-06-21 18:14:45 +0000	[diff] [blame]	916	assert( id->isOpen );
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	917	SimulateIOError(SQLITE_IOERR);
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	918	return ftruncate(id->h, nByte)==0 ? SQLITE_OK : SQLITE_IOERR;
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	919	}
				920
				921	/*
				922	** Determine the current size of a file in bytes
				923	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame^]	924	static int unixFileSize(OsFile id, i64 pSize){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	925	struct stat buf;
drh	da71ce1	2004-06-21 18:14:45 +0000	[diff] [blame]	926	assert( id->isOpen );
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	927	SimulateIOError(SQLITE_IOERR);
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	928	if( fstat(id->h, &buf)!=0 ){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	929	return SQLITE_IOERR;
				930	}
				931	*pSize = buf.st_size;
				932	return SQLITE_OK;
				933	}
				934
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	935	/*
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	936	** This routine checks if there is a RESERVED lock held on the specified
				937	** file by this or any other process. If such a lock is held, return
drh	2ac3ee9	2004-06-07 16:27:46 +0000	[diff] [blame]	938	** non-zero. If the file is unlocked or holds only SHARED locks, then
				939	** return zero.
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	940	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame^]	941	static int unixCheckReservedLock(OsFile *id){
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	942	int r = 0;
				943
drh	da71ce1	2004-06-21 18:14:45 +0000	[diff] [blame]	944	assert( id->isOpen );
drh	2b4b596	2005-06-15 17:47:55 +0000	[diff] [blame]	945	if( CHECK_THREADID(id) ) return SQLITE_MISUSE;
drh	2ac3ee9	2004-06-07 16:27:46 +0000	[diff] [blame]	946	sqlite3OsEnterMutex(); /* Needed because id->pLock is shared across threads */
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	947
				948	/* Check if a thread in this process holds such a lock */
				949	if( id->pLock->locktype>SHARED_LOCK ){
				950	r = 1;
				951	}
				952
drh	2ac3ee9	2004-06-07 16:27:46 +0000	[diff] [blame]	953	/* Otherwise see if some other process holds it.
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	954	*/
				955	if( !r ){
				956	struct flock lock;
				957	lock.l_whence = SEEK_SET;
drh	2ac3ee9	2004-06-07 16:27:46 +0000	[diff] [blame]	958	lock.l_start = RESERVED_BYTE;
				959	lock.l_len = 1;
				960	lock.l_type = F_WRLCK;
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	961	fcntl(id->h, F_GETLK, &lock);
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	962	if( lock.l_type!=F_UNLCK ){
				963	r = 1;
				964	}
				965	}
				966
				967	sqlite3OsLeaveMutex();
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	968	TRACE3("TEST WR-LOCK %d %d\n", id->h, r);
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	969
				970	return r;
				971	}
				972
danielk1977	2b44485	2004-06-29 07:45:33 +0000	[diff] [blame]	973	#ifdef SQLITE_DEBUG
				974	/*
				975	** Helper function for printing out trace information from debugging
				976	** binaries. This returns the string represetation of the supplied
				977	** integer lock-type.
				978	*/
				979	static const char * locktypeName(int locktype){
				980	switch( locktype ){
				981	case NO_LOCK: return "NONE";
				982	case SHARED_LOCK: return "SHARED";
				983	case RESERVED_LOCK: return "RESERVED";
				984	case PENDING_LOCK: return "PENDING";
				985	case EXCLUSIVE_LOCK: return "EXCLUSIVE";
				986	}
				987	return "ERROR";
				988	}
				989	#endif
				990
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	991	/*
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	992	** Lock the file with the lock specified by parameter locktype - one
				993	** of the following:
				994	**
drh	2ac3ee9	2004-06-07 16:27:46 +0000	[diff] [blame]	995	** (1) SHARED_LOCK
				996	** (2) RESERVED_LOCK
				997	** (3) PENDING_LOCK
				998	** (4) EXCLUSIVE_LOCK
				999	**
drh	b3e0434	2004-06-08 00:47:47 +0000	[diff] [blame]	1000	** Sometimes when requesting one lock state, additional lock states
				1001	** are inserted in between. The locking might fail on one of the later
				1002	** transitions leaving the lock state different from what it started but
				1003	** still short of its goal. The following chart shows the allowed
				1004	** transitions and the inserted intermediate states:
				1005	**
				1006	** UNLOCKED -> SHARED
				1007	** SHARED -> RESERVED
				1008	** SHARED -> (PENDING) -> EXCLUSIVE
				1009	** RESERVED -> (PENDING) -> EXCLUSIVE
				1010	** PENDING -> EXCLUSIVE
drh	2ac3ee9	2004-06-07 16:27:46 +0000	[diff] [blame]	1011	**
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	1012	** This routine will only increase a lock. Use the sqlite3OsUnlock()
				1013	** routine to lower a locking level.
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1014	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame^]	1015	static int unixLock(OsFile *id, int locktype){
danielk1977	f42f25c	2004-06-25 07:21:28 +0000	[diff] [blame]	1016	/* The following describes the implementation of the various locks and
				1017	** lock transitions in terms of the POSIX advisory shared and exclusive
				1018	** lock primitives (called read-locks and write-locks below, to avoid
				1019	** confusion with SQLite lock names). The algorithms are complicated
				1020	** slightly in order to be compatible with windows systems simultaneously
				1021	** accessing the same database file, in case that is ever required.
				1022	**
				1023	** Symbols defined in os.h indentify the 'pending byte' and the 'reserved
				1024	** byte', each single bytes at well known offsets, and the 'shared byte
				1025	** range', a range of 510 bytes at a well known offset.
				1026	**
				1027	** To obtain a SHARED lock, a read-lock is obtained on the 'pending
				1028	** byte'. If this is successful, a random byte from the 'shared byte
				1029	** range' is read-locked and the lock on the 'pending byte' released.
				1030	**
danielk1977	90ba3bd	2004-06-25 08:32:25 +0000	[diff] [blame]	1031	** A process may only obtain a RESERVED lock after it has a SHARED lock.
				1032	** A RESERVED lock is implemented by grabbing a write-lock on the
				1033	** 'reserved byte'.
danielk1977	f42f25c	2004-06-25 07:21:28 +0000	[diff] [blame]	1034	**
				1035	** A process may only obtain a PENDING lock after it has obtained a
danielk1977	90ba3bd	2004-06-25 08:32:25 +0000	[diff] [blame]	1036	** SHARED lock. A PENDING lock is implemented by obtaining a write-lock
				1037	** on the 'pending byte'. This ensures that no new SHARED locks can be
				1038	** obtained, but existing SHARED locks are allowed to persist. A process
				1039	** does not have to obtain a RESERVED lock on the way to a PENDING lock.
				1040	** This property is used by the algorithm for rolling back a journal file
				1041	** after a crash.
danielk1977	f42f25c	2004-06-25 07:21:28 +0000	[diff] [blame]	1042	**
danielk1977	90ba3bd	2004-06-25 08:32:25 +0000	[diff] [blame]	1043	** An EXCLUSIVE lock, obtained after a PENDING lock is held, is
				1044	** implemented by obtaining a write-lock on the entire 'shared byte
				1045	** range'. Since all other locks require a read-lock on one of the bytes
				1046	** within this range, this ensures that no other locks are held on the
				1047	** database.
danielk1977	f42f25c	2004-06-25 07:21:28 +0000	[diff] [blame]	1048	**
				1049	** The reason a single byte cannot be used instead of the 'shared byte
				1050	** range' is that some versions of windows do not support read-locks. By
				1051	** locking a random byte from a range, concurrent SHARED locks may exist
				1052	** even if the locking primitive used is always a write-lock.
				1053	*/
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1054	int rc = SQLITE_OK;
				1055	struct lockInfo *pLock = id->pLock;
				1056	struct flock lock;
				1057	int s;
				1058
drh	da71ce1	2004-06-21 18:14:45 +0000	[diff] [blame]	1059	assert( id->isOpen );
drh	e29b915	2005-03-18 14:03:15 +0000	[diff] [blame]	1060	TRACE7("LOCK %d %s was %s(%s,%d) pid=%d\n", id->h, locktypeName(locktype),
danielk1977	2b44485	2004-06-29 07:45:33 +0000	[diff] [blame]	1061	locktypeName(id->locktype), locktypeName(pLock->locktype), pLock->cnt
				1062	,getpid() );
drh	2b4b596	2005-06-15 17:47:55 +0000	[diff] [blame]	1063	if( CHECK_THREADID(id) ) return SQLITE_MISUSE;
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1064
				1065	/* If there is already a lock of this type or more restrictive on the
				1066	** OsFile, do nothing. Don't use the end_lock: exit path, as
				1067	** sqlite3OsEnterMutex() hasn't been called yet.
				1068	*/
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	1069	if( id->locktype>=locktype ){
drh	e29b915	2005-03-18 14:03:15 +0000	[diff] [blame]	1070	TRACE3("LOCK %d %s ok (already held)\n", id->h, locktypeName(locktype));
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1071	return SQLITE_OK;
				1072	}
				1073
drh	b3e0434	2004-06-08 00:47:47 +0000	[diff] [blame]	1074	/* Make sure the locking sequence is correct
drh	2ac3ee9	2004-06-07 16:27:46 +0000	[diff] [blame]	1075	*/
drh	b3e0434	2004-06-08 00:47:47 +0000	[diff] [blame]	1076	assert( id->locktype!=NO_LOCK \|\| locktype==SHARED_LOCK );
				1077	assert( locktype!=PENDING_LOCK );
				1078	assert( locktype!=RESERVED_LOCK \|\| id->locktype==SHARED_LOCK );
drh	2ac3ee9	2004-06-07 16:27:46 +0000	[diff] [blame]	1079
drh	b3e0434	2004-06-08 00:47:47 +0000	[diff] [blame]	1080	/* This mutex is needed because id->pLock is shared across threads
				1081	*/
				1082	sqlite3OsEnterMutex();
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1083
				1084	/* If some thread using this PID has a lock via a different OsFile*
				1085	** handle that precludes the requested lock, return BUSY.
				1086	*/
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	1087	if( (id->locktype!=pLock->locktype &&
drh	2ac3ee9	2004-06-07 16:27:46 +0000	[diff] [blame]	1088	(pLock->locktype>=PENDING_LOCK \|\| locktype>SHARED_LOCK))
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1089	){
				1090	rc = SQLITE_BUSY;
				1091	goto end_lock;
				1092	}
				1093
				1094	/* If a SHARED lock is requested, and some thread using this PID already
				1095	** has a SHARED or RESERVED lock, then increment reference counts and
				1096	** return SQLITE_OK.
				1097	*/
				1098	if( locktype==SHARED_LOCK &&
				1099	(pLock->locktype==SHARED_LOCK \|\| pLock->locktype==RESERVED_LOCK) ){
				1100	assert( locktype==SHARED_LOCK );
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	1101	assert( id->locktype==0 );
danielk1977	ecb2a96	2004-06-02 06:30:16 +0000	[diff] [blame]	1102	assert( pLock->cnt>0 );
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	1103	id->locktype = SHARED_LOCK;
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1104	pLock->cnt++;
				1105	id->pOpen->nLock++;
				1106	goto end_lock;
				1107	}
				1108
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	1109	lock.l_len = 1L;
drh	2b4b596	2005-06-15 17:47:55 +0000	[diff] [blame]	1110
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1111	lock.l_whence = SEEK_SET;
				1112
drh	3cde3bb	2004-06-12 02:17:14 +0000	[diff] [blame]	1113	/* A PENDING lock is needed before acquiring a SHARED lock and before
				1114	** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will
				1115	** be released.
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1116	*/
drh	3cde3bb	2004-06-12 02:17:14 +0000	[diff] [blame]	1117	if( locktype==SHARED_LOCK
				1118	\|\| (locktype==EXCLUSIVE_LOCK && id->locktype<PENDING_LOCK)
				1119	){
danielk1977	489468c	2004-06-28 08:25:47 +0000	[diff] [blame]	1120	lock.l_type = (locktype==SHARED_LOCK?F_RDLCK:F_WRLCK);
drh	2ac3ee9	2004-06-07 16:27:46 +0000	[diff] [blame]	1121	lock.l_start = PENDING_BYTE;
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	1122	s = fcntl(id->h, F_SETLK, &lock);
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1123	if( s ){
				1124	rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
				1125	goto end_lock;
				1126	}
drh	3cde3bb	2004-06-12 02:17:14 +0000	[diff] [blame]	1127	}
				1128
				1129
				1130	/* If control gets to this point, then actually go ahead and make
				1131	** operating system calls for the specified lock.
				1132	*/
				1133	if( locktype==SHARED_LOCK ){
				1134	assert( pLock->cnt==0 );
				1135	assert( pLock->locktype==0 );
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1136
drh	2ac3ee9	2004-06-07 16:27:46 +0000	[diff] [blame]	1137	/* Now get the read-lock */
				1138	lock.l_start = SHARED_FIRST;
				1139	lock.l_len = SHARED_SIZE;
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	1140	s = fcntl(id->h, F_SETLK, &lock);
drh	2ac3ee9	2004-06-07 16:27:46 +0000	[diff] [blame]	1141
				1142	/* Drop the temporary PENDING lock */
				1143	lock.l_start = PENDING_BYTE;
				1144	lock.l_len = 1L;
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1145	lock.l_type = F_UNLCK;
drh	2b4b596	2005-06-15 17:47:55 +0000	[diff] [blame]	1146	if( fcntl(id->h, F_SETLK, &lock)!=0 ){
				1147	rc = SQLITE_IOERR; /* This should never happen */
				1148	goto end_lock;
				1149	}
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1150	if( s ){
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1151	rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
				1152	}else{
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	1153	id->locktype = SHARED_LOCK;
danielk1977	ecb2a96	2004-06-02 06:30:16 +0000	[diff] [blame]	1154	id->pOpen->nLock++;
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1155	pLock->cnt = 1;
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1156	}
drh	3cde3bb	2004-06-12 02:17:14 +0000	[diff] [blame]	1157	}else if( locktype==EXCLUSIVE_LOCK && pLock->cnt>1 ){
				1158	/* We are trying for an exclusive lock but another thread in this
				1159	** same process is still holding a shared lock. */
				1160	rc = SQLITE_BUSY;
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1161	}else{
drh	3cde3bb	2004-06-12 02:17:14 +0000	[diff] [blame]	1162	/* The request was for a RESERVED or EXCLUSIVE lock. It is
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1163	** assumed that there is a SHARED or greater lock on the file
				1164	** already.
				1165	*/
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	1166	assert( 0!=id->locktype );
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1167	lock.l_type = F_WRLCK;
				1168	switch( locktype ){
				1169	case RESERVED_LOCK:
drh	2ac3ee9	2004-06-07 16:27:46 +0000	[diff] [blame]	1170	lock.l_start = RESERVED_BYTE;
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1171	break;
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1172	case EXCLUSIVE_LOCK:
drh	2ac3ee9	2004-06-07 16:27:46 +0000	[diff] [blame]	1173	lock.l_start = SHARED_FIRST;
				1174	lock.l_len = SHARED_SIZE;
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1175	break;
				1176	default:
				1177	assert(0);
				1178	}
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	1179	s = fcntl(id->h, F_SETLK, &lock);
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1180	if( s ){
				1181	rc = (errno==EINVAL) ? SQLITE_NOLFS : SQLITE_BUSY;
				1182	}
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1183	}
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1184
danielk1977	ecb2a96	2004-06-02 06:30:16 +0000	[diff] [blame]	1185	if( rc==SQLITE_OK ){
danielk1977	13adf8a	2004-06-03 16:08:41 +0000	[diff] [blame]	1186	id->locktype = locktype;
danielk1977	ecb2a96	2004-06-02 06:30:16 +0000	[diff] [blame]	1187	pLock->locktype = locktype;
drh	3cde3bb	2004-06-12 02:17:14 +0000	[diff] [blame]	1188	}else if( locktype==EXCLUSIVE_LOCK ){
				1189	id->locktype = PENDING_LOCK;
				1190	pLock->locktype = PENDING_LOCK;
danielk1977	ecb2a96	2004-06-02 06:30:16 +0000	[diff] [blame]	1191	}
danielk1977	9a1d0ab	2004-06-01 14:09:28 +0000	[diff] [blame]	1192
				1193	end_lock:
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1194	sqlite3OsLeaveMutex();
drh	e29b915	2005-03-18 14:03:15 +0000	[diff] [blame]	1195	TRACE4("LOCK %d %s %s\n", id->h, locktypeName(locktype),
danielk1977	2b44485	2004-06-29 07:45:33 +0000	[diff] [blame]	1196	rc==SQLITE_OK ? "ok" : "failed");
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1197	return rc;
				1198	}
				1199
				1200	/*
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	1201	** Lower the locking level on file descriptor id to locktype. locktype
				1202	** must be either NO_LOCK or SHARED_LOCK.
				1203	**
				1204	** If the locking level of the file descriptor is already at or below
				1205	** the requested locking level, this routine is a no-op.
				1206	**
drh	9c105bb	2004-10-02 20:38:28 +0000	[diff] [blame]	1207	** It is not possible for this routine to fail if the second argument
				1208	** is NO_LOCK. If the second argument is SHARED_LOCK, this routine
				1209	** might return SQLITE_IOERR instead of SQLITE_OK.
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1210	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame^]	1211	static int unixUnlock(OsFile *id, int locktype){
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	1212	struct lockInfo *pLock;
				1213	struct flock lock;
drh	9c105bb	2004-10-02 20:38:28 +0000	[diff] [blame]	1214	int rc = SQLITE_OK;
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	1215
drh	da71ce1	2004-06-21 18:14:45 +0000	[diff] [blame]	1216	assert( id->isOpen );
drh	e29b915	2005-03-18 14:03:15 +0000	[diff] [blame]	1217	TRACE7("UNLOCK %d %d was %d(%d,%d) pid=%d\n", id->h, locktype, id->locktype,
danielk1977	2b44485	2004-06-29 07:45:33 +0000	[diff] [blame]	1218	id->pLock->locktype, id->pLock->cnt, getpid());
drh	2b4b596	2005-06-15 17:47:55 +0000	[diff] [blame]	1219	if( CHECK_THREADID(id) ) return SQLITE_MISUSE;
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	1220
				1221	assert( locktype<=SHARED_LOCK );
				1222	if( id->locktype<=locktype ){
				1223	return SQLITE_OK;
				1224	}
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1225	sqlite3OsEnterMutex();
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	1226	pLock = id->pLock;
				1227	assert( pLock->cnt!=0 );
				1228	if( id->locktype>SHARED_LOCK ){
				1229	assert( pLock->locktype==id->locktype );
drh	9c105bb	2004-10-02 20:38:28 +0000	[diff] [blame]	1230	if( locktype==SHARED_LOCK ){
				1231	lock.l_type = F_RDLCK;
				1232	lock.l_whence = SEEK_SET;
				1233	lock.l_start = SHARED_FIRST;
				1234	lock.l_len = SHARED_SIZE;
				1235	if( fcntl(id->h, F_SETLK, &lock)!=0 ){
				1236	/* This should never happen */
				1237	rc = SQLITE_IOERR;
				1238	}
				1239	}
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1240	lock.l_type = F_UNLCK;
				1241	lock.l_whence = SEEK_SET;
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	1242	lock.l_start = PENDING_BYTE;
				1243	lock.l_len = 2L; assert( PENDING_BYTE+1==RESERVED_BYTE );
drh	2b4b596	2005-06-15 17:47:55 +0000	[diff] [blame]	1244	if( fcntl(id->h, F_SETLK, &lock)==0 ){
				1245	pLock->locktype = SHARED_LOCK;
				1246	}else{
				1247	rc = SQLITE_IOERR; /* This should never happen */
				1248	}
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1249	}
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	1250	if( locktype==NO_LOCK ){
				1251	struct openCnt *pOpen;
danielk1977	ecb2a96	2004-06-02 06:30:16 +0000	[diff] [blame]	1252
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	1253	/* Decrement the shared lock counter. Release the lock using an
				1254	** OS call only when all threads in this same process have released
				1255	** the lock.
				1256	*/
				1257	pLock->cnt--;
				1258	if( pLock->cnt==0 ){
				1259	lock.l_type = F_UNLCK;
				1260	lock.l_whence = SEEK_SET;
				1261	lock.l_start = lock.l_len = 0L;
drh	2b4b596	2005-06-15 17:47:55 +0000	[diff] [blame]	1262	if( fcntl(id->h, F_SETLK, &lock)==0 ){
				1263	pLock->locktype = NO_LOCK;
				1264	}else{
				1265	rc = SQLITE_IOERR; /* This should never happen */
				1266	}
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	1267	}
				1268
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1269	/* Decrement the count of locks against this same file. When the
				1270	** count reaches zero, close any other file descriptors whose close
				1271	** was deferred because of outstanding locks.
				1272	*/
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	1273	pOpen = id->pOpen;
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1274	pOpen->nLock--;
				1275	assert( pOpen->nLock>=0 );
				1276	if( pOpen->nLock==0 && pOpen->nPending>0 ){
				1277	int i;
				1278	for(i=0; i<pOpen->nPending; i++){
				1279	close(pOpen->aPending[i]);
				1280	}
				1281	sqliteFree(pOpen->aPending);
				1282	pOpen->nPending = 0;
				1283	pOpen->aPending = 0;
				1284	}
				1285	}
				1286	sqlite3OsLeaveMutex();
drh	a6abd04	2004-06-09 17:37:22 +0000	[diff] [blame]	1287	id->locktype = locktype;
drh	9c105bb	2004-10-02 20:38:28 +0000	[diff] [blame]	1288	return rc;
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1289	}
				1290
				1291	/*
danielk1977	e302663	2004-06-22 11:29:02 +0000	[diff] [blame]	1292	** Close a file.
				1293	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame^]	1294	static int unixClose(OsFile *id){
danielk1977	e302663	2004-06-22 11:29:02 +0000	[diff] [blame]	1295	if( !id->isOpen ) return SQLITE_OK;
drh	2b4b596	2005-06-15 17:47:55 +0000	[diff] [blame]	1296	if( CHECK_THREADID(id) ) return SQLITE_MISUSE;
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame^]	1297	sqlite3Io.xUnlock(id, NO_LOCK);
danielk1977	e302663	2004-06-22 11:29:02 +0000	[diff] [blame]	1298	if( id->dirfd>=0 ) close(id->dirfd);
				1299	id->dirfd = -1;
				1300	sqlite3OsEnterMutex();
				1301	if( id->pOpen->nLock ){
				1302	/* If there are outstanding locks, do not actually close the file just
				1303	** yet because that would clear those locks. Instead, add the file
				1304	** descriptor to pOpen->aPending. It will be automatically closed when
				1305	** the last lock is cleared.
				1306	*/
				1307	int *aNew;
				1308	struct openCnt *pOpen = id->pOpen;
drh	ad81e87	2005-08-21 21:45:01 +0000	[diff] [blame]	1309	aNew = sqliteRealloc( pOpen->aPending, (pOpen->nPending+1)*sizeof(int) );
danielk1977	e302663	2004-06-22 11:29:02 +0000	[diff] [blame]	1310	if( aNew==0 ){
				1311	/* If a malloc fails, just leak the file descriptor */
				1312	}else{
				1313	pOpen->aPending = aNew;
drh	ad81e87	2005-08-21 21:45:01 +0000	[diff] [blame]	1314	pOpen->aPending[pOpen->nPending] = id->h;
				1315	pOpen->nPending++;
danielk1977	e302663	2004-06-22 11:29:02 +0000	[diff] [blame]	1316	}
				1317	}else{
				1318	/* There are no outstanding locks so we can close the file immediately */
				1319	close(id->h);
				1320	}
				1321	releaseLockInfo(id->pLock);
				1322	releaseOpenCnt(id->pOpen);
				1323	sqlite3OsLeaveMutex();
				1324	id->isOpen = 0;
				1325	TRACE2("CLOSE %-3d\n", id->h);
				1326	OpenCounter(-1);
				1327	return SQLITE_OK;
				1328	}
				1329
				1330	/*
drh	0ccebe7	2005-06-07 22:22:50 +0000	[diff] [blame]	1331	** Turn a relative pathname into a full pathname. Return a pointer
				1332	** to the full pathname stored in space obtained from sqliteMalloc().
				1333	** The calling function is responsible for freeing this space once it
				1334	** is no longer needed.
				1335	*/
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame^]	1336	static char unixFullPathname(const char zRelative){
drh	0ccebe7	2005-06-07 22:22:50 +0000	[diff] [blame]	1337	char *zFull = 0;
				1338	if( zRelative[0]=='/' ){
				1339	sqlite3SetString(&zFull, zRelative, (char*)0);
				1340	}else{
drh	79158e1	2005-09-06 21:40:45 +0000	[diff] [blame]	1341	char *zBuf = sqliteMalloc(5000);
				1342	if( zBuf==0 ){
				1343	return 0;
				1344	}
drh	0ccebe7	2005-06-07 22:22:50 +0000	[diff] [blame]	1345	zBuf[0] = 0;
drh	79158e1	2005-09-06 21:40:45 +0000	[diff] [blame]	1346	sqlite3SetString(&zFull, getcwd(zBuf, 5000), "/", zRelative,
drh	0ccebe7	2005-06-07 22:22:50 +0000	[diff] [blame]	1347	(char*)0);
drh	79158e1	2005-09-06 21:40:45 +0000	[diff] [blame]	1348	sqliteFree(zBuf);
drh	0ccebe7	2005-06-07 22:22:50 +0000	[diff] [blame]	1349	}
				1350	return zFull;
				1351	}
				1352
				1353
drh	9c06c95	2005-11-26 00:25:00 +0000	[diff] [blame^]	1354	/*
				1355	** This is the structure that defines all of the I/O routines.
				1356	*/
				1357	struct sqlite3IoVtbl sqlite3Io = {
				1358	unixDelete,
				1359	unixFileExists,
				1360	unixOpenReadWrite,
				1361	unixOpenExclusive,
				1362	unixOpenReadOnly,
				1363	unixOpenDirectory,
				1364	unixSyncDirectory,
				1365	unixTempFileName,
				1366	unixIsDirWritable,
				1367	unixClose,
				1368	unixRead,
				1369	unixWrite,
				1370	unixSeek,
				1371	unixSync,
				1372	unixTruncate,
				1373	unixFileSize,
				1374	unixFullPathname,
				1375	unixLock,
				1376	unixUnlock,
				1377	unixCheckReservedLock,
				1378	};
				1379
				1380
drh	0ccebe7	2005-06-07 22:22:50 +0000	[diff] [blame]	1381	#endif /* SQLITE_OMIT_DISKIO */
				1382	/***************************************************************************
				1383	** Everything above deals with file I/O. Everything that follows deals
				1384	** with other miscellanous aspects of the operating system interface
				1385	****************************************************************************/
				1386
				1387
				1388	/*
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1389	** Get information to seed the random number generator. The seed
				1390	** is written into the buffer zBuf[256]. The calling function must
				1391	** supply a sufficiently large buffer.
				1392	*/
				1393	int sqlite3OsRandomSeed(char *zBuf){
				1394	/* We have to initialize zBuf to prevent valgrind from reporting
				1395	** errors. The reports issued by valgrind are incorrect - we would
				1396	** prefer that the randomness be increased by making use of the
				1397	** uninitialized space in zBuf - but valgrind errors tend to worry
				1398	** some users. Rather than argue, it seems easier just to initialize
				1399	** the whole array and silence valgrind, even if that means less randomness
				1400	** in the random seed.
				1401	**
				1402	** When testing, initializing zBuf[] to zero is all we do. That means
				1403	** that we always use the same random number sequence.* This makes the
				1404	** tests repeatable.
				1405	*/
				1406	memset(zBuf, 0, 256);
				1407	#if !defined(SQLITE_TEST)
				1408	{
drh	842b864	2005-01-21 17:53:17 +0000	[diff] [blame]	1409	int pid, fd;
				1410	fd = open("/dev/urandom", O_RDONLY);
				1411	if( fd<0 ){
				1412	time((time_t*)zBuf);
				1413	pid = getpid();
				1414	memcpy(&zBuf[sizeof(time_t)], &pid, sizeof(pid));
				1415	}else{
				1416	read(fd, zBuf, 256);
				1417	close(fd);
				1418	}
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1419	}
				1420	#endif
				1421	return SQLITE_OK;
				1422	}
				1423
				1424	/*
				1425	** Sleep for a little while. Return the amount of time slept.
				1426	*/
				1427	int sqlite3OsSleep(int ms){
				1428	#if defined(HAVE_USLEEP) && HAVE_USLEEP
				1429	usleep(ms*1000);
				1430	return ms;
				1431	#else
				1432	sleep((ms+999)/1000);
				1433	return 1000*((ms+999)/1000);
				1434	#endif
				1435	}
				1436
				1437	/*
				1438	** Static variables used for thread synchronization
				1439	*/
				1440	static int inMutex = 0;
drh	7906975	2004-05-22 21:30:40 +0000	[diff] [blame]	1441	#ifdef SQLITE_UNIX_THREADS
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1442	static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
drh	7906975	2004-05-22 21:30:40 +0000	[diff] [blame]	1443	#endif
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1444
				1445	/*
				1446	** The following pair of routine implement mutual exclusion for
				1447	** multi-threaded processes. Only a single thread is allowed to
				1448	** executed code that is surrounded by EnterMutex() and LeaveMutex().
				1449	**
				1450	** SQLite uses only a single Mutex. There is not much critical
				1451	** code and what little there is executes quickly and without blocking.
				1452	*/
				1453	void sqlite3OsEnterMutex(){
				1454	#ifdef SQLITE_UNIX_THREADS
				1455	pthread_mutex_lock(&mutex);
				1456	#endif
				1457	assert( !inMutex );
				1458	inMutex = 1;
				1459	}
				1460	void sqlite3OsLeaveMutex(){
				1461	assert( inMutex );
				1462	inMutex = 0;
				1463	#ifdef SQLITE_UNIX_THREADS
				1464	pthread_mutex_unlock(&mutex);
				1465	#endif
				1466	}
				1467
				1468	/*
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1469	** The following variable, if set to a non-zero value, becomes the result
				1470	** returned from sqlite3OsCurrentTime(). This is used for testing.
				1471	*/
				1472	#ifdef SQLITE_TEST
				1473	int sqlite3_current_time = 0;
				1474	#endif
				1475
				1476	/*
				1477	** Find the current time (in Universal Coordinated Time). Write the
				1478	** current time and date as a Julian Day number into *prNow and
				1479	** return 0. Return 1 if the time and date cannot be found.
				1480	*/
				1481	int sqlite3OsCurrentTime(double *prNow){
drh	19e2d37	2005-08-29 23:00:03 +0000	[diff] [blame]	1482	#ifdef NO_GETTOD
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1483	time_t t;
				1484	time(&t);
				1485	*prNow = t/86400.0 + 2440587.5;
drh	19e2d37	2005-08-29 23:00:03 +0000	[diff] [blame]	1486	#else
				1487	struct timeval sNow;
				1488	struct timezone sTz; /* Not used */
				1489	gettimeofday(&sNow, &sTz);
				1490	*prNow = 2440587.5 + sNow.tv_sec/86400.0 + sNow.tv_usec/86400000000.0;
				1491	#endif
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1492	#ifdef SQLITE_TEST
				1493	if( sqlite3_current_time ){
				1494	*prNow = sqlite3_current_time/86400.0 + 2440587.5;
				1495	}
				1496	#endif
				1497	return 0;
				1498	}
				1499
drh	bbd42a6	2004-05-22 17:41:58 +0000	[diff] [blame]	1500	#endif /* OS_UNIX */