// XXX ToDo // in extendFile(), if allocBRun() fails, ask for small chunks instead of one large one // ability to add attributes to the attr dir // extension of attributes unit UbixFS; interface {$DEFINE GRAPHICS} uses device, lists {$IFDEF GRAPHICS}, ObjGfx40, ogFont, objImage, DOS, CRT {$ENDIF}; // open/fcntl - O_SYNC is only implemented on blocks devices and on files // located on an ext2 file system const O_ACCMODE = 00000003; const O_RDONLY = 00000000; const O_WRONLY = 00000001; const O_RDWR = 00000002; const O_APPEND = 00000010; const O_CREAT = 00000400; // not fcntl const O_TRUNC = 00001000; // not fcntl const O_EXCL = 00002000; // not fcntl const O_LARGEFILE = 00004000; const O_SYNC = 00100000; // O_NONBLOCK = 00200004; // HPUX has separate NDELAY & NONBLOCK */ //const O_NDELAY = O_NONBLOCK; const O_NOCTTY = 00400000; // not fcntl */ const FASYNC = 00020000; // fcntl, for BSD compatibility */ const O_DIRECT = 00040000; // direct disk access hint - currently ignored */ const O_DIRECTORY = 00010000; // must be a directory */ const O_NOFOLLOW = 00000200; // don't follow links */ const O_INVISIBLE = 04000000; // invisible I/O, for DMAPI/XDSM const F_DUPFD = 0; // dup const F_GETFD = 1; // get f_flags const F_SETFD = 2; // set f_flags const F_GETFL = 3; // more flags (cloexec) const F_SETFL = 4; const F_GETLK = 5; const F_SETLK = 6; const F_SETLKW = 7; const F_GETLK64 = 8; const F_SETLK64 = 9; const F_SETLKW64 = 10; // for F_[GET|SET]FL const FD_CLOEXEC = 1; // actually anything with low bit set goes // for posix fcntl() and lockf() const F_RDLCK = 01; const F_WRLCK = 02; const F_UNLCK = 03; // for old implementation of bsd flock () const F_EXLCK = 4; // or 3 const F_SHLCK = 8; // or 4 // for leases const F_INPROGRESS= 16; // operations for bsd flock(), also used by the kernel implementation const LOCK_SH = 1; // shared lock const LOCK_EX = 2; // exclusive lock const LOCK_NB = 4; // or'd with one of the above to prevent blocking const LOCK_UN = 8; // remove lock const LOCK_MAND = 32; // This is a mandatory flock const LOCK_READ = 64; // ... Which allows concurrent read operations const LOCK_WRITE = 128; // ... Which allows concurrent write operations const LOCK_RW = 192; // ... Which allows concurrent read & write ops const INODE_NO_FLAG = $00000000; const INODE_IN_USE = $00000001; const ATTR_INODE = $00000004; const INODE_LOGGED = $00000008; const INODE_DELETED = $00000010; const PERMANENT_FLAGS = $0000ffff; const INODE_NO_CACHE = $00010000; const INODE_WAS_WRITTEN = $00020000; const S_IFDIR = $00040000; // POSIX const NO_TRANSACTION = $00040000; const DATASTREAM_DIRECT_BLOCKS = 16; // number of direct blocks in the dataStream const UBIXFS_MAGIC_1 = $A0A0A0A; const UBIXFS_MAGIC_2 = $B0B0B0B; const UBIXFS_MAGIC_3 = $C0C0C0C; const UBIXFS_INODE_MAGIC = $3BBE0AD9; const UBIXFS_CLEAN = $434C454E; const UBIXFS_DIRTY = $44495254; const SECTOR_SHIFT = 9; const SECTOR_SIZE = 1 shl SECTOR_SHIFT; type int8 = shortInt; int16 = smallInt; int32 = longInt; uInt8 = byte; uInt16 = word; uInt32 = dWord; { Common Integer array definitions } type int8ArrayPtr = ^int8Array; int8Array = array[ 0..0 ] of int8; int16ArrayPtr = ^int16Array; int16Array = array[ 0..0 ] of int16; int32ArrayPtr = ^int32Array; int32Array = array[ 0..0 ] of int32; { Common Unsigned Integer array definitions } type uInt8ArrayPtr = ^uInt8Array; uInt8Array = array[ 0..0 ] of uInt8; uInt16ArrayPtr = ^uInt16Array; uInt16Array = array[ 0..0 ] of uInt16; uInt32ArrayPtr = ^uInt32Array; uInt32Array = array[ 0..0 ] of uInt32; type flock = object lType : smallint; lWhence : smallint; lStart : integer; lLen : integer; lPID : integer; end; // flock type flock64 = object lType : smallint; lWhence : smallint; lStart : int64; lLen : int64; lPID : integer; end; // flock64 (* TBlockRun = packed record AG : int32; start : uInt16; len : uInt16; end; inodeAddr = TblockRun; uPtr = packed record case integer of 0: (i : int32); 1: (u : uInt32); 2: (s : single); 3: (d : double); 4: (p : pointer); 5: (offset : int64); 6: (iAddr : inodeAddr); end; *) PDiskSuperBlock = ^TDiskSuperBlock; TDiskSuperBlock = packed record name : array[ 0..31 ] of char; magic1 : int32; fsByteOrder : int32; // blockSize on disk blockSize : uInt32; // number of bits needed to shift a block number to get a byte address blockShift : uInt32; numBlocks : int64; usedBlocks : int64; inodeSize : uInt32; magic2 : uInt32; blocksPerAG : uInt32; AGShift : uInt32; numAGs : uInt32; flags : uInt32; logBlocks : TBlockRun; logStart : int64; logEnd : int64; magic3 : int32; BAT : inodeAddr; rootDir : inodeAddr; indicies : inodeAddr; pad : array[0..371] of byte; end; // TDiskSuperBlock PDataStream = ^TDataStream; TDataStream = packed record direct : array[0..DATASTREAM_DIRECT_BLOCKS-1] of TBlockRun; maxDirect : int64; maxIndirect : int64; maxDIndirect : int64; size : int64; indirect : TBlockRun; doubleIndirect: TBlockrun; blockCount : uInt32; end; // TDataStream PSmallDataNode = ^TSmallDataNode; TSmallDataNode = packed object public nameSize : uInt16; dataSize : uInt16; dataType : treeTypes; reserved : array[ 0..2 ] of uInt8; name : array[ 0..0 ] of char; function align(size:uInt32):uInt32; function dataPtr:pointer; function isEmpty:boolean; function next:PSmallDataNode; function size:uInt32; end; // TSmallDataNode PUbixfsInode = ^TUbixfsInode; TUbixfsInode = packed object public magic : int32; inodeNum : inodeAddr; uid : uInt32; gid : uInt32; mode : int32; flags : int32; createTime : int64; modifiedTime : int64; parent : inodeAddr; attributes : inodeAddr; iType : uInt32; inodeSize : uInt32; data : uPtr; {this was the etc field in bfs} blocks : TDataStream; smallData : TSmallDataNode; function addAttr(attribute:PChar; attrType:treeTypes; buffer:pointer; count:uInt32):PSmallDataNode; function delAttr(attribute:PChar; whichNode:PSmallDataNode):boolean; function findAttr(attribute:PChar):PSmallDataNode; end; // TUbixfsInode PCacheNode = ^TCacheNode; TCacheNode = object public address : int64; lastUsed : int64; data : pointer; dataSize : uInt32; dirty : boolean; locked : boolean; constructor init(_address, _lastUsed:int64; _dataSize:uInt32); destructor done; end; // TCacheNode PVNode = ^TVNode; TVNode = object TID : uInt32; // Thread ID iAddr : inodeAddr; position : int64; flags : uInt32; end; // TVNode PVnid = ^TVnid; TVnid = object refCount : uInt32; iAddr : inodeAddr; mode : uInt32; end; // TVnid PUbixFS = ^TUbixFS; TUbixFS = object private dev : PDevice; superBlock : PDiskSuperBlock; blockCache : PbTree; blockTime : PbTree; vnids : PbTree; vNodeQueue : PQueue; fileDescriptors : ^array[0..0] of TVNode; maxCacheEntry : uInt32; maxFDEntry : uInt32; procedure assert(x:boolean); function allocBRun(blockRun:TBlockRun):TBlockRun; function allocInode(parentIAddr:inodeAddr):PUbixfsInode; function cacheBlock(blockAddr:TBlockRun):pointer; function extendFile(inode:PUbixfsInode; newSize:int64):boolean; function findBRun(dataStream:PDataStream; blockNum:uInt32):TBlockRun; function findBRunDirect(dataStream:PDataStream; blockNum:uInt32):TBlockRun; function findBRunIndirect(dataStream:PDataStream; blockNum:uInt32):TBlockRun; function findBRunDIndirect(dataStream:PDataStream; blockNum:uInt32):TBlockRun; function freeUsedBlocks(blockRun:TBlockRun):integer; function getFileSize(inode:PUbixfsInode):int64; function getFileSpace(inode:PUbixfsInode):int64; function iAddrToStr(iAddr:inodeAddr):string; function isBAT(iAddr:inodeAddr):boolean; function isValidIAddr(iAddr:inodeAddr):boolean; function loadBlock(blockAddr:TBlockRun):pointer; function nextAG(AG:uInt32):uInt32; function pathToIAddr(path:PChar):inodeAddr; function readAttr(iAddr:inodeAddr; attribute:PChar; attrType:treeTypes; position:uInt32; buffer:pointer; count:uInt32):integer; function readDataStream(iAddr:inodeAddr; position:int64; buf:pointer; length:uInt32):integer; function saveBlock(blockAddr:TBlockRun; buf:pointer):boolean; procedure setBlockRun(var blockRun:TBlockRun; AG:uInt32; start, len:uInt16); procedure setIAddr(var inode:inodeAddr; AG:uInt32; start, len:uInt16); function shrinkFile(inode:PUbixfsInode):boolean; function strToIAddr(s:string):inodeAddr; function writeAttr(iAddr:inodeAddr; attribute:PChar; attrType:treeTypes; position:uInt32; buffer:pointer; count:uInt32):integer; function writeDataStream(iAddr:inodeAddr; position:int64; buf:pointer; length:uInt32):integer; public constructor init(device:Pdevice); procedure printSuperBlock; procedure sync; function syncBlock(cacheNode:PCacheNode):boolean; {$IFDEF GRAPHICS} procedure visualize; {$ENDIF} function vfs_fCreate(fileName:PChar):integer; function vfs_fExist(path:PChar):integer; function vfs_close(handle:integer):integer; function vfs_format(device:PDevice; fsBlockSize:uInt32; quick:boolean):integer; function vfs_init:integer; function vfs_mkdir(path:PChar):integer; function vfs_read(handle:integer; buffer:pointer; size:uInt32):integer; function vfs_readAttr(handle:integer; attribute:PChar; attrType:treeTypes; position:uInt32; buffer:pointer; count:uInt32):integer; function vfs_seek(handle:integer; newPosition:int64):integer; function vfs_unlink(fileName:PChar):integer; function vfs_write(handle:integer; buffer:pointer; size:uInt32):integer; function vfs_writeAttr(handle:integer; attribute:PChar; attrType:treeTypes; position:uInt32; buffer:pointer; count:uInt32):integer; procedure examine; destructor done; virtual; end; // TUbixFS implementation uses strings, dos, math, lists; // IMPORTANT NOTE // You cannot under any circumstance change NUM_EXTENTION_BLOCKS. This // constant defines the multiple of blocks that a file will be extended // by, *and* it determines how many blocks are used in each blockRun in // the Double Indirect dataStream. // NUM_EXTENSION_BLOCKS determines how much slack space is allocated for // a file when it is extended. For example, a file with 0 bytes in it // will take up 0 blocks for the data and 1 block for the inode (ignoring // space required to hold the directory entry). When the first byte is // written, extendFile() will allocate a multiple of NUM_EXTENSION_BLOCKS // to accomodate the amount of data being written out. So, a 1 byte file will // actually take up NUM_EXTENSION_BLOCKS (plus one for the inode) until the // file is closed. Then the file will be trimmed and the slack space reclaimed. // This value is also used in the double-indirect region of the dataStream // to determine how many blocks each blockRun uses. // vv DO NOT CHANGE vv // const NUM_EXTENSION_BLOCKS = 64; // <--- don't change this // ^^ DO NOT CHANGE ^^ // const NUM_CACHE_ENTRIES = 8192; // default max entries in block cache const NUM_FD_ENTRIES = 1024; // default max entries in the File Descriptor Table const BAT_PAGE_SIZE = 256; // page size for the Block Allocation Tree type PUbixBTreeVFS = ^TUbixBTreeVFS; TUbixBTreeVFS = object(TBTreeVFS) private fs : PUbixFS; iAddr : inodeAddr; position : int64; openFile : boolean; public constructor init(_fs:PUbixFS); function fClose:boolean; virtual; function fCreate(const filename:string):boolean; virtual; function fExist(const filename:string):boolean; virtual; function fOpen(const filename:string):boolean; virtual; function fRead(var buf; size:uInt32):boolean; virtual; function fSeek(offset:uInt32):boolean; virtual; function fWrite(var buf; size:uInt32):boolean; virtual; destructor done; virtual; end; // TUbixBTreeVFS function getRDTSCTime:int64; var x:int64; begin asm rdtsc mov dword ptr x, eax mov dword ptr x+4, edx end; result := x; end; // B^Tree compareKey BlockRun function function compareKeyBlockRun(key1, key2:pointer):integer; begin result := 0; if ((key1 = NIL) or (key2 = NIL)) then exit; if (TBlockRun(key1^).AG < TBlockRun(key2^).AG) then result := -1 else if (TBlockRun(key1^).AG > TBlockRun(key2^).AG) then result := 1 else // AGs are the same if (TBlockRun(key1^).start < TBlockRun(key2^).start) then result := -1 else if (TBlockRun(key1^).start > TBlockRun(key2^).start) then result := 1; // else they are identical end; // compareKeyInt64 // B^Tree copyKey BlockRun function procedure copyKeyBlockRun(srcKey, destKey:pointer); begin if ((srcKey = NIL) or (destKey = NIL)) then exit; TBlockRun(destKey^) := TBlockRun(srcKey^); end; // copyKeyBlockRun; // B^Tree keySize BlockRun function function keySizeBlockRun(key:pointer):integer; begin if (key = NIL) then result := 0 else result := sizeof(TBlockRun); end; // keySizeBlockRun // TSmallDataNode methods function TSmallDataNode.align; result := (size+3) and not 3; // round up to nearest dword address function TSmallDataNode.dataPtr; result := @self + align(nameSize) + (sizeof(TSmallDataNode)-sizeof(name)); function TSmallDataNode.isEmpty; result := ((nameSize or dataSize) = 0); function TSmallDataNode.next; result := @self + size(); function TSmallDataNode.size; result := align(nameSize) + align(dataSize) + (sizeof(TSmallDataNode)-sizeof(name)); // TUbixfsInode methods function TUbixfsInode.addAttr; var smallDataNode:PSmallDataNode; attrSize:uInt32; ptr:pointer; begin result := NIL; // failure by default; if (attribute = NIL) or (buffer = NIL) or (count = 0) then exit; // point a smallDataNode object at the first piece of the smallData section smallDataNode := @smallData; // Now we scan through looking for the first free slot while not smallDataNode^.isEmpty() do smallDataNode := smallDataNode^.next(); // figure out how big the attribute name will be (includes null) attrSize := strlen(attribute)+1; // double-check that there's enough room to the end of the inode if (dword(smallDataNode - @self) + smallDataNode^.align(attrSize) + smallDataNode^.align(count) + (sizeof(TSmallDataNode)-1) > inodeSize) then exit; // Okay, it'll fit with smallDataNode^ do begin nameSize := attrSize; // include null dataSize := count; dataType := attrType; fillchar(reserved, sizeof(reserved), 0); fillchar(name, align(nameSize), 0); // better safe than sorry? move(attribute^, name, nameSize); fillchar(dataPtr^, align(count), 0); // better safe than sorry move(buffer^, dataPtr^, count); end; // with result := smallDataNode; end; // TUbixfsInode.addAttr function TUbixfsInode.delAttr; var nextNode:PSmallDataNode; chunkSize:uInt32; begin result := FALSE; // failure by default // First check to see if we have been given a hint on what to delete. // If whichNode is NIL, then we should scan through the small data area // for the named attribute. if (whichNode = NIL) then begin assert(attribute <> NIL); // I don't think this should ever happen if (attribute = NIL) then exit; // problem whichNode := findAttr(attribute); end; // if whichNode is NIL here, then it doesn't exist in the small data area if (whichNode = NIL) then exit; // whichNode points to the attribute we need to delete. // First, get the nextNode nextNode := whichNode^.next(); if nextNode^.isEmpty() then begin // if we're on the last node, just clear it out fillchar(whichNode^, whichNode^.size(), 0); end else begin // calculate space from nextNode to the end chunkSize := inodeSize - (nextNode - @self); // move the next node over the one we're deleting move(nextNode^, whichNode^, chunkSize); // clear out the slack fillchar(pointer(whichNode + chunkSize)^, (nextNode - whichNode), 0); end; result := TRUE // success end; // TUbixfsInode.delAttr function TUbixfsInode.findAttr; var smallDataNode:PSmallDataNode; found:boolean; begin result := NIL; // failure by default if (attribute = NIL) then exit; smallDataNode := @smallData; found := FALSE; while not found and not smallDataNode^.isEmpty() do with smallDataNode^ do begin if (strcomp(attribute, name) = 0) then found := TRUE else smallDataNode := next(); end; // while/with if found then result := smallDataNode; end; // TUbixfsInode.findAttr // TCacheNode methods constructor TCacheNode.init; begin address := _address; lastUsed := _lastUsed; dataSize := _dataSize; data := NIL; if (dataSize <> 0) then GetMem(data, dataSize); dirty := FALSE; locked := FALSE; end; // TCacheNode.init destructor TCacheNode.done; begin if (data <> NIL) and (dataSize <> 0) then FreeMem(data, dataSize); address := 0; lastUsed := 0; data := NIL; dataSize := 0; dirty := FALSE; locked := FALSE; end; // TCacheNode.done constructor TUbixBTreeVFS.init; begin // writeln('TUbixBTreeVFS.init()'); fs := _fs; fillchar(iAddr, sizeof(iAddr), 0); position := 0; openFile := FALSE; end; // TUbixBTreeVFS.init; function TUbixBTreeVFS.fClose; var tmpIAddr:inodeAddr; tmpInode:PUbixfsInode; begin result := FALSE; if (fs = NIL) then exit; if (openFile) then begin if not fs^.isValidIAddr(IAddr) then exit; if not fs^.isBAT(IAddr) then // Don't shrink the BAT begin tmpInode := fs^.loadBlock(IAddr); if (tmpInode = NIL) then exit; fs^.shrinkFile(tmpInode); // shrink the directory end; position := 0; openFile := FALSE; end; result := TRUE; end; // TUbixBTreeVFS.fClose function TUbixBTreeVFS.fCreate; begin // I *think* that this code is only called for directories (and BAT). // Therefore, the file technically exists, but just has nothing in it when we call fCreate. result := fOpen(filename); end; // TUbixBTreeVFS.fCreate function TUbixBTreeVFS.fExist; var tmpIAddr:inodeAddr; tmpInode:PUbixfsInode; begin result := FALSE; assert(fs <> NIL); if (fs = NIL) then exit; assert(fs^.superBlock <> NIL); if (fs^.superBlock = NIL) then exit; tmpIAddr := fs^.strToIAddr(filename); if not fs^.isValidIAddr(tmpIAddr) then exit; // If we've created an inode, but haven't written any data to it, then // the B^Tree hasn't written out a superblock, so technically the file doesn't // exist, and this is a really long run-on sentence. tmpInode := fs^.loadBlock(tmpIAddr); // I'm not sure why, but the compiler cannot properly evaluate (tmpInode^.blocks.size <> 0), // so we check both the high and low portions of the size to make sure it's not 0. // result := (tmpInode <> NIL) and ((longLongRec(tmpInode^.blocks.size).lo <> 0) or (longLongRec(tmpInode^.blocks.size).hi <> 0)); // However, checking for 0 and then taking the complement of that works .. so we go that way. result := ((tmpInode <> NIL) and (not (tmpInode^.blocks.size = 0))); end; // TUbixBTreeVFS.fExist function TUbixBTreeVFS.fOpen; begin result := FALSE; if (fs = NIL) then exit; if (fs^.superBlock = NIL) then exit; iAddr := fs^.strToIAddr(filename); position := 0; openFile := fs^.isValidIAddr(iAddr); result := openFile; /// fs^.isValidIAddr(iAddr); end; // TUbixBTreeVFS.fOpen function TUbixBTreeVFS.fRead; begin result := FALSE; if (fs = NIL) then exit; // with inode^.inodeNum do // write('fRead(', AG, ':', start, ':', len, ', '); // write(position, ', '); // write('@buf, '); result := (fs^.readDataStream(iAddr, position, @buf, size) = 0); position := position + int64(size); end; // TUbixBTreeVFS.fRead function TUbixBTreeVFS.fSeek; begin // uhm.. if the desired offset is beyond the end of the file, // should we not extend out the file? Or does that occur in writeDataStream ... position := offset; result := TRUE; end; // TUbixBTreeVFS.fSeek function TUbixBTreeVFS.fWrite; begin result := FALSE; if (fs = NIL) then exit; // with inode^.inodeNum do // write('fWrite(', AG, ':', start, ':', len, ', '); // write(position, ', '); // write('@buf, '); result := (fs^.writeDataStream(iAddr, position, @buf, size) = 0); position := position + int64(size); //inc(position, size); end; // TUbixBTreeVFS.fWrite; destructor TUbixBTreeVFS.done; begin // XXX: It's possible that we need to save the inode here since // we possibly changed a directory fClose(); // "Close" the file if it hasn't been already. fillchar(iAddr, sizeof(iAddr), 0); fs := NIL; // don't destruct it since we didn't allocate it end; // TUbixBTreeVFS.done constructor TUbixFS.init; begin dev := device; superBlock := NIL; blockCache := NIL; blockTime := NIL; vnids := NIL; vnodeQueue := NIL; fileDescriptors := NIL; maxCacheEntry := NUM_CACHE_ENTRIES; maxFDEntry := NUM_FD_ENTRIES; end; // TUbixFS.init procedure TUbixFS.assert(x:boolean); begin if not x then runerror(1000); end; // TUbixFS.assert function TUbixFS.allocBRun; var FBL:PBTree; // free block list u:uPtr; tmpBlockRun:TBlockRun; searchRec:BTreeSearchRec; found:boolean; begin setBlockRun(result, 0, 0, 0); // basic sanity checks with blockRun do writeln('allocBRun(', AG, ':', start, ':', len, ');'); if (superBlock = NIL) or (blockRun.len = 0) then exit; if not isValidIAddr(superBlock^.BAT) then exit; if (blockRun.AG > superBlock^.numAGs) then exit; if (superBlock^.usedBlocks + int64(blockRun.len) > superBlock^.numBlocks) then exit; // should probably give a louder warning new(FBL, open(iAddrToStr(superBlock^.BAT), new(PUbixBTreeVFS, init(@self)))); //new(FBL, open('BAT.DAT', NIL)); if (FBL = NIL) then exit; FBL^.InstallUserFunctions(compareKeyBlockRun, copyKeyBlockRun, keySizeBlockRun); setBlockRun(tmpBlockRun, blockRun.AG, 0, 0); fillchar(searchRec, sizeof(searchRec), 0); // tmpBlockRun points to the Allocation Group marker in the tree if not FBL^.FindFirst(@tmpBlockRun, searchRec) then begin writeln('Could not find AG marker inside FBL'); dispose(FBL, done); exit; end; found := FALSE; // have we found a free range large enough to accomodate our request? repeat if not FBL^.findNext(searchRec) then assert(FBL^.GetFirstKey(searchRec)); // assert(searchRec.key <> NIL); // I don't think this can ever happen with TBlockRun(searchRec.key^) do if (len = 0) then if (AG = blockRun.AG) then break // We've wrapped completely around else continue; // look for another one found := (TBlockRun(searchRec.key^).len >= blockRun.len); until found; (* found = false; do { if (!FBL->FindNext(searchRec)) assert(FBL->GetFirstKey(searchRec)); if (searchRec.key->len == 0) if (searchRec.key->AG == blockRun.AG) break; else continue; found = (searchRec.key->len >= blockRun.len); } while (!found) *) // It's possible we didn't find a run of the size requested.. if found then begin // No matter what, we're going to have to delete the old entry and insert a new one tmpBlockRun := TBlockRun(searchRec.key^); // make a copy u := searchRec.value; FBL^.Delete(@tmpBlockRun, u); // delete the old key with tmpBlockRun do begin setBlockRun(result, AG, start, blockRun.len); // set the result to the associated blockrun inc(start, blockRun.len); dec(len, blockRun.len); // I'm not sure what to put in the value field yet if (len <> 0) then assert(FBL^.Insert(@tmpBlockRun, u)); // if there's any blocks left, re-insert them into the FBL superBlock^.usedBlocks := superBlock^.usedBlocks + int64(blockRun.len); end; // with end; // if found FBL^.FindClose(searchRec); // de-allocate the searchRec dispose(FBL, done); end; // TUbixFS.allocBRun function TUbixFS.allocInode; var inode:PUbixfsInode; blockRun:TBlockRun; begin // XXX ToDo: // need to fill in the rest of the fields in the inode record // Notes: // 2005-04-27 mji // Inodes take up exactly one logical block on the device. They // are usually in the same AG as their parent directory. Allocating // one involves getting some memory for it and finding a free block // to hold it on the device. result := NIL; if (superBlock = NIL) then exit; assert(parentIAddr.AG < superBlock^.numAGs); if (parentIAddr.AG >= superBlock^.numAGs) then exit; // parent addr is outside out block count? // ask for a single block somewhere around the beginning of the requested AG setBlockRun(blockRun, parentIAddr.AG, 0, 1); blockRun := allocBRun(blockRun); assert(blockRun.len <> 0); if (blockRun.len = 0) then exit; // this would be bad // Normally we'd allocate memory for the inode. Instead we let loadBlock() do that // for us. //inode := loadBlock(blockRun); // candidate for cacheBlock() inode := cacheBlock(blockRun); assert(inode <> NIL); if (inode = NIL) then exit; fillchar(inode^, superBlock^.inodeSize, 0); with inode^ do begin magic := UBIXFS_INODE_MAGIC; // inodes point to themselves inodeNum := blockRun; uid := 0; // this doesn't mean much here gid := 0; // mode := ?? flags := INODE_NO_FLAG; // createTime := // modifiedTime := parent := parentIAddr; setIAddr(attributes, 0, 0, 0); inodeSize := superBlock^.inodeSize; // data := 0; // there's no data yet in the file, so don't // set blocks to anything (cleared above) blocks.blockCount := 0; end; // with result := inode; end; // TUbixFS.allocInode function TUbixFS.cacheBlock; var blockAddress:int64; cacheNode:PCacheNode; u:uPtr; searchRec:BTreeSearchRec; begin result := NIL; // failure by default if (dev = NIL) or (superBLock = NIL) or (blockcache = NIL) then exit; with blockAddr, superBlock^ do blockAddress := (AG shl AGShift) + start; fillchar(u, sizeof(u), 0); // jut to be safe cacheNode := NIL; if (blockCache^.Find(@blockAddress, u)) then begin // block was in cache assert(u.p <> NIL); cacheNode := u.p; assert(cacheNode^.data <> NIL); fillchar(cacheNode^.data^, superBlock^.blockSize, 0); cacheNode^.dirty := TRUE; // Now we need to update the blockTime. We do this by first deleting // the cacheNode out of the blockTime cache. blockTime^.Delete(@cacheNode^.lastUsed, u); // Delete old one cacheNode^.lastUsed := getRDTSCTime(); // Get the new timestamp blockTime^.Insert(@cacheNode^.lastUsed, u); // Insert with new time end else begin // Requested block isn't in cache // Check to make sure cache isn't full if (blockCache^.getKeyCount() = maxCacheEntry) then begin blockTime^.getFirstKey(searchRec); // least recently used block cacheNode := searchRec.value.p; // get the pointer from the value assert(cacheNode <> NIL); assert(syncBlock(cacheNode)); u.p := cacheNode; blockCache^.Delete(@cacheNode^.address, u); blockTime^.Delete(@cacheNode^.lastUsed, u); // We've deleted the cacheNode out of both the blockCache and // the blockTime trees. We're going to re-use the cacheNode. cacheNode^.address := blockAddress; cacheNode^.lastUsed := getRDTSCTime(); // No need to update data, dataSize, or dirty. We re-use the first two, // and dirty will be set to FALSE above if necessary. Might need to // reset locked if we're using that. end else new(cacheNode, init(blockAddress, getRDTSCTime(), superBlock^.blockSize)); assert(cacheNode <> NIL); assert(cacheNode^.data <> NIL); u.p := cacheNode; if not (blockCache^.Insert(@blockAddress, u)) then writeln('Error inserting ', blockAddress, ' into blockCache in cacheBlock()'); if not (blockTime^.Insert(@cacheNode^.lastUsed, u)) then writeln('Error inserting into blockTime'); end; if (cacheNode <> NIL) then begin fillchar(cacheNode^.data^, superBlock^.blockSize, 0); cacheNode^.dirty := TRUE; // mark this block as dirty result := cacheNode^.data; // return the data pointer end; end; // TUbixFS.cacheBlock function TUbixFS.extendFile; function appendToDIndirect(newBlockRun:TBlockRun):boolean; var extents, indirectExtents:array[0..NUM_EXTENSION_BLOCKS-1] of ^array[0..0] of TBlockRun; extentsIndex, indirectExtentsIndex:uInt32; extentsBlockIndex, indirectExtentsBlockIndex:uInt32; extentsUpperBound:uInt32; tmpBlockRun:TBlockRun; begin writeln('in appendToDIndirect()'); // Many apologies for the long variable names. This section is relatively // complicated, and it wouldn't help to have cryptic variables. result := FALSE; // failure by default if (newBlockRun.len mod NUM_EXTENSION_BLOCKS <> 0) then begin writeln('Double indirect block run is not a multiple of NUM_EXTENSION_BLOCKS'); exit; end; extentsUpperBound := (superBlock^.blockSize div sizeof(TBlockRun)) - 1; if not isValidIAddr(inode^.blocks.doubleIndirect) then begin // allocate a double indirect block tmpBlockRun := inode^.inodeNum; // allocate a block near the parent inode tmpBlockRun.len := NUM_EXTENSION_BLOCKS; inode^.blocks.doubleIndirect := allocBRun(tmpBlockRun); if not isValidIAddr(inode^.blocks.doubleIndirect) then begin writeln('Failed to allocate double indirect block'); exit end; tmpBlockRun := inode^.blocks.doubleIndirect; for extentsBlockIndex := low(extents) to high(extents) do begin extents[extentsBlockIndex] := cacheBlock(tmpBlockRun); //extents[extentsBlockIndex] := loadBlock(tmpBlockRun); // good candidate for cacheBlock() //fillchar(extents[extentsBlockIndex]^, superBlock^.blockSize, 0); //assert(saveBlock(tmpBlockRun, extents[extentsBlockIndex])); inc(tmpBlockRun.len); end; // for extentsBlockIndex // We have a freshly allocated double indirect block. Now we need an indirect block // tmpBlockRun := inode^.blocks.doubleIndirect; // allocate an indirect block near the dindirect block // tmpBlockRun.len := NUM_EXTENSION_BLOCKS; // theoretically, it is already // extents[0]^[0] := allocBRun(tmpBlockRun); extents[0]^[0] := allocBRun(inode^.blocks.doubleIndirect); if not isValidIAddr(extents[0]^[0]) then begin writeln('Failed to allocate indirect block beneath a double indirect block'); exit end; // save out the doubleIndirect block inode^.blocks.maxDIndirect := inode^.blocks.maxIndirect; //assert(saveBlock(inode^.blocks.doubleIndirect, extents)); // This is a little misleading. Since saveBlock() formulates the // address based on tbe base-block of the blockRun, this will only save // out the first block. Secondly, we may not even need to do this since // we tagged them for saving above. assert(saveBlock(inode^.blocks.doubleIndirect, extents[0])); tmpBlockRun := extents[0]^[0]; for indirectExtentsBlockIndex := low(indirectExtents) to high(indirectExtents) do begin indirectExtents[indirectExtentsBlockIndex] := cacheBlock(tmpBlockRun); // good candidate for cacheBlock() (* indirectExtents[indirectExtentsBlockIndex] := loadBlock(tmpBlockRun); // good candidate for cacheBlock() fillchar(indirectExtents[indirectExtentsBlockIndex]^, superBlock^.blockSize, 0); assert(saveBlock(tmpBlockRun, indirectExtents[indirectExtentsBlockIndex])); *) inc(tmpBlockRun.start); end; // for indirectExtentsBlockIndex // Because we have a freshly allocated double indirect block, we have no entries // in it yet. This means that our DIndirect index is 0, and our indirect index is also 0. extentsIndex := 0; extentsBlockIndex := 0; indirectExtentsIndex := 0; indirectExtentsBlockIndex := 0; end else begin // double indirect block exists. Load it tmpBlockRun := inode^.blocks.doubleIndirect; for extentsBlockIndex := low(extents) to high(extents) do begin extents[extentsBlockindex] := loadBlock(tmpBlockRun); assert(extents[extentsBlockindex] <> NIL); inc(tmpBlockRun.start); end; // for extentsBlockIndex // Now scan through (backwards) to find the last entry. for extentsBlockIndex := high(extents) downto low(extents) do if (extents[extentsBlockIndex]^[0].len <> 0) then break; for extentsIndex := extentsUpperBound downto 0 do if (extents[extentsBlockIndex]^[extentsIndex].len <> 0) then break; // [extentsBlockindex],[extentsIndex] points to the entry which holds the indirect block with the last entry. // load that one tmpBlockRun := extents[extentsBlockindex]^[extentsIndex]; for indirectExtentsBlockIndex := low(indirectExtents) to high(indirectExtents) do begin indirectExtents[indirectExtentsBlockIndex] := loadBlock(tmpBlockRun); assert(indirectExtents[indirectExtentsBlockIndex] <> NIL); inc(tmpBlockRun.start); end; // for indirectExtentsBlocksIndex // first check the last entry if (indirectExtents[high(indirectExtents)]^[extentsUpperBound].len <> 0) then begin // this indirect block is full. Allocate a new one if ((extentsIndex = extentsUpperBound) and (extentsBlockIndex = high(extents))) then begin // I might as well halt at this point because the user is totally hosed. writeln('Egad! The double indirect block section is full. Can not write any more to this file!'); exit; end; // if doomed tmpBlockRun := extents[extentsBlockIndex]^[extentsIndex]; // address of the aforementioned full indirect block // tmpBlockRun.len := NUM_EXTENSION_BLOCKS; // should be already if (extentsIndex = extentsUpperBound) then begin extentsIndex := 0; inc(extentsBlockIndex); // checked above to see if this was full end else inc(extentsIndex); extents[extentsBlockIndex]^[extentsIndex] := allocBRun(tmpBlockRun); tmpBlockRun := extents[extentsBlockIndex]^[extentsIndex]; // grab a copy of the brun we just made assert(isValidIAddr(tmpBlockRun)); // better safe than sorry for indirectExtentsBlockIndex := low(indirectExtents) to high(indirectExtents) do begin indirectExtents[indirectExtentsBlockIndex] := cacheBlock(tmpBlockRun); // good candidate for cacheBlock() (* indirectExtents[indirectExtentsBlockIndex] := loadBlock(tmpBlockRun); // good candidate for cacheBlock() fillchar(indirectExtents[indirectExtentsBlockIndex]^, superBlock^.blockSize, 0); assert(saveBlock(tmpBlockRun, indirectExtents[indirectExtentsBlockIndex])); *) inc(tmpBlockRun.start); end; tmpBlockRun := inode^.blocks.doubleIndirect; inc(tmpBlockRun.start, extentsBlockIndex); saveBlock(tmpBlockRun, extents[extentsBlockIndex]); // save, since we added an indirect block to it indirectExtentsIndex := 0; indirectExtentsBlockIndex := 0; end else begin // now scan forward indirectExtentsBlockIndex := 0; indirectExtentsIndex := 0; while (indirectExtents[indirectExtentsBlockIndex]^[indirectExtentsIndex].len <> 0) do begin if (indirectExtentsIndex = extentsUpperBound) then begin indirectExtentsIndex := 0; if (indirectExtentsBlockIndex = high(indirectExtents)) then break else inc(indirectExtentsBlockIndex); end else inc(indirectExtentsIndex); end; // while (* for indirectExtentsBlockIndex := low(indirectExtents) to high(indirectExtents) do for indirectExtentsIndex := 0 to extentsUpperBound do if (indirectExtents[indirectExtentsBlockIndex]^[indirectExtentsIndex].len = 0) then break; *) writeln('[1] indirectExtentsIndex: ', indirectExtentsIndex); writeln('[1] indirectExtentsBlockIndex: ', indirectExtentsBlockIndex); end; // [extentsBlockIndex],[extentsIndex] points to which indirect block we're on // [indirectExtentsBlockIndex],[indirectExtentsIndex] points to the first free entry in that indirect block end; while (newBlockRun.len <> 0) do begin tmpBlockRun := newBlockRun; tmpBlockRun.len := NUM_EXTENSION_BLOCKS; writeln('indirectExtentsIndex: ', indirectExtentsIndex); writeln('indirectExtentsBlockIndex: ', indirectExtentsBlockIndex); indirectExtents[indirectExtentsBlockIndex]^[indirectExtentsIndex] := tmpBlockRun; with newBlockRun do begin dec(len, NUM_EXTENSION_BLOCKS); if ((uInt32(start) + NUM_EXTENSION_BLOCKS) > high(start)) then inc(AG); inc(start, NUM_EXTENSION_BLOCKS); // Warning! Relies on integer wrap-around (start is 16-bit) end; // with if (indirectExtentsIndex = extentsUpperBound) then begin // this indirect block is full. // Save the current one assert(saveBlock(extents[extentsBlockIndex]^[extentsIndex], indirectExtents[indirectExtentsBlockIndex])); if (indirectExtentsBlockIndex = high(indirectExtents)) then begin // Allocate a new one if ((extentsIndex = extentsUpperBound) and (extentsBlockIndex = high(extents))) then begin // I might as well halt at this point because the user is totally hosed. writeln('Egad! The double indirect block section is full. Can not write any more to this file!'); exit; end; // if doomed tmpBlockRun := extents[extentsBlockIndex]^[extentsIndex]; // snag the previous location // update which index in the double indirect block we're using if (extentsIndex = extentsUpperBound) then begin inc(extentsBlockIndex); extentsIndex := 0; end else inc(extentsIndex); extents[extentsBlockIndex]^[extentsIndex] := allocBRun(tmpBlockRun); // allocate an indirect block near the previous one // make a copy of the indirect block we just allocated tmpBlockRun := extents[extentsBlockIndex]^[extentsIndex]; assert(isValidIAddr(tmpBlockRun)); // just to be sure for indirectExtentsBlockIndex := low(indirectExtents) to high(indirectExtents) do begin indirectExtents[indirectExtentsBlockIndex] := cacheBlock(tmpBlockRun); // candidate for cacheBlock() (* indirectExtents[indirectExtentsBlockIndex] := loadBlock(tmpBlockRun); // candidate for cacheBlock() assert(indirectExtents[indirectExtentsBlockIndex] <> NIL); fillchar(indirectExtents[indirectExtentsBlockIndex]^, superBlock^.blockSize, 0); assert(saveBlock(tmpBlockRun, indirectExtents[indirectExtentsBlockIndex])); *) inc(tmpBlockRun.start); end; // for indirectExtentsBlockIndex indirectExtentsIndex := 0; indirectExtentsBlockIndex := 0; // Here we need to save the extents block we just changed --> tmpBlockRun := inode^.blocks.doubleIndirect; inc(tmpBlockRun.start, extentsBlockIndex); assert(saveBlock(tmpBlockRun, extents[extentsBlockIndex])); // <--- end else begin // We just need to pop over to the next section of the indirect block. // At this point I really want to beat the BeOS guy who decided to make these strucutres // span multiple disk blocks. They said [in the book] that they didn't want to do that because // it complicated things. Well, gee, no kidding. inc(indirectExtentsBlockIndex); indirectExtentsIndex := 0; end; end else inc(indirectExtentsIndex); // update the size the maxDoubleIndirect blocks reference inode^.blocks.maxDIndirect := inode^.blocks.maxDIndirect + (NUM_EXTENSION_BLOCKS shl superBlock^.blockShift); end; // while // XXX ?-> saveBlock(extents^[extentsIndex], indirectExtents); end; // local function appendToDIndirect() function appendToIndirect(newBlockRun:TBlockRun):boolean; var extents:array[0..NUM_EXTENSION_BLOCKS-1] of ^array[0..0] of TBlockRun; extentsUpperBound, blockIndex, index:uInt32; tmpBlockRun:TBlockRun; begin result := FALSE; // failure by default // The indirect block is NUM_EXTENSION_BLOCKS long extentsUpperBound := (superBlock^.blockSize div sizeof(TBlockRun))-1; for blockIndex := 0 to NUM_EXTENSION_BLOCKS-1 do extents[blockIndex] := NIL; // NIL out the array if not isValidIAddr(inode^.blocks.indirect) then begin // allocate an indirect block tmpBlockRun := inode^.inodeNum; // alloate near the parent inode tmpBlockRun.len := NUM_EXTENSION_BLOCKS; // indirect block is NUM_EXTENSION_BLOCKS long inode^.blocks.indirect := allocBRun(tmpBlockRun); // Try to allocate it if not isValidIAddr(inode^.blocks.indirect) then begin writeln('Failed to allocate indirect block'); exit end; tmpBlockRun := inode^.blocks.indirect; for blockIndex := 0 to NUM_EXTENSION_BLOCKS-1 do begin extents[blockIndex] := cacheBlock(tmpBlockRun); // candidate for cacheBlock() (* extents[blockIndex] := loadBlock(tmpBlockRun); // candidate for cacheBlock() fillchar(extents[blockIndex]^, superBlock^.blockSize, 0); saveBlock(tmpBlockRun, extents[blockIndex]); *) inc(tmpBlockRun.start); // XXX hopefully this won't wrap around. end; // for index // extents := loadBlock(inode^.blocks.indirect); inode^.blocks.maxIndirect := inode^.blocks.maxDirect; end else begin // the indirect block exists, so read it in tmpBlockRun := inode^.blocks.indirect; for blockIndex := low(extents) to high(extents) do begin extents[blockIndex] := loadBlock(tmpBlockRun); assert(extents[blockIndex] <> NIL); inc(tmpBlockRun.start); end; // for i end; // extents[] has the indirect block in it // We first check to make sure that we're not going to be moving // to the next section of the dataStream if (extents[high(extents)]^[extentsUpperBound].len <> 0) then result := appendToDIndirect(newBlockRun) // promote the newBlockRun to the DIndirect section else begin // scan through for the first free slot blockIndex := 0; index := 0; while (extents[blockIndex]^[index].len <> 0) do begin if (index = extentsUpperBound) then begin index := 0; if (blockIndex = high(extents)) then break else inc(blockIndex); end else inc(index); end; // while (* for blockIndex := 0 to NUM_EXTENSION_BLOCKS-1 do for index := 0 to extentsUpperBound do if extents[blockIndex]^[index].len = 0 then break; *) extents[blockIndex]^[index] := newBlockRun; // index points to the first free slot. Fill it in with the newBlockRun tmpBlockRun := inode^.blocks.indirect; inc(tmpBlockRun.start, blockIndex); // hopefully this won't wrap around saveBlock(tmpBlockRun, extents[blockIndex]); // save out the indirect block. Could check for failure here // update the size the maxDoubleIndirect blocks reference inode^.blocks.maxIndirect := inode^.blocks.maxIndirect + (newBlockRun.len shl superBlock^.blockShift); result := TRUE; // success end; end; // local function appendToIndirect() function appendToDirect(newBlockRun:TBlockRun):boolean; var index:uInt32; begin with newBlockRun do writeln('in appendToDirect(); blockRun = ', ag, ':', start, ':', len); result := FALSE; with inode^, blocks do begin if (direct[high(direct)].len <> 0) then result := appendToIndirect(newBlockRun) else begin for index := low(direct) to high(direct) do if (direct[index].len = 0) then break; direct[index] := newBlockRun; maxDirect := maxDirect + (newBlockRun.len shl superBlock^.blockShift); result := TRUE; end; // else end; // with end; // local function appendToDirect function appendToDataStream(newBlockRun:TBlockRun):boolean; begin if not(inode^.blocks.maxDIndirect = 0) then result := appendToDIndirect(newBlockRun) else if not(inode^.blocks.maxIndirect = 0) then result := appendToIndirect(newBlockRun) else result := appendToDirect(newBlockRun); end; // local function appendToDataStream function continueIndirect(newBlockRun:TBlockRun):boolean; var extents:array[0..NUM_EXTENSION_BLOCKS-1] of ^array[0..0] of TBlockRun; extentsUpperBound, blockIndex, index:uInt32; tmpBlockRun:TBlockRun; begin result := FALSE; extentsUpperBound := (superBlock^.blockSize div sizeof(TBlockRun))-1; tmpBlockRun := inode^.blocks.indirect; for blockIndex := low(extents) to high(extents) do begin extents[blockIndex] := loadBlock(tmpBlockRun); assert(extents[blockIndex] <> NIL); inc(tmpBlockRun.start); // hopefully this won't wrap end; for blockIndex := high(extents) downto low(extents) do if (extents[blockIndex]^[0].len <> 0) then break; for index := extentsUpperBound downto 0 do if (extents[blockIndex]^[index].len <> 0) then break; assert(extents[blockIndex]^[index].len <> 0); // just to be sure // extents[blockIndex]^[index] has the last entry if (uInt32(extents[blockIndex]^[index].len) + newBlockRun.len < 65535) then begin inc(extents[blockIndex]^[index].len, newBlockRun.len); tmpBlockRun := inode^.blocks.indirect; inc(tmpBlockRun.start, blockIndex); saveBlock(tmpBlockRun, extents[blockIndex]); inode^.blocks.maxIndirect := inode^.blocks.maxIndirect + (newBlockRun.len shl superBlock^.blockShift); result := TRUE; end else // oops, we overflowed the length begin // need to check to see if we were the last indirect entry. If we were // then we need to allocate a double indirect block and store it in there if (index = extentsUpperBound) then begin if (blockIndex = high(extents)) then result := appendToDIndirect(newBlockRun) else begin inc(blockIndex); extents[blockIndex]^[0] := newBlockRun; tmpBlockRun := inode^.blocks.indirect; inc(tmpBlockRun.start, blockIndex); saveBlock(tmpBlockRun, extents[blockIndex]); inode^.blocks.maxIndirect := inode^.blocks.maxIndirect + (newBlockRun.len shl superBlock^.blockShift); result := TRUE; end end else begin extents[blockIndex]^[index+1] := newBlockRun; tmpBlockRun := inode^.blocks.indirect; inc(tmpBlockRun.start, blockIndex); saveBlock(tmpBlockRun, extents[blockIndex]); inode^.blocks.maxIndirect := inode^.blocks.maxIndirect + (newBlockRun.len shl superBlock^.blockShift); result := TRUE; end; // else end; // else end; // local function continueIndirect function continueDirect(newBlockRun:TBlockRun):boolean; var index:uInt32; begin result := FALSE; with inode^, blocks do begin for index := high(direct) downto low(direct) do if (direct[index].len <> 0) then break; assert(direct[index].len <> 0); // just to be sure // make sure we don't overflow the length if (uInt32(direct[index].len) + newBlockRun.len < 65535) then begin inc(direct[index].len, newBlockRun.len); // update the last entry maxDirect := maxDirect + (newBlockRun.len shl superBlock^.blockShift); result := TRUE; end else // oops, we overflowed the length begin // need to check to see if we were the last direct entry. If we were // then we need to allocate an indirect block and store it in there if (index = high(direct)) then result := appendToIndirect(newBlockRun) else begin direct[index+1] := newBlockRun; // store it in the next available entry maxDirect := maxDirect + (newBlockRun.len shl superBlock^.blockShift); result := TRUE; end; end; // else end; // with end; // local function continueDirect function continueDataStream(newBlockRun:TBlockRun):boolean; begin // The extended blockRun continues a previous run. // When extending the file we request an allocation of more blocks // near to the last blockRun of the file. If the new blocks // are contiguous to the last blockRun of the file // then we can simply extend that blockRun. This not only // prevents fragmentation when possible, it also reduces the // number of actual blockRuns needed to store the file. if (inode^.blocks.maxDIndirect > 0) then result := appendToDIndirect(newBlockRun) else if (inode^.blocks.maxIndirect > 0) then result := continueIndirect(newBlockRun) else result := continueDirect(newBlockRun); end; // local function continueDataStream var blockRun, exBlockRun:TBlockRun; blocksToAllocate:uInt32; begin // extendFile() // XXX ToDo: // Need to verify support for indirect and double-indirect blocks // Notes: // 2005-04-27 mji // This function is called by writeDataStream() when we're about to write // past the end of the file. When extending out a file, it is almost never // extended by the amount written. Instead it is at least 1 block, and usually // more (in our case: 4, as mentioned below). Currently we pass in the number // of blocks to extend the file by. This is problematic for a few reasons. // The first is that the code to figure out how much to extend the file by // would have to be in writeDataStream(). That's not necessarily a problem, // except that the amount to be extended by depends on many factors, some // including whether we're writing out more than the default small block // extension size (currently is 4) or if we're in double indirect blocks, // which extends out by NUM_DINDIRECT_BLOCKS (also currently 4). // The second problem is that we save the inode here -- and also in // writeDataStream() -- because the new size of the file is updated in wDS() // instead of here. // The third problem is when you seek past the end of the file and then // write. Space should be allocated for all the space between the old // EOF and the new write position. This is common behaviour for most // filesystems, and should probably be emulated here as well. // The proper solution is to pass in the new size and let this function // work out exactly how much space on disk it is really going to take. result := FALSE; // failure by default // basic sanity checks if (inode = NIL) or (superBlock = NIL) then exit; // if the file size is 0 then this is the first extension. if (inode^.blocks.size = 0) then // first extension begin // A file's data is stored in the next allocation group from it's parent // inode. This will help distribute the data across the disk. // Find out how many blocks the new size will take up. Do this by rounding // up the size to the nearest block. Then we round that up to the nearest // NUM_DIRECT_BLOCKS multiple. blocksToAllocate := uInt32(((newSize+(superBlock^.blockSize-1)) shr superBlock^.blockShift)); blocksToAllocate := (blocksToAllocate + (NUM_EXTENSION_BLOCKS-1)) and not (NUM_EXTENSION_BLOCKS-1); // Allocate directory info in the same AG as the inode if (inode^.mode and S_IFDIR <> 0) then setBlockRun(exBlockRun, inode^.inodeNum.AG, 0, blocksToAllocate) else setBlockRun(exBlockRun, nextAG(inode^.inodeNum.AG), 0, blocksToAllocate); exBlockRun := allocBRun(exBlockRun); // try to allocate a blockrun assert(exBlockRun.len <> 0); if (exBlockRun.len = 0) then exit; // erp. Couldn't extend the file inode^.blocks.direct[0] := exBlockRun; // Set the first direct run // The space allocated for the file can and may be different than the // actual size. Set the maxDirect to the size (in bytes) of how much space // we allocated inode^.blocks.maxDirect := blocksToAllocate shl superBlock^.blockShift; end // size = 0 else begin // the size of the file wasn't 0 (handled above). We need to extend the // file out. Find where the last blocks of the file are // Note: // 2005-04-28 mji // We're going to have a potential problem here. When we check to see how many blocks we // want to extend by, we don't know for sure if we're actually going to end up in the // dataStream section we were assuming. For example, we may have no more room in the // indirect block section of dataStream and thus will have to store the entry in the double // indirect area. If the constants for each section are different, then we may be requesting the // wrong amount. This is *only* a problem if any of the constants are different. Therefore // the solution is to pick some arbitrary extension amount (probably 4 or 8 blocks) and stick // with that. If we pick the amount that will end up in the double indirect area (currently 4 // but I'm considering 8) then we can easily break apart our blockRun if we're writing it // to the double indirect section. // IMPORTANT NOTE // Once this value is chosen it will be set in stone since we will break everything by // changing it. Math done for retrieving the blockRun from the double indirect region will // rely on this value, and it can never be changed. BeFS uses 4 blocks for the double // indirect section, and that seems fine with me. The largest file using contiguous blockRuns // will be 38GB. I am, however, pondering 8 blocks instead. Remember, extensions are done // as a multiple of this value. // 2005-04-28 mji // I decided to use a single constant, NUM_EXTENSION_BLOCKS, and it is set to 4. blocksToAllocate := uInt32((newSize+(superBlock^.blockSize-1)) shr superBlock^.blockShift); // blocksToAllocate is now rounded up to the nearest block of how much space we need. blocksToAllocate := (blocksToAllocate + (NUM_EXTENSION_BLOCKS-1)) and not (NUM_EXTENSION_BLOCKS-1); // blocksToAllocate is now rounded up to the nearest multiple of NUM_EXTENSION_BLOCKS // now subtract off how many blocks we already have dec(blocksToAllocate, inode^.blocks.blockCount); // XXX // I don't think this is right. We need to find the last block returned by // getFileSpace(), not the size. Something to ponder/look into/test //blockRun := findBRun(@inode^.blocks, inode^.blocks.size shr superBlock^.blockShift); assert(inode^.blocks.blockCount <> 0); // if blockCount is 0, we're in big trouble blockRun := findBRun(@inode^.blocks, inode^.blocks.blockCount-1); assert(blockRun.len <> 0); if (blockRun.len = 0) then exit; // This would be odd exBlockRun := blockRun; // copy the last run to exBlockRun exBlockRun.len := blocksToAllocate; // set the requested number of blocks exBlockRun := allocBRun(exBlockRun); // allocate a block run near the previous one assert(exBlockRun.len <> 0); if (exBlockRun.len = 0) then exit; // should probably yell louder about this // blockRun points to the last blockRun structure associated with this inode if (exBlockRun.start = blockRun.start+1) and (exBlockRun.AG = blockRun.AG) then continueDataStream(exBlockRun) else appendToDataStream(exBlockRun); end; // else size <> 0 inode^.blocks.size := newSize; // Now we update the size here and not in writeDataStream() inc(inode^.blocks.blockCount, blocksToAllocate); // update the blockCount in the inode dataStream // no matter where we stored the extended blockRun, we have updated the inode assert(saveBlock(inode^.inodeNum, inode)); // 0 result means success result := TRUE; // no error end; // TUbixFS.extendFile function TUbixFS.findBRun; var position:int64; begin setBlockRun(result, 0, 0, 0); if (dataStream = NIL) then exit; if (blockNum >= dataStream^.blockCount) then begin writeln('Requested block outside the block count in the dataStream'); exit end; position := int64(blockNum) shl superBlock^.blockShift; if (position < dataStream^.maxDirect) then result := findBRunDirect(dataStream, blockNum) else if (position < dataStream^.maxIndirect) then result := findBRunIndirect(dataStream, blockNum) else if (position < dataStream^.maxDIndirect) then result := findBRunDIndirect(dataStream, blockNum) else writeln('Block position is not mapped by dataStream'); end; // TUbixFS.findBRun function TUbixFS.findBRunDirect; var sum, i, offset:uInt32; begin // set the result to a nil blockrun setBlockRun(result, 0, 0, 0); sum := 0; with dataStream^ do for i := low(direct) to high(direct) do with direct[i] do begin if (blockNum < sum + len) then // found it begin offset := blockNum - sum; setBlockRun(result, AG, start + offset, len - offset); exit end; inc(sum, len); end; // for/with end; // TUbixFS.findBRunDirect function TUbixFS.findBRunIndirect; var extents:array[0..NUM_EXTENSION_BLOCKS-1] of ^array[0..0] of TBlockRun; sum, offset, blockIndex, index, extentsUpperBound:uInt32; tmpBlockRun:TBlockRun; begin // NOTE: Basic error checking on whether superblock <> NIL isn't done here. setBlockRun(result, 0, 0, 0); // load the indirect block tmpBlockRun := dataStream^.indirect; for blockIndex := low(extents) to high(extents) do begin extents[blockIndex] := loadBlock(tmpBlockRun); assert(extents[blockIndex] <> NIL); inc(tmpBlockRun.start); // hope this doesn't wrap around end; extentsUpperBound := (superBlock^.blockSize div sizeof(TBlockRun))-1; // We either need to adjust sum forward or blockNum back. // sum := (dataStream^.maxDirect+superBlock^.blockSize-1) shr superBlock^.blockShift; // We need to decrease blockNum by the amount of blocks the direct portion took up. // This can be calculated by dividing the maxDirect byte size by the block size. blockNum := blockNum - dword(dataStream^.maxDirect shr superBlock^.blockShift); sum := 0; for blockIndex := low(extents) to high(extents) do for index := 0 to extentsUpperBound do with extents[blockIndex]^[index] do begin if (blockNum < sum + len) then // found it begin offset := blockNum - sum; setBlockRun(result, AG, start + offset, len - offset); exit // was a break end; inc(sum, len); end; // for/with end; // TUbixFS.findBRunIndirect const printStuff:boolean = FALSE; function TUbixFS.findBRunDIndirect; var extents:^array[0..0] of TBlockRun; sum, blockIndex, indirectIndex, index, offset:uInt32; blocksPerIndirect:uInt32; // per indirect section (NUM_EXTENSION_BLOCKS * superBlock^.blockSize) === 4 x 1k block blocksPerDIndirect:uInt32; // per single double indirect block === 1k block runsPerBlock:uInt32; // runs per single block (blockSize div sizeof(TBlockRun)) tmpBlockRun:TBlockRun; begin // NOTE: Basic error checking on whether superblock <> NIL isn't done here. setBlockRun(result, 0, 0, 0); // find the indirect block that contains the block run we're looking for. // This is done with some clever shifting // first adjust the blockNum, taking into account how many blocks are used // by the direct and indirect sections blockNum := blockNum - dword(dataStream^.maxIndirect shr superBlock^.blockShift); // Each indirect block holds NUM_EXTENSION_BLOCKS*((superBlock^.blockSize*NUM_EXTENSION_BLOCKS) div sizeof(TBlockRun)) blocks // e.g. // NUM_EXTENSION_BLOCKS = 4 // blockSize = 1024*NUM_EXTENSION_BLOCKS // sizeof(TBlockRun) = 8 // blocks referenced per indirect block = 4 * (4096 / 8) === 2048 // indirect block index = (block we're looking for) / (blocks referenced per indirect block) // blockrun index in indirect block = ((block we're looking for) % (blocks referenced per indirect block)) div NUM_EXTENSION_BLOCK // If we're really *really* clever, we will only need to load two blocks to find this blockrun. // So let's begin the cleverness. tmpBlockRun := dataStream^.doubleIndirect; runsPerBlock := superBlock^.blockSize div sizeof(TBlockRun); blocksPerIndirect := NUM_EXTENSION_BLOCKS * ((superBlock^.blockSize * NUM_EXTENSION_BLOCKS) div sizeof(TBlockRun)); blocksPerDIndirect := blocksPerIndirect * runsPerBlock; assert(blocksPerIndirect <> 0); // since this would div by 0 below anyway assert(blocksPerDIndirect <> 0); blockIndex := blockNum div blocksPerDIndirect; index := (blockNum mod blocksPerDIndirect) div blocksPerIndirect; assert(blockIndex < NUM_EXTENSION_BLOCKS); // this would mean we're way outside the range of possible addressable blocks inc(tmpBlockRun.start, blockIndex); extents := loadBlock(tmpBlockRun); assert(extents <> NIL); tmpBlockRun := extents^[index]; // now find the proper indirect sub-block blockIndex := (blockNum mod blocksPerIndirect) div (NUM_EXTENSION_BLOCKS*runsPerBlock); assert(blockIndex < NUM_EXTENSION_BLOCKS); // adjust the double indirect block address inc(tmpBlockRun.start, blockIndex); extents := loadBlock(tmpBlockRun); assert(extents <> NIL); // now find which indirect run references the block we're looking for index := (blockNum mod (runsPerBlock*NUM_EXTENSION_BLOCKS)) div NUM_EXTENSION_BLOCKS; (* blockIndex := (index mod blocksPerIndirect) div (superBlock^.blockSize div sizeof(TBlockRun)); assert(blockIndex < NUM_EXTENSION_BLOCKS); index := (blockNum mod blocksPerIndirect) div NUM_EXTENSION_BLOCKS; tmpBlockRun := extents[ i := blockNum div (NUM_EXTENSION_BLOCKS * ((superBlock^.blockSize*NUM_EXTENSION_BLOCKS) div sizeof(TBlockRun))); // i should now hold which entry in the double indirect block we need to load assert(i < ((superBlock^.blockSize*NUM_EXTENSION_BLOCKS) div sizeof(TBlockRun))); // load the proper indirect block beneath the double indirect block extents := loadBlock(extents^[i]); assert(extents <> NIL); // now seek into this indirect block by taking the modulus of the number // of blocks referenced by an indirect block i := (blockNum mod (NUM_EXTENSION_BLOCKS * ((superBlock^.blockSize*NUM_EXTENSION_BLOCKS) div sizeof(TBlockRun)))) div NUM_EXTENSION_BLOCKS; *) with extents^[index] do begin // extents^[i] is a block extent inside an indirect block which // was referenced by the double indirect block. Now we need to see // which block inside this extent we wanted. We do that by taking the // modulus of the requested block number by the number of blocks // referenced in each blockrun. BeFS used 4 per; and as of writing // this comment that is what I'm using. offset := blockNum and (NUM_EXTENSION_BLOCKS-1); setBlockRun(result, AG, start + offset, len - offset); end; // with end; // TUbixFS.findBRunDIndirect function TUbixFS.freeUsedBlocks; var FBL:PBTree; tmpBlockRun, leftBlockRun, rightBlockRun:TBlockRun; searchRec:BTreeSearchRec; mergeLeft, mergeRight:boolean; u:uPtr; begin result := -1; // failure by default // basic sanity checks if (superBlock = NIL) or (blockRun.len = 0) then exit; if not isValidIAddr(superBlock^.BAT) then exit; with blockRun do writeln('---- freeing used blocks: ', AG, ':', start, ':', len); new(FBL, open(iAddrToStr(superBlock^.BAT), new(PUbixBTreeVFS, init(@self)))); // new(FBL, open('BAT.DAT', NIL)); if (FBL = NIL) then exit; FBL^.InstallUserFunctions(compareKeyBlockRun, copyKeyBlockRun, keySizeBlockRun); fillchar(u, sizeof(u), 0); // not sure what to put in here yet assert(FBL^.Insert(@blockRun, u)); // put in our block run if not FBL^.FindFirst(@blockRun, searchRec) then begin FBL^.PrintWholeTree('after.txt'); runError; end; //assert(FBL^.FindFirst(@blockRun, searchRec)); // yes, we look it right back up if FBL^.FindPrev(searchRec) then begin assert(searchRec.key <> NIL); leftBlockRun := TBlockRun(searchRec.key^); mergeLeft := (leftBlockRun.len <> 0) and (leftBlockRun.start + leftBlockRun.len = blockRun.start); end else mergeLeft := FALSE; assert(FBL^.FindFirst(@blockRun, searchRec)); // We're looking this one up a lot if FBL^.FindNext(searchRec) then begin assert(searchRec.key <> NIL); rightBlockRun := TBlockRun(searchRec.key^); mergeRight := (rightBlockRun.len <> 0) and (blockRun.start + blockRun.len = rightBlockRun.start) end else mergeRight := FALSE; if (mergeLeft or mergeRight) then begin FBL^.Delete(@blockRun, u); // testing [this is to prevent that blockrun from being used in the event the BAT grows] if mergeLeft then assert(FBL^.Find(@leftBlockRun, u)); if mergeLeft then FBL^.Delete(@leftBlockRun, u); if mergeRight then assert(FBL^.Find(@rightBlockRun, u)); if mergeRight then FBL^.Delete(@rightBlockRun, u); // this blockRun merges two other free spaces into a single unified space tmpBlockRun := blockRun; if mergeLeft then begin dec(tmpBlockRun.start, leftBlockRun.len); inc(tmpBlockRun.len, leftBlockRun.len); end; if mergeRight then inc(tmpBlockRun.len, rightBlockRun.len); // tmpBlockRun now holds the unified run. Delete the appropiate ones // FBL^.Delete(@blockRun, u); assert(FBL^.Insert(@tmpBlockRun, u)); end; superBlock^.usedBlocks := superBlock^.usedBlocks - int64(blockRun.len); FBL^.FindClose(searchRec); dispose(FBL, done); result := 0; // success end; // TUbixFS.freeUsedBlocks function TUbixFS.getFileSize; begin result := 0; if (inode = NIL) then exit; result := inode^.blocks.size; end; // TUbixFS.getFileSize function TUbixFS.getFileSpace; begin // getFileSpace() will return the size of the file's allocated space result := 0; if (inode = NIL) or (superBlock = NIL) then exit; result := inode^.blocks.blockCount shl superBlock^.blockShift; end; // TUbixFS.getFileSpace function TUbixFS.iAddrToStr; var s:string; begin // Note: this won't work in C/C++ because strings are really null // terminated character arrays. So, the B^Tree won't hold a string // with nulls in it properly length(s) := sizeof(iAddr); move(iAddr, s[1], sizeof(iAddr)); result := s; end; // TUbixFS.iAddrToStr; function TUbixFS.loadBlock; var blockAddress:int64; cacheNode:PCacheNode; u:uPtr; searchRec:BTreeSearchRec; begin result := NIL; // failure by default; // 2005-04-29 mji // I'm not really sure how much error checking to put in here. Chances are that // if we're calling this function everything is fine. For now, with such limited // use I'll keep it pretty light. I think that this will eventually be used for // dealing with the block cache, so more checking might be required later. if (dev = NIL) or (superBlock = NIL) or (blockCache = NIL) then exit; with blockAddr, superBlock^ do blockAddress := (AG shl AGShift) + start; fillchar(u, sizeof(u), 0); // just to be safe cacheNode := NIL; // XXX ToDo // Fix this so that if there is no cache, we only have one pointer // Check for a full cache if (blockCache^.Find(@blockAddress, u)) then begin // block was in the cache. assert(u.p <> NIL); cacheNode := u.p; assert(cacheNode^.data <> NIL); // If we're going to honour the locked flag in the cacheNode, then // we should put a mutex here // Now we need to update the blockTime. We do this by first deleting // the cacheNode out of the blockTime cache. blockTime^.Delete(@cacheNode^.lastUsed, u); // Delete old one cacheNode^.lastUsed := getRDTSCTime(); // Get the new timestamp blockTime^.Insert(@cacheNode^.lastUsed, u); // Insert with new time result := cacheNode^.data; // return the data pointer end else begin // Requested block isn't in cache // XXX ToDo: // Check to see if we've hit the maximum cache entry limit. If so, we // need to drop off the least recently used (look for the first key // in blockTime) and possibly save it to disk if it has been modified. if (blockCache^.getKeyCount() = maxCacheEntry) then begin blockTime^.getFirstKey(searchRec); // least recently used block cacheNode := searchRec.value.p; // get the pointer from the value assert(cacheNode <> NIL); assert(syncBlock(cacheNode)); u.p := cacheNode; blockCache^.Delete(@cacheNode^.address, u); blockTime^.Delete(@cacheNode^.lastUsed, u); // We've deleted the cacheNode out of both the blockCache and // the blockTime trees. We're going to re-use the cacheNode. cacheNode^.address := blockAddress; cacheNode^.lastUsed := getRDTSCTime(); // No need to update data, dataSize, or dirty. We re-use the first two, // and dirty will be set to FALSE above if necessary. Might need to // reset locked if we're using that. end else new(cacheNode, init(blockAddress, getRDTSCTime(), superBlock^.blockSize)); assert(cacheNode <> NIL); assert(cacheNode^.data <> NIL); u.p := cacheNode; if not (blockCache^.Insert(@blockAddress, u)) then writeln('Error inserting ', blockAddress, ' into blockCache in loadBlock()'); if not (blockTime^.Insert(@cacheNode^.lastUsed, u)) then writeln('Error inserting into blockTime'); // now we need to fill the data from the device with superBlock^ do dev^.read(cacheNode^.data, blockAddress * (blockSize shr SECTOR_SHIFT), blockSize shr SECTOR_SHIFT ); // dev^.read result := cacheNode^.data; end; end; // TUbixFS.loadBlock function TUbixFS.nextAG; begin // there's no error we can throw from here if superblock is NIL, // so don't bother to check. Just hope whomever calls this // function has done some basic sanity checks result := (AG+1) mod superBlock^.numAGs end; // TUbixFS.nextAG function TUbixFS.pathToIAddr; function recursePath(path:PChar; len:integer; iAddr:inodeAddr):inodeAddr; var dirTree:PBTree; u:uPtr; nameSize:integer; pfs:PUbixBTreeVFS; begin // if the string length is 0 then we have found the inode in the previous // recurse, so return what we found. if (len = 0) then begin result := iAddr; exit; end else setIAddr(result, 0, 0, 0); // hm. If we're looking for something like ``/.'' then the ``/'' is stripped // off in the main call leaving only ``.'', which means that this malformed // path check would trigger. Same with /foo/bar if bar is a dir. //if uInt8ArrayPtr(path)^[0] = ord('/') then exit; // malformed path if not (isValidIAddr(iAddr)) then exit; // sanity check nameSize := 0; while (len > 0) and (uInt8ArrayPtr(path)^[nameSize] <> ord('/')) do begin inc(nameSize); dec(len); end; uInt8ArrayPtr(path)^[nameSize+1] := 0; if (len <> 0) then dec(len); new(dirTree, open(IAddrToStr(iAddr), new(PUbixBTreeVFS, init(@self)))); if (dirTree = NIL) then exit; // hm.. what happens to the PUbixBTreeVFS is this is NIL? // note that the dirTree is a tree of BT_PCHAR, and searches are case sensitive. // if the tree doesn't exist, Find() should return false. if dirTree^.Find(path, u) then result := recursePath(path + nameSize + 1, len, u.iAddr); dispose(dirTree, done); end; // recursePath var len:integer; copyOfPath:PChar; begin setIAddr(result, 0, 0, 0); if (path = NIL) or (superBlock = NIL) then exit; len := strlen(path); if (len = 0) then exit; // empty string if uInt8ArrayPtr(path)^[0] <> ord('/') then exit; // malformed path // we need to make a copy of the path since recursePath() will munge it GetMem(copyOfPath, len+1); // Allocate some memory for the path assert(copyOfPath <> NIL); move(path^, copyOfPath^, len+1); // copy it result := recursePath(copyOfPath+1, len-1, superBlock^.rootDir); freemem(copyOfPath, len+1); end; // TUbixFS.pathToIAddr procedure TUbixFS.printSuperBlock; begin if (superBlock = NIL) then exit; with superBlock^ do begin writeln('superBlock^.name........... ', name); writeln('superBlock^.magic1......... ', hex(magic1)); writeln('superBlock^.fsByteOrder.... ', fsByteOrder); writeln('superBlock^.blockSize...... ', blockSize); writeln('superBlock^.blockShift..... ', blockShift); writeln('superBlock^.numBlocks...... ', numBlocks); writeln('superBlock^.usedBlocks..... ', usedBlocks); writeln('superBlock^.magic2......... ', hex(magic2)); writeln('superBlock^.blocksPerAG.... ', blocksPerAG); writeln('superBlock^.AGShift........ ', AGShift); writeln('superBlock^.numAGs......... ', numAGs); writeln('superBlock^.flags.......... ', hex(flags)); writeln('superBlock^.magic3......... ', hex(magic3)); with superBlock^.BAT do writeln('superBlock^.BAT............ ', AG, ':', start, ':', len); with superBlock^.rootDir do writeln('superBlock^.rootDir........ ', AG, ':', start, ':', len); end; end; // TUbixFS.printSuperBlock function TUbixFS.saveBlock; var blockAddress:int64; cacheNode:PCacheNode; u:uPtr; searchRec:BTreeSearchRec; begin result := FALSE; // failure by default; if (blockCache = NIL) or (blockTime = NIL) or (dev = NIL) or (superBlock = NIL) then exit; with blockAddr, superBlock^ do blockAddress := (AG shl AGShift) + start; cacheNode := NIL; fillchar(u, sizeof(u), 0); // best be safe // first check to see if the blockAddress is in the cache if (blockCache^.Find(@blockAddress, u)) then begin // block was in cache cacheNode := u.p; assert(cacheNode <> NIL); assert(cacheNode^.data <> NIL); if (cacheNode^.data <> buf) then begin // The buffer with the data we want to save was not the same as the one // in the cache. This means that it was not obtained with loadBlock(). // We need to copy that pointer to our block. move(buf^, cacheNode^.data^, cacheNode^.dataSize); end; // Now we need to update the blockTime. We do this by first deleting // the cacheNode out of the blockTime cache. blockTime^.Delete(@cacheNode^.lastUsed, u); // Delete old one cacheNode^.lastUsed := getRDTSCTime(); // Get the new timestamp blockTime^.Insert(@cacheNode^.lastUsed, u); // Insert with new time end else begin // block was not in cache if (blockCache^.getKeyCount() = maxCacheEntry) then begin blockTime^.getFirstKey(searchRec); cacheNode := searchRec.value.p; assert(cacheNode <> NIL); assert(syncBlock(cacheNode)); u.p := cacheNode; blockCache^.Delete(@cacheNode^.address, u); blockTime^.Delete(@cacheNode^.lastUsed, u); // We've deleted the cacheNode out of both the blockCache and // the blockTime trees. We're going to re-use the cacheNode. cacheNode^.address := blockAddress; cacheNode^.lastUsed := getRDTSCTime(); // No need to update data, dataSize, or dirty. We re-use the first two, // and dirty will be set to FALSE above if necessary. Might need to // reset locked if we're using that. end else new(cacheNode, init(blockAddress, getRDTSCTime(), superBlock^.blockSize)); assert(cacheNode <> NIL); assert(cacheNode^.data <> NIL); move(buf^, cacheNode^.data^, cacheNode^.dataSize); u.p := cacheNode; if not (blockCache^.Insert(@blockAddress, u)) then writeln('Error inserting ', blockAddress, ' into blockCache in saveBlock()'); if not (blockTime^.Insert(@cacheNode^.lastUsed, u)) then writeln('Error inserting into blockTime'); end; cacheNode^.dirty := TRUE; // indicate the block has been modified result := TRUE; end; // TUbixFS.saveBlock function TUbixFS.isBAT; begin result := FALSE; if (superBlock = NIL) then exit; with superBlock^.BAT do result := (AG = iAddr.AG) and (start = iAddr.start); end; // TUbixFS.isBAT function TUbixFS.isValidIAddr; begin result := (iAddr.len <> 0) and (iAddr.AG < superBlock^.numAGs); end; // TUbixFS.isValidIAddr function TUbixFS.readAttr; var inode:PUbixfsInode; begin result := -1; if (attribute = NIL) or (buffer = NIL) then exit; if not isValidIAddr(iAddr) then exit; inode := loadBlock(iAddr); if (inode = NIL) then exit; end; // TUbixFS.readAttr function TUbixFS.readDataStream; var tmpBlock:uInt8ArrayPtr; blockRun:TBlockRun; size, remainder:uInt32; blockStart:uInt32; blocksToRead:uInt32; bytesToRead:uInt32; inode:PUbixFSInode; begin // with iAddr do // writeln('readDataStream(', AG, ':', start, ':', len, ', ', position, ', ', hex(dword(buf)), ', ', length, ');'); result := -1; if ((buf = NIL) or (superBlock = NIL)) then exit; inode := loadBlock(iAddr); tmpBlock := NIL; size := length; remainder := uInt32(position mod superBlock^.blockSize); if (remainder <> 0) then begin // the position we've been asked to read from doesn't align to // a block size. We need to read in the previous block and pad // the data blockStart := uInt32(position shr superBlock^.blockShift); blockRun := findBRun(@inode^.blocks, blockStart); assert(blockRun.len > 0); tmpBlock := loadBlock(blockRun); assert(tmpBlock <> NIL); bytesToRead := min(size, (superBlock^.blockSize - remainder)); move(tmpBlock^[remainder], buf^, bytesToRead); inc(buf, bytesToRead); dec(size, bytesToRead); position := position + bytesToRead; end; // if remainder != 0 // If the blockSize is always a power of 2, then you can eliminate the MOD and use // an AND operation instead, e.g. remainder := size and (superBlock^.blockSize-1) remainder := size mod superBlock^.blockSize; dec(size, remainder); // Size is currently in bytes. The remainder variable above holds the number // of remaining bytes (if any) outside full blocks we have to write. // convert the size into blocks size := size shr superBlock^.blockShift; while (size > 0) do begin blockRun := findBRun(@inode^.blocks, dword(position shr superBlock^.blockShift)); assert(blockRun.len > 0); for blocksToRead := 0 to min(blockRun.len, size)-1 do begin tmpBlock := loadBlock(blockRun); assert(tmpBlock <> NIL); move(tmpBlock^, buf^, superBlock^.blockSize); dec(size); inc(buf, superBlock^.blockSize); inc(blockRun.start); // This might wrap around? dec(blockRun.len); // might be unnecessary end; // for end; // while if (remainder <> 0) then begin // There is less than a block of data left to read. blockStart := dword((position+length-remainder) shr superBlock^.blockShift); blockRun := findBRun(@inode^.blocks, blockStart); if (blockRun.len = 0) then begin printStuff := TRUE; blockRun := findBRun(@inode^.blocks, uInt32(position shr superBlock^.blockShift)); end; assert(blockRun.len > 0); tmpBlock := loadBlock(blockRun); assert(tmpBlock <> NIL); move(tmpBlock^, buf^, remainder); end; result := 0; end; // TUbixFS.readDataStream procedure TUbixFS.setBlockRun; begin blockRun.AG := AG; blockRun.start := start; blockRun.len := len; end; // TUbixFS.setBlockRun procedure TUbixFS.setIAddr; begin inode.AG := AG; inode.start := start; inode.len := len; end; // TUbixFS.setIAddr function TUbixFS.strToIAddr; begin // Note: this won't work in C/C++ because strings are really null // terminated character arrays. So, the B^Tree won't hold a string // with nulls in it properly // begin fiendishly clever hack move(s[1], result, sizeof(inodeAddr)); // end fiendishly clever hack end; // TUbixFS.strToIAddr function TUbixFS.shrinkFile; function shrinkDirect(var trimBlockCount:uInt32):boolean; var index:uInt32; blockRun:TBlockRun; // for freeUsedBlocks() begin result := FALSE; with inode^, blocks do begin dec(blockCount, trimBlockCount); // this is a little presumptious maxDirect := maxDirect - (trimBlockCount shl superBlock^.blockShift); // so is this for index := high(direct) downto low(direct) do if (direct[index].len <> 0) then break; // find the last used one assert(direct[index].len <> 0); while (trimBlockCount <> 0) do begin if (direct[index].len >= trimBlockCount) then begin blockRun := direct[index]; // make a copy dec(direct[index].len, trimBlockCount); // subtract from the len if (direct[index].len = 0) then // check to see if that was the whole run setBlockRun(direct[index], 0, 0, 0) // yup, mark this entry as blank else // else, nope .. update blockRun for freeUsedBlocks() setBlockRun(blockRun, blockRun.AG, blockRun.start + direct[index].len, blockRun.len - direct[index].len); freeUsedBlocks(blockRun); // free the blockRun we just trimmed trimBlockCount := 0; // all done break; // end the while end else begin dec(trimBlockCount, direct[index].len); // subtract the length of this run from the total trim amount freeUsedBlocks(direct[index]); // free this blockRun setBlockRun(direct[index], 0, 0, 0); // mark this entry as blank end; // keep going until we've hit bottom or while trimBlockCount <> 0 if (index = low(direct)) then break else dec(index); end; // while end; // with assert(trimBlockCount = 0); // Better be 0. result := TRUE; end; // local function shrinkDirect function shrinkIndirect(var trimBlockCount:uInt32):boolean; var extents:array[0..NUM_EXTENSION_BLOCKS-1] of ^array[0..0] of TBlockRun; extentsUpperBound, blockIndex, index:uInt32; tmpBlockRun:TBlockRun; begin result := FALSE; // failure by default; extentsUpperBound := (superBlock^.blockSize div sizeof(TBlockRun))-1; if not isValidIaddr(inode^.blocks.indirect) then begin writeln('For some reason you called shrinkIndirect() when there was no valid indirect block.'); exit; end; // load the indirect block tmpBlockRun := inode^.blocks.indirect; for blockIndex := low(extents) to high(extents) do begin extents[blockIndex] := loadBlock(tmpBlockRun); assert(extents[blockIndex] <> NIL); inc(tmpBlockRun.start); // hope this doesn't wrap around end; // for blockIndex for blockIndex := high(extents) downto low(extents) do if (extents[blockIndex]^[0].len <> 0) then break; for index := extentsUpperBound downto 0 do if (extents[blockIndex]^[index].len <> 0) then break; assert(extents[blockIndex]^[index].len <> 0); // this would be odd with inode^, blocks do begin while (trimBlockCount <> 0) do begin if (extents[blockIndex]^[index].len >= trimBlockCount) then begin tmpBlockRun := extents[blockIndex]^[index]; // make a copy dec(extents[blockIndex]^[index].len, trimBlockCount); // subtract from the len maxIndirect := maxIndirect - (trimBlockCount shl superBlock^.blockShift); // update inode^.blocks.maxIndirect dec(blockCount, trimBlockCount); // update inode^.blocks.blockCount if (extents[blockIndex]^[index].len = 0) then // check to see if that was the whole run begin // Reclaim this entry. But, if the index is 0, then // we've trimmed away the indirect section completely. if (index = 0) then begin if (blockIndex = 0) then // toast the indirect block begin freeUsedBlocks(indirect); setIAddr(indirect, 0, 0, 0); end else // just move to the previous section of the indirect block begin // XXX we probably need to save out extents[blockIndex] right here dec(blockIndex); index := extentsUpperBound; end; end else // else just clear this entry setBlockRun(extents[blockIndex]^[index], 0, 0, 0) // yup it was the whole run, mark this entry as blank end else // else, nope .. update tmpBlockRun for freeUsedBlocks() setBlockRun(tmpBlockRun, tmpBlockRun.AG, tmpBlockRun.start + extents[blockIndex]^[index].len, tmpBlockRun.len - extents[blockIndex]^[index].len); freeUsedBlocks(tmpBlockRun); // free the blockRun we just trimmed trimBlockCount := 0; // all done break; // end the while end else begin dec(trimBlockCount, extents[blockIndex]^[index].len); // subtract the length of this run from the total trim amount freeUsedBlocks(extents[blockIndex]^[index]); // free this blockRun setBlockRun(extents[blockIndex]^[index], 0, 0, 0); // mark this entry as blank // This branch means that trimBlockCount is not 0 yet (relevant to code below) end; // keep going until we've hit bottom or while trimBlockCount <> 0 if (index = 0) then begin if (blockIndex = 0) then begin // If we're here, then the indirect block is no longer needed // and we need to scan through the direct section. Toast the // indirect block freeUsedBlocks(indirect); setBlockRun(indirect, 0, 0, 0); result := shrinkDirect(trimBlockCount); exit; end else begin dec(blockIndex); index := extentsUpperBound; end; end else dec(index); end; // while // XXX // This next piece of code should go away and the proper pieces should be saved above. // Since we have caching at the block level, we can go ahead and save blocks when they // change without worrying too much about speed. if isValidIAddr(indirect) then // we might have removed the indirect section altogether begin tmpBlockRun := indirect; for blockIndex := low(extents) to high(extents) do begin assert(saveBlock(tmpBlockRun, extents[blockIndex])); inc(tmpBlockRun.start); // hope this doesn't wrap around end; end; // if isValidIAddr(indirect) end; // with result := TRUE; // success end; // local function shrinkIndirect function shrinkDIndirect(var trimBlockCount:uInt32):boolean; var extents:^array[0..0] of TBlockRun; indirectExtents:^array[0..0] of TBlockRun; extentsIndex, indirectExtentsIndex:uInt32; extentsUpperBound:uInt32; tmpBlockRun:TBlockRun; begin result := FALSE; // failure by default extentsUpperBound := (superBlock^.blockSize div sizeof(TBlockRun)) - 1; assert(isValidIAddr(inode^.blocks.doubleIndirect)); extents := loadBlock(inode^.blocks.doubleIndirect); assert(extents <> NIL); for extentsIndex := extentsUPperBound downto 0 do if (extents^[extentsIndex].len <> 0) then break; assert(extents^[extentsIndex].len <> 0); indirectExtents := loadBlock(extents^[extentsIndex]); assert(indirectExtents <> NIL); for indirectExtentsIndex := extentsUpperBound downto 0 do if (indirectExtents^[indirectExtentsIndex].len <> 0) then break; assert(indirectExtents^[indirectExtentsIndex].len <> 0); while (trimBlockCount >= NUM_EXTENSION_BLOCKS) do begin dec(trimBlockCount, indirectExtents^[indirectExtentsIndex].len); dec(inode^.blocks.blockCount, indirectExtents^[indirectExtentsIndex].len); with inode^, blocks do maxDIndirect := maxDIndirect - (indirectExtents^[indirectExtentsIndex].len shl superBlock^.blockShift); freeUsedBlocks(indirectExtents^[indirectExtentsIndex]); // free the blockRun if (indirectExtentsIndex = 0) then begin freeUsedBlocks(extents^[extentsIndex]); // free this indirect block if (extentsIndex = 0) then begin freeUsedBlocks(inode^.blocks.doubleIndirect); // free the double indirect block setBlockRun(inode^.blocks.doubleIndirect, 0, 0, 0); // XXX // We should put a check to make sure inode^.blocks.maxDIndirect is equal to maxIndirect with inode^, blocks do assert(maxDIndirect = maxIndirect); inode^.blocks.maxDIndirect := 0; result := shrinkIndirect(trimBlockCount); exit; end else begin dec(extentsIndex); assert(saveBlock(inode^.blocks.doubleIndirect, extents)); // XXX this isn't right indirectExtents := loadBlock(extents^[extentsIndex]); assert(indirectExtents <> NIL); indirectExtentsIndex := extentsUpperBound; end; // else end else dec(indirectExtentsIndex); end; // while result := TRUE; // success end; // local function shrinkDIndirect function shrinkDataStream(var trimBlocks:uInt32):boolean; begin if (inode^.blocks.maxDIndirect > 0) then result := shrinkDIndirect(trimBlocks) else if (inode^.blocks.maxIndirect > 0) then result := shrinkIndirect(trimBlocks) else result := shrinkDirect(trimBlocks); end; // local function shrinkDataStream var blocksToTrim:uInt32; begin // shrinkFile() // XXX ToDo // Figure out which pieces of the datastream need to be saved // in shrinkIndirect() and shrinkDIndirect() result := FALSE; // failure by default if (inode = NIL) or (superBlock = NIL) then exit; // start with the original block count blocksToTrim := inode^.blocks.blockCount; // Subtract off how much space the file actually takes up. dec(blocksToTrim, uInt32((inode^.blocks.size+(superBlock^.blockSize-1)) shr superBlock^.blockShift)); // Do a quick check to see if we're in double indirect blocks. If we are, and the amount to trim // is less than NUM_EXTENSION_BLOCKS then we don't actually need to do anything. // Also do a quick check to see if blocksToTrim is 0 .. which means we really don't have to do anything. if (blocksToTrim = 0) or (isValidIAddr(inode^.blocks.doubleIndirect) and (blocksToTrim < NUM_EXTENSION_BLOCKS)) then begin result := TRUE; // Technically there was no failure exit; end; result := shrinkDataStream(blocksToTrim); assert(saveBlock(inode^.inodeNum, inode)); end; // TUbixFS.shrinkFile procedure TUbixFS.sync; var searchRec:BTreeSearchRec; begin // XXX ToDo: // Need to mark the superblock as clean. if (blockCache = NIL) or (dev = NIL) then exit; // We sync out the blocks in ascending order from the blockCache. // To do this we examine each entry to see if the block is dirty. // If it is, we write it out to the device and set dirty to FALSE. if (blockCache^.getFirstKey(searchRec)) then repeat syncBlock(searchRec.value.p); until not blockCache^.FindNext(searchRec); // Also write out the superblock. dev^.write(superBlock, 1, 1); end; // TUbixFS.sync function TUbixFS.syncBlock; begin result := FALSE; if (dev = NIL) or (cacheNode = NIL) then exit; with cacheNode^ do if (dirty) then begin assert(data <> NIL); dev^.write(data, address * (dataSize shr SECTOR_SHIFT), dataSize shr SECTOR_SHIFT ); // dev^.write dirty := FALSE; end; // if dirty result := TRUE; end; // TUbixFS.syncBlock {$IFDEF GRAPHICS} {$I graphics.pas} {$ENDIF} function TUbixFS.vfs_close; var u:uPtr; vnid:PVnid; inode:PUbixfsInode; begin result := -1; if (fileDescriptors = NIL) or (handle >= maxFDEntry) then exit; if not (vnids^.Find(@fileDescriptors^[handle].iAddr, u)) then writeln('Could not find iAddr in vnids'); vnid := u.p; if (vnid = NIL) then begin writeln('vnid is NIL'); exit; end; with vnid^ do begin dec(refCount); if (refCount = 0) then begin // refCount has fallen to 0. If the file is deleted we should take care of it here inode := loadBlock(fileDescriptors^[handle].iAddr); // if file is deleted then inode^.blocks.size = 0; assert(inode <> NIL); // shrink the file assert(shrinkFile(inode)); vnids^.Delete(@inode^.inodeNum, u); setIAddr(fileDescriptors^[handle].iAddr, 0, 0, 0); end; end; if not vNodeQueue^.enqueue(handle) then writeln('Error enqueuing handle back into vNodeQueue'); result := 0; end; // TUbixFS.vfs_close function TUbixFS.vfs_fCreate; var inode:PUbixFSInode; dirTree:PBTree; dirIAddr, fileIAddr:inodeAddr; u:uPtr; searchRec:BTreeSearchRec; len : integer; path:PChar; fileNamePtr:PChar; curPos:integer; vnid:PVnid; handle:integer; begin result := -1; // error by default if (filename = NIL) or (superBlock = NIL) then exit; // we need to split the filename into it's component path and filename pieces. // From there we need to see if the file exists. If it does then I guess we // should truncate the file and load the inode. Otherwise allocate a new inode len := strlen(filename); if (len = 0) then exit; if uInt8ArrayPtr(filename)^[0] <> ord('/') then exit; // malformed path GetMem(path, len+1); strCopy(path, filename); curPos := len; setIAddr(dirIAddr, 0, 0, 0); // zero out the inode address for the directory setIAddr(fileIAddr, 0, 0, 0); // zero out the inode address for the file while (curPos > 0) and (uInt8ArrayPtr(path)^[curPos] <> ord('/')) do dec(curPos); fileNamePtr := path + curPos + 1; if (curPos = 0) then begin // file is in the root dir writeln('path: /'); writeln('filename: ', fileNamePtr); dirIAddr := pathToIAddr('/'); end else begin uInt8ArrayPtr(path)^[curPos] := 0; // null out the separator writeln('Path: ', path); writeln('Filename: ', fileNamePtr); dirIAddr := pathToIAddr(path); end; with dirIAddr do writeln('iaddr to path: ', AG, ':', start, ':', len); assert(isValidIAddr(dirIAddr)); if not isValidIAddr(dirIAddr) then exit; // should probably check to make sure that we're opening a directory and not a file new(dirTree, open(IAddrToStr(dirIAddr), new(PUbixBTreeVFS, init(@self)))); if (dirTree = NIL) then exit; // We need to ascertain if the file is existing or new. // If the file exists, we should truncate the file // If the file is new, we need to create a new inode and write the entry to the directory // Then we need to allocate a file descriptor and store it somewhere. if dirTree^.Find(fileNamePtr, u) then begin writeln(fileNamePtr, ' already exists'); // XXX ToDo: // Set the filesize to 0 and call shrinkFile on the inode. // So, uhm, what happens if the file is already open? end else begin writeln('creating new file: ', fileNamePtr); writeln('allocating new inode'); inode := allocInode(dirIAddr); // alloc a new inode (pass in the parent iaddr) assert(inode <> NIL); // make sure it isn't nil inode^.addAttr('name', BT_PCHAR, fileNamePtr, strlen(fileNamePtr)+1); if (inode = NIL) then exit; assert(saveBlock(inode^.inodeNum, inode)); // save the new inode u.iAddr := inode^.inodeNum; // copy the inode addr to a uPtr // now add the file to the directory writeln('Inserting ', fileNamePtr, ' into directory returned ', dirTree^.Insert(fileNamePtr, u)); end; dispose(dirTree, done); FreeMem(path, len+1); // This file doesn't have an associated vnid. Create a new one. new(vnid); vnid^.refCount := 1; vnid^.iAddr := inode^.inodeNum; vnid^.mode := 0; // XXX ? u.p := vnid; if not (vnids^.insert(@inode^.inodeNum, u)) then writeln('Could not insert into vnids'); if not vNodeQueue^.dequeue(handle) then begin writeln('Out of file descriptors!'); exit; end; with fileDescriptors^[handle] do begin TID := 0; // ThreadID iAddr := inode^.inodeNum; position := 0; flags := 0; // XXX ? end; result := handle; end; // TUbixFS.vfs_fCreate function TUbixFS.vfs_fExist; begin // XXX ToDo: // find out why fExist('/./.') fails // fExist('/.') no longer fails result := -1; if ((superBlock = NIL) or (path = NIL)) then exit; if (not isValidIAddr(superBlock^.rootDir)) then exit; if isValidIAddr(pathToIAddr(path)) then result := 0; end; // TUbixFS.vfs_fExist function TUbixFS.vfs_format; const BATName:array[0..23] of char = 'Block Allocation Tree'#0; var sector:array[0..511] of byte; fs:PUbixFS; logicalBlocks, fsBATSize, fsBATBlocks:uInt32; sb:PdiskSuperBlock; inode:PubixfsInode; i, fsBlockShift, lBlockShift, nodeSize, keyLength:integer; blockRun:TBlockRun; FBL:PBTree; remainderBlocks:uInt32; u:uPtr; blockBit:shortint; begin result := -1; if (device = NIL) then exit; if (fsBlockSize < 1024) or (fsBlockSize > 8192) then begin writeln('Blocksize must be between 1024 and 8192'); exit; end; // zero out the sector fillchar(sector, sizeof(sector), 0); writeln('device^.sectors: ', device^.sectors); if not quick then begin system.write('clearing device... '); for i := 0 to device^.sectors-1 do device^.write(@sector, i, 1); writeln('done'); end; // if !quick fsBlockShift := 10; {minimum is 1024} while (1 shl fsBlockShift < fsBlockSize) do inc(fsBlockShift); // allocate a new superBlock and clear it new(sb); if (sb = NIL) then exit; fillchar(sb^, sizeof(TDiskSuperBlock), 0); // device^.sectors is the number of 512 byte sectors lBlockShift := fsBlockSize shr 9; writeln('lBlockShift: ', lBlockShift); logicalBlocks := device^.sectors div lBlockShift; // logical blocks writeln('logical blocks: ', logicalBlocks); with sb^ do begin strcopy(name, 'UbixFS'); magic1 := UBIXFS_MAGIC_1; fsByteOrder := 0; blockSize := fsBlockSize; blockShift := fsBlockShift; numBlocks := logicalBlocks; usedBlocks := 32; // XXX Based on what we allocate below for the new BAT system! inodeSize := fsBlockSize; magic2 := UBIXFS_MAGIC_2; blocksPerAG := min(65536, blockSize shl 3); AGShift := 10; // realistically it should be a min of 13 while (1 shl AGShift <> blocksPerAG) do inc(AGShift); numAGs := (dword(numBlocks)+(blocksPerAG-1)) div blocksPerAG; flags := UBIXFS_CLEAN; setBlockRun(logBlocks, 0, 0, 0); logStart := 0; logEnd := 0; magic3 := UBIXFS_MAGIC_3; setIAddr(indicies, 0, 0, 0); setIAddr(rootDir, 0, 0, 0); end; // with // create the free block list inode getmem(inode, fsBlockSize); // make the inode the same size as a block if (inode = NIL) then exit; fillchar(inode^, sb^.inodeSize, 0); with inode^ do begin magic := UBIXFS_INODE_MAGIC; // inodes point to themselves setIAddr(inodeNum, 0, sb^.logBlocks.len+1, 1); // the +1 for the start might be wrong setIAddr(parent, 0, 0, 0); // If the partition is larger than 550GB we will need to allocate more than // one blockRun for the BAT. //oldBAT -- fsBATSize := logicalBlocks shr 3; // compute how many bytes the BAT takes up //oldBAT -- fsBATBlocks := (fsBATSize + fsBlockSize-1) div fsBlockSize; //oldBAT -- writeln('fsBATSize: ', fsBATSize); //oldBAT -- writeln('fsBATBlocks: ', fsBATBlocks); //oldBAT -- setBlockRun(blocks.direct[0], 0, inodeNum.start+1, fsBATBlocks); //oldBAT -- blocks.size := (fsBATBlocks * fsBlockSize); // round the file size up to the nearest block size //oldBAT -- blocks.maxDirect := blocks.size-1; //oldBAT -- blocks.blockCount := fsBATBlocks; blocks.size := 0; blocks.blockcount := 30; // manually allocate blocks blocks.maxDirect := (blocks.blockCount shl fsBlockShift)-1; // set the size of the file setBlockRun(blocks.direct[0], 0, inodeNum.start+1, blocks.blockCount); // manually allocate blocks uid := 0; gid := 0; mode := 0; flags := INODE_LOGGED; setIAddr(attributes, 0, 0, 0); inodeSize := sb^.inodeSize; // strcopy(name, 'Block Allocation Tree'); addAttr('name', BT_PCHAR, @BATName, strlen(@BATName)+1); end; // with sb^.BAT := inode^.inodeNum; writeln('writing out superblock to sector 1'); device^.write(sb, 1, 1); // note that the superblock does not contain proper inode addresses for the // BAT and rootDir. Those are sync'd to disk in the destructor after they've been // created below. system.write('Writing out BAT inode... '); // write out the Block Allocation Table inode with inode^.inodeNum, sb^ do device^.write(inode, ((AG shl AGShift) + start) * (inodeSize shr SECTOR_SHIFT), inodeSize shr SECTOR_SHIFT ); // device^.write writeln('done'); // Okay, the superblock is the only piece of information needed // to mount an UbixFS partition new(fs, init(device)); assert(fs <> NIL); assert(fs^.vfs_init() = 0); // The freshly formatted parition is now mounted. // From here we need to create: // BAT (Block Allocation Table) file contents // journal // root dir new(FBL, init(iAddrToStr(sb^.BAT), BAT_PAGE_SIZE, BT_CUSTOM, new(PUbixBTreeVFS, init(fs)))); // new(FBL, init('BAT.DAT', BAT_PAGE_SIZE, BT_CUSTOM, NIL)); if (FBL = NIL) then begin writeln('Error creating Block Allocation Tree in vfs_format()'); halt; end; FBL^.InstallUserFunctions(compareKeyBlockRun, copyKeyBlockRun, keySizeBlockRun); fillchar(u, sizeof(u), 0); setBlockRun(blockRun, 0, 0, 0); // 0th AG tag assert(FBL^.Insert(@blockRun, u)); // 0th AG marker writeln(dword(sb^.numBlocks)); writeln(sb^.numAGs); remainderBlocks := dword(sb^.numBlocks-sb^.usedBlocks); i := inode^.blocks.direct[0].start+inode^.blocks.blockCount; setBlockRun(blockRun, 0, i, min(sb^.blocksPerAG-i, remainderBlocks)); dec(remainderBlocks, blockRun.len); with blockRun do writeln('0th AG free block section: ', AG, ':', start, ':', len); FBL^.Insert(@blockRun, u); // 0th AG free block list for i := 1 to sb^.numAGs-1 do begin setBlockRun(blockRun, i, 0, 0); // AG tags assert(FBL^.Insert(@blockRun, u)); blockRun.start := 1; blockRun.len := min(remainderBlocks, sb^.blocksPerAG-1); assert(FBL^.Insert(@blockRun, u)); dec(remainderBlocks, min(remainderBlocks, sb^.blocksPerAG)); sb^.usedBlocks := sb^.usedBlocks + 1; end; // for i writeln('remainderBlocks: ', remainderBlocks); assert(remainderblocks = 0); dispose(FBL, done); fs^.vfs_mkdir('/'); if (fs^.vfs_fExist('/.') = -1) then writeln('Warning! /. doesn''t exist') else writeln('fExist(''/.'') returned properly'); if (fs^.vfs_fExist('/nofile.dat') <> -1) then writeln('Warning! /nofile.dat exists?') else writeln('fExist(''/nofile.dat'') returned properly'); if (fs^.vfs_fExist('/nodir/nofile.dat') <> -1) then writeln('Warning! /nodir/nofile.dat exists?') else writeln('fExist(''/nodir/nofile.dat'') returned properly'); // writeln('fs^.vfs_fExist(''/.'') returned: ', fs^.vfs_fExist('/.')); writeln('fs^.vfs_fExist(''/nofile.dat'') returned: ', fs^.vfs_fExist('/nofile.dat')); fs^.printSuperBlock(); dispose(fs, done); writeln('Format complete!'); dispose(sb); freemem(inode, fsBlockSize); end; // TUbixfS.vfs_format function TUbixFS.vfs_init; var index:uInt32; begin // This function should probably not be called more than once without // calling the appropiate shutdown code writeln('TUbixFS.vfs_init()'); result := -1; if (dev = NIL) then exit; new(superBlock); assert(superBlock <> NIL); if (superBlock = NIL) then exit; fillchar(superBlock^, sizeof(superBlock^), 0); // better safe than sorry // read in the superBlock. It's always the 1st block on the partition (counting from 0) system.write('reading in superBlock... '); dev^.read(superBlock, 1, 1); writeln('done'); assert(superBlock^.magic1 = UBIXFS_MAGIC_1); assert(superBlock^.magic2 = UBIXFS_MAGIC_2); assert(superBlock^.magic3 = UBIXFS_MAGIC_3); assert(strcomp(superBlock^.name, 'UbixFS') = 0); assert((1 shl superBlock^.blockShift) = superBlock^.blockSize); assert((1 shl superBlock^.AGShift) = superBlock^.blocksPerAG); assert(superBlock^.flags = UBIXFS_CLEAN); // create the file decriptor tree // The blockCache tree has a [key::value] pair of [block address::pointer to cacheNode]. // The blockTime tree has a [key::value] pair of [last used time::pointer to cacheNode]. // create the block cache tree new(blockCache, init('', 256, BT_INT64, NIL)); // create the block last used time tree // The last used time is obtained through the RDTSC instruction to give us a unique // and increasing timestamp. When the cache is full we drop off the least recently // used block. Because the tree is sorted, this will be the first entry. new(blockTime, init('', 256, BT_INT64, NIL)); // Create the file descriptors and related stuff // vnids is a tree which has a [key::value] pair of [iAddr::pointer to vNode]. new(vnids, init('', 256, BT_INT64, NIL)); // vnodeQueue is a queue which has a list of available FD handles new(vNodeQueue, init(maxFDEntry, sizeof(uInt32))); // fill the vNodeQueue with a list of all available FD handles for index := 0 to maxFDEntry-1 do vNodeQueue^.enqueue(index); // Allocate and clear the file descriptor array GetMem(fileDescriptors, maxFDEntry*sizeof(TVNode)); fillchar(fileDescriptors^, maxFDEntry*sizeof(TVNode), 0); result := 0; // success end; // TUbixFS.vfs_init function TUbixFS.vfs_mkdir; const dot:array[0..1] of char = '.'#0; dotdot:array[0..2] of char = '..'#0; var inode:PUbixFSInode; dirIAddr:inodeAddr; iAddr:inodeAddr; bRun:TBlockRun; len:integer; dirTree:PbTree; u:uPtr; pathToParentDir:PChar; dirNamePtr:PChar; curPos:integer; begin result := -1; assert(path <> NIL); assert(superBlock <> NIL); if (path = NIL) or (superBlock = NIL) then exit; len := strlen(path); assert(len <> 0); if (len = 0) then exit; if (len = 1) and (uInt8ArrayPtr(path)^[0] = ord('/')) then begin writeln('Creating root directory'); // wanting to create the root. Check to see if the root dir inode is empty assert(not isValidIAddr(superBlock^.rootDir)); if isValidIAddr(superBlock^.rootDir) then exit; // should set a proper error code here setIAddr(iAddr, 0, 0, 0); // technically the "parent" iaddr inode := allocInode(iAddr); // allocate an inode (parent points to la-la land) assert(inode <> NIL); // make sure it isn't nil if (inode = NIL) then exit; inode^.mode := S_IFDIR; inode^.flags := INODE_LOGGED; superBlock^.rootDir := inode^.inodeNum; // assign the root dir inode^.addAttr('name', BT_PCHAR, path, len+1); // len+1 should equal 2 assert(saveBlock(superBlock^.rootDir, inode)); // save the inode u.iAddr := inode^.inodeNum; // copy the inode addr to a uPtr // open the root dir with superBlock^.rootDir do writeln('Root dir inode: ', AG, ':', start, ':', len); writeln('Opening root dir tree'); new(dirTree, init(iAddrToStr(superBlock^.rootDir), 1024, BT_PCHAR, new(PUbixBTreeVFS, init(@self)))); writeln('Inserting ''.'''); // Get ready to insert the '.' :: inode^.inodeNum key :: value pair writeln('dirTree^.Insert(@dot, u) returned ', dirTree^.Insert(@dot, u)); // '.' points to ourself writeln('dirTree^.Insert(@dotDot, u) returned ', dirTree^.Insert(@dotDot, u)); // '..' points to ourself, too dispose(dirTree, done); // close and save the directory end else begin // subdir support goes here setIAddr(dirIAddr, 0, 0, 0); len := strlen(path); if (len = 0) then exit; if uInt8ArrayPtr(path)^[0] <> ord('/') then exit; // malformed path GetMem(pathToParentDir, len+1); strCopy(pathToParentDir, path); curPos := len; while (curPos > 0) and (uInt8ArrayPtr(pathToParentDir)^[curPos] <> ord('/')) do dec(curPos); dirNamePtr := pathToParentDir + curPos + 1; if (curPos = 0) then begin writeln('New directory being created below root'); writeln('dir name: ', dirNamePtr); dirIAddr := superBlock^.rootDir; end else begin if (curPos <> 0) then uInt8ArrayPtr(pathToParentDir)^[curPos] := 0; // null out the last / separator writeln('Path to parent dir: ', pathToParentDir); writeln('dir name: ', dirNamePtr); dirIAddr := pathToIAddr(pathToParentDir); end; inode := allocInode(dirIAddr); // allocate an inode assert(inode <> NIL); if (inode = NIL) then exit; // set some appropiate error code (not sure what) inode^.mode := S_IFDIR; inode^.flags := INODE_LOGGED; inode^.addAttr('name', BT_PCHAR, dirNamePtr, strlen(dirNamePtr)+1); assert(saveBlock(inode^.inodeNum, inode)); u.iAddr := inode^.inodeNum; // open the parent dir new(dirTree, open(iAddrToStr(dirIAddr), new(PUbixBTreeVFS, init(@self)))); writeln('dirTree^.Insert(dirNamePtr, u) returned ', dirTree^.Insert(dirNamePtr, u)); dispose(dirTree, done); // now open the subdir and insert '.' and '..' new(dirTree, init(iAddrToStr(inode^.inodeNum), 1024, BT_PCHAR, new(PUbixBTreeVFS, init(@self)))); // Get ready to insert the '.' :: inode^.inodeNum key :: value pair writeln('dirTree^.Insert(@dot, u) returned ', dirTree^.Insert(@dot, u)); // '.' points to ourself u.iAddr := dirIAddr; writeln('dirTree^.Insert(@dotdot, u) returned ', dirTree^.Insert(@dotdot, u)); // '..' points to parent dir dispose(dirTree, done); FreeMem(pathToParentDir, len+1); end; // else subdir result := 0; end; // TUbixFS.vfs_mkdir function TUbixFS.vfs_read; var inode:PUbixFSInode; u:uPtr; begin result := -1; if (buffer = NIL) or (fileDescriptors = NIL) or (handle >= maxFDEntry) then exit; with fileDescriptors^[handle] do begin assert(readDataStream(iAddr, position, buffer, size) = 0); position := position + int64(size); result := size; end; // with end; // TUbixFS.vfs_read function TUbixFS.vfs_readAttr; var inode:PUbixfsInode; begin result := -1; // failure by default if (attribute = NIL) or (buffer = NIL) or (handle >= maxFDEntry) or (fileDescriptors = NIL) then exit; with fileDescriptors^[handle] do begin if not isValidIAddr(iAddr) then exit; // We need to check that the file descriptor points to an open file. inode := loadBlock(iAddr); if (inode = NIL) then exit; end; // with // first, start in the small data area end; // TUbixFS.vfs_readAttr function TUbixFS.vfs_seek; begin result := -1; if (fileDescriptors = NIL) or (handle >= maxFDEntry) then exit; fileDescriptors^[handle].position := newPosition; result := 0; end; // TUbixFS.vfs_seek function TUbixFS.vfs_unlink; var dirTree:PbTree; iAddr:inodeAddr; dirIAddr:inodeAddr; path, fileNamePtr:PChar; curPos:integer; len:integer; inode:PUbixfsInode; u:uPtr; vnid:PVnid; smallDataNode:PSmallDataNode; begin // XXX ToDo: // Set appropiate error codes. // If the "file" is actually a directory, bail on that // If bailing out, don't forget to reclaim allocated memory result := -1; if (fileName = NIL) or (superBlock = NIL) or (vnids = NIL) then exit; // We need to break the path apart into a directory path and filename len := strlen(filename); if (len = 0) then exit; getmem(path, len+1); strcopy(path, filename); curPos := len; while (curPos > 0) and (uInt8ArrayPtr(path)^[curPos] <> ord('/')) do dec(curPos); fileNamePtr := path + curPos +1; if (curPos = 0) then begin dirIAddr := superBlock^.rootDir; end else begin uInt8ArrayPtr(path)^[curPos] := 0; dirIAddr := pathToIAddr(path); end; // XXX We can't exit because we have allocated memory to clean up if not (isValidIAddr(dirIAddr)) then exit; // set some error code here new(dirTree, open(IAddrToStr(dirIAddr), new(PUbixBTreeVFS, init(@self)))); assert(dirTree <> NIL); fillchar(u, sizeof(u), 0); if dirTree^.Find(fileNamePtr, u) then begin inode := loadBlock(u.iAddr); if (inode = NIL) then begin writeln('error loading inode in unlink()'); exit; // XXX cannot exit from here either end; // the filename of the file always exists inside the // small data area of the file. smallDataNode := inode^.findAttr('name'); assert(smallDataNode <> NIL); dirTree^.Delete(smallDataNode^.dataPtr(), u); // We know the file exists.. but does anybody have it open? if vnids^.Find(@inode^.inodeNum, u) then begin // file was already open // mark it for deletion later inode^.flags := inode^.flags OR INODE_DELETED; saveBlock(inode^.inodeNum, inode); end else begin // Tis okay to delete the file inode^.blocks.size := 0; shrinkFile(inode); freeUsedBlocks(inode^.inodeNum); end; end else writeln('File does not exist'); dispose(dirTree, done); FreeMem(path, len+1); result := 0; end; // TUbixFS.vfs_unlink function TUbixFS.vfs_write; begin result := -1; if (buffer = NIL) or (fileDescriptors = NIL) or (handle >= maxFDEntry) then exit; with fileDescriptors^[handle] do begin if not isValidIAddr(iAddr) then exit; assert(writeDataStream(iAddr, position, buffer, size) = 0); position := position + int64(size); result := size; end; // with end; // TUbixFS.vfs_write function TUbixFS.vfs_writeAttr; begin result := -1; // failure by default; if (fileDescriptors = NIL) or (handle >= maxFDEntry) then exit; result := writeAttr(fileDescriptors^[handle].iAddr, attribute, attrType, position, buffer, count); end; // TUbixFS.vfs_writeAttr function TUbixFS.writeAttr; var inode:PUbixfsInode; dataNode:PSmallDataNode; attrTree:PbTree; found, smallDataArea:boolean; u:uPtr; begin result := -1; if (attribute = NIL) or (buffer = NIL) then exit; if not isValidIAddr(iAddr) then exit; inode := loadBlock(iAddr); if (inode = NIL) then exit; dataNode := @inode^.smallData; found := FALSE; writeln('looking for ', attribute); smallDataArea := TRUE; while not found and not dataNode^.isEmpty() do if (strcomp(@dataNode^.name, attribute) = 0) then found := TRUE else dataNode := dataNode^.next(); if not found then begin // we looked through the small data area and didn't find it. Check the attr dir (if there is one) if isValidIAddr(inode^.attributes) then begin new(attrTree, open(iAddrToStr(inode^.attributes), new(PUbixBTreeVFS, init(@self)))); if (attrTree = NIL) then exit; // danger will robinson! if (attrTree^.Find(attribute, u)) then begin found := FALSE; smallDataArea := FALSE; end; dispose(attrTree, done); end; end; end; // TUbixFS.writeAttr function TUbixFS.writeDataStream; var tmpBlock:uInt8ArrayPtr; blockRun:TBlockRun; size, remainder:uInt32; blockStart:uInt32; blocksToWrite:uInt32; bytesToWrite:uInt32; inodeChanged:boolean; inode:PUbixFSInode; begin // with iAddr do // writeln('writeDataStream(', AG, ':', start, ':', len, ', ', position, ', ', hex(dword(buf)), ', ', length, ');'); result := -1; if ((buf = NIL) or (superBlock = NIL)) then exit; inode := loadBlock(iAddr); tmpBlock := NIL; inodeChanged := FALSE; if (position + length > getFileSpace(inode)) then begin extendFile(inode, position+length); // inode^.blocks.size := position+length; // update the blocks writeln('after extendfile: size = ', inode^.blocks.size, ' getFileSpace() = ', getFileSpace(inode)); end else if (position + length > inode^.blocks.size) then begin // The above case checks to see if we're writing out past // the end of the allocated space on the device. If we do, we extend // out the file. However, if we're just writing past the end of // the file, we have to update the size. inode^.blocks.size := position+length; // XXX To Do: // Might want to clear out the blocks we're skipping past, otherwise // there might be garbage in there assert(saveBlock(iAddr, inode)); // save the inode end; size := length; remainder := uInt32(position mod superBlock^.blockSize); if (remainder <> 0) then begin // the position we've been asked to write to doesn't align to // a block size. We need to read in the previous block and pad // the data blockStart := uInt32(position shr superBlock^.blockShift); blockRun := findBRun(@inode^.blocks, blockStart); assert(blockRun.len > 0); tmpBlock := loadBlock(blockRun); assert(tmpBlock <> NIL); if (tmpBlock = NIL) then exit; bytesToWrite := min(size, (superBlock^.blockSize - remainder)); move(buf^, tmpBlock^[remainder], bytesToWrite); assert(saveBlock(blockRun, tmpBlock)); inc(buf, bytesToWrite); dec(size, bytesToWrite); position := position + bytesToWrite; end; // if remainder != 0 remainder := size mod superBlock^.blockSize; dec(size, remainder); // Size is currently in bytes. The remainder variable above holds the number // of remaining bytes (if any) outside full blocks we have to write. // convert the size into blocks size := size shr superBlock^.blockShift; while (size > 0) do begin blockRun := findBRun(@inode^.blocks, uInt32(position shr superBlock^.blockShift)); if (blockRun.len = 0) then begin printStuff := TRUE; blockRun := findBRun(@inode^.blocks, uInt32(position shr superBlock^.blockShift)); end; assert(blockRun.len > 0); blocksToWrite := min(blockRun.len, size); for blocksToWrite := 0 to min(blockRun.len, size)-1 do begin assert(saveBlock(blockRun, buf)); inc(blockRun.start); // This might wrap around? dec(blockRun.len); // might be unnecessary dec(size); inc(buf, superBlock^.blockSize); end; end; // while if (remainder <> 0) then begin // There is less than a block of data left to write. Read in the old block blockStart := uInt32((position+length-remainder) shr superBlock^.blockShift); blockRun := findBRun(@inode^.blocks, blockStart); assert(blockRun.len > 0); // obtain a pointer to the block tmpBlock := loadBlock(blockRun); assert(tmpBlock <> NIL); if (tmpBlock = NIL) then exit; move(buf^, tmpBlock^[0], remainder); assert(saveBlock(blockRun, tmpBlock)); end; result := 0; end; // TUbixFS.writeDataStream destructor TUbixFS.done; var searchRec:BTreeSearchRec; inode:PUbixfsInode; cacheNode:PCacheNode; dirtyCount:integer; u:uPtr; begin sync(); // XXX ToDo: // close all "open" files manually. if (vnids <> NIL) then dispose(vnids, done); if (vNodeQueue <> NIL) then dispose(vNodeQueue, done); if (fileDescriptors <> NIL) then FreeMem(fileDescriptors, maxFDEntry*sizeof(TVNode)); vnids := NIL; vNodeQueue := NIL; fileDescriptors := NIL; if (blockTime <> NIL) then dispose(blockTime, done); if (blockCache <> NIL) then begin dirtyCount := 0; fillchar(u, sizeof(u), 0); while (blockCache^.getFirstKey(searchRec)) do begin cacheNode := searchRec.value.p; assert(cacheNode <> NIL); if (cacheNode^.dirty) then inc(dirtyCount); u.p := cacheNode; blockCache^.Delete(@cacheNode^.address, u); dispose(cacheNode, done); end; if (dirtyCount <> 0) then writeln('Dirty blocks in cache: ', dirtyCount); dispose(blockCache, done); end; if (superBlock <> NIL) then dispose(superBlock); end; // TUbixFS.done procedure TUbixFS.examine; var dirTree:PBTree; searchRec:BTreeSearchRec; u:uPtr; begin new(dirTree, open(iAddrToStr(superBlock^.BAT), new(PUbixBTreeVFS, init(@self)))); dirTree^.installUserFunctions(compareKeyBlockRun, copyKeyBlockRun, keySizeBlockRun); dirTree^.printWholeTree('bat.txt'); dispose(dirTree, done); writeln('------------- examining FS -----------------'); write('Opening root dir...'); new(dirTree, open(IAddrToStr(superblock^.rootDir), new(PUbixBTreeVFS, init(@self)))); //dirTree^.PrintList('rootDir.txt'); writeln('done'); writeln('Root directory has ', dirTree^.getKeyCount(), ' entries'); dirTree^.getFirstKey(searchRec); repeat with searchRec.value.iAddr do write('value: ', AG, ':', start, ':', len); writeln(' key: ', pchar(searchRec.key)); until not dirTree^.findNext(searchRec); dispose(dirTree, done); if (blockCache <> NIL) then writeln('Block cache has ', blockCache^.getKeyCount(), ' entries'); writeln('Space used: ', superBlock^.usedBlocks, '(', superBlock^.numBlocks, ')'); writeln('------------------ done --------------------'); end; end.