Newer
Older
ubixfs-2 / UBIXFS.PAS
@flameshadow flameshadow on 28 Jul 2005 130 KB sync
// XXX ToDo
// in extendFile(), if allocBRun() fails, ask for small chunks instead of one large one
// ability to add attributes to the attr dir
// extension of attributes
unit UbixFS;

interface
{$DEFINE GRAPHICS}
uses 
  device, lists {$IFDEF GRAPHICS}, ObjGfx40, ogFont, objImage, DOS, CRT {$ENDIF};

   // open/fcntl - O_SYNC is only implemented on blocks devices and on files
   //   located on an ext2 file system 
const O_ACCMODE   = 00000003;
const O_RDONLY    = 00000000;
const O_WRONLY    = 00000001;
const O_RDWR      = 00000002;
const O_APPEND    = 00000010;
const O_CREAT     = 00000400; // not fcntl 
const O_TRUNC     = 00001000; // not fcntl 
const O_EXCL      = 00002000; // not fcntl 
const O_LARGEFILE = 00004000;
const O_SYNC      = 00100000;

//  O_NONBLOCK   = 00200004; // HPUX has separate NDELAY & NONBLOCK */
//const O_NDELAY      = O_NONBLOCK;
const O_NOCTTY    = 00400000; // not fcntl */
const FASYNC      = 00020000; // fcntl, for BSD compatibility */
const O_DIRECT    = 00040000; // direct disk access hint - currently ignored */
const O_DIRECTORY = 00010000; // must be a directory */
const O_NOFOLLOW  = 00000200; // don't follow links */
const O_INVISIBLE = 04000000; // invisible I/O, for DMAPI/XDSM 

const F_DUPFD     = 0;        // dup 
const F_GETFD     = 1;        // get f_flags 
const F_SETFD     = 2;        // set f_flags 
const F_GETFL     = 3;        // more flags (cloexec) 
const F_SETFL     = 4;
const F_GETLK     = 5;
const F_SETLK     = 6;
const F_SETLKW    = 7;
const F_GETLK64   = 8;
const F_SETLK64   = 9;
const F_SETLKW64  = 10;

 // for F_[GET|SET]FL
const FD_CLOEXEC  = 1;       // actually anything with low bit set goes 
 // for posix fcntl() and lockf() 
const F_RDLCK     = 01;
const F_WRLCK     = 02;
const F_UNLCK     = 03;

 // for old implementation of bsd flock () 
const F_EXLCK     = 4;       // or 3 
const F_SHLCK     = 8;       // or 4 
 
 // for leases 
const F_INPROGRESS= 16;

 // operations for bsd flock(), also used by the kernel implementation 
const LOCK_SH     = 1;      // shared lock 
const LOCK_EX     = 2;      // exclusive lock 
const LOCK_NB     = 4;      // or'd with one of the above to prevent blocking 
const LOCK_UN      = 8;      // remove lock 
  
const LOCK_MAND   = 32;     // This is a mandatory flock 
const LOCK_READ   = 64;     // ... Which allows concurrent read operations 
const LOCK_WRITE  = 128;    // ... Which allows concurrent write operations 
const LOCK_RW     = 192;    // ... Which allows concurrent read & write ops 
 
const INODE_NO_FLAG     = $00000000;
const INODE_IN_USE      = $00000001;

const ATTR_INODE        = $00000004;
const INODE_LOGGED      = $00000008;
const INODE_DELETED     = $00000010;
const PERMANENT_FLAGS   = $0000ffff;
const INODE_NO_CACHE    = $00010000;
const INODE_WAS_WRITTEN = $00020000;
const S_IFDIR           = $00040000;  // POSIX
const NO_TRANSACTION    = $00040000;

const DATASTREAM_DIRECT_BLOCKS = 16;    // number of direct blocks in the dataStream 

const UBIXFS_MAGIC_1    = $A0A0A0A;
const UBIXFS_MAGIC_2    = $B0B0B0B;
const UBIXFS_MAGIC_3    = $C0C0C0C;
const UBIXFS_INODE_MAGIC = $3BBE0AD9;

const UBIXFS_CLEAN = $434C454E;
const UBIXFS_DIRTY = $44495254;

const SECTOR_SHIFT = 9;
const SECTOR_SIZE = 1 shl SECTOR_SHIFT;

type
  int8            = shortInt;
  int16           = smallInt;
  int32           = longInt;

  uInt8           = byte;
  uInt16          = word;
  uInt32          = dWord;
{ Common Integer array definitions }

type
  int8ArrayPtr    = ^int8Array;
  int8Array       = array[ 0..0 ] of int8;

  int16ArrayPtr   = ^int16Array;
  int16Array      = array[ 0..0 ] of int16;

  int32ArrayPtr   = ^int32Array;
  int32Array      = array[ 0..0 ] of int32;

  { Common Unsigned Integer array definitions }

type
  uInt8ArrayPtr   = ^uInt8Array;
  uInt8Array      = array[ 0..0 ] of uInt8;

  uInt16ArrayPtr  = ^uInt16Array;
  uInt16Array     = array[ 0..0 ] of uInt16;

  uInt32ArrayPtr  = ^uInt32Array;
  uInt32Array     = array[ 0..0 ] of uInt32;

type
  flock = object
    lType         : smallint;
    lWhence       : smallint;
    lStart        : integer;
    lLen          : integer;
    lPID          : integer;
  end; // flock

type
  flock64 = object
    lType         : smallint;
    lWhence       : smallint;
    lStart        : int64;
    lLen          : int64;
    lPID          : integer;
  end; // flock64    

(*  TBlockRun = packed record
    AG     : int32;
    start  : uInt16;
    len    : uInt16;
  end;

  inodeAddr = TblockRun;

  uPtr = packed record
   case integer of
     0: (i      : int32);
     1: (u      : uInt32);
     2: (s      : single);
     3: (d      : double);
     4: (p      : pointer);
     5: (offset : int64);
     6: (iAddr  : inodeAddr);
  end;
*)
  PDiskSuperBlock = ^TDiskSuperBlock;
  TDiskSuperBlock = packed record
    name          : array[ 0..31 ] of char;
    magic1        : int32;

    fsByteOrder   : int32;

    // blockSize on disk 
    blockSize     : uInt32;

    // number of bits needed to shift a block number to get a byte address
    blockShift    : uInt32;

    numBlocks     : int64;
    usedBlocks    : int64;

    inodeSize     : uInt32;
    magic2        : uInt32;

    blocksPerAG   : uInt32;
    AGShift       : uInt32;
    numAGs        : uInt32;

    flags         : uInt32;

    logBlocks     : TBlockRun;
    logStart      : int64;
    logEnd        : int64;

    magic3        : int32;
    BAT           : inodeAddr;
    rootDir       : inodeAddr;
    indicies      : inodeAddr;

    pad           : array[0..371] of byte;
  end; // TDiskSuperBlock

  PDataStream = ^TDataStream;
  TDataStream = packed record
    direct        : array[0..DATASTREAM_DIRECT_BLOCKS-1] of TBlockRun;
    maxDirect     : int64;
    maxIndirect   : int64;
    maxDIndirect  : int64;
    size          : int64;
    indirect      : TBlockRun;
    doubleIndirect: TBlockrun;
    blockCount    : uInt32;
  end; // TDataStream

  PSmallDataNode = ^TSmallDataNode;
  TSmallDataNode = packed object
   public
    nameSize      : uInt16;
    dataSize      : uInt16;
    dataType      : treeTypes;
    reserved      : array[ 0..2 ] of uInt8;
    name          : array[ 0..0 ] of char;
    function        align(size:uInt32):uInt32;
    function        dataPtr:pointer;
    function        isEmpty:boolean;
    function        next:PSmallDataNode;
    function        size:uInt32;
  end; // TSmallDataNode

  PUbixfsInode = ^TUbixfsInode;
  TUbixfsInode = packed object
   public
    magic         : int32;
    inodeNum      : inodeAddr;
    uid           : uInt32;
    gid           : uInt32;
    mode          : int32;
    flags         : int32;
    createTime    : int64;
    modifiedTime  : int64;
    parent        : inodeAddr;
    attributes    : inodeAddr;
    iType         : uInt32;
    inodeSize     : uInt32;
    data          : uPtr;         {this was the etc field in bfs}
    blocks        : TDataStream;
    smallData     : TSmallDataNode;
    function        addAttr(attribute:PChar; attrType:treeTypes; buffer:pointer; count:uInt32):PSmallDataNode;
    function        delAttr(attribute:PChar; whichNode:PSmallDataNode):boolean;
    function        findAttr(attribute:PChar):PSmallDataNode;
  end; // TUbixfsInode

  PCacheNode = ^TCacheNode;
  TCacheNode = object
   public
    address       : int64;
    lastUsed      : int64;
    data          : pointer;
    dataSize      : uInt32;
    dirty         : boolean;
    locked        : boolean;
    constructor     init(_address, _lastUsed:int64; _dataSize:uInt32);
    destructor      done;
  end; // TCacheNode

  PVNode = ^TVNode;
  TVNode = object
    TID           : uInt32;     // Thread ID
    iAddr         : inodeAddr;
    position      : int64;
    flags         : uInt32; 
  end; // TVNode

  PVnid = ^TVnid;
  TVnid = object
    refCount      : uInt32;
    iAddr         : inodeAddr;
    mode          : uInt32;
  end; // TVnid

  PUbixFS = ^TUbixFS;
  TUbixFS = object
   private
    dev           : PDevice;
    superBlock    : PDiskSuperBlock;
    blockCache    : PbTree;
    blockTime     : PbTree;
    vnids         : PbTree;
    vNodeQueue    : PQueue;
    fileDescriptors : ^array[0..0] of TVNode;

    maxCacheEntry : uInt32;
    maxFDEntry    : uInt32;
    
    procedure       assert(x:boolean);
    function        allocBRun(blockRun:TBlockRun):TBlockRun;
    function        allocInode(parentIAddr:inodeAddr):PUbixfsInode;
    function        cacheBlock(blockAddr:TBlockRun):pointer;
    function        extendFile(inode:PUbixfsInode; newSize:int64):boolean;
    function        findBRun(dataStream:PDataStream; blockNum:uInt32):TBlockRun;
    function        findBRunDirect(dataStream:PDataStream; blockNum:uInt32):TBlockRun;    
    function        findBRunIndirect(dataStream:PDataStream; blockNum:uInt32):TBlockRun;
    function        findBRunDIndirect(dataStream:PDataStream; blockNum:uInt32):TBlockRun;
    function        freeUsedBlocks(blockRun:TBlockRun):integer;
    function        getFileSize(inode:PUbixfsInode):int64; 
    function        getFileSpace(inode:PUbixfsInode):int64; 
    function        iAddrToStr(iAddr:inodeAddr):string;
    function        isBAT(iAddr:inodeAddr):boolean;
    function        isValidIAddr(iAddr:inodeAddr):boolean;
    function        loadBlock(blockAddr:TBlockRun):pointer;

    function        nextAG(AG:uInt32):uInt32;
    function        pathToIAddr(path:PChar):inodeAddr;

    function        readAttr(iAddr:inodeAddr; attribute:PChar; attrType:treeTypes; position:uInt32; buffer:pointer; count:uInt32):integer;
    function        readDataStream(iAddr:inodeAddr; position:int64; buf:pointer; length:uInt32):integer; 
    function        saveBlock(blockAddr:TBlockRun; buf:pointer):boolean; 
    procedure       setBlockRun(var blockRun:TBlockRun; AG:uInt32; start, len:uInt16);
    procedure       setIAddr(var inode:inodeAddr; AG:uInt32; start, len:uInt16);
    function        shrinkFile(inode:PUbixfsInode):boolean;
    function        strToIAddr(s:string):inodeAddr;
    function        writeAttr(iAddr:inodeAddr; attribute:PChar; attrType:treeTypes; position:uInt32; buffer:pointer; count:uInt32):integer;
    function        writeDataStream(iAddr:inodeAddr; position:int64; buf:pointer; length:uInt32):integer;
   public
    constructor     init(device:Pdevice);
    procedure       printSuperBlock;
    procedure       sync;
    function        syncBlock(cacheNode:PCacheNode):boolean;
    {$IFDEF GRAPHICS}
    procedure       visualize;
    {$ENDIF}
    function        vfs_fCreate(fileName:PChar):integer;
    function        vfs_fExist(path:PChar):integer;
    function        vfs_close(handle:integer):integer;
    function        vfs_format(device:PDevice; fsBlockSize:uInt32; quick:boolean):integer;
    function        vfs_init:integer;
    function        vfs_mkdir(path:PChar):integer;
    function        vfs_read(handle:integer; buffer:pointer; size:uInt32):integer; 
    function        vfs_readAttr(handle:integer; attribute:PChar; attrType:treeTypes; position:uInt32; buffer:pointer; count:uInt32):integer;
    function        vfs_seek(handle:integer; newPosition:int64):integer;   
    function        vfs_unlink(fileName:PChar):integer;
    function        vfs_write(handle:integer; buffer:pointer; size:uInt32):integer;  
    function        vfs_writeAttr(handle:integer; attribute:PChar; attrType:treeTypes; position:uInt32; buffer:pointer; count:uInt32):integer; 
    procedure       examine;
    destructor      done; virtual;
  end; // TUbixFS
 
implementation

uses strings, dos, math, lists;

// IMPORTANT NOTE
//   You cannot under any circumstance change NUM_EXTENTION_BLOCKS. This
// constant defines the multiple of blocks that a file will be extended
// by, *and* it determines how many blocks are used in each blockRun in
// the Double Indirect dataStream.
//   NUM_EXTENSION_BLOCKS determines how much slack space is allocated for
// a file when it is extended. For example, a file with 0 bytes in it
// will take up 0 blocks for the data and 1 block for the inode (ignoring
// space required to hold the directory entry).  When the first byte is 
// written, extendFile() will allocate a multiple of NUM_EXTENSION_BLOCKS 
// to accomodate the amount of data being written out. So, a 1 byte file will 
// actually take up NUM_EXTENSION_BLOCKS (plus one for the inode) until the 
// file is closed. Then the file will be trimmed and the slack space reclaimed.
//   This value is also used in the double-indirect region of the dataStream
// to determine how many blocks each blockRun uses.

//         vv DO NOT CHANGE vv        //
const NUM_EXTENSION_BLOCKS = 64;      // <--- don't change this
//         ^^ DO NOT CHANGE ^^        //

const NUM_CACHE_ENTRIES = 8192; // default max entries in block cache
const NUM_FD_ENTRIES = 1024;    // default max entries in the File Descriptor Table
const BAT_PAGE_SIZE = 256;      // page size for the Block Allocation Tree

type 
  PUbixBTreeVFS = ^TUbixBTreeVFS;
  TUbixBTreeVFS = object(TBTreeVFS)
   private
    fs          : PUbixFS;
    iAddr       : inodeAddr;
    position    : int64;  
    openFile    : boolean;
   public
    constructor init(_fs:PUbixFS);
    function    fClose:boolean; virtual;
    function    fCreate(const filename:string):boolean; virtual;
    function    fExist(const filename:string):boolean; virtual;
    function    fOpen(const filename:string):boolean; virtual;
    function    fRead(var buf; size:uInt32):boolean; virtual;   
    function    fSeek(offset:uInt32):boolean; virtual;
    function    fWrite(var buf; size:uInt32):boolean; virtual;
    destructor  done; virtual; 
  end; // TUbixBTreeVFS

function getRDTSCTime:int64;
var x:int64;
begin
  asm
        rdtsc
        mov     dword ptr x, eax
        mov     dword ptr x+4, edx 
  end;
  result := x;
end;

// B^Tree compareKey BlockRun function
function compareKeyBlockRun(key1, key2:pointer):integer;
begin
  result := 0;
  if ((key1 = NIL) or (key2 = NIL)) then exit;

  if (TBlockRun(key1^).AG < TBlockRun(key2^).AG) then
    result := -1
  else
    if (TBlockRun(key1^).AG > TBlockRun(key2^).AG) then 
      result := 1
    else  // AGs are the same
      if (TBlockRun(key1^).start < TBlockRun(key2^).start) then
        result := -1
      else
        if (TBlockRun(key1^).start > TBlockRun(key2^).start) then
          result := 1;  // else they are identical
end; // compareKeyInt64

// B^Tree copyKey BlockRun function

procedure copyKeyBlockRun(srcKey, destKey:pointer);
begin
  if ((srcKey = NIL) or (destKey = NIL)) then exit;
  TBlockRun(destKey^) := TBlockRun(srcKey^);
end; // copyKeyBlockRun;

// B^Tree keySize BlockRun function

function keySizeBlockRun(key:pointer):integer;
begin
  if (key = NIL) then result := 0 else result := sizeof(TBlockRun);
end; // keySizeBlockRun

// TSmallDataNode methods
function TSmallDataNode.align;   result := (size+3) and not 3;  // round up to nearest dword address
function TSmallDataNode.dataPtr; result := @self + align(nameSize) + (sizeof(TSmallDataNode)-sizeof(name));
function TSmallDataNode.isEmpty; result := ((nameSize or dataSize) = 0);
function TSmallDataNode.next;    result := @self + size();
function TSmallDataNode.size;    result := align(nameSize) + align(dataSize) + (sizeof(TSmallDataNode)-sizeof(name));

// TUbixfsInode methods
function TUbixfsInode.addAttr;
var
  smallDataNode:PSmallDataNode;
  attrSize:uInt32;
  ptr:pointer;
begin
  result := NIL;  // failure by default;
  if (attribute = NIL) or (buffer = NIL) or (count = 0) then exit;
  // point a smallDataNode object at the first piece of the smallData section
  smallDataNode := @smallData;
  // Now we scan through looking for the first free slot
  while not smallDataNode^.isEmpty() do
    smallDataNode := smallDataNode^.next();
  // figure out how big the attribute name will be (includes null)
  attrSize := strlen(attribute)+1;
  // double-check that there's enough room to the end of the inode
  if (dword(smallDataNode - @self) + smallDataNode^.align(attrSize) + smallDataNode^.align(count) + (sizeof(TSmallDataNode)-1) > inodeSize) then exit;
  // Okay, it'll fit
  with smallDataNode^ do
    begin
      nameSize := attrSize;  // include null
      dataSize := count;
      dataType := attrType;
      fillchar(reserved, sizeof(reserved), 0);
      fillchar(name, align(nameSize), 0);  // better safe than sorry?
      move(attribute^, name, nameSize);
      fillchar(dataPtr^, align(count), 0);  // better safe than sorry
      move(buffer^, dataPtr^, count);
    end; // with
  result := smallDataNode;
end; // TUbixfsInode.addAttr

function TUbixfsInode.delAttr;
var 
  nextNode:PSmallDataNode;
  chunkSize:uInt32;
begin
  result := FALSE;  // failure by default
  // First check to see if we have been given a hint on what to delete.
  // If whichNode is NIL, then we should scan through the small data area
  // for the named attribute.
  if (whichNode = NIL) then 
    begin
      assert(attribute <> NIL);  // I don't think this should ever happen
      if (attribute = NIL) then exit;  // problem
      whichNode := findAttr(attribute);
    end;
  // if whichNode is NIL here, then it doesn't exist in the small data area
  if (whichNode = NIL) then exit;
  // whichNode points to the attribute we need to delete.
  // First, get the nextNode
  nextNode := whichNode^.next();
  if nextNode^.isEmpty() then
    begin
      // if we're on the last node, just clear it out
      fillchar(whichNode^, whichNode^.size(), 0);  
    end
  else
    begin
      // calculate space from nextNode to the end
      chunkSize := inodeSize - (nextNode - @self);
      // move the next node over the one we're deleting
      move(nextNode^, whichNode^, chunkSize);
      // clear out the slack
      fillchar(pointer(whichNode + chunkSize)^, (nextNode - whichNode), 0);
    end; 
  result := TRUE  // success
end; // TUbixfsInode.delAttr

function TUbixfsInode.findAttr;
var 
  smallDataNode:PSmallDataNode;
  found:boolean;
begin
  result := NIL;  // failure by default
  if (attribute = NIL) then exit;
  smallDataNode := @smallData;  
  found := FALSE;
  while not found and not smallDataNode^.isEmpty() do
    with smallDataNode^ do
      begin
        if (strcomp(attribute, name) = 0) then
          found := TRUE
        else
          smallDataNode := next();
      end; // while/with
  if found then result := smallDataNode;
end; // TUbixfsInode.findAttr

// TCacheNode methods
constructor TCacheNode.init;
begin
  address  := _address;
  lastUsed := _lastUsed;
  dataSize := _dataSize;
  data := NIL;
  if (dataSize <> 0) then GetMem(data, dataSize);
  dirty    := FALSE;
  locked   := FALSE;
end; // TCacheNode.init

destructor TCacheNode.done;
begin
  if (data <> NIL) and (dataSize <> 0) then FreeMem(data, dataSize);
  address  := 0;
  lastUsed := 0;
  data     := NIL;
  dataSize := 0;
  dirty    := FALSE;
  locked   := FALSE;
end; // TCacheNode.done

constructor TUbixBTreeVFS.init;
begin
//  writeln('TUbixBTreeVFS.init()');
  fs         := _fs;
  fillchar(iAddr, sizeof(iAddr), 0);  
  position   := 0;
  openFile   := FALSE;
end; // TUbixBTreeVFS.init;

function TUbixBTreeVFS.fClose;
var 
  tmpIAddr:inodeAddr;
  tmpInode:PUbixfsInode;
begin
  result := FALSE;
  if (fs = NIL) then exit;
  if (openFile) then
    begin
      if not fs^.isValidIAddr(IAddr) then exit;
      if not fs^.isBAT(IAddr) then   // Don't shrink the BAT
        begin
          tmpInode := fs^.loadBlock(IAddr);
          if (tmpInode = NIL) then exit;
          fs^.shrinkFile(tmpInode);  // shrink the directory 
        end;
      position := 0;
      openFile := FALSE;
    end;
  result := TRUE;
end; // TUbixBTreeVFS.fClose

function TUbixBTreeVFS.fCreate;
begin
  // I *think* that this code is only called for directories (and BAT). 
  // Therefore, the file technically exists, but just has nothing in it when we call fCreate.
  result := fOpen(filename);  
end; // TUbixBTreeVFS.fCreate

function TUbixBTreeVFS.fExist;
var 
  tmpIAddr:inodeAddr;
  tmpInode:PUbixfsInode;
begin
  result := FALSE;
  assert(fs <> NIL);
  if (fs = NIL) then exit;
  assert(fs^.superBlock <> NIL);
  if (fs^.superBlock = NIL) then exit;
  tmpIAddr := fs^.strToIAddr(filename);
  if not fs^.isValidIAddr(tmpIAddr) then exit;

  // If we've created an inode, but haven't written any data to it, then
  // the B^Tree hasn't written out a superblock, so technically the file doesn't
  // exist, and this is a really long run-on sentence.
  tmpInode := fs^.loadBlock(tmpIAddr);

  // I'm not sure why, but the compiler cannot properly evaluate (tmpInode^.blocks.size <> 0),
  // so we check both the high and low portions of the size to make sure it's not 0.
  //  result := (tmpInode <> NIL) and ((longLongRec(tmpInode^.blocks.size).lo <> 0) or (longLongRec(tmpInode^.blocks.size).hi <> 0));

  // However, checking for 0 and then taking the complement of that works .. so we go that way.
  result := ((tmpInode <> NIL) and (not (tmpInode^.blocks.size = 0)));
end; // TUbixBTreeVFS.fExist

function TUbixBTreeVFS.fOpen;
begin
  result := FALSE;
  if (fs = NIL) then exit;
  if (fs^.superBlock = NIL) then exit;
  iAddr := fs^.strToIAddr(filename);
  position := 0;
  openFile := fs^.isValidIAddr(iAddr);
  result := openFile;   /// fs^.isValidIAddr(iAddr);
end;  // TUbixBTreeVFS.fOpen

function TUbixBTreeVFS.fRead;
begin
  result := FALSE;
  if (fs = NIL) then exit;
//  with inode^.inodeNum do
//    write('fRead(', AG, ':', start, ':', len, ', ');
//  write(position, ', ');
//  write('@buf, ');

  result := (fs^.readDataStream(iAddr, position, @buf, size) = 0);
  position := position + int64(size);
end; // TUbixBTreeVFS.fRead

function TUbixBTreeVFS.fSeek;
begin
  // uhm.. if the desired offset is beyond the end of the file, 
  // should we not extend out the file? Or does that occur in writeDataStream ...
  position := offset;
  result := TRUE;
end; // TUbixBTreeVFS.fSeek

function TUbixBTreeVFS.fWrite;
begin
  result := FALSE;
  if (fs = NIL) then exit;
//  with inode^.inodeNum do
//    write('fWrite(', AG, ':', start, ':', len, ', ');
//  write(position, ', ');
//  write('@buf, ');
  result := (fs^.writeDataStream(iAddr, position, @buf, size) = 0);
  position := position + int64(size);
  //inc(position, size);
end; // TUbixBTreeVFS.fWrite;

destructor TUbixBTreeVFS.done;
begin
  // XXX: It's possible that we need to save the inode here since 
  // we possibly changed a directory
  fClose();   // "Close" the file if it hasn't been already.
  fillchar(iAddr, sizeof(iAddr), 0);
  fs := NIL;  // don't destruct it since we didn't allocate it
end; // TUbixBTreeVFS.done

constructor TUbixFS.init;
begin
  dev := device;
  superBlock := NIL;
  blockCache := NIL;
  blockTime  := NIL;
  vnids      := NIL;
  vnodeQueue := NIL;
  fileDescriptors := NIL;
  maxCacheEntry := NUM_CACHE_ENTRIES;
  maxFDEntry := NUM_FD_ENTRIES;
end; // TUbixFS.init

procedure TUbixFS.assert(x:boolean);
begin
  if not x then runerror(1000);
end; // TUbixFS.assert

function TUbixFS.allocBRun;
var
  FBL:PBTree;  // free block list
  u:uPtr;
  tmpBlockRun:TBlockRun;
  searchRec:BTreeSearchRec;
  found:boolean;
begin
  setBlockRun(result, 0, 0, 0);
  // basic sanity checks
with blockRun do
  writeln('allocBRun(', AG, ':', start, ':', len, ');');
  if (superBlock = NIL) or (blockRun.len = 0) then exit;
  if not isValidIAddr(superBlock^.BAT) then exit;

  if (blockRun.AG > superBlock^.numAGs) then exit;

  if (superBlock^.usedBlocks + int64(blockRun.len) > superBlock^.numBlocks) then exit; // should probably give a louder warning

  new(FBL, open(iAddrToStr(superBlock^.BAT), new(PUbixBTreeVFS, init(@self))));
  //new(FBL, open('BAT.DAT', NIL));
  if (FBL = NIL) then exit;

  FBL^.InstallUserFunctions(compareKeyBlockRun, copyKeyBlockRun, keySizeBlockRun);
  setBlockRun(tmpBlockRun, blockRun.AG, 0, 0);

  fillchar(searchRec, sizeof(searchRec), 0);

  // tmpBlockRun points to the Allocation Group marker in the tree
  if not FBL^.FindFirst(@tmpBlockRun, searchRec) then
    begin
      writeln('Could not find AG marker inside FBL');
      dispose(FBL, done);
      exit;
    end;

  found := FALSE;  // have we found a free range large enough to accomodate our request?
  repeat
    if not FBL^.findNext(searchRec) then assert(FBL^.GetFirstKey(searchRec));
    // assert(searchRec.key <> NIL); // I don't think this can ever happen
    with TBlockRun(searchRec.key^) do
      if (len = 0) then 
        if (AG = blockRun.AG) then 
          break       // We've wrapped completely around
        else
          continue;   // look for another one
    found := (TBlockRun(searchRec.key^).len >= blockRun.len);
  until found;
(*
  found = false;
  do {
    if (!FBL->FindNext(searchRec)) assert(FBL->GetFirstKey(searchRec));
    if (searchRec.key->len == 0) 
      if (searchRec.key->AG == blockRun.AG) break; else continue;
    found = (searchRec.key->len >= blockRun.len);
  } while (!found)
*)
  // It's possible we didn't find a run of the size requested..
  if found then
    begin
      // No matter what, we're going to have to delete the old entry and insert a new one

      tmpBlockRun := TBlockRun(searchRec.key^);  // make a copy
      u := searchRec.value;
      FBL^.Delete(@tmpBlockRun, u);  // delete the old key
      with tmpBlockRun do
        begin
          setBlockRun(result, AG, start, blockRun.len);  // set the result to the associated blockrun
          inc(start, blockRun.len);
          dec(len, blockRun.len);
          // I'm not sure what to put in the value field yet
          if (len <> 0) then assert(FBL^.Insert(@tmpBlockRun, u));  // if there's any blocks left, re-insert them into the FBL
          superBlock^.usedBlocks := superBlock^.usedBlocks + int64(blockRun.len);
        end; // with
    end;  // if found
  FBL^.FindClose(searchRec);  // de-allocate the searchRec
  dispose(FBL, done);
end; // TUbixFS.allocBRun

function TUbixFS.allocInode;
var
  inode:PUbixfsInode;
  blockRun:TBlockRun;
  
begin
  // XXX ToDo:
  // need to fill in the rest of the fields in the inode record

  // Notes:
  // 2005-04-27 mji
  //   Inodes take up exactly one logical block on the device.  They
  // are usually in the same AG as their parent directory.  Allocating
  // one involves getting some memory for it and finding a free block
  // to hold it on the device.

  result := NIL;
  if (superBlock = NIL) then exit;
  assert(parentIAddr.AG < superBlock^.numAGs);
  if (parentIAddr.AG >= superBlock^.numAGs) then exit;  // parent addr is outside out block count?
  // ask for a single block somewhere around the beginning of the requested AG
  setBlockRun(blockRun, parentIAddr.AG, 0, 1); 
  blockRun := allocBRun(blockRun);
  assert(blockRun.len <> 0);
  if (blockRun.len = 0) then exit;     // this would be bad
  // Normally we'd allocate memory for the inode. Instead we let loadBlock() do that
  // for us.
  //inode := loadBlock(blockRun);        // candidate for cacheBlock()
  inode := cacheBlock(blockRun);

  assert(inode <> NIL);
  if (inode = NIL) then exit;
  fillchar(inode^, superBlock^.inodeSize, 0);

  with inode^ do
    begin
      magic := UBIXFS_INODE_MAGIC;

      // inodes point to themselves
      inodeNum := blockRun;

      uid := 0;  // this doesn't mean much here
      gid := 0;  
      // mode := ??
      flags := INODE_NO_FLAG;
      // createTime := 
      // modifiedTime :=

      parent := parentIAddr;
      setIAddr(attributes, 0, 0, 0);
      inodeSize := superBlock^.inodeSize;
      // data := 0;
      // there's no data yet in the file, so don't 
      // set blocks to anything (cleared above)
      blocks.blockCount := 0;

    end; // with
  result := inode;
end; // TUbixFS.allocInode

function TUbixFS.cacheBlock;
var 
  blockAddress:int64;
  cacheNode:PCacheNode;
  u:uPtr;
  searchRec:BTreeSearchRec;
begin
  result := NIL; // failure by default  

  if (dev = NIL) or (superBLock = NIL) or (blockcache = NIL) then exit;

  with blockAddr, superBlock^ do
    blockAddress := (AG shl AGShift) + start;

  fillchar(u, sizeof(u), 0);  // jut to be safe
  cacheNode := NIL;

  if (blockCache^.Find(@blockAddress, u)) then
    begin
      // block was in cache
      assert(u.p <> NIL);
      cacheNode := u.p;
      assert(cacheNode^.data <> NIL);
      fillchar(cacheNode^.data^, superBlock^.blockSize, 0);
      cacheNode^.dirty := TRUE;  
      // Now we need to update the blockTime. We do this by first deleting
      // the cacheNode out of the blockTime cache.
      blockTime^.Delete(@cacheNode^.lastUsed, u);  // Delete old one
      cacheNode^.lastUsed := getRDTSCTime();       // Get the new timestamp
      blockTime^.Insert(@cacheNode^.lastUsed, u);  // Insert with new time
    end
  else
    begin
      // Requested block isn't in cache

      // Check to make sure cache isn't full

      if (blockCache^.getKeyCount() = maxCacheEntry) then
        begin
          blockTime^.getFirstKey(searchRec);                  // least recently used block
          cacheNode := searchRec.value.p;                     // get the pointer from the value
          assert(cacheNode <> NIL);
          assert(syncBlock(cacheNode));
          u.p := cacheNode;
          blockCache^.Delete(@cacheNode^.address, u);
          blockTime^.Delete(@cacheNode^.lastUsed, u);
          // We've deleted the cacheNode out of both the blockCache and
          // the blockTime trees. We're going to re-use the cacheNode.
          cacheNode^.address := blockAddress;
          cacheNode^.lastUsed := getRDTSCTime();
          // No need to update data, dataSize, or dirty. We re-use the first two,
          // and dirty will be set to FALSE above if necessary.  Might need to 
          // reset locked if we're using that.
        end
      else
        new(cacheNode, init(blockAddress, getRDTSCTime(), superBlock^.blockSize));

      assert(cacheNode <> NIL);
      assert(cacheNode^.data <> NIL);
      u.p := cacheNode;
      if not (blockCache^.Insert(@blockAddress, u)) then
        writeln('Error inserting ', blockAddress, ' into blockCache in cacheBlock()');
      if not (blockTime^.Insert(@cacheNode^.lastUsed, u)) then
        writeln('Error inserting into blockTime');
    end; 

  if (cacheNode <> NIL) then 
    begin
      fillchar(cacheNode^.data^, superBlock^.blockSize, 0);
      cacheNode^.dirty := TRUE;                    // mark this block as dirty
      result := cacheNode^.data;                   // return the data pointer      
    end;

end; // TUbixFS.cacheBlock

function TUbixFS.extendFile;

  function appendToDIndirect(newBlockRun:TBlockRun):boolean;
    var 
      extents, indirectExtents:array[0..NUM_EXTENSION_BLOCKS-1] of ^array[0..0] of TBlockRun;
      extentsIndex, indirectExtentsIndex:uInt32;
      extentsBlockIndex, indirectExtentsBlockIndex:uInt32;
      extentsUpperBound:uInt32;
      tmpBlockRun:TBlockRun;
    begin
writeln('in appendToDIndirect()');
      // Many apologies for the long variable names. This section is relatively
      // complicated, and it wouldn't help to have cryptic variables.
      result := FALSE; // failure by default
      if (newBlockRun.len mod NUM_EXTENSION_BLOCKS <> 0) then
        begin
          writeln('Double indirect block run is not a multiple of NUM_EXTENSION_BLOCKS');
          exit;
        end;
      extentsUpperBound := (superBlock^.blockSize div sizeof(TBlockRun)) - 1;

      if not isValidIAddr(inode^.blocks.doubleIndirect) then
        begin
          // allocate a double indirect block
          tmpBlockRun := inode^.inodeNum;       // allocate a block near the parent inode
          tmpBlockRun.len := NUM_EXTENSION_BLOCKS;
          inode^.blocks.doubleIndirect := allocBRun(tmpBlockRun); 

          if not isValidIAddr(inode^.blocks.doubleIndirect) then
            begin
              writeln('Failed to allocate double indirect block');
              exit
            end;
          tmpBlockRun := inode^.blocks.doubleIndirect;
          for extentsBlockIndex := low(extents) to high(extents) do
            begin
              extents[extentsBlockIndex] := cacheBlock(tmpBlockRun);
              //extents[extentsBlockIndex] := loadBlock(tmpBlockRun);   // good candidate for cacheBlock()
              //fillchar(extents[extentsBlockIndex]^, superBlock^.blockSize, 0);
              //assert(saveBlock(tmpBlockRun, extents[extentsBlockIndex]));
              inc(tmpBlockRun.len);
            end; // for extentsBlockIndex

          // We have a freshly allocated double indirect block. Now we need an indirect block
          // tmpBlockRun := inode^.blocks.doubleIndirect;  // allocate an indirect block near the dindirect block  
          // tmpBlockRun.len := NUM_EXTENSION_BLOCKS;   // theoretically, it is already
          // extents[0]^[0] := allocBRun(tmpBlockRun);
          extents[0]^[0] := allocBRun(inode^.blocks.doubleIndirect); 
          if not isValidIAddr(extents[0]^[0]) then
            begin
              writeln('Failed to allocate indirect block beneath a double indirect block');
              exit
            end;
          // save out the doubleIndirect block
          inode^.blocks.maxDIndirect := inode^.blocks.maxIndirect;
          //assert(saveBlock(inode^.blocks.doubleIndirect, extents));  
          // This is a little misleading.  Since saveBlock() formulates the
          // address based on tbe base-block of the blockRun, this will only save
          // out the first block.  Secondly, we may not even need to do this since
          // we tagged them for saving above. 
          assert(saveBlock(inode^.blocks.doubleIndirect, extents[0])); 

          tmpBlockRun := extents[0]^[0];
          for indirectExtentsBlockIndex := low(indirectExtents) to high(indirectExtents) do
            begin
              indirectExtents[indirectExtentsBlockIndex] := cacheBlock(tmpBlockRun);  // good candidate for cacheBlock()             
              (* indirectExtents[indirectExtentsBlockIndex] := loadBlock(tmpBlockRun);  // good candidate for cacheBlock()
              fillchar(indirectExtents[indirectExtentsBlockIndex]^, superBlock^.blockSize, 0);
              assert(saveBlock(tmpBlockRun, indirectExtents[indirectExtentsBlockIndex])); *)
              inc(tmpBlockRun.start);
            end; // for indirectExtentsBlockIndex

          // Because we have a freshly allocated double indirect block, we have no entries
          // in it yet.  This means that our DIndirect index is 0, and our indirect index is also 0.
          extentsIndex := 0; 
          extentsBlockIndex := 0;
          indirectExtentsIndex := 0;
          indirectExtentsBlockIndex := 0;
        end
      else
        begin
          // double indirect block exists. Load it
          tmpBlockRun := inode^.blocks.doubleIndirect;
          for extentsBlockIndex := low(extents) to high(extents) do
            begin
              extents[extentsBlockindex] := loadBlock(tmpBlockRun);
              assert(extents[extentsBlockindex] <> NIL);
              inc(tmpBlockRun.start);
            end; // for extentsBlockIndex

          // Now scan through (backwards) to find the last entry.
          for extentsBlockIndex := high(extents) downto low(extents) do
            if (extents[extentsBlockIndex]^[0].len <> 0) then break;

          for extentsIndex := extentsUpperBound downto 0 do
            if (extents[extentsBlockIndex]^[extentsIndex].len <> 0) then break;
          // [extentsBlockindex],[extentsIndex] points to the entry which holds the indirect block with the last entry.
          // load that one
          tmpBlockRun := extents[extentsBlockindex]^[extentsIndex];
          for indirectExtentsBlockIndex := low(indirectExtents) to high(indirectExtents) do
            begin
              indirectExtents[indirectExtentsBlockIndex] := loadBlock(tmpBlockRun);
              assert(indirectExtents[indirectExtentsBlockIndex] <> NIL);
              inc(tmpBlockRun.start);
            end; // for indirectExtentsBlocksIndex

          // first check the last entry
          if (indirectExtents[high(indirectExtents)]^[extentsUpperBound].len <> 0) then
            begin
              // this indirect block is full. Allocate a new one
              if ((extentsIndex = extentsUpperBound) and (extentsBlockIndex = high(extents))) then
                begin
                  // I might as well halt at this point because the user is totally hosed.
                  writeln('Egad! The double indirect block section is full. Can not write any more to this file!');
                  exit;
                end; // if doomed
              tmpBlockRun := extents[extentsBlockIndex]^[extentsIndex];  // address of the aforementioned full indirect block
              // tmpBlockRun.len := NUM_EXTENSION_BLOCKS;  // should be already
              if (extentsIndex = extentsUpperBound) then
                begin
                  extentsIndex := 0;
                  inc(extentsBlockIndex);  // checked above to see if this was full
                end
              else
                inc(extentsIndex); 

              extents[extentsBlockIndex]^[extentsIndex] := allocBRun(tmpBlockRun); 
              tmpBlockRun := extents[extentsBlockIndex]^[extentsIndex];     // grab a copy of the brun we just made
              assert(isValidIAddr(tmpBlockRun));  // better safe than sorry
              for indirectExtentsBlockIndex := low(indirectExtents) to high(indirectExtents) do
                begin
                  indirectExtents[indirectExtentsBlockIndex] := cacheBlock(tmpBlockRun);  // good candidate for cacheBlock()
                  (* indirectExtents[indirectExtentsBlockIndex] := loadBlock(tmpBlockRun);  // good candidate for cacheBlock()
                  fillchar(indirectExtents[indirectExtentsBlockIndex]^, superBlock^.blockSize, 0);
                  assert(saveBlock(tmpBlockRun, indirectExtents[indirectExtentsBlockIndex])); *)
                  inc(tmpBlockRun.start);
                end;
              tmpBlockRun := inode^.blocks.doubleIndirect;
              inc(tmpBlockRun.start, extentsBlockIndex);
              saveBlock(tmpBlockRun, extents[extentsBlockIndex]); // save, since we added an indirect block to it
              indirectExtentsIndex := 0;
              indirectExtentsBlockIndex := 0;
            end
          else
            begin
              // now scan forward
              indirectExtentsBlockIndex := 0;
              indirectExtentsIndex := 0;
              while (indirectExtents[indirectExtentsBlockIndex]^[indirectExtentsIndex].len <> 0) do
                begin
                  if (indirectExtentsIndex = extentsUpperBound) then
                    begin
                      indirectExtentsIndex := 0;
                      if (indirectExtentsBlockIndex = high(indirectExtents)) then break else inc(indirectExtentsBlockIndex);
                    end
                  else
                    inc(indirectExtentsIndex);
                end; // while
(*              for indirectExtentsBlockIndex := low(indirectExtents) to high(indirectExtents) do
                for indirectExtentsIndex := 0 to extentsUpperBound do
                  if (indirectExtents[indirectExtentsBlockIndex]^[indirectExtentsIndex].len = 0) then break; *)
writeln('[1] indirectExtentsIndex: ', indirectExtentsIndex);
writeln('[1] indirectExtentsBlockIndex: ', indirectExtentsBlockIndex);

            end;
          // [extentsBlockIndex],[extentsIndex] points to which indirect block we're on
          // [indirectExtentsBlockIndex],[indirectExtentsIndex] points to the first free entry in that indirect block  
        end;

      while (newBlockRun.len <> 0) do
        begin
          tmpBlockRun := newBlockRun;
          tmpBlockRun.len := NUM_EXTENSION_BLOCKS;
writeln('indirectExtentsIndex: ', indirectExtentsIndex);
writeln('indirectExtentsBlockIndex: ', indirectExtentsBlockIndex);
          indirectExtents[indirectExtentsBlockIndex]^[indirectExtentsIndex] := tmpBlockRun;
          with newBlockRun do
            begin
              dec(len, NUM_EXTENSION_BLOCKS);
              if ((uInt32(start) + NUM_EXTENSION_BLOCKS) > high(start)) then inc(AG);
              inc(start, NUM_EXTENSION_BLOCKS); // Warning! Relies on integer wrap-around (start is 16-bit)
            end; // with
          
          if (indirectExtentsIndex = extentsUpperBound) then
            begin
              // this indirect block is full. 
              // Save the current one
              assert(saveBlock(extents[extentsBlockIndex]^[extentsIndex], indirectExtents[indirectExtentsBlockIndex]));
              if (indirectExtentsBlockIndex = high(indirectExtents)) then
                begin
                  // Allocate a new one
                  if ((extentsIndex = extentsUpperBound) and (extentsBlockIndex = high(extents))) then
                    begin
                      // I might as well halt at this point because the user is totally hosed.
                      writeln('Egad! The double indirect block section is full. Can not write any more to this file!');
                      exit;
                    end; // if doomed
                  tmpBlockRun := extents[extentsBlockIndex]^[extentsIndex];  // snag the previous location
                  // update which index in the double indirect block we're using
                  if (extentsIndex = extentsUpperBound) then
                    begin
                      inc(extentsBlockIndex);
                      extentsIndex := 0;
                    end
                  else
                    inc(extentsIndex);  

                  extents[extentsBlockIndex]^[extentsIndex] := allocBRun(tmpBlockRun); // allocate an indirect block near the previous one
                  // make a copy of the indirect block we just allocated
                  tmpBlockRun := extents[extentsBlockIndex]^[extentsIndex]; 
                  assert(isValidIAddr(tmpBlockRun));  // just to be sure
                  for indirectExtentsBlockIndex := low(indirectExtents) to high(indirectExtents) do
                    begin
                      indirectExtents[indirectExtentsBlockIndex] := cacheBlock(tmpBlockRun); // candidate for cacheBlock()
                      (* indirectExtents[indirectExtentsBlockIndex] := loadBlock(tmpBlockRun); // candidate for cacheBlock()
                      assert(indirectExtents[indirectExtentsBlockIndex] <> NIL);
                      fillchar(indirectExtents[indirectExtentsBlockIndex]^, superBlock^.blockSize, 0);
                      assert(saveBlock(tmpBlockRun, indirectExtents[indirectExtentsBlockIndex])); *)
                      inc(tmpBlockRun.start);                
                    end; // for indirectExtentsBlockIndex
                  indirectExtentsIndex := 0;
                  indirectExtentsBlockIndex := 0;
                  // Here we need to save the extents block we just changed -->
                  tmpBlockRun := inode^.blocks.doubleIndirect;
                  inc(tmpBlockRun.start, extentsBlockIndex);
                  assert(saveBlock(tmpBlockRun, extents[extentsBlockIndex]));
                  // <---
                end
              else
                begin
                  // We just need to pop over to the next section of the indirect block.
                  // At this point I really want to beat the BeOS guy who decided to make these strucutres
                  // span multiple disk blocks.  They said [in the book] that they didn't want to do that because
                  // it complicated things.  Well, gee, no kidding.
                  inc(indirectExtentsBlockIndex);
                  indirectExtentsIndex := 0;
                end;
            end
          else
            inc(indirectExtentsIndex);
          // update the size the maxDoubleIndirect blocks reference
          inode^.blocks.maxDIndirect := inode^.blocks.maxDIndirect + (NUM_EXTENSION_BLOCKS shl superBlock^.blockShift);
        end; // while
      // XXX ?-> saveBlock(extents^[extentsIndex], indirectExtents);
    end; // local function appendToDIndirect()

  function appendToIndirect(newBlockRun:TBlockRun):boolean;
    var 
      extents:array[0..NUM_EXTENSION_BLOCKS-1] of ^array[0..0] of TBlockRun;
      extentsUpperBound, blockIndex, index:uInt32;
      tmpBlockRun:TBlockRun;
    begin
      result := FALSE; // failure by default
      // The indirect block is NUM_EXTENSION_BLOCKS long
      extentsUpperBound := (superBlock^.blockSize div sizeof(TBlockRun))-1;

      for blockIndex := 0 to NUM_EXTENSION_BLOCKS-1 do
        extents[blockIndex] := NIL;  // NIL out the array

      if not isValidIAddr(inode^.blocks.indirect) then
        begin
          // allocate an indirect block
          tmpBlockRun := inode^.inodeNum;                   // alloate near the parent inode
          tmpBlockRun.len := NUM_EXTENSION_BLOCKS;          // indirect block is NUM_EXTENSION_BLOCKS long
          inode^.blocks.indirect := allocBRun(tmpBlockRun); // Try to allocate it
          if not isValidIAddr(inode^.blocks.indirect) then
            begin
              writeln('Failed to allocate indirect block');
              exit
            end;
          tmpBlockRun := inode^.blocks.indirect;
          for blockIndex := 0 to NUM_EXTENSION_BLOCKS-1 do
            begin
              extents[blockIndex] := cacheBlock(tmpBlockRun);      // candidate for cacheBlock()
(*              extents[blockIndex] := loadBlock(tmpBlockRun);      // candidate for cacheBlock()
              fillchar(extents[blockIndex]^, superBlock^.blockSize, 0);
              saveBlock(tmpBlockRun, extents[blockIndex]); *)
              inc(tmpBlockRun.start);  // XXX hopefully this won't wrap around. 
            end; // for index
//          extents := loadBlock(inode^.blocks.indirect);

          inode^.blocks.maxIndirect := inode^.blocks.maxDirect;
        end
      else
        begin
          // the indirect block exists, so read it in
          tmpBlockRun := inode^.blocks.indirect;
          for blockIndex := low(extents) to high(extents) do
            begin
              extents[blockIndex] := loadBlock(tmpBlockRun);
              assert(extents[blockIndex] <> NIL);
              inc(tmpBlockRun.start);
            end; // for i
        end; 
      // extents[] has the indirect block in it
      // We first check to make sure that we're not going to be moving
      // to the next section of the dataStream
      if (extents[high(extents)]^[extentsUpperBound].len <> 0) then
        result := appendToDIndirect(newBlockRun)   // promote the newBlockRun to the DIndirect section
      else
        begin
          // scan through for the first free slot
          blockIndex := 0;
          index := 0;
          while (extents[blockIndex]^[index].len <> 0) do
            begin
              if (index = extentsUpperBound) then 
                begin
                  index := 0;
                  if (blockIndex = high(extents)) then break else inc(blockIndex);
                end
              else
                inc(index);
            end; // while
(*          for blockIndex := 0 to NUM_EXTENSION_BLOCKS-1 do
            for index := 0 to extentsUpperBound do
              if extents[blockIndex]^[index].len = 0 then break; *)
          extents[blockIndex]^[index] := newBlockRun;              // index points to the first free slot. Fill it in with the newBlockRun
          tmpBlockRun := inode^.blocks.indirect;
          inc(tmpBlockRun.start, blockIndex);          // hopefully this won't wrap around
          saveBlock(tmpBlockRun, extents[blockIndex]);  // save out the indirect block. Could check for failure here
          // update the size the maxDoubleIndirect blocks reference
          inode^.blocks.maxIndirect := inode^.blocks.maxIndirect + (newBlockRun.len shl superBlock^.blockShift);
          result := TRUE;                              // success
        end;
    end; // local function appendToIndirect()

  function appendToDirect(newBlockRun:TBlockRun):boolean;
    var index:uInt32;
    begin
      with newBlockRun do
        writeln('in appendToDirect(); blockRun = ', ag, ':', start, ':', len);
      result := FALSE;
      with inode^, blocks do
        begin
          if (direct[high(direct)].len <> 0) then 
            result := appendToIndirect(newBlockRun)  
          else
            begin
              for index := low(direct) to high(direct) do
                if (direct[index].len = 0) then break;
              direct[index] := newBlockRun;
              maxDirect := maxDirect + (newBlockRun.len shl superBlock^.blockShift);
              result := TRUE;
            end; // else
        end; // with
    end; // local function appendToDirect

  function appendToDataStream(newBlockRun:TBlockRun):boolean;
    begin
      if not(inode^.blocks.maxDIndirect = 0) then result := appendToDIndirect(newBlockRun)
      else if not(inode^.blocks.maxIndirect = 0) then result := appendToIndirect(newBlockRun)
      else result := appendToDirect(newBlockRun);
    end; // local function appendToDataStream

  function continueIndirect(newBlockRun:TBlockRun):boolean;
    var 
      extents:array[0..NUM_EXTENSION_BLOCKS-1] of ^array[0..0] of TBlockRun;
      extentsUpperBound, blockIndex, index:uInt32;
      tmpBlockRun:TBlockRun;
    begin
      result := FALSE;
      extentsUpperBound := (superBlock^.blockSize div sizeof(TBlockRun))-1;

      tmpBlockRun := inode^.blocks.indirect;
      for blockIndex := low(extents) to high(extents) do
        begin
          extents[blockIndex] := loadBlock(tmpBlockRun);
          assert(extents[blockIndex] <> NIL);
          inc(tmpBlockRun.start);                 // hopefully this won't wrap
        end;

      for blockIndex := high(extents) downto low(extents) do
        if (extents[blockIndex]^[0].len <> 0) then break;

      for index := extentsUpperBound downto 0 do
        if (extents[blockIndex]^[index].len <> 0) then break;
     
      assert(extents[blockIndex]^[index].len <> 0); // just to be sure
      // extents[blockIndex]^[index] has the last entry
      if (uInt32(extents[blockIndex]^[index].len) + newBlockRun.len < 65535) then
        begin
          inc(extents[blockIndex]^[index].len, newBlockRun.len);
          tmpBlockRun := inode^.blocks.indirect;
          inc(tmpBlockRun.start, blockIndex);
          saveBlock(tmpBlockRun, extents[blockIndex]);
          inode^.blocks.maxIndirect := inode^.blocks.maxIndirect + (newBlockRun.len shl superBlock^.blockShift);
          result := TRUE;
        end
      else // oops, we overflowed the length
        begin
          // need to check to see if we were the last indirect entry. If we were
          // then we need to allocate a double indirect block and store it in there
          if (index = extentsUpperBound) then
            begin
              if (blockIndex = high(extents)) then
                result := appendToDIndirect(newBlockRun)
              else
                begin
                  inc(blockIndex);
                  extents[blockIndex]^[0] := newBlockRun;
                  tmpBlockRun := inode^.blocks.indirect;
                  inc(tmpBlockRun.start, blockIndex);
                  saveBlock(tmpBlockRun, extents[blockIndex]);
                  inode^.blocks.maxIndirect := inode^.blocks.maxIndirect + (newBlockRun.len shl superBlock^.blockShift);
                  result := TRUE;
                end
            end
          else
            begin
              extents[blockIndex]^[index+1] := newBlockRun;
              tmpBlockRun := inode^.blocks.indirect;
              inc(tmpBlockRun.start, blockIndex);
              saveBlock(tmpBlockRun, extents[blockIndex]);
              inode^.blocks.maxIndirect := inode^.blocks.maxIndirect + (newBlockRun.len shl superBlock^.blockShift);
              result := TRUE;
            end; // else
        end; // else
      
    end; // local function continueIndirect

 function continueDirect(newBlockRun:TBlockRun):boolean;
  var
    index:uInt32;
  begin
    result := FALSE;
    with inode^, blocks do
      begin
        for index := high(direct) downto low(direct) do
          if (direct[index].len <> 0) then break;

        assert(direct[index].len <> 0);  // just to be sure
        // make sure we don't overflow the length
        if (uInt32(direct[index].len) + newBlockRun.len < 65535) then
          begin
            inc(direct[index].len, newBlockRun.len);  // update the last entry
            maxDirect := maxDirect + (newBlockRun.len shl superBlock^.blockShift);
            result := TRUE;
          end
        else  // oops, we overflowed the length
          begin
            // need to check to see if we were the last direct entry. If we were
            // then we need to allocate an indirect block and store it in there
            if (index = high(direct)) then 
              result := appendToIndirect(newBlockRun)
            else
              begin
                direct[index+1] := newBlockRun; // store it in the next available entry
                maxDirect := maxDirect + (newBlockRun.len shl superBlock^.blockShift);
                result := TRUE;
              end;
          end; // else
      end; // with
   end; // local function continueDirect

  function continueDataStream(newBlockRun:TBlockRun):boolean;
    begin
      // The extended blockRun continues a previous run.

      // When extending the file we request an allocation of more blocks 
      // near to the last blockRun of the file. If the new blocks 
      // are contiguous to the last blockRun of the file
      // then we can simply extend that blockRun.  This not only 
      // prevents fragmentation when possible, it also reduces the
      // number of actual blockRuns needed to store the file.
      if (inode^.blocks.maxDIndirect > 0) then result := appendToDIndirect(newBlockRun)
      else if (inode^.blocks.maxIndirect > 0) then result := continueIndirect(newBlockRun)
      else result := continueDirect(newBlockRun);
    end; // local function continueDataStream

var
  blockRun, exBlockRun:TBlockRun;
  blocksToAllocate:uInt32;

begin // extendFile()
  // XXX ToDo:
  // Need to verify support for indirect and double-indirect blocks

  // Notes:
  // 2005-04-27 mji
  //   This function is called by writeDataStream() when we're about to write
  // past the end of the file.  When extending out a file, it is almost never
  // extended by the amount written. Instead it is at least 1 block, and usually
  // more (in our case: 4, as mentioned below). Currently we pass in the number 
  // of blocks to extend the file by.  This is problematic for a few reasons. 
  //   The first is that the code to figure out how much to extend the file by 
  // would have to be in writeDataStream().  That's not necessarily a problem, 
  // except that the amount to be extended by depends on many factors, some 
  // including whether we're writing out more than the default small block 
  // extension size (currently is 4) or if we're in double indirect blocks, 
  // which extends out by NUM_DINDIRECT_BLOCKS (also currently 4).
  //   The second problem is that we save the inode here -- and also in 
  // writeDataStream() -- because the new size of the file is updated in wDS() 
  // instead of here.
  //   The third problem is when you seek past the end of the file and then
  // write.  Space should be allocated for all the space between the old
  // EOF and the new write position.  This is common behaviour for most 
  // filesystems, and should probably be emulated here as well.
  //   The proper solution is to pass in the new size and let this function 
  // work out exactly how much space on disk it is really going to take.

  result := FALSE;  // failure by default
  // basic sanity checks
  if (inode = NIL) or (superBlock = NIL) then exit;

  // if the file size is 0 then this is the first extension.
  if (inode^.blocks.size = 0) then  // first extension
    begin
      // A file's data is stored in the next allocation group from it's parent
      // inode. This will help distribute the data across the disk.
 
      // Find out how many blocks the new size will take up.  Do this by rounding
      // up the size to the nearest block. Then we round that up to the nearest
      // NUM_DIRECT_BLOCKS multiple.
      blocksToAllocate := uInt32(((newSize+(superBlock^.blockSize-1)) shr superBlock^.blockShift));
      blocksToAllocate := (blocksToAllocate + (NUM_EXTENSION_BLOCKS-1)) and not (NUM_EXTENSION_BLOCKS-1);
      // Allocate directory info in the same AG as the inode
      if (inode^.mode and S_IFDIR <> 0) then 
        setBlockRun(exBlockRun, inode^.inodeNum.AG, 0, blocksToAllocate)
      else
        setBlockRun(exBlockRun, nextAG(inode^.inodeNum.AG), 0, blocksToAllocate);
      exBlockRun := allocBRun(exBlockRun);    // try to allocate a blockrun
      assert(exBlockRun.len <> 0);
      if (exBlockRun.len = 0) then exit;      // erp. Couldn't extend the file
      inode^.blocks.direct[0] := exBlockRun;  // Set the first direct run
      // The space allocated for the file can and may be different than the
      // actual size.  Set the maxDirect to the size (in bytes) of how much space
      // we allocated
      inode^.blocks.maxDirect := blocksToAllocate shl superBlock^.blockShift;  
    end // size = 0
  else
    begin
    // the size of the file wasn't 0 (handled above). We need to extend the 
    // file out.  Find where the last blocks of the file are

    // Note:
    // 2005-04-28 mji
    //   We're going to have a potential problem here.  When we check to see how many blocks we
    // want to extend by, we don't know for sure if we're actually going to end up in the
    // dataStream section we were assuming.  For example, we may have no more room in the
    // indirect block section of dataStream and thus will have to store the entry in the double 
    // indirect area.  If the constants for each section are different, then we may be requesting the
    // wrong amount.  This is *only* a problem if any of the constants are different.  Therefore
    // the solution is to pick some arbitrary extension amount (probably 4 or 8 blocks) and stick 
    // with that.  If we pick the amount that will end up in the double indirect area (currently 4
    // but I'm considering 8) then we can easily break apart our blockRun if we're writing it
    // to the double indirect section.  
    // IMPORTANT NOTE
    //   Once this value is chosen it will be set in stone since we will break everything by 
    // changing it. Math done for retrieving the blockRun from the double indirect region will 
    // rely on this value, and it can never be changed.  BeFS uses 4 blocks for the double
    // indirect section, and that seems fine with me. The largest file using contiguous blockRuns
    // will be 38GB.  I am, however, pondering 8 blocks instead.  Remember, extensions are done 
    // as a multiple of this value.
    // 2005-04-28 mji
    //   I decided to use a single constant, NUM_EXTENSION_BLOCKS, and it is set to 4.
    blocksToAllocate := uInt32((newSize+(superBlock^.blockSize-1)) shr superBlock^.blockShift);    
    // blocksToAllocate is now rounded up to the nearest block of how much space we need.
    blocksToAllocate := (blocksToAllocate + (NUM_EXTENSION_BLOCKS-1)) and not (NUM_EXTENSION_BLOCKS-1);
    // blocksToAllocate is now rounded up to the nearest multiple of NUM_EXTENSION_BLOCKS
    // now subtract off how many blocks we already have
    dec(blocksToAllocate, inode^.blocks.blockCount);

    // XXX
    // I don't think this is right. We need to find the last block returned by
    // getFileSpace(), not the size.  Something to ponder/look into/test
    //blockRun := findBRun(@inode^.blocks, inode^.blocks.size shr superBlock^.blockShift);
    assert(inode^.blocks.blockCount <> 0);     // if blockCount is 0, we're in big trouble
    blockRun := findBRun(@inode^.blocks, inode^.blocks.blockCount-1);
    assert(blockRun.len <> 0);     
    if (blockRun.len = 0) then exit;           // This would be odd
    exBlockRun     := blockRun;                // copy the last run to exBlockRun
    exBlockRun.len := blocksToAllocate;               // set the requested number of blocks
    exBlockRun     := allocBRun(exBlockRun);   // allocate a block run near the previous one
    assert(exBlockRun.len <> 0);
    if (exBlockRun.len = 0) then exit;         // should probably yell louder about this

    // blockRun points to the last blockRun structure associated with this inode
    if (exBlockRun.start = blockRun.start+1) and (exBlockRun.AG = blockRun.AG) then
      continueDataStream(exBlockRun)
    else
      appendToDataStream(exBlockRun);
  end;  // else size <> 0
  inode^.blocks.size := newSize;      // Now we update the size here and not in writeDataStream()
  inc(inode^.blocks.blockCount, blocksToAllocate);  // update the blockCount in the inode dataStream
  // no matter where we stored the extended blockRun, we have updated the inode

  assert(saveBlock(inode^.inodeNum, inode));  // 0 result means success
  result := TRUE;  // no error
end; // TUbixFS.extendFile

function TUbixFS.findBRun;
var
  position:int64;
begin
  setBlockRun(result, 0, 0, 0);
  if (dataStream = NIL) then exit;
  if (blockNum >= dataStream^.blockCount) then
    begin
      writeln('Requested block outside the block count in the dataStream');
      exit
    end;

  position := int64(blockNum) shl superBlock^.blockShift;
  if (position < dataStream^.maxDirect) then
    result := findBRunDirect(dataStream, blockNum)
  else if (position < dataStream^.maxIndirect) then
    result := findBRunIndirect(dataStream, blockNum)
  else if (position < dataStream^.maxDIndirect) then
    result := findBRunDIndirect(dataStream, blockNum)
  else writeln('Block position is not mapped by dataStream');
end; // TUbixFS.findBRun

function TUbixFS.findBRunDirect;
var
  sum, i, offset:uInt32;
begin
  // set the result to a nil blockrun
  setBlockRun(result, 0, 0, 0);

  sum := 0;
  with dataStream^ do
    for i := low(direct) to high(direct) do
      with direct[i] do
        begin
          if (blockNum < sum + len) then                // found it
            begin
              offset := blockNum - sum;
              setBlockRun(result, AG, start + offset, len - offset);
              exit
            end;
          inc(sum, len);
        end;  // for/with
end; // TUbixFS.findBRunDirect

function TUbixFS.findBRunIndirect;
var 
  extents:array[0..NUM_EXTENSION_BLOCKS-1] of ^array[0..0] of TBlockRun;
  sum, offset, blockIndex, index, extentsUpperBound:uInt32;
  tmpBlockRun:TBlockRun;
begin
  // NOTE: Basic error checking on whether superblock <> NIL isn't done here.
  setBlockRun(result, 0, 0, 0);

  // load the indirect block
  tmpBlockRun := dataStream^.indirect;
  for blockIndex := low(extents) to high(extents) do
    begin
      extents[blockIndex] := loadBlock(tmpBlockRun);
      assert(extents[blockIndex] <> NIL);
      inc(tmpBlockRun.start);         // hope this doesn't wrap around
    end;

  extentsUpperBound := (superBlock^.blockSize div sizeof(TBlockRun))-1;
  // We either need to adjust sum forward or blockNum back.
  // sum := (dataStream^.maxDirect+superBlock^.blockSize-1) shr superBlock^.blockShift;

  // We need to decrease blockNum by the amount of blocks the direct portion took up.
  // This can be calculated by dividing the maxDirect byte size by the block size.

  blockNum := blockNum - dword(dataStream^.maxDirect shr superBlock^.blockShift);
  sum := 0;
  for blockIndex := low(extents) to high(extents) do
    for index := 0 to extentsUpperBound do
      with extents[blockIndex]^[index] do
        begin
          if (blockNum < sum + len) then                // found it
            begin
              offset := blockNum - sum;
              setBlockRun(result, AG, start + offset, len - offset);
              exit  // was a break
            end;
          inc(sum, len);
        end; // for/with
end; // TUbixFS.findBRunIndirect

const printStuff:boolean = FALSE;

function TUbixFS.findBRunDIndirect;
var 
  extents:^array[0..0] of TBlockRun;
  sum, blockIndex, indirectIndex, index, offset:uInt32;
  blocksPerIndirect:uInt32;    // per indirect section (NUM_EXTENSION_BLOCKS * superBlock^.blockSize) === 4 x 1k block
  blocksPerDIndirect:uInt32;   // per single double indirect block === 1k block
  runsPerBlock:uInt32;         // runs per single block (blockSize div sizeof(TBlockRun))
  tmpBlockRun:TBlockRun;
begin
  // NOTE: Basic error checking on whether superblock <> NIL isn't done here.
  setBlockRun(result, 0, 0, 0);

  // find the indirect block that contains the block run we're looking for.
  // This is done with some clever shifting
  
  // first adjust the blockNum, taking into account how many blocks are used
  // by the direct and indirect sections
  blockNum := blockNum - dword(dataStream^.maxIndirect shr superBlock^.blockShift);
  // Each indirect block holds NUM_EXTENSION_BLOCKS*((superBlock^.blockSize*NUM_EXTENSION_BLOCKS) div sizeof(TBlockRun)) blocks
  // e.g.
  //   NUM_EXTENSION_BLOCKS = 4 
  //   blockSize = 1024*NUM_EXTENSION_BLOCKS
  //   sizeof(TBlockRun) = 8
  //   blocks referenced per indirect block = 4 * (4096 / 8) === 2048
  //   indirect block index = (block we're looking for) / (blocks referenced per indirect block)
  //   blockrun index in indirect block = ((block we're looking for) % (blocks referenced per indirect block)) div NUM_EXTENSION_BLOCK

  // If we're really *really* clever, we will only need to load two blocks to find this blockrun.
  // So let's begin the cleverness.
  tmpBlockRun := dataStream^.doubleIndirect;

  runsPerBlock := superBlock^.blockSize div sizeof(TBlockRun);

  blocksPerIndirect := NUM_EXTENSION_BLOCKS * ((superBlock^.blockSize * NUM_EXTENSION_BLOCKS) div sizeof(TBlockRun));

  blocksPerDIndirect := blocksPerIndirect * runsPerBlock;

  assert(blocksPerIndirect <> 0);  // since this would div by 0 below anyway
  assert(blocksPerDIndirect <> 0);
  
  blockIndex := blockNum div blocksPerDIndirect; 

  index := (blockNum mod blocksPerDIndirect) div blocksPerIndirect;

  assert(blockIndex < NUM_EXTENSION_BLOCKS);     // this would mean we're way outside the range of possible addressable blocks
  inc(tmpBlockRun.start, blockIndex);
  extents := loadBlock(tmpBlockRun);
  assert(extents <> NIL);

  tmpBlockRun := extents^[index];

  // now find the proper indirect sub-block

  blockIndex := (blockNum mod blocksPerIndirect) div (NUM_EXTENSION_BLOCKS*runsPerBlock);

  assert(blockIndex < NUM_EXTENSION_BLOCKS);

  // adjust the double indirect block address
  inc(tmpBlockRun.start, blockIndex);
  extents := loadBlock(tmpBlockRun);
  assert(extents <> NIL);

  // now find which indirect run references the block we're looking for
  index := (blockNum mod (runsPerBlock*NUM_EXTENSION_BLOCKS)) div NUM_EXTENSION_BLOCKS;
(*
  blockIndex := (index mod blocksPerIndirect) div (superBlock^.blockSize div sizeof(TBlockRun));
  assert(blockIndex < NUM_EXTENSION_BLOCKS);
  
  index := (blockNum mod blocksPerIndirect) div NUM_EXTENSION_BLOCKS;

  tmpBlockRun := extents[
  i := blockNum div (NUM_EXTENSION_BLOCKS * ((superBlock^.blockSize*NUM_EXTENSION_BLOCKS) div sizeof(TBlockRun))); 
  // i should now hold which entry in the double indirect block we need to load
  assert(i < ((superBlock^.blockSize*NUM_EXTENSION_BLOCKS) div sizeof(TBlockRun)));

  // load the proper indirect block beneath the double indirect block
  extents := loadBlock(extents^[i]);
  assert(extents <> NIL);

  // now seek into this indirect block by taking the modulus of the number
  // of blocks referenced by an indirect block
  i := (blockNum mod (NUM_EXTENSION_BLOCKS * ((superBlock^.blockSize*NUM_EXTENSION_BLOCKS) div sizeof(TBlockRun)))) div NUM_EXTENSION_BLOCKS;
*)
  with extents^[index] do
    begin
      // extents^[i] is a block extent inside an indirect block which
      // was referenced by the double indirect block.  Now we need to see 
      // which block inside this extent we wanted. We do that by taking the 
      // modulus of the requested block number by the number of blocks 
      // referenced in each blockrun.  BeFS used 4 per; and as of writing 
      // this comment that is what I'm using.
      offset := blockNum and (NUM_EXTENSION_BLOCKS-1);
      setBlockRun(result, AG, start + offset, len - offset);
    end; // with
  
end; // TUbixFS.findBRunDIndirect

function TUbixFS.freeUsedBlocks;
var 
  FBL:PBTree;
  tmpBlockRun, leftBlockRun, rightBlockRun:TBlockRun;
  searchRec:BTreeSearchRec;
  mergeLeft, mergeRight:boolean;

  u:uPtr;
begin
  result := -1;  // failure by default
  // basic sanity checks
  if (superBlock = NIL) or (blockRun.len = 0) then exit;
  if not isValidIAddr(superBlock^.BAT) then exit;

with blockRun do writeln('---- freeing used blocks: ', AG, ':', start, ':', len);  
  new(FBL, open(iAddrToStr(superBlock^.BAT), new(PUbixBTreeVFS, init(@self))));
//  new(FBL, open('BAT.DAT', NIL));
  if (FBL = NIL) then exit;
  FBL^.InstallUserFunctions(compareKeyBlockRun, copyKeyBlockRun, keySizeBlockRun);

  fillchar(u, sizeof(u), 0);  // not sure what to put in here yet

  assert(FBL^.Insert(@blockRun, u));                // put in our block run
  if not FBL^.FindFirst(@blockRun, searchRec) then begin FBL^.PrintWholeTree('after.txt'); runError; end;

  //assert(FBL^.FindFirst(@blockRun, searchRec));     // yes, we look it right back up
  if FBL^.FindPrev(searchRec) then
    begin
      assert(searchRec.key <> NIL);
      leftBlockRun := TBlockRun(searchRec.key^);
      mergeLeft := (leftBlockRun.len <> 0) and (leftBlockRun.start + leftBlockRun.len = blockRun.start);
    end
  else mergeLeft := FALSE;

  assert(FBL^.FindFirst(@blockRun, searchRec));     // We're looking this one up a lot
  
  if FBL^.FindNext(searchRec) then
    begin
      assert(searchRec.key <> NIL);
      rightBlockRun := TBlockRun(searchRec.key^);
      mergeRight := (rightBlockRun.len <> 0) and (blockRun.start + blockRun.len = rightBlockRun.start)
    end
  else mergeRight := FALSE;

  if (mergeLeft or mergeRight) then 
    begin
      FBL^.Delete(@blockRun, u); // testing [this is to prevent that blockrun from being used in the event the BAT grows]
      if mergeLeft then assert(FBL^.Find(@leftBlockRun, u));
      if mergeLeft then FBL^.Delete(@leftBlockRun, u);

      if mergeRight then assert(FBL^.Find(@rightBlockRun, u));
      if mergeRight then FBL^.Delete(@rightBlockRun, u);

      // this blockRun merges two other free spaces into a single unified space
      tmpBlockRun := blockRun;
      if mergeLeft then 
        begin
          dec(tmpBlockRun.start, leftBlockRun.len);
          inc(tmpBlockRun.len, leftBlockRun.len);
        end;
      if mergeRight then inc(tmpBlockRun.len, rightBlockRun.len);
          
      // tmpBlockRun now holds the unified run. Delete the appropiate ones
//      FBL^.Delete(@blockRun, u);
      assert(FBL^.Insert(@tmpBlockRun, u));
    end;

  superBlock^.usedBlocks := superBlock^.usedBlocks - int64(blockRun.len);
  FBL^.FindClose(searchRec);
  dispose(FBL, done);
  result := 0; // success
end; // TUbixFS.freeUsedBlocks

function TUbixFS.getFileSize;
begin
  result := 0;
  if (inode = NIL) then exit;
  result := inode^.blocks.size;
end;  // TUbixFS.getFileSize

function TUbixFS.getFileSpace;
begin
  // getFileSpace() will return the size of the file's allocated space
  result := 0;
  if (inode = NIL) or (superBlock = NIL) then exit;
  result := inode^.blocks.blockCount shl superBlock^.blockShift;
end; // TUbixFS.getFileSpace

function TUbixFS.iAddrToStr;
var s:string;
begin
  // Note: this won't work in C/C++ because strings are really null
  // terminated character arrays. So, the B^Tree won't hold a string
  // with nulls in it properly
  length(s) := sizeof(iAddr);
  move(iAddr, s[1], sizeof(iAddr));
  result := s;
end; // TUbixFS.iAddrToStr;

function TUbixFS.loadBlock;
var 
  blockAddress:int64;
  cacheNode:PCacheNode;
  u:uPtr;
  searchRec:BTreeSearchRec;
begin
  result := NIL;  // failure by default;
  // 2005-04-29 mji
  // I'm not really sure how much error checking to put in here. Chances are that
  // if we're calling this function everything is fine.  For now, with such limited
  // use I'll keep it pretty light. I think that this will eventually be used for 
  // dealing with the block cache, so more checking might be required later.
  if (dev = NIL) or (superBlock = NIL) or (blockCache = NIL) then exit; 

  with blockAddr, superBlock^ do
    blockAddress := (AG shl AGShift) + start;

  fillchar(u, sizeof(u), 0);  // just to be safe
  cacheNode := NIL;
  // XXX ToDo
  // Fix this so that if there is no cache, we only have one pointer
  // Check for a full cache

  if (blockCache^.Find(@blockAddress, u)) then
    begin
      // block was in the cache. 
      assert(u.p <> NIL);
      cacheNode := u.p;
      assert(cacheNode^.data <> NIL);
      // If we're going to honour the locked flag in the cacheNode, then
      // we should put a mutex here
      
      // Now we need to update the blockTime. We do this by first deleting
      // the cacheNode out of the blockTime cache.
      blockTime^.Delete(@cacheNode^.lastUsed, u);  // Delete old one
      cacheNode^.lastUsed := getRDTSCTime();       // Get the new timestamp
      blockTime^.Insert(@cacheNode^.lastUsed, u);  // Insert with new time
      result := cacheNode^.data;                   // return the data pointer
    end
  else
    begin
      // Requested block isn't in cache

      // XXX ToDo:
      // Check to see if we've hit the maximum cache entry limit. If so, we 
      // need to drop off the least recently used (look for the first key 
      // in blockTime) and possibly save it to disk if it has been modified.

      if (blockCache^.getKeyCount() = maxCacheEntry) then
        begin
          blockTime^.getFirstKey(searchRec);                  // least recently used block
          cacheNode := searchRec.value.p;                     // get the pointer from the value
          assert(cacheNode <> NIL);
          assert(syncBlock(cacheNode));
          u.p := cacheNode;
          blockCache^.Delete(@cacheNode^.address, u);
          blockTime^.Delete(@cacheNode^.lastUsed, u);
          // We've deleted the cacheNode out of both the blockCache and
          // the blockTime trees. We're going to re-use the cacheNode.
          cacheNode^.address := blockAddress;
          cacheNode^.lastUsed := getRDTSCTime();
          // No need to update data, dataSize, or dirty. We re-use the first two,
          // and dirty will be set to FALSE above if necessary.  Might need to 
          // reset locked if we're using that.
        end
      else
        new(cacheNode, init(blockAddress, getRDTSCTime(), superBlock^.blockSize));

      assert(cacheNode <> NIL);
      assert(cacheNode^.data <> NIL);
      u.p := cacheNode;
      if not (blockCache^.Insert(@blockAddress, u)) then
        writeln('Error inserting ', blockAddress, ' into blockCache in loadBlock()');
      if not (blockTime^.Insert(@cacheNode^.lastUsed, u)) then
        writeln('Error inserting into blockTime');
      // now we need to fill the data from the device
      with superBlock^ do
        dev^.read(cacheNode^.data,
                  blockAddress * (blockSize shr SECTOR_SHIFT),
                  blockSize shr SECTOR_SHIFT
                 ); // dev^.read
      result := cacheNode^.data;
    end; 
end; // TUbixFS.loadBlock

function TUbixFS.nextAG;
begin
  // there's no error we can throw from here if superblock is NIL,
  // so don't bother to check.  Just hope whomever calls this
  // function has done some basic sanity checks
  result := (AG+1) mod superBlock^.numAGs
end; // TUbixFS.nextAG

function TUbixFS.pathToIAddr;

  function recursePath(path:PChar; len:integer; iAddr:inodeAddr):inodeAddr;
    var 
      dirTree:PBTree;
      u:uPtr;
      nameSize:integer;
      pfs:PUbixBTreeVFS;
    begin
      // if the string length is 0 then we have found the inode in the previous
      // recurse, so return what we found.
      if (len = 0) then 
        begin
          result := iAddr;
          exit;     
        end
      else
        setIAddr(result, 0, 0, 0);
      // hm. If we're looking for something like ``/.'' then the ``/'' is stripped
      // off in the main call leaving only ``.'', which means that this malformed 
      // path check would trigger.  Same with /foo/bar if bar is a dir.
      //if uInt8ArrayPtr(path)^[0] = ord('/') then exit;  // malformed path
      if not (isValidIAddr(iAddr)) then exit;           // sanity check
      nameSize := 0;
      while (len > 0) and (uInt8ArrayPtr(path)^[nameSize] <> ord('/')) do
        begin
          inc(nameSize);
          dec(len);    
        end;
      uInt8ArrayPtr(path)^[nameSize+1] := 0;
      if (len <> 0) then dec(len);
      new(dirTree, open(IAddrToStr(iAddr), new(PUbixBTreeVFS, init(@self))));
      if (dirTree = NIL) then exit;  // hm.. what happens to the PUbixBTreeVFS is this is NIL?
      // note that the dirTree is a tree of BT_PCHAR, and searches are case sensitive.
      // if the tree doesn't exist, Find() should return false.
      if dirTree^.Find(path, u) then result := recursePath(path + nameSize + 1, len, u.iAddr);
      dispose(dirTree, done);
    end; // recursePath

var 
  len:integer;
  copyOfPath:PChar;
begin
  setIAddr(result, 0, 0, 0);

  if (path = NIL) or (superBlock = NIL) then exit;  

  len := strlen(path);
  if (len = 0) then exit;                            // empty string
  if uInt8ArrayPtr(path)^[0] <> ord('/') then exit;  // malformed path
  // we need to make a copy of the path since recursePath() will munge it
  GetMem(copyOfPath, len+1);        // Allocate some memory for the path
  assert(copyOfPath <> NIL);
  move(path^, copyOfPath^, len+1);  // copy it
  result := recursePath(copyOfPath+1, len-1, superBlock^.rootDir);
  freemem(copyOfPath, len+1);
end; // TUbixFS.pathToIAddr

procedure TUbixFS.printSuperBlock;
begin
  if (superBlock = NIL) then exit;
  with superBlock^ do
    begin
      writeln('superBlock^.name........... ', name);
      writeln('superBlock^.magic1......... ', hex(magic1));
      writeln('superBlock^.fsByteOrder.... ', fsByteOrder);
      writeln('superBlock^.blockSize...... ', blockSize);
      writeln('superBlock^.blockShift..... ', blockShift);
      writeln('superBlock^.numBlocks...... ', numBlocks);
      writeln('superBlock^.usedBlocks..... ', usedBlocks);
      writeln('superBlock^.magic2......... ', hex(magic2));
      writeln('superBlock^.blocksPerAG.... ', blocksPerAG);
      writeln('superBlock^.AGShift........ ', AGShift);
      writeln('superBlock^.numAGs......... ', numAGs);
      writeln('superBlock^.flags.......... ', hex(flags));
      writeln('superBlock^.magic3......... ', hex(magic3));
      with superBlock^.BAT do
        writeln('superBlock^.BAT............ ', AG, ':', start, ':', len);
      with superBlock^.rootDir do
        writeln('superBlock^.rootDir........ ', AG, ':', start, ':', len);
    end;
end; // TUbixFS.printSuperBlock

function TUbixFS.saveBlock;
var 
  blockAddress:int64;
  cacheNode:PCacheNode;
  u:uPtr;
  searchRec:BTreeSearchRec;
begin

  result := FALSE;  // failure by default;
  if (blockCache = NIL) or (blockTime = NIL) or (dev = NIL) or (superBlock = NIL) then exit;
  with blockAddr, superBlock^ do
    blockAddress := (AG shl AGShift) + start;
  cacheNode := NIL;
  fillchar(u, sizeof(u), 0); // best be safe
  // first check to see if the blockAddress is in the cache
  if (blockCache^.Find(@blockAddress, u)) then
    begin
      // block was in cache
      cacheNode := u.p;
      assert(cacheNode <> NIL);
      assert(cacheNode^.data <> NIL);
      if (cacheNode^.data <> buf) then
        begin
          // The buffer with the data we want to save was not the same as the one
          // in the cache. This means that it was not obtained with loadBlock().
          // We need to copy that pointer to our block.
          move(buf^, cacheNode^.data^, cacheNode^.dataSize);
        end;
      // Now we need to update the blockTime. We do this by first deleting
      // the cacheNode out of the blockTime cache.
      blockTime^.Delete(@cacheNode^.lastUsed, u);  // Delete old one
      cacheNode^.lastUsed := getRDTSCTime();       // Get the new timestamp
      blockTime^.Insert(@cacheNode^.lastUsed, u);  // Insert with new time
    end
  else
    begin
      // block was not in cache
      if (blockCache^.getKeyCount() = maxCacheEntry) then
        begin
          blockTime^.getFirstKey(searchRec);
          cacheNode := searchRec.value.p;
          assert(cacheNode <> NIL);
          assert(syncBlock(cacheNode));
          u.p := cacheNode;
          blockCache^.Delete(@cacheNode^.address, u);
          blockTime^.Delete(@cacheNode^.lastUsed, u);
          // We've deleted the cacheNode out of both the blockCache and
          // the blockTime trees. We're going to re-use the cacheNode.
          cacheNode^.address := blockAddress;
          cacheNode^.lastUsed := getRDTSCTime();
          // No need to update data, dataSize, or dirty. We re-use the first two,
          // and dirty will be set to FALSE above if necessary.  Might need to 
          // reset locked if we're using that.
        end
      else
        new(cacheNode, init(blockAddress, getRDTSCTime(), superBlock^.blockSize));

      assert(cacheNode <> NIL);
      assert(cacheNode^.data <> NIL);
      move(buf^, cacheNode^.data^, cacheNode^.dataSize);
      u.p := cacheNode;
      if not (blockCache^.Insert(@blockAddress, u)) then 
        writeln('Error inserting ', blockAddress, ' into blockCache in saveBlock()');
      if not (blockTime^.Insert(@cacheNode^.lastUsed, u)) then 
        writeln('Error inserting into blockTime');
    end;
  cacheNode^.dirty := TRUE;  // indicate the block has been modified
  result := TRUE;
end; // TUbixFS.saveBlock

function TUbixFS.isBAT;
begin
  result := FALSE;
  if (superBlock = NIL) then exit;
  with superBlock^.BAT do
    result := (AG = iAddr.AG) and (start = iAddr.start);
end; // TUbixFS.isBAT

function TUbixFS.isValidIAddr;
begin
  result := (iAddr.len <> 0) and (iAddr.AG < superBlock^.numAGs); 
end; // TUbixFS.isValidIAddr

function TUbixFS.readAttr;
var
  inode:PUbixfsInode;
begin
  result := -1;
  if (attribute = NIL) or (buffer = NIL) then exit;
  if not isValidIAddr(iAddr) then exit;
  inode := loadBlock(iAddr);
  if (inode = NIL) then exit;
end; // TUbixFS.readAttr

function TUbixFS.readDataStream;
var 
  tmpBlock:uInt8ArrayPtr;
  blockRun:TBlockRun;
  size, remainder:uInt32;
  blockStart:uInt32;
  blocksToRead:uInt32;
  bytesToRead:uInt32;
  inode:PUbixFSInode;
begin
//  with iAddr do
//    writeln('readDataStream(', AG, ':', start, ':', len, ', ', position, ', ', hex(dword(buf)), ', ', length, ');');
  result := -1;
  if ((buf = NIL) or (superBlock = NIL)) then exit;
  inode := loadBlock(iAddr);
  tmpBlock := NIL;
  size := length;
  remainder := uInt32(position mod superBlock^.blockSize);

  if (remainder <> 0) then
    begin
      // the position we've been asked to read from doesn't align to
      // a block size. We need to read in the previous block and pad
      // the data
      blockStart := uInt32(position shr superBlock^.blockShift);
      blockRun := findBRun(@inode^.blocks, blockStart);
      assert(blockRun.len > 0);
      tmpBlock := loadBlock(blockRun);
      assert(tmpBlock <> NIL);
      bytesToRead := min(size, (superBlock^.blockSize - remainder));
      move(tmpBlock^[remainder], buf^, bytesToRead);
      inc(buf, bytesToRead);
      dec(size, bytesToRead);
      position := position + bytesToRead;
    end; // if remainder != 0

  // If the blockSize is always a power of 2, then you can eliminate the MOD and use
  // an AND operation instead, e.g. remainder := size and (superBlock^.blockSize-1)
  remainder := size mod superBlock^.blockSize; 
  dec(size, remainder);

  // Size is currently in bytes. The remainder variable above holds the number
  // of remaining bytes (if any) outside full blocks we have to write.

  // convert the size into blocks
  size := size shr superBlock^.blockShift;

  while (size > 0) do
    begin
      blockRun := findBRun(@inode^.blocks, dword(position shr superBlock^.blockShift));
      assert(blockRun.len > 0);
      for blocksToRead := 0 to min(blockRun.len, size)-1 do
        begin
          tmpBlock := loadBlock(blockRun);
          assert(tmpBlock <> NIL);
          move(tmpBlock^, buf^, superBlock^.blockSize);
          dec(size);
          inc(buf, superBlock^.blockSize);
          inc(blockRun.start);  // This might wrap around?
          dec(blockRun.len);    // might be unnecessary
        end; // for
    end; // while 

  if (remainder <> 0) then
    begin
      // There is less than a block of data left to read.
      blockStart := dword((position+length-remainder) shr superBlock^.blockShift);
      blockRun := findBRun(@inode^.blocks, blockStart);
      if (blockRun.len = 0)  then 
        begin 
          printStuff := TRUE; 
          blockRun := findBRun(@inode^.blocks, uInt32(position shr superBlock^.blockShift));
        end;

      assert(blockRun.len > 0);
      tmpBlock := loadBlock(blockRun);
      assert(tmpBlock <> NIL);
      move(tmpBlock^, buf^, remainder);      
    end;

  result := 0;
end; // TUbixFS.readDataStream

procedure TUbixFS.setBlockRun;
begin
  blockRun.AG    := AG;
  blockRun.start := start;
  blockRun.len   := len;
end; // TUbixFS.setBlockRun

procedure TUbixFS.setIAddr;
begin
  inode.AG    := AG;
  inode.start := start;
  inode.len   := len;
end; // TUbixFS.setIAddr

function TUbixFS.strToIAddr;
begin
  // Note: this won't work in C/C++ because strings are really null
  // terminated character arrays. So, the B^Tree won't hold a string
  // with nulls in it properly
  // begin fiendishly clever hack
  move(s[1], result, sizeof(inodeAddr));
  // end fiendishly clever hack
end; // TUbixFS.strToIAddr

function TUbixFS.shrinkFile;

  function shrinkDirect(var trimBlockCount:uInt32):boolean;
    var 
      index:uInt32;
      blockRun:TBlockRun;   // for freeUsedBlocks()
    begin
      result := FALSE;
      with inode^, blocks do
        begin
          dec(blockCount, trimBlockCount);  // this is a little presumptious
          maxDirect := maxDirect - (trimBlockCount shl superBlock^.blockShift);  // so is this
          for index := high(direct) downto low(direct) do
            if (direct[index].len <> 0) then break;       // find the last used one
          assert(direct[index].len <> 0);
          while (trimBlockCount <> 0) do
            begin
              if (direct[index].len >= trimBlockCount) then
                begin
                  blockRun := direct[index];              // make a copy
                  dec(direct[index].len, trimBlockCount); // subtract from the len
                  if (direct[index].len = 0) then         // check to see if that was the whole run
                    setBlockRun(direct[index], 0, 0, 0)   // yup, mark this entry as blank
                  else                                    // else, nope .. update blockRun for freeUsedBlocks()
                    setBlockRun(blockRun, blockRun.AG, blockRun.start + direct[index].len, blockRun.len - direct[index].len);
                  freeUsedBlocks(blockRun);               // free the blockRun we just trimmed
                  trimBlockCount := 0;                    // all done
                  break;                                  // end the while
                end
              else
                begin
                  dec(trimBlockCount, direct[index].len); // subtract the length of this run from the total trim amount
                  freeUsedBlocks(direct[index]);          // free this blockRun
                  setBlockRun(direct[index], 0, 0, 0);    // mark this entry as blank
                end;
              // keep going until we've hit bottom or while trimBlockCount <> 0
              if (index = low(direct)) then break else dec(index); 
            end; // while
        end; // with
      assert(trimBlockCount = 0); // Better be 0.
      result := TRUE;
    end; // local function shrinkDirect

  function shrinkIndirect(var trimBlockCount:uInt32):boolean;
    var
      extents:array[0..NUM_EXTENSION_BLOCKS-1] of ^array[0..0] of TBlockRun;
      extentsUpperBound, blockIndex, index:uInt32;
      tmpBlockRun:TBlockRun;
    begin
      result := FALSE; // failure by default;
      extentsUpperBound := (superBlock^.blockSize div sizeof(TBlockRun))-1;

      if not isValidIaddr(inode^.blocks.indirect) then
        begin
          writeln('For some reason you called shrinkIndirect() when there was no valid indirect block.');
          exit;
        end;

      // load the indirect block
      tmpBlockRun := inode^.blocks.indirect;
      for blockIndex := low(extents) to high(extents) do
        begin
          extents[blockIndex] := loadBlock(tmpBlockRun);
          assert(extents[blockIndex] <> NIL);
          inc(tmpBlockRun.start);  // hope this doesn't wrap around
        end; // for blockIndex

      for blockIndex := high(extents) downto low(extents) do
        if (extents[blockIndex]^[0].len <> 0) then break;

      for index := extentsUpperBound downto 0 do
        if (extents[blockIndex]^[index].len <> 0) then break;
      assert(extents[blockIndex]^[index].len <> 0);   // this would be odd

      with inode^, blocks do
        begin
          while (trimBlockCount <> 0) do
            begin 
              if (extents[blockIndex]^[index].len >= trimBlockCount) then
                begin
                  tmpBlockRun := extents[blockIndex]^[index];              // make a copy
                  dec(extents[blockIndex]^[index].len, trimBlockCount); // subtract from the len
                  maxIndirect := maxIndirect - (trimBlockCount shl superBlock^.blockShift);  // update inode^.blocks.maxIndirect
                  dec(blockCount, trimBlockCount);          // update inode^.blocks.blockCount
                  if (extents[blockIndex]^[index].len = 0) then           // check to see if that was the whole run
                    begin
                      // Reclaim this entry. But, if the index is 0, then 
                      // we've trimmed away the indirect section completely.
                      if (index = 0) then    
                        begin
                          if (blockIndex = 0) then // toast the indirect block
                            begin
                              freeUsedBlocks(indirect);
                              setIAddr(indirect, 0, 0, 0);  
                            end
                          else                     // just move to the previous section of the indirect block
                            begin
                              // XXX we probably need to save out extents[blockIndex] right here
                              dec(blockIndex);
                              index := extentsUpperBound;
                            end;
                        end
                      else                   // else just clear this entry 
                        setBlockRun(extents[blockIndex]^[index], 0, 0, 0)  // yup it was the whole run, mark this entry as blank
                    end
                  else                                     // else, nope .. update tmpBlockRun for freeUsedBlocks()
                    setBlockRun(tmpBlockRun, tmpBlockRun.AG, tmpBlockRun.start + extents[blockIndex]^[index].len, tmpBlockRun.len - extents[blockIndex]^[index].len);
                  freeUsedBlocks(tmpBlockRun);                // free the blockRun we just trimmed
                  trimBlockCount := 0;                     // all done
                  break;                                   // end the while
                end
              else
                begin
                  dec(trimBlockCount, extents[blockIndex]^[index].len);  // subtract the length of this run from the total trim amount
                  freeUsedBlocks(extents[blockIndex]^[index]);           // free this blockRun
                  setBlockRun(extents[blockIndex]^[index], 0, 0, 0);     // mark this entry as blank
                  // This branch means that trimBlockCount is not 0 yet (relevant to code below)
                end;
              // keep going until we've hit bottom or while trimBlockCount <> 0
              if (index = 0) then 
                begin
                  if (blockIndex = 0) then
                    begin
                      // If we're here, then the indirect block is no longer needed
                      // and we need to scan through the direct section. Toast the
                      // indirect block
                      freeUsedBlocks(indirect);
                      setBlockRun(indirect, 0, 0, 0);
                      result := shrinkDirect(trimBlockCount);
                      exit;
                    end
                  else
                    begin
                      dec(blockIndex);
                      index := extentsUpperBound;
                    end;
                end
              else
                dec(index);
            end; // while
          // XXX 
          // This next piece of code should go away and the proper pieces should be saved above.
          // Since we have caching at the block level, we can go ahead and save blocks when they 
          // change without worrying too much about speed.
          if isValidIAddr(indirect) then  // we might have removed the indirect section altogether
            begin
              tmpBlockRun := indirect;
              for blockIndex := low(extents) to high(extents) do
                begin
                  assert(saveBlock(tmpBlockRun, extents[blockIndex])); 
                  inc(tmpBlockRun.start); // hope this doesn't wrap around
                end;
            end; // if isValidIAddr(indirect)
        end; // with
      result := TRUE;  // success
    end; // local function shrinkIndirect

  function shrinkDIndirect(var trimBlockCount:uInt32):boolean;
    var 
      extents:^array[0..0] of TBlockRun;
      indirectExtents:^array[0..0] of TBlockRun;
      extentsIndex, indirectExtentsIndex:uInt32;
      extentsUpperBound:uInt32;
      tmpBlockRun:TBlockRun;
    begin
      result := FALSE; // failure by default

      extentsUpperBound := (superBlock^.blockSize div sizeof(TBlockRun)) - 1;
      assert(isValidIAddr(inode^.blocks.doubleIndirect));
      extents := loadBlock(inode^.blocks.doubleIndirect);
      assert(extents <> NIL);
      for extentsIndex := extentsUPperBound downto 0 do
        if (extents^[extentsIndex].len <> 0) then break;
      assert(extents^[extentsIndex].len <> 0);
      indirectExtents := loadBlock(extents^[extentsIndex]);
      assert(indirectExtents <> NIL);
      for indirectExtentsIndex := extentsUpperBound downto 0 do
        if (indirectExtents^[indirectExtentsIndex].len <> 0) then break;
      assert(indirectExtents^[indirectExtentsIndex].len <> 0);
      while (trimBlockCount >= NUM_EXTENSION_BLOCKS) do
        begin
          dec(trimBlockCount, indirectExtents^[indirectExtentsIndex].len);
          dec(inode^.blocks.blockCount, indirectExtents^[indirectExtentsIndex].len);
          with inode^, blocks do
            maxDIndirect := maxDIndirect - (indirectExtents^[indirectExtentsIndex].len shl superBlock^.blockShift);
          freeUsedBlocks(indirectExtents^[indirectExtentsIndex]); // free the blockRun
          if (indirectExtentsIndex = 0) then
            begin
              freeUsedBlocks(extents^[extentsIndex]); // free this indirect block
              if (extentsIndex = 0) then
                begin
                  freeUsedBlocks(inode^.blocks.doubleIndirect);  // free the double indirect block
                  setBlockRun(inode^.blocks.doubleIndirect, 0, 0, 0);  
                  // XXX
                  // We should put a check to make sure inode^.blocks.maxDIndirect is equal to maxIndirect
                  with inode^, blocks do
                    assert(maxDIndirect = maxIndirect);  
                  inode^.blocks.maxDIndirect := 0;
                  result := shrinkIndirect(trimBlockCount);
                  exit;
                end
              else
                begin
                  dec(extentsIndex);
                  assert(saveBlock(inode^.blocks.doubleIndirect, extents)); // XXX this isn't right
                  indirectExtents := loadBlock(extents^[extentsIndex]);
                  assert(indirectExtents <> NIL);
                  indirectExtentsIndex := extentsUpperBound;
                end;  // else
            end
          else
            dec(indirectExtentsIndex);           
        end; // while
      result := TRUE;  // success
    end; // local function shrinkDIndirect

  function shrinkDataStream(var trimBlocks:uInt32):boolean;
    begin
      if (inode^.blocks.maxDIndirect > 0) then result := shrinkDIndirect(trimBlocks) else
      if (inode^.blocks.maxIndirect > 0) then result := shrinkIndirect(trimBlocks)
      else result := shrinkDirect(trimBlocks);
    end; // local function shrinkDataStream
var
  blocksToTrim:uInt32;

begin // shrinkFile() 
  // XXX ToDo
  // Figure out which pieces of the datastream need to be saved 
  // in shrinkIndirect() and shrinkDIndirect()

  result := FALSE; // failure by default
  if (inode = NIL) or (superBlock = NIL) then exit;

  // start with the original block count
  blocksToTrim := inode^.blocks.blockCount;  
  // Subtract off how much space the file actually takes up.
  dec(blocksToTrim, uInt32((inode^.blocks.size+(superBlock^.blockSize-1)) shr superBlock^.blockShift));
  
  // Do a quick check to see if we're in double indirect blocks. If we are, and the amount to trim
  // is less than NUM_EXTENSION_BLOCKS then we don't actually need to do anything.
  // Also do a quick check to see if blocksToTrim is 0 .. which means we really don't have to do anything.
  if (blocksToTrim = 0) or (isValidIAddr(inode^.blocks.doubleIndirect) and (blocksToTrim < NUM_EXTENSION_BLOCKS)) then 
    begin
      result := TRUE;  // Technically there was no failure
      exit; 
    end;
  result := shrinkDataStream(blocksToTrim);
  assert(saveBlock(inode^.inodeNum, inode));
end; // TUbixFS.shrinkFile

procedure TUbixFS.sync;
var 
  searchRec:BTreeSearchRec;
begin
  // XXX ToDo:
  // Need to mark the superblock as clean.

  if (blockCache = NIL) or (dev = NIL) then exit;
  // We sync out the blocks in ascending order from the blockCache.
  // To do this we examine each entry to see if the block is dirty.
  // If it is, we write it out to the device and set dirty to FALSE.
  if (blockCache^.getFirstKey(searchRec)) then
    repeat
      syncBlock(searchRec.value.p);
    until not blockCache^.FindNext(searchRec);
  // Also write out the superblock.
  dev^.write(superBlock, 1, 1);
end; // TUbixFS.sync

function TUbixFS.syncBlock;
begin
  result := FALSE;
  if (dev = NIL) or (cacheNode = NIL) then exit;
  with cacheNode^ do
    if (dirty) then
      begin
        assert(data <> NIL);  
        dev^.write(data, 
                   address * (dataSize shr SECTOR_SHIFT),
                   dataSize shr SECTOR_SHIFT
                  ); // dev^.write    
        dirty := FALSE;
      end; // if dirty
  result := TRUE;
end; // TUbixFS.syncBlock
{$IFDEF GRAPHICS}
 {$I graphics.pas}
{$ENDIF}

function TUbixFS.vfs_close;
var 
  u:uPtr;
  vnid:PVnid;
  inode:PUbixfsInode;
begin
  result := -1;
  if (fileDescriptors = NIL) or (handle >= maxFDEntry) then exit;
  if not (vnids^.Find(@fileDescriptors^[handle].iAddr, u)) then writeln('Could not find iAddr in vnids');
  vnid := u.p;
  if (vnid = NIL) then 
    begin
      writeln('vnid is NIL');
      exit;
    end; 

  with vnid^ do
    begin
      dec(refCount);
      if (refCount = 0) then 
        begin   
          // refCount has fallen to 0. If the file is deleted we should take care of it here
          inode := loadBlock(fileDescriptors^[handle].iAddr);
          // if file is deleted then inode^.blocks.size = 0;
          assert(inode <> NIL);
          // shrink the file
          assert(shrinkFile(inode));
          vnids^.Delete(@inode^.inodeNum, u);
          setIAddr(fileDescriptors^[handle].iAddr, 0, 0, 0);
        end;
    end;
  if not vNodeQueue^.enqueue(handle) then writeln('Error enqueuing handle back into vNodeQueue');
  result := 0;
end; // TUbixFS.vfs_close

function TUbixFS.vfs_fCreate;
var 
  inode:PUbixFSInode;
  dirTree:PBTree;
  dirIAddr, fileIAddr:inodeAddr;
  u:uPtr;
  searchRec:BTreeSearchRec;
  len : integer;
  path:PChar;
  fileNamePtr:PChar; 
  curPos:integer;
  vnid:PVnid;
  handle:integer;
begin
  result := -1;  // error by default
  if (filename = NIL) or (superBlock = NIL) then exit;
  // we need to split the filename into it's component path and filename pieces.
  // From there we need to see if the file exists.  If it does then I guess we 
  // should truncate the file and load the inode.  Otherwise allocate a new inode
  len := strlen(filename);
  if (len = 0) then exit;
  if uInt8ArrayPtr(filename)^[0] <> ord('/') then exit;  // malformed path
  GetMem(path, len+1);
  strCopy(path, filename);
  curPos := len;

  setIAddr(dirIAddr, 0, 0, 0);  // zero out the inode address for the directory
  setIAddr(fileIAddr, 0, 0, 0); // zero out the inode address for the file

  while (curPos > 0) and (uInt8ArrayPtr(path)^[curPos] <> ord('/')) do dec(curPos);
  fileNamePtr := path + curPos + 1;
  if (curPos = 0) then
    begin
      // file is in the root dir
      writeln('path: /');
      writeln('filename: ', fileNamePtr);
      dirIAddr := pathToIAddr('/');
    end
  else
    begin
      uInt8ArrayPtr(path)^[curPos] := 0; // null out the separator
      writeln('Path: ', path);
      writeln('Filename: ', fileNamePtr);
      dirIAddr := pathToIAddr(path);
    end; 

  with dirIAddr do
    writeln('iaddr to path: ', AG, ':', start, ':', len);
  assert(isValidIAddr(dirIAddr));
  if not isValidIAddr(dirIAddr) then exit;
  // should probably check to make sure that we're opening a directory and not a file
  new(dirTree, open(IAddrToStr(dirIAddr), new(PUbixBTreeVFS, init(@self))));
  if (dirTree = NIL) then exit;
  // We need to ascertain if the file is existing or new.
  // If the file exists, we should truncate the file
  // If the file is new, we need to create a new inode and write the entry to the directory
  // Then we need to allocate a file descriptor and store it somewhere.  
  if dirTree^.Find(fileNamePtr, u) then 
    begin
      writeln(fileNamePtr, ' already exists');
      // XXX ToDo:
      // Set the filesize to 0 and call shrinkFile on the inode.
      // So, uhm, what happens if the file is already open?
    end
  else
    begin
      writeln('creating new file: ', fileNamePtr);
      writeln('allocating new inode');
      inode := allocInode(dirIAddr);   // alloc a new inode (pass in the parent iaddr)
      assert(inode <> NIL);            // make sure it isn't nil

      inode^.addAttr('name', BT_PCHAR, fileNamePtr, strlen(fileNamePtr)+1);

      if (inode = NIL) then exit;
      assert(saveBlock(inode^.inodeNum, inode));                // save the new inode
      u.iAddr := inode^.inodeNum;      // copy the inode addr to a uPtr      
      // now add the file to the directory
      writeln('Inserting ', fileNamePtr, ' into directory returned ', dirTree^.Insert(fileNamePtr, u));
    end;  
  dispose(dirTree, done);
  FreeMem(path, len+1);

  // This file doesn't have an associated vnid. Create a new one.
  new(vnid);
  vnid^.refCount := 1;
  vnid^.iAddr := inode^.inodeNum;
  vnid^.mode := 0; // XXX ?
  u.p := vnid;
  if not (vnids^.insert(@inode^.inodeNum, u)) then writeln('Could not insert into vnids');

  if not vNodeQueue^.dequeue(handle) then 
    begin 
      writeln('Out of file descriptors!');
      exit;
    end;

  with fileDescriptors^[handle] do
    begin
      TID := 0;  // ThreadID
      iAddr := inode^.inodeNum;
      position := 0;
      flags := 0;  // XXX ?
    end;
  result := handle;
end; // TUbixFS.vfs_fCreate

function TUbixFS.vfs_fExist;
begin
  // XXX ToDo:
  // find out why fExist('/./.') fails
  // fExist('/.') no longer fails
  
  result := -1;
  if ((superBlock = NIL) or (path = NIL)) then exit;
  if (not isValidIAddr(superBlock^.rootDir)) then exit;
  if isValidIAddr(pathToIAddr(path)) then result := 0;
end; // TUbixFS.vfs_fExist

function TUbixFS.vfs_format;
const 
  BATName:array[0..23] of char = 'Block Allocation Tree'#0;
var
  sector:array[0..511] of byte;
  fs:PUbixFS;
  logicalBlocks, fsBATSize, fsBATBlocks:uInt32;
  sb:PdiskSuperBlock;
  inode:PubixfsInode;
  i, fsBlockShift, lBlockShift, nodeSize, keyLength:integer;
  blockRun:TBlockRun;
  FBL:PBTree;
  remainderBlocks:uInt32;
  u:uPtr;
  blockBit:shortint;
begin
  result := -1;
  if (device = NIL) then exit;
  if (fsBlockSize < 1024) or (fsBlockSize > 8192) then
    begin
      writeln('Blocksize must be between 1024 and 8192');
      exit;
    end;
  // zero out the sector
  fillchar(sector, sizeof(sector), 0);

  writeln('device^.sectors: ', device^.sectors);

  if not quick then
    begin
      system.write('clearing device... ');

      for i := 0 to device^.sectors-1 do
        device^.write(@sector, i, 1);

      writeln('done');
    end; // if !quick


  fsBlockShift := 10;  {minimum is 1024}
  while (1 shl fsBlockShift < fsBlockSize) do inc(fsBlockShift);

  // allocate a new superBlock and clear it

  new(sb);
  if (sb = NIL) then exit;
  fillchar(sb^, sizeof(TDiskSuperBlock), 0);

  // device^.sectors is the number of 512 byte sectors

  lBlockShift := fsBlockSize shr 9;
  writeln('lBlockShift: ', lBlockShift);
  logicalBlocks := device^.sectors div lBlockShift;        // logical blocks
  writeln('logical blocks: ', logicalBlocks);

  with sb^ do
    begin
      strcopy(name, 'UbixFS');
      magic1      := UBIXFS_MAGIC_1;
      fsByteOrder := 0;
      blockSize   := fsBlockSize;
      blockShift  := fsBlockShift;
      numBlocks   := logicalBlocks;
      usedBlocks  := 32;            // XXX Based on what we allocate below for the new BAT system!
      inodeSize   := fsBlockSize;
      magic2      := UBIXFS_MAGIC_2;
      blocksPerAG := min(65536, blockSize shl 3);
      AGShift     := 10;  // realistically it should be a min of 13
      while (1 shl AGShift <> blocksPerAG) do inc(AGShift);
      numAGs      := (dword(numBlocks)+(blocksPerAG-1)) div blocksPerAG;

      flags       := UBIXFS_CLEAN;
      setBlockRun(logBlocks, 0, 0, 0);
      logStart    := 0;
      logEnd      := 0;
      magic3      := UBIXFS_MAGIC_3;
      setIAddr(indicies, 0, 0, 0);
      setIAddr(rootDir, 0, 0, 0);
    end; // with
  // create the free block list inode 

  getmem(inode, fsBlockSize);  // make the inode the same size as a block
  if (inode = NIL) then exit;

  fillchar(inode^, sb^.inodeSize, 0);

  with inode^ do
    begin
      magic := UBIXFS_INODE_MAGIC;

      // inodes point to themselves
      setIAddr(inodeNum, 0, sb^.logBlocks.len+1, 1); // the +1 for the start might be wrong
      setIAddr(parent, 0, 0, 0);

      // If the partition is larger than 550GB we will need to allocate more than
      // one blockRun for the BAT.
      
//oldBAT -- fsBATSize := logicalBlocks shr 3;  // compute how many bytes the BAT takes up
//oldBAT -- fsBATBlocks := (fsBATSize + fsBlockSize-1) div fsBlockSize;
//oldBAT -- writeln('fsBATSize: ', fsBATSize);
//oldBAT -- writeln('fsBATBlocks: ', fsBATBlocks);
//oldBAT -- setBlockRun(blocks.direct[0], 0, inodeNum.start+1, fsBATBlocks);

//oldBAT -- blocks.size := (fsBATBlocks * fsBlockSize); // round the file size up to the nearest block size
//oldBAT -- blocks.maxDirect := blocks.size-1;
//oldBAT -- blocks.blockCount := fsBATBlocks;
      blocks.size := 0;
      blocks.blockcount := 30;       // manually allocate blocks
      blocks.maxDirect := (blocks.blockCount shl fsBlockShift)-1;             // set the size of the file
      setBlockRun(blocks.direct[0], 0, inodeNum.start+1, blocks.blockCount);  // manually allocate blocks

      uid := 0;
      gid := 0;
      mode := 0;
      flags := INODE_LOGGED;

      setIAddr(attributes, 0, 0, 0);
      inodeSize := sb^.inodeSize;
//      strcopy(name, 'Block Allocation Tree');
      addAttr('name', BT_PCHAR, @BATName, strlen(@BATName)+1);
  end; // with
  sb^.BAT := inode^.inodeNum;
  writeln('writing out superblock to sector 1');
  device^.write(sb, 1, 1);
  // note that the superblock does not contain proper inode addresses for the
  // BAT and rootDir. Those are sync'd to disk in the destructor after they've been
  // created below. 
    
  system.write('Writing out BAT inode... ');
  // write out the Block Allocation Table inode
  with inode^.inodeNum, sb^ do
    device^.write(inode,
                  ((AG shl AGShift) + start) * (inodeSize shr SECTOR_SHIFT),
                  inodeSize shr SECTOR_SHIFT
                 ); // device^.write 
  writeln('done');
  // Okay, the superblock is the only piece of information needed
  // to mount an UbixFS partition
  new(fs, init(device));
  assert(fs <> NIL);
  assert(fs^.vfs_init() = 0);

  // The freshly formatted parition is now mounted. 
  // From here we need to create:
  //   BAT (Block Allocation Table) file contents
  //   journal
  //   root dir 
  new(FBL, init(iAddrToStr(sb^.BAT), BAT_PAGE_SIZE, BT_CUSTOM, new(PUbixBTreeVFS, init(fs))));
//  new(FBL, init('BAT.DAT', BAT_PAGE_SIZE, BT_CUSTOM, NIL));
  if (FBL = NIL) then 
    begin
      writeln('Error creating Block Allocation Tree in vfs_format()');
      halt;
    end; 

  FBL^.InstallUserFunctions(compareKeyBlockRun, copyKeyBlockRun, keySizeBlockRun);

  fillchar(u, sizeof(u), 0);

  setBlockRun(blockRun, 0, 0, 0);     // 0th AG tag
  assert(FBL^.Insert(@blockRun, u));  // 0th AG marker
writeln(dword(sb^.numBlocks));
writeln(sb^.numAGs);
  remainderBlocks := dword(sb^.numBlocks-sb^.usedBlocks);
  i := inode^.blocks.direct[0].start+inode^.blocks.blockCount;  
  setBlockRun(blockRun, 0, i, min(sb^.blocksPerAG-i, remainderBlocks));
  dec(remainderBlocks, blockRun.len);
with blockRun do 
  writeln('0th AG free block section: ', AG, ':', start, ':', len);

  FBL^.Insert(@blockRun, u);          // 0th AG free block list
  for i := 1 to sb^.numAGs-1 do
    begin
      setBlockRun(blockRun, i, 0, 0);  // AG tags
      assert(FBL^.Insert(@blockRun, u));
      blockRun.start := 1;
      blockRun.len := min(remainderBlocks, sb^.blocksPerAG-1);
      assert(FBL^.Insert(@blockRun, u));
      dec(remainderBlocks, min(remainderBlocks, sb^.blocksPerAG));
      sb^.usedBlocks := sb^.usedBlocks + 1;
    end;  // for i
writeln('remainderBlocks: ', remainderBlocks);
assert(remainderblocks = 0);
  dispose(FBL, done);

  fs^.vfs_mkdir('/');
  if (fs^.vfs_fExist('/.') = -1) then 
    writeln('Warning! /. doesn''t exist') 
  else 
    writeln('fExist(''/.'') returned properly');
  if (fs^.vfs_fExist('/nofile.dat') <> -1) then 
    writeln('Warning! /nofile.dat exists?') 
  else 
    writeln('fExist(''/nofile.dat'') returned properly');

  if (fs^.vfs_fExist('/nodir/nofile.dat') <> -1) then 
    writeln('Warning! /nodir/nofile.dat exists?') 
  else 
    writeln('fExist(''/nodir/nofile.dat'') returned properly');

//  writeln('fs^.vfs_fExist(''/.'') returned: ', fs^.vfs_fExist('/.'));
  writeln('fs^.vfs_fExist(''/nofile.dat'') returned: ', fs^.vfs_fExist('/nofile.dat')); 
  fs^.printSuperBlock();
  
  dispose(fs, done);
  writeln('Format complete!');
  dispose(sb);
  freemem(inode, fsBlockSize);
   
end; // TUbixfS.vfs_format

function TUbixFS.vfs_init;
var index:uInt32;
begin
  // This function should probably not be called more than once without
  // calling the appropiate shutdown code
  writeln('TUbixFS.vfs_init()');
  result := -1;
  if (dev = NIL) then exit;
  new(superBlock);
  assert(superBlock <> NIL);
  if (superBlock = NIL) then exit;
  fillchar(superBlock^, sizeof(superBlock^), 0); // better safe than sorry
  // read in the superBlock. It's always the 1st block on the partition (counting from 0)
  system.write('reading in superBlock... ');
  dev^.read(superBlock, 1, 1);
  writeln('done');

  assert(superBlock^.magic1 = UBIXFS_MAGIC_1);
  assert(superBlock^.magic2 = UBIXFS_MAGIC_2);
  assert(superBlock^.magic3 = UBIXFS_MAGIC_3);
  assert(strcomp(superBlock^.name, 'UbixFS') = 0);
  assert((1 shl superBlock^.blockShift) = superBlock^.blockSize);
  assert((1 shl superBlock^.AGShift) = superBlock^.blocksPerAG);
  assert(superBlock^.flags = UBIXFS_CLEAN);
  
  // create the file decriptor tree


  // The blockCache tree has a [key::value] pair of [block address::pointer to cacheNode].
  // The blockTime tree has a [key::value] pair of [last used time::pointer to cacheNode].

  // create the block cache tree
  new(blockCache, init('', 256, BT_INT64, NIL));
  // create the block last used time tree
  // The last used time is obtained through the RDTSC instruction to give us a unique
  // and increasing timestamp.  When the cache is full we drop off the least recently 
  // used block.  Because the tree is sorted, this will be the first entry. 
  new(blockTime, init('', 256, BT_INT64, NIL));

  // Create the file descriptors and related stuff

  // vnids is a tree which has a [key::value] pair of [iAddr::pointer to vNode].
  new(vnids, init('', 256, BT_INT64, NIL));

  // vnodeQueue is a queue which has a list of available FD handles
  new(vNodeQueue, init(maxFDEntry, sizeof(uInt32)));
  // fill the vNodeQueue with a list of all available FD handles
  for index := 0 to maxFDEntry-1 do
    vNodeQueue^.enqueue(index);

  // Allocate and clear the file descriptor array
  GetMem(fileDescriptors, maxFDEntry*sizeof(TVNode));
  fillchar(fileDescriptors^, maxFDEntry*sizeof(TVNode), 0);
  
  result := 0;  // success
end; // TUbixFS.vfs_init

function TUbixFS.vfs_mkdir;
const 
  dot:array[0..1] of char = '.'#0;
  dotdot:array[0..2] of char = '..'#0;
var 
  inode:PUbixFSInode;
  dirIAddr:inodeAddr;
  iAddr:inodeAddr;
  bRun:TBlockRun;
  len:integer;
  dirTree:PbTree;
  u:uPtr;
  pathToParentDir:PChar;
  dirNamePtr:PChar; 
  curPos:integer;

begin
  result := -1;
  assert(path <> NIL);
  assert(superBlock <> NIL);
  if (path = NIL) or (superBlock = NIL) then exit;

  len := strlen(path);
  assert(len <> 0);
  if (len = 0) then exit;

  if (len = 1) and (uInt8ArrayPtr(path)^[0] = ord('/')) then
    begin
      writeln('Creating root directory');
      // wanting to create the root. Check to see if the root dir inode is empty
      assert(not isValidIAddr(superBlock^.rootDir));
      if isValidIAddr(superBlock^.rootDir) then exit;  // should set a proper error code here
      setIAddr(iAddr, 0, 0, 0);   // technically the "parent" iaddr
      inode := allocInode(iAddr); // allocate an inode (parent points to la-la land)
      assert(inode <> NIL);       // make sure it isn't nil
      if (inode = NIL) then exit;
      inode^.mode := S_IFDIR;
      inode^.flags := INODE_LOGGED;
      superBlock^.rootDir := inode^.inodeNum;  // assign the root dir
      inode^.addAttr('name', BT_PCHAR, path, len+1); // len+1 should equal 2
      assert(saveBlock(superBlock^.rootDir, inode));           // save the inode
      u.iAddr := inode^.inodeNum; // copy the inode addr to a uPtr

      // open the root dir
      with superBlock^.rootDir do
        writeln('Root dir inode: ', AG, ':', start, ':', len);
      writeln('Opening root dir tree');
      new(dirTree, init(iAddrToStr(superBlock^.rootDir), 1024, BT_PCHAR, new(PUbixBTreeVFS, init(@self))));
      writeln('Inserting ''.''');
      // Get ready to insert the '.' :: inode^.inodeNum key :: value pair
      writeln('dirTree^.Insert(@dot, u) returned ', dirTree^.Insert(@dot, u));     // '.' points to ourself
      writeln('dirTree^.Insert(@dotDot, u) returned ', dirTree^.Insert(@dotDot, u));  // '..' points to ourself, too
      dispose(dirTree, done);       // close and save the directory
    end
  else
    begin
      // subdir support goes here
      setIAddr(dirIAddr, 0, 0, 0);
      len := strlen(path);
      if (len = 0) then exit;
      if uInt8ArrayPtr(path)^[0] <> ord('/') then exit;  // malformed path
      GetMem(pathToParentDir, len+1);
      strCopy(pathToParentDir, path);
      curPos := len;
      while (curPos > 0) and (uInt8ArrayPtr(pathToParentDir)^[curPos] <> ord('/')) do dec(curPos);
      dirNamePtr := pathToParentDir + curPos + 1;
      if (curPos = 0) then
        begin
          writeln('New directory being created below root');
          writeln('dir name: ', dirNamePtr);    
          dirIAddr := superBlock^.rootDir;
        end
      else
        begin
          if (curPos <> 0) then uInt8ArrayPtr(pathToParentDir)^[curPos] := 0;  // null out the last / separator
          writeln('Path to parent dir: ', pathToParentDir);
          writeln('dir name: ', dirNamePtr);  
          dirIAddr := pathToIAddr(pathToParentDir);    

        end;
      inode := allocInode(dirIAddr);  // allocate an inode
      assert(inode <> NIL);
      if (inode = NIL) then exit;     // set some appropiate error code (not sure what)
      inode^.mode := S_IFDIR;
      inode^.flags := INODE_LOGGED;
      inode^.addAttr('name', BT_PCHAR, dirNamePtr, strlen(dirNamePtr)+1);
      assert(saveBlock(inode^.inodeNum, inode));
      u.iAddr := inode^.inodeNum;
      // open the parent dir
      new(dirTree, open(iAddrToStr(dirIAddr), new(PUbixBTreeVFS, init(@self))));  
      writeln('dirTree^.Insert(dirNamePtr, u) returned ', dirTree^.Insert(dirNamePtr, u));     
      dispose(dirTree, done);   
      // now open the subdir and insert '.' and '..'
      new(dirTree, init(iAddrToStr(inode^.inodeNum), 1024, BT_PCHAR, new(PUbixBTreeVFS, init(@self))));
      // Get ready to insert the '.' :: inode^.inodeNum key :: value pair
      writeln('dirTree^.Insert(@dot, u) returned ', dirTree^.Insert(@dot, u));     // '.' points to ourself
      u.iAddr := dirIAddr;
      writeln('dirTree^.Insert(@dotdot, u) returned ', dirTree^.Insert(@dotdot, u));     // '..' points to parent dir
      dispose(dirTree, done);      
      FreeMem(pathToParentDir, len+1);
    end; // else subdir
    
  result := 0;
end; // TUbixFS.vfs_mkdir

function TUbixFS.vfs_read;
var 
  inode:PUbixFSInode;
  u:uPtr;
begin
  result := -1;
  if (buffer = NIL) or (fileDescriptors = NIL) or (handle >= maxFDEntry) then exit;
  with fileDescriptors^[handle] do
    begin
      assert(readDataStream(iAddr, position, buffer, size) = 0);
      position := position + int64(size);
      result := size;
    end; // with
end; // TUbixFS.vfs_read

function TUbixFS.vfs_readAttr;
var 
  inode:PUbixfsInode;
begin
  result := -1;  // failure by default
  if (attribute = NIL) or (buffer = NIL) or (handle >= maxFDEntry) or (fileDescriptors = NIL) then exit;

  with fileDescriptors^[handle] do
    begin
      if not isValidIAddr(iAddr) then exit; // We need to check that the file descriptor points to an open file.
      inode := loadBlock(iAddr);
      if (inode = NIL) then exit;
    end; // with

  // first, start in the small data area 
end; // TUbixFS.vfs_readAttr

function TUbixFS.vfs_seek;
begin
  result := -1;
  if (fileDescriptors = NIL) or (handle >= maxFDEntry) then exit;
  fileDescriptors^[handle].position := newPosition;
  result := 0;
end; // TUbixFS.vfs_seek

function TUbixFS.vfs_unlink;
var 
  dirTree:PbTree;
  iAddr:inodeAddr;
  dirIAddr:inodeAddr;
  path, fileNamePtr:PChar; 
  curPos:integer;
  len:integer;
  inode:PUbixfsInode;
  u:uPtr;
  vnid:PVnid;
  smallDataNode:PSmallDataNode;
begin 
  // XXX ToDo:
  // Set appropiate error codes.
  // If the "file" is actually a directory, bail on that
  // If bailing out, don't forget to reclaim allocated memory
  result := -1;
  if (fileName = NIL) or (superBlock = NIL) or (vnids = NIL) then exit;

  // We need to break the path apart into a directory path and filename
  len := strlen(filename);
  if (len = 0) then exit;
  getmem(path, len+1);
  strcopy(path, filename);
  curPos := len;
  while (curPos > 0) and (uInt8ArrayPtr(path)^[curPos] <> ord('/')) do
    dec(curPos);
  fileNamePtr := path + curPos +1;

  if (curPos = 0) then
    begin
      dirIAddr := superBlock^.rootDir;
    end
  else
    begin
      uInt8ArrayPtr(path)^[curPos] := 0;
      dirIAddr := pathToIAddr(path);
    end;
  // XXX We can't exit because we have allocated memory to clean up
  if not (isValidIAddr(dirIAddr)) then exit; // set some error code here  
  
  new(dirTree, open(IAddrToStr(dirIAddr), new(PUbixBTreeVFS, init(@self))));
  assert(dirTree <> NIL);
  fillchar(u, sizeof(u), 0);
  if dirTree^.Find(fileNamePtr, u) then 
    begin
      inode := loadBlock(u.iAddr);
      if (inode = NIL) then 
        begin
          writeln('error loading inode in unlink()');
          exit;  // XXX cannot exit from here either
        end;
      // the filename of the file always exists inside the 
      // small data area of the file.
      smallDataNode := inode^.findAttr('name');
      assert(smallDataNode <> NIL);
      dirTree^.Delete(smallDataNode^.dataPtr(), u);
      // We know the file exists.. but does anybody have it open?
      if vnids^.Find(@inode^.inodeNum, u) then
        begin
          // file was already open
          // mark it for deletion later
          inode^.flags := inode^.flags OR INODE_DELETED;
          saveBlock(inode^.inodeNum, inode);
        end
      else
        begin
          // Tis okay to delete the file
          inode^.blocks.size := 0;
          shrinkFile(inode);
          freeUsedBlocks(inode^.inodeNum);
        end; 
    end
  else
    writeln('File does not exist');
  dispose(dirTree, done);

  FreeMem(path, len+1);
  result := 0;
end; // TUbixFS.vfs_unlink

function TUbixFS.vfs_write;
begin
  result := -1;
  if (buffer = NIL) or (fileDescriptors = NIL) or (handle >= maxFDEntry) then exit;

  with fileDescriptors^[handle] do
    begin
      if not isValidIAddr(iAddr) then exit;
      assert(writeDataStream(iAddr, position, buffer, size) = 0);
      position := position + int64(size);
      result := size;
    end; // with
end; // TUbixFS.vfs_write

function TUbixFS.vfs_writeAttr;
begin
  result := -1;  // failure by default;
  if (fileDescriptors = NIL) or (handle >= maxFDEntry) then exit;
  result := writeAttr(fileDescriptors^[handle].iAddr, attribute, attrType, position, buffer, count);
end; // TUbixFS.vfs_writeAttr

function TUbixFS.writeAttr;
var
  inode:PUbixfsInode;
  dataNode:PSmallDataNode;
  attrTree:PbTree;
  found, smallDataArea:boolean;

  u:uPtr;
begin
  result := -1;
  if (attribute = NIL) or (buffer = NIL) then exit;
  if not isValidIAddr(iAddr) then exit;
  inode := loadBlock(iAddr);
  if (inode = NIL) then exit;
  dataNode := @inode^.smallData;
  found := FALSE;
  writeln('looking for ', attribute);
  smallDataArea := TRUE;
  while not found and not dataNode^.isEmpty() do
    if (strcomp(@dataNode^.name, attribute) = 0) then
      found := TRUE
    else      
      dataNode := dataNode^.next();

  if not found then
    begin
      // we looked through the small data area and didn't find it. Check the attr dir (if there is one)
      if isValidIAddr(inode^.attributes) then
        begin
          new(attrTree, open(iAddrToStr(inode^.attributes), new(PUbixBTreeVFS, init(@self))));
          if (attrTree = NIL) then exit;  // danger will robinson!
          if (attrTree^.Find(attribute, u)) then
            begin
              found := FALSE;
              smallDataArea := FALSE;
            end;
          dispose(attrTree, done);
        end;
    end;
end; // TUbixFS.writeAttr

function TUbixFS.writeDataStream;
var 
  tmpBlock:uInt8ArrayPtr;
  blockRun:TBlockRun;
  size, remainder:uInt32;
  blockStart:uInt32;
  blocksToWrite:uInt32;
  bytesToWrite:uInt32;
  inodeChanged:boolean;
  inode:PUbixFSInode;
begin
//  with iAddr do
//    writeln('writeDataStream(', AG, ':', start, ':', len, ', ', position, ', ', hex(dword(buf)), ', ', length, ');');

  result := -1;
  if ((buf = NIL) or (superBlock = NIL)) then exit; 
  inode := loadBlock(iAddr);
  tmpBlock := NIL;

  inodeChanged := FALSE;
  if (position + length > getFileSpace(inode)) then 
    begin
      extendFile(inode, position+length);  
//      inode^.blocks.size := position+length;  // update the blocks
      writeln('after extendfile: size = ', inode^.blocks.size, '  getFileSpace() = ', getFileSpace(inode)); 

    end
  else
    if (position + length > inode^.blocks.size) then
      begin
      	// The above case checks to see if we're writing out past
      	// the end of the allocated space on the device. If we do, we extend 
      	// out the file. However, if we're just writing past the end of 
      	// the file, we have to update the size.
        
      	inode^.blocks.size := position+length;
        // XXX To Do:
        // Might want to clear out the blocks we're skipping past, otherwise
        // there might be garbage in there
        assert(saveBlock(iAddr, inode));  // save the inode
      end;

  size := length;
  remainder := uInt32(position mod superBlock^.blockSize);

  if (remainder <> 0) then
    begin
      // the position we've been asked to write to doesn't align to
      // a block size. We need to read in the previous block and pad
      // the data
      blockStart := uInt32(position shr superBlock^.blockShift);
      blockRun := findBRun(@inode^.blocks, blockStart);
      assert(blockRun.len > 0);
      tmpBlock := loadBlock(blockRun);
      assert(tmpBlock <> NIL);
      if (tmpBlock = NIL) then exit;
      bytesToWrite := min(size, (superBlock^.blockSize - remainder));
      move(buf^, tmpBlock^[remainder], bytesToWrite);
      assert(saveBlock(blockRun, tmpBlock));
      inc(buf, bytesToWrite);
      dec(size, bytesToWrite);
      position := position + bytesToWrite;
    end; // if remainder != 0

  remainder := size mod superBlock^.blockSize;
  dec(size, remainder);

  // Size is currently in bytes. The remainder variable above holds the number
  // of remaining bytes (if any) outside full blocks we have to write.

  // convert the size into blocks
  size := size shr superBlock^.blockShift;

  while (size > 0) do
    begin
      blockRun := findBRun(@inode^.blocks, uInt32(position shr superBlock^.blockShift));
      if (blockRun.len = 0)  then 
        begin 
          printStuff := TRUE; 
          blockRun := findBRun(@inode^.blocks, uInt32(position shr superBlock^.blockShift));
        end;
      assert(blockRun.len > 0);
      blocksToWrite := min(blockRun.len, size);
      for blocksToWrite := 0 to min(blockRun.len, size)-1 do
        begin
          assert(saveBlock(blockRun, buf));
          inc(blockRun.start);  // This might wrap around?
          dec(blockRun.len);    // might be unnecessary
          dec(size);
          inc(buf, superBlock^.blockSize);
        end;
    end; // while

  if (remainder <> 0) then
    begin
      // There is less than a block of data left to write. Read in the old block
      blockStart := uInt32((position+length-remainder) shr superBlock^.blockShift);
      blockRun := findBRun(@inode^.blocks, blockStart);
      assert(blockRun.len > 0);
      // obtain a pointer to the block
      tmpBlock := loadBlock(blockRun);
      assert(tmpBlock <> NIL);
      if (tmpBlock = NIL) then exit;
      move(buf^, tmpBlock^[0], remainder);
      assert(saveBlock(blockRun, tmpBlock));
    end;
  result := 0;
end; // TUbixFS.writeDataStream

destructor TUbixFS.done;
var 
  searchRec:BTreeSearchRec;
  inode:PUbixfsInode;
  cacheNode:PCacheNode;
  dirtyCount:integer;
  u:uPtr;
begin
  sync();
  // XXX ToDo:
  // close all "open" files manually.
  if (vnids <> NIL) then dispose(vnids, done);
  if (vNodeQueue <> NIL) then dispose(vNodeQueue, done);
  if (fileDescriptors <> NIL) then FreeMem(fileDescriptors, maxFDEntry*sizeof(TVNode));
  vnids := NIL;
  vNodeQueue := NIL;
  fileDescriptors := NIL;

  if (blockTime <> NIL) then dispose(blockTime, done);
  if (blockCache <> NIL) then 
    begin
      dirtyCount := 0;
      fillchar(u, sizeof(u), 0);
      while (blockCache^.getFirstKey(searchRec)) do
        begin
          cacheNode := searchRec.value.p;
          assert(cacheNode <> NIL);
          if (cacheNode^.dirty) then inc(dirtyCount);
          u.p := cacheNode;
          blockCache^.Delete(@cacheNode^.address, u);
          dispose(cacheNode, done);          
        end;
      if (dirtyCount <> 0) then writeln('Dirty blocks in cache: ', dirtyCount);
      dispose(blockCache, done);
    end;
  if (superBlock <> NIL) then dispose(superBlock);
end; // TUbixFS.done

procedure TUbixFS.examine;
var 
  dirTree:PBTree;
  searchRec:BTreeSearchRec;
  u:uPtr;
begin
  new(dirTree, open(iAddrToStr(superBlock^.BAT), new(PUbixBTreeVFS, init(@self))));
  dirTree^.installUserFunctions(compareKeyBlockRun, copyKeyBlockRun, keySizeBlockRun);
  dirTree^.printWholeTree('bat.txt');
  dispose(dirTree, done);
  writeln('------------- examining FS -----------------');
  write('Opening root dir...');
  new(dirTree, open(IAddrToStr(superblock^.rootDir), new(PUbixBTreeVFS, init(@self))));
  //dirTree^.PrintList('rootDir.txt');
  writeln('done');
  writeln('Root directory has ', dirTree^.getKeyCount(), ' entries');
  dirTree^.getFirstKey(searchRec);
  repeat
    with searchRec.value.iAddr do
      write('value: ', AG, ':', start, ':', len);
    writeln('    key: ', pchar(searchRec.key));
  until not dirTree^.findNext(searchRec);
  dispose(dirTree, done);
  if (blockCache <> NIL) then writeln('Block cache has ', blockCache^.getKeyCount(), ' entries');
  writeln('Space used: ', superBlock^.usedBlocks, '(', superBlock^.numBlocks, ')');
  writeln('------------------ done --------------------');
end;

end.