ubixfs-2/ubixfs_test.pas at master

Fork: 0
uBixOS / ubixfs-2
Find file
Newer
Older
ubixfs-2 / ubixfs_test.pas
flameshadow on 10 Jun 2005 87 KB UbixFS
Raw Blame History
unit UbixFS;

interface

uses device, lists;

const INODE_NO_FLAG     = $00000000;
const INODE_IN_USE      = $00000001;

const ATTR_INODE        = $00000004;
const INODE_LOGGED      = $00000008;
const INODE_DELETED     = $00000010;
const PERMANENT_FLAGS   = $0000ffff;
const INODE_NO_CACHE    = $00010000;
const INODE_WAS_WRITTEN = $00020000;
const S_IFDIR           = $00040000;  // POSIX
const NO_TRANSACTION    = $00040000;

const DATASTREAM_DIRECT_BLOCKS = 16;    // number of direct blocks in the dataStream 

const UBIXFS_MAGIC_1    = $A0A0A0A;
const UBIXFS_MAGIC_2    = $B0B0B0B;
const UBIXFS_MAGIC_3    = $C0C0C0C;
const UBIXFS_INODE_MAGIC = $3BBE0AD9;

const UBIXFS_CLEAN = $434C454E;
const UBIXFS_DIRTY = $44495254;

const SECTOR_SHIFT = 9;
const SECTOR_SIZE = 1 shl SECTOR_SHIFT;

type
  int8          = shortInt;
  int16         = smallInt;
  int32         = longInt;

  uInt8         = byte;
  uInt16        = word;
  uInt32        = dWord;
{ Common Integer array definitions }

type
  int8ArrayPtr  = ^int8Array;
  int8Array     = array[ 0..0 ] of int8;

  int16ArrayPtr = ^int16Array;
  int16Array    = array[ 0..0 ] of int16;

  int32ArrayPtr = ^int32Array;
  int32Array    = array[ 0..0 ] of int32;

  { Common Unsigned Integer array definitions }

type
  uInt8ArrayPtr = ^uInt8Array;
  uInt8Array    = array[ 0..0 ] of uInt8;

  uInt16ArrayPtr= ^uInt16Array;
  uInt16Array   = array[ 0..0 ] of uInt16;

  uInt32ArrayPtr= ^uInt32Array;
  uInt32Array   = array[ 0..0 ] of uInt32;

type


(*  TBlockRun = packed record
    AG     : int32;
    start  : uInt16;
    len    : uInt16;
  end;

  inodeAddr = TblockRun;

  uPtr = packed record
   case integer of
     0: (i      : int32);
     1: (u      : uInt32);
     2: (s      : single);
     3: (d      : double);
     4: (p      : pointer);
     5: (offset : int64);
     6: (iAddr  : inodeAddr);
  end;
*)
  PdiskSuperBlock = ^diskSuperBlock;
  diskSuperBlock = packed record
    name        : array[0..31] of char;
    magic1      : int32;

    fsByteOrder : int32;

    // blockSize on disk 
    blockSize   : uInt32;

    // number of bits needed to shift a block number to get a byte address
    blockShift  : uInt32;

    numBlocks   : int64;
    usedBlocks  : int64;

    inodeSize   : uInt32;
    magic2      : uInt32;

    blocksPerAG : uInt32;
    AGShift     : uInt32;
    numAGs      : uInt32;

    flags       : uInt32;

    logBlocks   : TBlockRun;
    logStart    : int64;
    logEnd      : int64;

    magic3      : int32;
    BAT         : inodeAddr;
    rootDir     : inodeAddr;
    indicies    : inodeAddr;

    pad         : array[0..371] of byte;
  end; // diskSuperBlock

  PDataStream = ^TDataStream;
  TDataStream = packed record
    direct        : array[0..DATASTREAM_DIRECT_BLOCKS-1] of TBlockRun;
    maxDirect     : int64;
    indirect      : TBlockRun;
    maxIndirect   : int64;
    doubleIndirect: TBlockrun;
    maxDIndirect  : int64;
    size          : int64;
    blockCount    : uInt32;
  end; // TDataStream

  PubixfsInode = ^ubixfsInode;
  ubixfsInode = packed record
    magic         : int32;
    inodeNum      : inodeAddr;
    uid           : uInt32;
    gid           : uInt32;
    mode          : int32;
    flags         : int32;
    createTime    : int64;
    modifiedTime  : int64;
    parent        : inodeAddr;
    attributes    : inodeAddr;
    iType         : uInt32;
    inodeSize     : uInt32;
    data          : uPtr;         {this was the etc field in bfs}
    blocks        : TDataStream;
    smallData     : array[0..0] of byte;
  end; // ubixfsInode

  PVNode = ^TVNode;
  TVNode = object
    iAddr    : inodeAddr;
    position : int64;
    flags    : uInt32; 
  end; // TVNode

  PVnid = ^TVnid;
  TVnid = object
    refCount : uInt32;
    iAddr    : inodeAddr;
    mode     : uInt32;
  end; // TVnid

(*  PFileDescriptor = ^TFileDescriptor;
  TFileDescriptor = object
    iAddr    : inodeAddr;
    refCount : uInt32;    
    flags    : uInt32;
    handle   : uInt32;
  end; // fileDescriptor *)

  PUbixFS = ^TUbixFS;
  TUbixFS = object
   private
    dev           : PDevice;
    superBlock    : ^diskSuperBlock;
    blockCache    : PbTree;
    blockTime     : PbTree;
    vnids         : PbTree;
    vNodeQueue    : PQueue;
    fileDescriptors : ^array[0..0] of TVNode;

    maxCacheEntry : uInt32;
    maxFDEntry    : uInt32;
    
    procedure       assert(x:boolean);
    function        allocBRun(blockRun:TBlockRun):TBlockRun;
    function        allocInode(parentIAddr:inodeAddr):PUbixfsInode;
    function        extendFile(inode:PubixfsInode; newSize:int64):integer;
    function        findBRun(dataStream:PDataStream; blockNum:uInt32):TBlockRun;
    function        findBRunDirect(dataStream:PDataStream; blockNum:uInt32):TBlockRun;    
    function        findBRunIndirect(dataStream:PDataStream; blockNum:uInt32):TBlockRun;
    function        findBRunDIndirect(dataStream:PDataStream; blockNum:uInt32):TBlockRun;
    function        freeUsedBlocks(blockRun:TBlockRun):integer;
    function        getFileSize(inode:PUbixfsInode):int64; 
    function        getFileSpace(inode:PUbixfsInode):int64; 
    function        iAddrToStr(iAddr:inodeAddr):string;
    function        isValidIAddr(iAddr:inodeAddr):boolean;
    function        loadBlock(blockAddr:TBlockRun):pointer;

    function        markUsedBlocks(blockRun:TBlockRun):integer;
    function        nextAG(AG:uInt32):uInt32;
    function        pathToIAddr(path:PChar):inodeAddr;

    function        readDataStream(iAddr:inodeAddr; position:int64; buf:pointer; length:uInt32):integer; 
    function        saveBlock(blockAddr:TBlockRun; buf:pointer):boolean; 
    procedure       setBlockRun(var blockRun:TBlockRun; AG:uInt32; start, len:uInt16);
    procedure       setIAddr(var inode:inodeAddr; AG:uInt32; start, len:uInt16);
    function        strToIAddr(s:string):inodeAddr;
    function        writeDataStream(iAddr:inodeAddr; position:int64; buf:pointer; length:uInt32):integer;
   public
    constructor     init(device:Pdevice);
    procedure       printSuperBlock;
    procedure       assign(var F:TFileRec; const _name:string);
    procedure       reset(var F:TFileRec; blkSize:uInt32);
    procedure       rewrite(var F:TFileRec; blkSize:uInt32);
    procedure       sync;
    function        vfs_fCreate(filename:PChar):integer;
    function        vfs_fExist(path:PChar):integer;
    function        vfs_close(handle:integer):integer;
    function        vfs_format(device:PDevice; fsBlockSize:uInt32; quick:boolean):integer;
    function        vfs_init:integer;
    function        vfs_mkdir(path:PChar):integer;
    function        vfs_read(handle:integer; buffer:pointer; size:uInt32):integer; 
    function        vfs_seek(handle:integer; newPosition:int64):integer;   
    function        vfs_write(handle:integer; buffer:pointer; size:uInt32):integer;  
    procedure       examine;
    destructor      done; virtual;
  end; // TUbixFS
 
implementation

uses strings, dos, math, lists;

// IMPORTANT NOTE
//   You cannot under any circumstance change NUM_EXTENTION_BLOCKS. This
// constant defines the multiple of blocks that a file will be extended
// by, *and* it determines how many blocks are used in each blockRun in
// the Double Indirect dataStream.
//   NUM_EXTENSION_BLOCKS determines how much slack space is allocated for
// a file when it is extended. For example, a file with 0 bytes in it
// will take up 0 blocks for the data and 1 block for the inode (ignoring
// space required to hold the directory entry).  When the first byte is 
// written, extendFile() will allocate a multiple of NUM_EXTENSION_BLOCKS 
// to accomodate the amount of data being written out. So, a 1 byte file will 
// actually take up NUM_EXTENSION_BLOCKS (plus one for the inode) until the 
// file is closed. Then the file will be trimmed and the slack space reclaimed.
//   This value is also used in the double-indirect region of the dataStream
// to determine how many blocks each blockRun uses.

//         vv DO NOT CHANGE vv        //
const NUM_EXTENSION_BLOCKS = 4;       // <--- don't change this
//         ^^ DO NOT CHANGE ^^        //

const NUM_CACHE_ENTRIES = 8192; // default max entries in block cache
const NUM_FD_ENTRIES = 1024;    // default max entries in the File Descriptor Table

type 
  PCacheNode = ^TCacheNode;
  TCacheNode = object
   public
    address  : int64;
    lastUsed : int64;
    data     : pointer;
    dataSize : uInt32;
    dirty    : boolean;
    locked   : boolean;
    constructor init(_address, _lastUsed:int64; _dataSize:uInt32);
    destructor done;
  end; // TCacheNode

type 
  PUbixBTreeVFS = ^TUbixBTreeVFS;
  TUbixBTreeVFS = object(TBTreeVFS)
   private
    fs          : PUbixFS;
    iAddr       : inodeAddr;
    position    : int64;  // limits files to 4GB
   public
    constructor init(_fs:PUbixFS);
    function    fClose:boolean; virtual;
    function    fCreate(const filename:string):boolean; virtual;
    function    fExist(const filename:string):boolean; virtual;
    function    fOpen(const filename:string):boolean; virtual;
    function    fRead(var buf; size:uInt32):boolean; virtual;   
    function    fSeek(offset:uInt32):boolean; virtual;
    function    fWrite(var buf; size:uInt32):boolean; virtual;
    destructor  done; virtual; 
  end; // TUbixBTreeVFS

function null_file_proc conv arg_based (f: LongInt; buf: pointer; len: ULong; var act: ULong): LongInt;
begin
  runerror(999);
end;

constructor TCacheNode.init;
begin
  address  := _address;
  lastUsed := _lastUsed;
  dataSize := _dataSize;
  data := NIL;
  if (dataSize <> 0) then GetMem(data, dataSize);
  dirty    := FALSE;
  locked   := FALSE;
end; // TCacheNode.init

destructor TCacheNode.done;
begin
  if (data <> NIL) and (dataSize <> 0) then FreeMem(data, dataSize);
  address  := 0;
  lastUsed := 0;
  data     := NIL;
  dataSize := 0;
  dirty    := FALSE;
  locked   := FALSE;
end; // TCacheNode.done

constructor TUbixBTreeVFS.init;
begin
//  writeln('TUbixBTreeVFS.init()');
  fs         := _fs;
  fillchar(iAddr, sizeof(iAddr), 0);  
  position   := 0;
end; // TUbixBTreeVFS.init;

function TUbixBTreeVFS.fClose;
begin
  if (fs = NIL) then exit;
  position := 0;
end; // TUbixBTreeVFS.fClose

function TUbixBTreeVFS.fCreate;
begin
  // I *think* that this code is only called for the
  // B^Tree. Therefore, the file technically exists, but just
  // has nothing in it when we call fCreate.
  result := fOpen(filename);  
end; // TUbixBTreeVFS.fCreate

function TUbixBTreeVFS.fExist;
var 
  tmpIAddr:inodeAddr;
  tmpInode:PUbixfsInode;
begin
  result := FALSE;
  assert(fs <> NIL);
  if (fs = NIL) then exit;
  assert(fs^.superBlock <> NIL);
  if (fs^.superBlock = NIL) then exit;
  tmpIAddr := fs^.strToIAddr(filename);
  if not fs^.isValidIAddr(tmpIAddr) then exit;

  // If we've created an inode, but haven't written any data to it, then
  // the B^Tree hasn't written out a superblock, so technically the file doesn't
  // exist, and this is a really long run-on sentence.
  tmpInode := fs^.loadBlock(tmpIAddr);

  // I'm not sure why, but the compiler cannot properly evaluate (tmpInode^.blocks.size <> 0),
  // so we check both the high and low portions of the size to make sure it's not 0.
  //  result := (tmpInode <> NIL) and ((longLongRec(tmpInode^.blocks.size).lo <> 0) or (longLongRec(tmpInode^.blocks.size).hi <> 0));

  // However, checking for 0 and then taking the complement of that works .. so we go that way.
  result := ((tmpInode <> NIL) and (not (tmpInode^.blocks.size = 0)));
end; // TUbixBTreeVFS.fExist

function TUbixBTreeVFS.fOpen;
begin
  result := FALSE;
  if (fs = NIL) then exit;
  if (fs^.superBlock = NIL) then exit;
  iAddr := fs^.strToIAddr(filename);
  position := 0;
  result := fs^.isValidIAddr(iAddr);
end;  // TUbixBTreeVFS.fOpen

function TUbixBTreeVFS.fRead;
begin
  result := FALSE;
  if (fs = NIL) then exit;
//  with inode^.inodeNum do
//    write('fRead(', AG, ':', start, ':', len, ', ');
//  write(position, ', ');
//  write('@buf, ');

  result := (fs^.readDataStream(iAddr, position, @buf, size) = 0);
  position := position + int64(size);
end; // TUbixBTreeVFS.fRead

function TUbixBTreeVFS.fSeek;
begin
  // uhm.. if the desired offset is beyond the end of the file, 
  // should we not extend out the file? Or does that occur in writeDataStream ...
  position := offset;
  result := TRUE;
end; // TUbixBTreeVFS.fSeek

function TUbixBTreeVFS.fWrite;
begin
  result := FALSE;
  if (fs = NIL) then exit;
//  with inode^.inodeNum do
//    write('fWrite(', AG, ':', start, ':', len, ', ');
//  write(position, ', ');
//  write('@buf, ');
  result := (fs^.writeDataStream(iAddr, position, @buf, size) = 0);
  position := position + int64(size);
  //inc(position, size);
end; // TUbixBTreeVFS.fWrite;

destructor TUbixBTreeVFS.done;
begin
  // XXX: It's possible that we need to save the inode here since 
  // we possibly changed a directory
  fillchar(iAddr, sizeof(iAddr), 0);
  fs         := NIL;  // don't destruct it since we didn't allocate it
end; // TUbixBTreeVFS.done

constructor TUbixFS.init;
begin
  dev := device;
  superBlock := NIL;
  blockCache := NIL;
  blockTime  := NIL;
  vnids      := NIL;
  vnodeQueue := NIL;
  fileDescriptors := NIL;
  maxCacheEntry := NUM_CACHE_ENTRIES;
  maxFDEntry := NUM_FD_ENTRIES;
end; // TUbixFS.init

procedure TUbixFS.assert(x:boolean);
begin
  if not x then runerror(1000);
end; // TUbixFS.assert

{$system}
procedure TUbixFS.assign;
begin
  with f do begin
    magic   := @f;
    state   := [%file_assigned];
    name    := _name;
    rd_proc := null_file_proc;
    wr_proc := null_file_proc;
    check_magic();
  end; // with
end; // TUbixFS.assign

{$system}
procedure TUbixFS.reset;
begin
//  if IO_ApiRet <> 0 then IO_Error(IO_Apiret);
end; // TUbixFS.reset

procedure TUbixFS.rewrite;
begin

end; // TUbixFS.rewrite

function TUbixFS.allocBRun;
var
//  inode:PUbixfsInode;
  FBL:int8ArrayPtr;    // Free Block List array pointer
  FBLSize:uInt32;      // Free Block List array pointer size
  bitOffset:uInt32;
  byteOffset:uInt32;
  curBlock, baseBlock:uInt32;
  freeCount:uInt32;
begin
  // Notes: 
  // 2005-04-27 mji
  //   allocBRun() will allocate a blockRun that is near to the blockRun 
  // passed into the function.  The AG and Start field of blockRun will be
  // the starting point of where to look for free blocks.  The len field
  // holds how many contiguous blocks we need to allocate.
  //   We look through the BAT (Block Allocation Table) for a contiguous run that 
  // fits the requested length.  The BAT is a bitfield (used blocks are an 
  // on (1) bit, free blocks are an off (0) bit) of used/unused logical
  // blocks, where the logical block size was set when the device was formatted.
  // This shouldn't be confused with the sector size on the device, which
  // is always 512 bytes.  Block sizes are usually 1k, but can be any power
  // of 2 higher than that.  64k is probably the largest anyone would ever
  // actually use.  Since we only need to load one AG section out of the BAT
  // at a time, we only allocate one block worth of memory.
  //   The disk is broken up into Allocation Groups, where each allocation group
  // bitfield fits into a single block.  This means that for a device with 1k
  // blocks an AG will be 8192 blocks long.  Small devices (such as a floppy)
  // will only have 1 AG.
  //   To find a free run, we load a starting section of the BAT out of the 
  // BAT file.  The BAT is technically another file on the volume, but unlike 
  // normal files the BAT inode is referenced inside the superblock, and cannot
  // be touched by a normal user (if the volume is grown later, the BAT can be 
  // grown as well).
  //   Once the AG block list is loaded, we scan through looking for a contiguous
  // set of free blocks that fits our length requirement. In the event this
  // AG doesn't meet that requirement, we move to the next AG.  When we hit the
  // end of the volume, we go back to the beginning of the volume and continue
  // scanning until either we find a free run or we end up back where we began.
  // In the latter case, the disk would have to either be very fragmented or
  // full. If we succeed, we mark the blocks as used and return the blockRun.

  // XXX To do:
  // Might want to clear the blocks we allocated to avoid garbage from being present

  setBlockRun(result, 0, 0, 0);
  if (superBlock = NIL) then exit;
  if (blockRun.AG > superBlock^.numAGs) then exit;
  if (superBlock^.usedBlocks + int64(blockRun.len) > superBlock^.numBlocks) then exit; // should probably give a louder warning

  // we read in one block at a time
  FBLSize := superBlock^.blockSize;
  GetMem(FBL, FBLSize);
  assert(FBL <> NIL);
  fillchar(FBL^, FBLSize, 0);  // better safe than sorry

  assert(readDataStream(superBlock^.BAT, blockRun.AG shl (superblock^.AGShift shr 3), FBL, FBLSize) = 0); 
  byteOffset := blockRun.start shr 3;
  bitOffset := 7-(blockRun.start and 7);
  baseBlock := (blockRun.AG shl superBlock^.AGShift) + blockRun.start;
  curBlock := baseBlock;
  freeCount := 0;
  // an optimization would be to check to see if the byte is -1, thus skipping 8 at a time
  // This will need to be implemented (but can be later)
  repeat

    inc(curBlock);

    if (FBL^[byteOffset] and (1 shl bitOffset) = 0) then 
      inc(freeCount) 
    else 
      freeCount := 0;

    if (freeCount = blockRun.len) then break;      // did we meet our length requirement?

    if (bitOffset = 0) then 
      begin
        bitOffset := 7;
        inc(byteOffset);
      end
    else dec(bitOffset);

    if (byteOffset = superBlock^.blocksPerAG) then
      begin
        // We scanned through one Allocation Group of blocks. Move to the next one
        inc(blockRun.AG);

        // it's possible that we're wrapping back around to the beginning.
        if (blockRun.AG = superBlock^.numAGs) then 
          begin
            // We hit the end of the volume and wrapped back around
            blockRun.AG := 0;  // Back to the beginning AG
            curBlock    := 0;  // curBlock is from the base of the volume
            freeCount   := 0;  // reset this to 0
          end;         
        // read in the FBL for the AG we're currently on   
        assert(readDataStream(superBlock^.BAT, blockRun.AG shl (superblock^.AGShift shr 3), FBL, FBLSize) = 0); 
        byteOffset := 0;  // first byte in this block
        bitOffset  := 7;  // first bit of the first byte
      end; // if byteOffset = superBlock^.blocksPerAG

  until curBlock = baseBlock;  // repeat until we end up back where we started
  
  if (freeCount = blockRun.len) then   // we found a run
    begin
      blockRun.AG := (curBlock-blockRun.len) shr superBlock^.AGShift;
      blockRun.start := (curBlock-blockRun.len) and (superBlock^.blocksPerAG-1);
      markUsedBlocks(blockRun);
      result := blockRun;
    end
  else writeln('Failure in allocBRun()');
  FreeMem(FBL, FBLSize); 
  
end; // TUbixFS.allocBRun

function TUbixFS.allocInode;
var
  inode:PUbixfsInode;
  blockRun:TBlockRun;
  
begin
  // XXX ToDo:
  // need to fill in the rest of the fields in the inode record

  // Notes:
  // 2005-04-27 mji
  //   Inodes take up exactly one logical block on the device.  They
  // are usually in the same AG as their parent directory.  Allocating
  // one involves getting some memory for it and finding a free block
  // to hold it on the device.

  result := NIL;
  if (superBlock = NIL) then exit;
  assert(parentIAddr.AG < superBlock^.numAGs);
  if (parentIAddr.AG >= superBlock^.numAGs) then exit;  // parent addr is outside out block count?
  // ask for a single block somewhere around the beginning of the requested AG
  setBlockRun(blockRun, parentIAddr.AG, 0, 1); 
  blockRun := allocBRun(blockRun);
  assert(blockRun.len <> 0);
  if (blockRun.len = 0) then exit;     // this would be bad
  // Normally we'd allocate memory for the inode. Instead we let loadBlock() do that
  // for us.
  inode := loadBlock(blockRun);        

  assert(inode <> NIL);
  if (inode = NIL) then exit;
  fillchar(inode^, superBlock^.inodeSize, 0);

  with inode^ do
    begin
      magic := UBIXFS_INODE_MAGIC;

      // inodes point to themselves
      inodeNum := blockRun;

      uid := 0;  // this doesn't mean much here
      gid := 0;  
      // mode := ??
      flags := INODE_NO_FLAG;
      // createTime := 
      // modifiedTime :=

      parent := parentIAddr;
      setIAddr(attributes, 0, 0, 0);
      inodeSize := superBlock^.inodeSize;
      // data := 0;
      // there's no data yet in the file, so don't 
      // set blocks to anything (cleared above)
      blocks.blockCount := 0;

    end; // with
  result := inode;
end; // TUbixFS.allocInode

function TUbixFS.extendFile;

  function appendToDIndirect(newBlockRun:TBlockRun):boolean;
    var 
      extents:^array[0..0] of TBlockRun;
      indirectExtents:^array[0..0] of TBlockRun;
      extentsIndex, indirectExtentsIndex:uInt32;
      extentsUpperBound:uInt32;
      tmpBlockRun:TBlockRun;
    begin
      // Many apologies for the long variable names. This section is relatively
      // complicated, and it wouldn't help to have cryptic variables.
      result := FALSE; // failure by default
      if (newBlockRun.len mod NUM_EXTENSION_BLOCKS <> 0) then
        begin
          writeln('Double indirect block run is not a multiple of NUM_EXTENSION_BLOCKS');
          exit;
        end;
      extentsUpperBound := (superBlock^.blockSize div sizeof(TBlockRun)) - 1;

      if not isValidIAddr(inode^.blocks.doubleIndirect) then
        begin
          // allocate a double indirect block
          inode^.blocks.doubleIndirect := allocBRun(inode^.inodeNum); // allocate a block near the parent inode

          if not isValidIAddr(inode^.blocks.doubleIndirect) then
            begin
              writeln('Failed to allocate double indirect block');
              exit
            end;
          // We have a freshly allocated double indirect block. Now we need an indirect block
          extents^[0] := allocBRun(inode^.blocks.doubleIndirect); // allocate an indirect block near the dindirect block
          if not isValidIAddr(extents^[0]) then
            begin
              writeln('Failed to allocate indirect block beneath a double indirect block');
              exit
            end;
          // save out the doubleIndirect block
          saveBlock(inode^.blocks.doubleIndirect, extents);
          // Because we have a freshly allocated double indirect block, we have no entries
          // in it yet.  This means that our DIndirect index is 0, and our indirect index is also 0.
          extentsIndex := 0; 
          indirectExtentsIndex := 0;
        end
      else
        begin
          // double indirect block exists. Load it
          extents := loadBlock(inode^.blocks.doubleIndirect);
          assert(extents <> NIL);
//loadBlock(inode^.blocks.doubleIndirect, extents);
          // Now scan through (backwards) to find the last entry.
          for extentsIndex := extentsUpperBound downto 0 do
            if (extents^[extentsIndex].len <> 0) then break;
          // extentsIndex points to the entry which holds the indirect block with the last entry.
          // load that one
          indirectExtents := loadBlock(extents^[extentsIndex]);
          assert(indirectExtents <> NIL);
//loadBlock(extents^[extentsIndex], indirectExtents);
          // first check the last entry
          if (indirectExtents^[extentsUpperBound].len <> 0) then
            begin
              // this indirect block is full. Allocate a new one
              if (extentsIndex = extentsUpperBound) then
                begin
                  // I might as well halt at this point because the user is totally hosed.
                  writeln('Egad! The double indirect block section is full. Can not write any more to this file!');
                  exit;
                end; // if doomed
              extents^[extentsIndex+1] := allocBRun(extents^[extentsIndex]); 
              inc(extentsIndex);
              indirectExtentsIndex := 0;
              saveBlock(inode^.blocks.doubleIndirect, extents); // save, since we changed it
            end
          else
            begin
              // now scan forward
              for indirectExtentsIndex := 0 to extentsUpperBound do
                if (indirectExtents^[indirectExtentsIndex].len = 0) then break;
            end;
          // extentsIndex points to which indirect block we're on
          // indirectExtentsIndex points to the first free entry in that indirect block  
        end;

      while (newBlockRun.len <> 0) do
        begin
          tmpBlockRun := newBlockRun;
          tmpBlockRun.len := NUM_EXTENSION_BLOCKS;
          indirectExtents^[indirectExtentsIndex] := tmpBlockRun;
          with newBlockRun do
            begin
              dec(len, NUM_EXTENSION_BLOCKS);
              if ((uInt32(start) + NUM_EXTENSION_BLOCKS) > high(start)) then inc(AG);
              inc(start, NUM_EXTENSION_BLOCKS); // Warning! Relies on integer wrap-around (start is 16-bit)
            end; // with

          if (indirectExtentsIndex = extentsUpperBound) then
            begin
              // this indirect block is full. 
              // Save the current one
              saveBlock(extents^[extentsIndex], indirectExtents);
              // Allocate a new one
              if (extentsIndex = extentsUpperBound) then
                begin
                  // I might as well halt at this point because the user is totally hosed.
                  writeln('Egad! The double indirect block section is full. Can not write any more to this file!');
                  exit;
                end; // if doomed
              extents^[extentsIndex+1] := allocBRun(extents^[extentsIndex]); // allocate an indirect block near the previous one
              inc(extentsIndex);  // upadte which index in the double indirect block we're using
              indirectExtentsIndex := 0;
              indirectExtents := loadBlock(extents^[extentsIndex]);
              fillchar(indirectExtents^, superBlock^.blockSize, 0);
              saveBlock(inode^.blocks.doubleIndirect, extents); // save, since we changed it
            end
          else
            inc(indirectExtentsIndex);
          // update the size the maxDoubleIndirect blocks reference
          inode^.blocks.maxDIndirect := inode^.blocks.maxDIndirect + (NUM_EXTENSION_BLOCKS shl superBlock^.blockShift);
        end; // while
      saveBlock(extents^[extentsIndex], indirectExtents);
    end; // local function appendToDIndirect()

  function appendToIndirect(newBlockRun:TBlockRun):boolean;
    var 
      extents:^array[0..0] of TBlockRun;
      extentsUpperBound, index:uInt32;
    begin
      result := FALSE; // failure by default
      extentsUpperBound := superBlock^.blockSize div sizeof(TBlockRun)-1;
      
      if not isValidIAddr(inode^.blocks.indirect) then
        begin
          // allocate an indirect block
          inode^.blocks.indirect := allocBRun(inode^.inodeNum);  // alloate a block near the parent inode
          if not isValidIAddr(inode^.blocks.indirect) then
            begin
              writeln('Failed to allocate indirect block');
              exit
            end;
          extents := loadBlock(inode^.blocks.indirect);
          fillchar(extents^, superBlock^.blockSize, 0);
          // There might be garbage in the block we just allocated, so write out 
          // the empty extents pointer to disk.
          // On second thought, this is saved below no matter what, so don't
          // save it here.
          // saveBlock(inode^.blocks.indirect, extents);
        end
      else
        begin
          // the indirect block exists, so read it in
          extents := loadBlock(inode^.blocks.indirect);
          assert(extents <> NIL);
//loadBlock(inode^.blocks.indirect, extents);
        end; 
      // extents[] has the indirect block in it
      // We first check to make sure that we're not going to be moving
      // to the next section of the dataStream
      if (extents^[extentsUpperBound].len <> 0) then
        result := appendToDIndirect(newBlockRun)   // promote the newBlockRun to the DIndirect section
      else
        begin
          // scan through for the first free slot
          for index := 0 to extentsUpperBound do
            if extents^[index].len = 0 then break;
          extents^[index] := newBlockRun;              // index points to the first free slot. Fill it in with the newBlockRun
          saveBlock(inode^.blocks.indirect, extents);  // save out the indirect block. Could check for failure here
          // update the size the maxDoubleIndirect blocks reference
          inode^.blocks.maxIndirect := inode^.blocks.maxIndirect + (newBlockRun.len shl superBlock^.blockShift);
          result := TRUE;                                  // success
        end;
    end; // local function appendToIndirect()

  function appendToDirect(newBlockRun:TBlockRun):boolean;
    var index:uInt32;
    begin
      result := FALSE;
      with inode^, blocks do
        begin
          if (direct[high(direct)].len <> 0) then 
            result := appendToIndirect(newBlockRun)  
          else
            begin
              for index := 0 to high(direct) do
                if (direct[index].len = 0) then break;
              direct[index] := newBlockRun;
              maxDirect := maxDirect + (newBlockRun.len shl superBlock^.blockShift);
              result := TRUE;
            end; // else
        end; // with
    end; // local function appendToDirect

  function appendToDataStream(newBlockRun:TBlockRun):boolean;
    begin
      if (inode^.blocks.maxDIndirect <> 0) then result := appendToDIndirect(newBlockRun)
      else if (inode^.blocks.maxIndirect <> 0) then result := appendToIndirect(newBlockRun)
      else result := appendToDirect(newBlockRun);
    end; // local function appendToDataStream

  function continueIndirect(newBlockRun:TBlockRun):boolean;
    var 
      extents:^array[0..0] of TBlockRun;
      extentsUpperBound, index:uInt32;
    begin
      result := FALSE;
      extentsUpperBound := superBlock^.blockSize div sizeof(TBlockRun)-1;
      extents := loadBlock(inode^.blocks.indirect);
      assert(extents <> NIL);

      for index := extentsUpperBound downto 0 do
        if (extents^[index].len <> 0) then break;
     
      assert(extents^[index].len <> 0); // just to be sure
      // extents^[index] has the last entry
      if (uInt32(extents^[index].len) + newBlockRun.len < 65535) then
        begin
          inc(extents^[index].len, newBlockRun.len);
          inode^.blocks.maxIndirect := inode^.blocks.maxIndirect + (newBlockRun.len shl superBlock^.blockShift);
          saveBlock(inode^.blocks.indirect, extents);
          result := TRUE;
        end
      else // oops, we overflowed the length
        begin
          // need to check to see if we were the last indirect entry. If we were
          // then we need to allocate a double indirect block and store it in there
          if (index = extentsUpperBound) then
            result := appendToDIndirect(newBlockRun)
          else
            begin
              extents^[index+1] := newBlockRun;
              inode^.blocks.maxIndirect := inode^.blocks.maxIndirect + (newBlockRun.len shl superBlock^.blockShift);
              saveBlock(inode^.blocks.indirect, extents);
              result := TRUE;
            end; // else
        end; // else
     
    end; // local function continueIndirect

 function continueDirect(newBlockRun:TBlockRun):boolean;
  var
    index:uInt32;
  begin
    result := FALSE;
    with inode^, blocks do
      begin
        for index := high(direct) downto low(direct) do
          if (direct[index].len <> 0) then break;

        assert(direct[index].len <> 0);  // just to be sure
        // make sure we don't overflow the length
        if (uInt32(direct[index].len) + newBlockRun.len < 65535) then
          begin
            inc(direct[index].len, newBlockRun.len);  // update the last entry
            maxDirect := maxDirect + (newBlockRun.len shl superBlock^.blockShift);
            result := TRUE;
          end
        else  // oops, we overflowed the length
          begin
            // need to check to see if we were the last direct entry. If we were
            // then we need to allocate an indirect block and store it in there
            if (index = high(direct)) then 
              result := appendToIndirect(newBlockRun)
            else
              begin
                direct[index+1] := newBlockRun; // store it in the next available entry
                maxDirect := maxDirect + (newBlockRun.len shl superBlock^.blockShift);
                result := TRUE;
              end;
          end; // else
      end; // with
   end; // local function continueDirect

  function continueDataStream(newBlockRun:TBlockRun):boolean;
    begin
      // The extended blockRun continues a previous run.

      // When extending the file we request an allocation of more blocks 
      // near to the last blockRun of the file. If the new blocks 
      // are contiguous to the last blockRun of the file
      // then we can simply extend that blockRun.  This not only 
      // prevents fragmentation when possible, it also reduces the
      // number of actual blockRuns needed to store the file.
      if (inode^.blocks.maxDIndirect <> 0) then result := appendToDIndirect(newBlockRun)
      else if (inode^.blocks.maxIndirect <> 0) then result := continueIndirect(newBlockRun)
      else result := continueDirect(newBlockRun);
    end; // local function continueDataStream

var
  blockRun, exBlockRun:TBlockRun;
  blocksToAllocate:uInt32;

begin // extendFile()
  // XXX ToDo:
  // Need to verify support for indirect and double-indirect blocks

  // Notes:
  // 2005-04-27 mji
  //   This function is called by writeDataStream() when we're about to write
  // past the end of the file.  When extending out a file, it is almost never
  // extended by the amount written. Instead it is at least 1 block, and usually
  // more (in our case: 4, as mentioned below). Currently we pass in the number 
  // of blocks to extend the file by.  This is problematic for a few reasons. 
  //   The first is that the code to figure out how much to extend the file by 
  // would have to be in writeDataStream().  That's not necessarily a problem, 
  // except that the amount to be extended by depends on many factors, some 
  // including whether we're writing out more than the default small block 
  // extension size (currently is 4) or if we're in double indirect blocks, 
  // which extends out by NUM_DINDIRECT_BLOCKS (also currently 4).
  //   The second problem is that we save the inode here -- and also in 
  // writeDataStream() -- because the new size of the file is updated in wDS() 
  // instead of here.
  //   The third problem is when you seek past the end of the file and then
  // write.  Space should be allocated for all the space between the old
  // EOF and the new write position.  This is common behaviour for most 
  // filesystems, and should probably be emulated here as well.
  //   The proper solution is to pass in the new size and let this function 
  // work out exactly how much space on disk it is really going to take.

  result := -1;  // failure by default
  // basic sanity checks
  if (inode = NIL) or (superBlock = NIL) then exit;

  // if the file size is 0 then this is the first extension.
  if (inode^.blocks.size = 0) then  // first extension
    begin
      // A file's data is stored in the next allocation group from it's parent
      // inode. This will help distribute the data across the disk.
 
      // Find out how many blocks the new size will take up.  Do this by rounding
      // up the size to the nearest block. Then we round that up to the nearest
      // NUM_DIRECT_BLOCKS multiple.
      blocksToAllocate := uInt32(((newSize+(superBlock^.blockSize-1)) shr superBlock^.blockShift));
      blocksToAllocate := (blocksToAllocate + (NUM_EXTENSION_BLOCKS-1)) and not (NUM_EXTENSION_BLOCKS-1);
      setBlockRun(exBlockRun, nextAG(inode^.inodeNum.AG), 0, blocksToAllocate);
      exBlockRun := allocBRun(exBlockRun);    // try to allocate a blockrun
      assert(exBlockRun.len <> 0);
      if (exBlockRun.len = 0) then exit;      // erp. Couldn't extend the file
      inode^.blocks.direct[0] := exBlockRun;  // Set the first direct run
      // The space allocated for the file can and may be different than the
      // actual size.  Set the maxDirect to the size (in bytes) of how much space
      // we allocated
      inode^.blocks.maxDirect := blocksToAllocate shl superBlock^.blockShift;  
    end // size = 0
  else
    begin
    // the size of the file wasn't 0 (handled above). We need to extend the 
    // file out.  Find where the last blocks of the file are

    // Note:
    // 2005-04-28 mji
    //   We're going to have a potential problem here.  When we check to see how many blocks we
    // want to extend by, we don't know for sure if we're actually going to end up in the
    // dataStream section we were assuming.  For example, we may have no more room in the
    // indirect block section of dataStream and thus will have to store the entry in the double 
    // indirect area.  If the constants for each section are different, then we may be requesting the
    // wrong amount.  This is *only* a problem if any of the constants are different.  Therefore
    // the solution is to pick some arbitrary extension amount (probably 4 or 8 blocks) and stick 
    // with that.  If we pick the amount that will end up in the double indirect area (currently 4
    // but I'm considering 8) then we can easily break apart our blockRun if we're writing it
    // to the double indirect section.  
    // IMPORTANT NOTE
    //   Once this value is chosen it will be set in stone since we will break everything by 
    // changing it. Math done for retrieving the blockRun from the double indirect region will 
    // rely on this value, and it can never be changed.  BeFS uses 4 blocks for the double
    // indirect section, and that seems fine with me. The largest file using contiguous blockRuns
    // will be 38GB.  I am, however, pondering 8 blocks instead.  Remember, extensions are done 
    // as a multiple of this value.
    // 2005-04-28 mji
    //   I decided to use a single constant, NUM_EXTENSION_BLOCKS, and it is set to 4.
    blocksToAllocate := uInt32((newSize+(superBlock^.blockSize-1)) shr superBlock^.blockShift);    
    // blocksToAllocate is now rounded up to the nearest block of how much space we need.
    blocksToAllocate := (blocksToAllocate + (NUM_EXTENSION_BLOCKS-1)) and not (NUM_EXTENSION_BLOCKS-1);
    // blocksToAllocate is now rounded up to the nearest multiple of NUM_EXTENSION_BLOCKS
    // now subtract off how many blocks we already have
    dec(blocksToAllocate, inode^.blocks.blockCount);

    // XXX
    // I don't think this is right. We need to find the last block returned by
    // getFileSpace(), not the size.  Something to ponder/look into/test
    //blockRun := findBRun(@inode^.blocks, inode^.blocks.size shr superBlock^.blockShift);
    assert(inode^.blocks.blockCount <> 0);     // if blockCount is 0, we're in big trouble
    blockRun := findBRun(@inode^.blocks, inode^.blocks.blockCount-1);
    assert(blockRun.len <> 0);     
    if (blockRun.len = 0) then exit;           // This would be odd
    exBlockRun     := blockRun;                // copy the last run to exBlockRun
    exBlockRun.len := blocksToAllocate;               // set the requested number of blocks
writeln('numBlocks[5]: ', blocksToAllocate);
with exBlockRun do
writeln('exBlockRun before allocBRun() = ', ag, ':', start, ':', len);

    exBlockRun     := allocBRun(exBlockRun);   // allocate a block run near the previous one
with exBlockRun do
writeln('exBlockRun after allocBRun() = ', ag, ':', start, ':', len);
    assert(exBlockRun.len <> 0);
    if (exBlockRun.len = 0) then exit;         // should probably yell louder about this

    // blockRun points to the last blockRun structure associated with this inode
    if (exBlockRun.start = blockRun.start+1) and (exBlockRun.AG = blockRun.AG) then
      continueDataStream(exBlockRun)
    else
      appendToDataStream(exBlockRun);
  end;  // else size <> 0
  inode^.blocks.size := newSize;      // Now we update the size here and not in writeDataStream()
  inc(inode^.blocks.blockCount, blocksToAllocate);  // update the blockCount in the inode dataStream
  // no matter where we stored the extended blockRun, we have updated the inode

  assert(saveBlock(inode^.inodeNum, inode));  // 0 result means success
  result := 0;  // no error
end; // TUbixFS.extendFile

function TUbixFS.findBRun;
var
  position:int64;
begin
  setBlockRun(result, 0, 0, 0);
  if (dataStream = NIL) then exit;
  if (blockNum >= dataStream^.blockCount) then
    begin
      writeln('Requested block outside the block count in the dataStream');
      exit
    end;
  position := int64(blockNum) shl superBlock^.blockShift;
  if (position < dataStream^.maxDirect) then
    result := findBRunDirect(dataStream, blockNum)
  else if (position < dataStream^.maxIndirect) then
    result := findBRunIndirect(dataStream, blockNum)
  else if (position < dataStream^.maxDIndirect) then
    result := findBRunDIndirect(dataStream, blockNum)
  else writeln('Block position is not mapped by dataStream');
end; // TUbixFS.findBRun

function TUbixFS.findBRunDirect;
var
  sum, i, offset:uInt32;
begin
  // set the result to a nil blockrun
  setBlockRun(result, 0, 0, 0);

  sum := 0;
  with dataStream^ do
    for i := low(direct) to high(direct) do
      with direct[i] do
        begin
          if (blockNum < sum + len) then                // found it
            begin
              offset := blockNum - sum;
              setBlockRun(result, AG, start + offset, len - offset);
              exit
            end;
          inc(sum, len);
        end;  // for/with
end; // TUbixFS.findBRunDirect

function TUbixFS.findBRunIndirect;
var 
  extents:^array[0..0] of TBlockRun;
  sum, i, offset:uInt32;
begin
  // NOTE: Basic error checking on whether superblock <> NIL isn't done here.
  setBlockRun(result, 0, 0, 0);

  // load the indirect block
  extents := loadBlock(dataStream^.indirect);
  assert(extents <> NIL);

  // We either need to adjust sum forward or blockNum back.
  // sum := (dataStream^.maxDirect+superBlock^.blockSize-1) shr superBlock^.blockShift;

  // We need to decrease blockNum by the amount of blocks the direct portion took up.
  // This can be calculated by dividing the maxDirect byte size by the block size.

  blockNum := blockNum - dword(dataStream^.maxDirect shr superBlock^.blockShift);
  sum := 0;

  for i := 0 to (superBlock^.blockSize div sizeof(TBlockRun))-1 do
    with extents^[i] do
      begin
        if (blockNum < sum + len) then                // found it
          begin
            offset := blockNum - sum;
            setBlockRun(result, AG, start + offset, len - offset);
            break
          end;
        inc(sum, len);
      end; // for/with

end; // TUbixFS.findBRunIndirect

function TUbixFS.findBRunDIndirect;
var 
  extents:^array[0..0] of TBlockRun;
  sum, i, offset:uInt32;
begin
  // NOTE: Basic error checking on whether superblock <> NIL isn't done here.
  setBlockRun(result, 0, 0, 0);

  extents := loadBlock(dataStream^.doubleIndirect);
  assert(extents <> NIL);
  // Now that we've loaded the double indirect block, we need to 
  // find the indirect block that contains the block run we're looking for.
  // This is done with some clever shifting
  
  // first adjust the blockNum, taking into account how many blocks are used
  // by the direct and indirect sections

  blockNum := blockNum - dword(dataStream^.maxIndirect shr superBlock^.blockShift);

  // Each indirect block holds NUM_EXTENSION_BLOCKS*(superBlock^.blockSize div sizeof(TBlockRun)) blocks
  // e.g.
  //   NUM_EXTENSION_BLOCKS = 4 
  //   blockSize = 1024
  //   sizeof(TBlockRun) = 8
  //   blocks referenced per indirect block = 4 * (1024 / 8) === 512
  //   indirect block index = (block we're looking for) / (blocks referenced per indirect block)
  //   blockrun index in indirect block = (block we're looking for) % (blocks referenced per indirect block)

  i := blockNum div (NUM_EXTENSION_BLOCKS * (superBlock^.blockSize div sizeof(TBlockRun))); 
  // i should now hold which entry in the double indirect block we need to load
  assert(i < (superBlock^.blockSize div sizeof(TBlockRun)));

  // load the proper indirect block beneath the double indirect block
  extents := loadBlock(extents^[i]);
  assert(extents <> NIL);

  // now seek into this indirect block by taking the modulus of the number
  // of blocks referenced by an indirect block
  i := blockNum mod (NUM_EXTENSION_BLOCKS * (superBlock^.blockSize div sizeof(TBlockRun)));
  with extents^[i] do
    begin
      // extents^[i] is a block extent inside an indirect block which
      // was referenced by the double indirect block.  Now we need to see 
      // which block inside this extent we wanted. We do that by taking the 
      // modulus of the requested block number by the number of blocks 
      // referenced in each blockrun.  BeFS used 4 per; and as of writing 
      // this comment that is what I'm using.
      offset := blockNum and (NUM_EXTENSION_BLOCKS-1);
      setBlockRun(result, AG, start + offset, len - offset);
    end; // with
  
end; // TUbixFS.findBRunDIndirect

function TUbixFS.freeUsedBlocks;
var 
  FBL:int8ArrayPtr;
  FBLSize:uInt32;
  startByte, endByte:uInt32;
  bitCount, bitOffset:uInt32;
  startBlock:uInt32;
  byteOffset:uInt32;
begin
  // Notes:
  // 2005-04-27 mji
  //   freeUsedBlocks() will clear bits in the BAT that are associated
  // with the blockRun passed in.  To do this we load only the portion
  // of the BAT we're going to modify. (As a side note, readDataStream()
  // will load a minimum of one block anyway, so this may not really be
  // much of an optimization).
 
  result := -1;
  // basic sanity checks
  if ((superBlock = NIL) or (blockRun.len = 0)) then exit;

  // Compute how much of the BAT we have to read in.
  with blockRun, superBlock^ do
    begin
      startBlock := (AG shl AGShift) + start;
      startByte := startBlock shr 3;       // divide by 8 since it's a bitfield
      endByte := (startBlock + len) shr 3; // divide by 8 since it's a bitfield
      FBLSize := endByte - startByte + 1;

      GetMem(FBL, FBLSize);
      assert(readDataStream(BAT, startByte, FBL, FBLSize) = 0); 
      bitOffset := 7-(startBlock and 7);
      byteOffset := 0;

      for bitCount := 1 to len do
        begin   
          FBL^[byteOffset] := FBL^[byteOffset] and not (1 shl bitOffset);
          if (bitOffset = 0) then 
            begin
              bitOffset := 7;
              inc(byteOffset);
            end
          else dec(bitOffset);
        end; // for bitCount
      // save back out the modified block
      assert(writeDataStream(BAT, startByte, FBL, FBLSize) = 0);
 
      FreeMem(FBL, FBLSize);
      usedBlocks := usedBlocks - len;
    end; // with

  result := 0;
end; // TUbixFS.freeUsedBlocks

function TUbixFS.getFileSize;
begin
  // XXX ToDo:
  // Fix to support sizes > 4GB
  result := 0;
  if (inode = NIL) then exit;
  result := inode^.blocks.size;
end;  // TUbixFS.getFileSize

function TUbixFS.getFileSpace;
begin
  // getFileSpace() will return the size of the file's allocated space
  result := 0;
  if (inode = NIL) or (superBlock = NIL) then exit;
  result := inode^.blocks.blockCount shl superBlock^.blockShift;
end; // TUbixFS.getFileSpace

function TUbixFS.iAddrToStr;
var s:string;
begin
  // Note: this won't work in C/C++ because strings are really null
  // terminated character arrays. So, the B^Tree won't hold a string
  // with nulls in it properly
  length(s) := sizeof(iAddr);
  move(iAddr, s[1], sizeof(iAddr));
  result := s;
end; // TUbixFS.iAddrToStr;

function getRDTSCTime:int64;
var x:int64;
begin
  asm
        rdtsc
        mov     dword ptr x, eax
        mov     dword ptr x+4, edx 
  end;
  result := x;
end;

function TUbixFS.loadBlock;
var 
  blockAddress:int64;
  cacheNode:PCacheNode;
  u:uPtr;
  searchRec:BTreeSearchRec;
begin
  result := NIL;  // failure by default;
  // 2005-04-29 mji
  // I'm not really sure how much error checking to put in here. Chances are that
  // if we're calling this function everything is fine.  For now, with such limited
  // use I'll keep it pretty light. I think that this will eventually be used for 
  // dealing with the block cache, so more checking might be required later.
  if (dev = NIL) or (superBlock = NIL) or (blockCache = NIL) then exit; 

  with blockAddr, superBlock^ do
    blockAddress := (AG shl AGShift) + start;

  fillchar(u, sizeof(u), 0);  // just to be safe
  cacheNode := NIL;
  // XXX ToDo
  // Fix this so that if there is no cache, we only have one pointer
  // Check for a full cache

  if (blockCache^.Find(@blockAddress, u)) then
    begin
      // block was in the cache. 
      assert(u.p <> NIL);
      cacheNode := u.p;
      assert(cacheNode^.data <> NIL);
      // If we're going to honour the locked flag in the cacheNode, then
      // we should put a mutex here
      
      // Now we need to update the blockTime. We do this by first deleting
      // the cacheNode out of the blockTime cache.
      blockTime^.Delete(@cacheNode^.lastUsed, u);  // Delete old one
      cacheNode^.lastUsed := getRDTSCTime();       // Get the new timestamp
      blockTime^.Insert(@cacheNode^.lastUsed, u);  // Insert with new time
      result := cacheNode^.data;                   // return the data pointer
    end
  else
    begin
      // Requested block isn't in cache

      // XXX ToDo:
      // Check to see if we've hit the maximum cache entry limit. If so, we 
      // need to drop off the least recently used (look for the first key 
      // in blockTime) and possibly save it to disk if it has been modified.

      if (blockCache^.getKeyCount() = maxCacheEntry) then
        begin
          blockTime^.getFirstKey(searchRec);                  // least recently used block
          cacheNode := searchRec.value.p;                     // get the pointer from the value
          assert(cacheNode <> NIL);
          // I suppose we could write a syncBlock() function, since this
          // code is also duplicated in sync().
          if (cacheNode^.dirty) then
            begin
              with cacheNode^ do
                dev^.write(data, 
                           address * (dataSize shr SECTOR_SHIFT),
                           dataSize shr SECTOR_SHIFT
                          ); // dev^.write    
              cacheNode^.dirty := FALSE;
            end; // if dirty
          u.p := cacheNode;
          blockCache^.Delete(@cacheNode^.address, u);
          blockTime^.Delete(@cacheNode^.lastUsed, u);
          // We've deleted the cacheNode out of both the blockCache and
          // the blockTime trees. We're going to re-use the cacheNode.
          cacheNode^.address := blockAddress;
          cacheNode^.lastUsed := getRDTSCTime();
          // No need to update data, dataSize, or dirty. We re-use the first two,
          // and dirty will be set to FALSE above if necessary.  Might need to 
          // reset locked if we're using that.
        end
      else
        new(cacheNode, init(blockAddress, getRDTSCTime(), superBlock^.blockSize));

      assert(cacheNode <> NIL);
      assert(cacheNode^.data <> NIL);
      u.p := cacheNode;
      if not (blockCache^.Insert(@blockAddress, u)) then
        writeln('Error inserting into blockCache');
      if not (blockTime^.Insert(@cacheNode^.lastUsed, u)) then
        writeln('Error inserting into blockTime');
      // now we need to fill the data from the device
      with superBlock^ do
        dev^.read(cacheNode^.data,
                  blockAddress * (blockSize shr SECTOR_SHIFT),
                  blockSize shr SECTOR_SHIFT
                 ); // dev^.read
      result := cacheNode^.data;
    end; 
end; // TUbixFS.loadBlock

function TUbixFS.markUsedBlocks;
var 
  FBL:uInt8ArrayPtr;
  FBLSize:uInt32;
  startByte, endByte:uInt32;
  bitCount, bitOffset:uInt32;
  startBlock:uInt32;
  byteOffset:uInt32;
begin

  result := -1;
  // basic sanity checks
  if ((superBlock = NIL) or (blockRun.len = 0)) then exit;

with blockRun do writeln('---- marking used blocks: ', AG, ':', start, ':', len);

  // Compute how much of the BAT we have to read in.
  with blockRun, superBlock^ do
    begin
      startBlock := (AG shl AGShift) + start;
      startByte := startBlock shr 3;
      endByte := (startBlock + len) shr 3;
      FBLSize := endByte - startByte + 1;
      GetMem(FBL, FBLSize);
      fillchar(FBL^, FBLSize, 0);  // better safe than sorry
      assert(readDataStream(BAT, startByte, FBL, FBLSize) = 0); 

      bitOffset := 7-(startBlock and 7);
      byteOffset := 0;
      for bitCount := 1 to len do
        begin   
          FBL^[byteOffset] := FBL^[byteOffset] or (1 shl bitOffset);
          if (bitOffset = 0) then 
            begin
              bitOffset := 7;
              inc(byteOffset);
            end
          else dec(bitOffset);
        end; // for bitCount
      assert(writeDataStream(BAT, startByte, FBL, FBLSize) = 0);
 
      FreeMem(FBL, FBLSize);

      usedBlocks := usedBlocks + len;
    end; // with
  result := 0;
end; // TUbixFS.markUsedBlocks

function TUbixFS.nextAG;
begin
  // there's no error we can throw from here if superblock is NIL,
  // so don't bother to check.  Just hope whomever calls this
  // function has done some basic sanity checks
  result := (AG+1) mod superBlock^.numAGs
end; // TUbixFS.nextAG

function TUbixFS.pathToIAddr;

  function recursePath(path:PChar; len:integer; iAddr:inodeAddr):inodeAddr;
    var 
      dirTree:PBTree;
      u:uPtr;
      nameSize:integer;
      pfs:PUbixBTreeVFS;
    begin
      // if the string length is 0 then we have found the inode in the previous
      // recurse, so return what we found.
      if (len = 0) then 
        begin
          result := iAddr;
          exit;     
        end
      else
        setIAddr(result, 0, 0, 0);
      // hm. If we're looking for something like ``/.'' then the ``/'' is stripped
      // off in the main call leaving only ``.'', which means that this malformed 
      // path check would trigger.  Same with /foo/bar if bar is a dir.
      //if uInt8ArrayPtr(path)^[0] = ord('/') then exit;  // malformed path
      if not (isValidIAddr(iAddr)) then exit;           // sanity check
      nameSize := 0;
      while (len > 0) and (uInt8ArrayPtr(path)^[nameSize] <> ord('/')) do
        begin
          inc(nameSize);
          dec(len);    
        end;
      uInt8ArrayPtr(path)^[nameSize+1] := 0;
      if (len <> 0) then dec(len);
      new(dirTree, open(IAddrToStr(iAddr), new(PUbixBTreeVFS, init(@self))));
      if (dirTree = NIL) then exit;  // hm.. what happens to the PUbixBTreeVFS is this is NIL?
      // note that the dirTree is a tree of BT_PCHAR, and searches are case sensitive.
      // if the tree doesn't exist, Find() should return false.
      if dirTree^.Find(path, u) then result := recursePath(path + nameSize + 1, len, u.iAddr);
      dispose(dirTree, done);
    end; // recursePath

var len:integer;
begin
  setIAddr(result, 0, 0, 0);

  if (path = NIL) or (superBlock = NIL) then exit;  

  len := strlen(path);
  if (len = 0) then exit;                            // empty string
  if uInt8ArrayPtr(path)^[0] <> ord('/') then exit;  // malformed path

  result := recursePath(path+1, len-1, superBlock^.rootDir);

end; // TUbixFS.pathToIAddr

procedure TUbixFS.printSuperBlock;
begin
  if (superBlock = NIL) then exit;
  with superBlock^ do
    begin
      writeln('superBlock^.name........... ', name);
      writeln('superBlock^.magic1......... ', hex(magic1));
      writeln('superBlock^.fsByteOrder.... ', fsByteOrder);
      writeln('superBlock^.blockSize...... ', blockSize);
      writeln('superBlock^.blockShift..... ', blockShift);
      writeln('superBlock^.numBlocks...... ', numBlocks);
      writeln('superBlock^.usedBlocks..... ', usedBlocks);
      writeln('superBlock^.magic2......... ', hex(magic2));
      writeln('superBlock^.blocksPerAG.... ', blocksPerAG);
      writeln('superBlock^.AGShift........ ', AGShift);
      writeln('superBlock^.numAGs......... ', numAGs);
      writeln('superBlock^.flags.......... ', hex(flags));
      writeln('superBlock^.magic3......... ', hex(magic3));
      with superBlock^.BAT do
        writeln('superBlock^.BAT............ ', AG, ':', start, ':', len);
      with superBlock^.rootDir do
        writeln('superBlock^.rootDir........ ', AG, ':', start, ':', len);
    end;
end; // TUbixFS.printSuperBlock

function TUbixFS.saveBlock;
var 
  blockAddress:int64;
  cacheNode:PCacheNode;
  u:uPtr;
  searchRec:BTreeSearchRec;
begin

  result := FALSE;  // failure by default;
  if (blockCache = NIL) or (blockTime = NIL) or (dev = NIL) or (superBlock = NIL) then exit;
  with blockAddr, superBlock^ do
    blockAddress := (AG shl AGShift) + start;
  cacheNode := NIL;
  fillchar(u, sizeof(u), 0); // best be safe
  // first check to see if the blockAddress is in the cache
  if (blockCache^.Find(@blockAddress, u)) then
    begin
      // block was in cache
      cacheNode := u.p;
      assert(cacheNode <> NIL);
      assert(cacheNode^.data <> NIL);
      if (cacheNode^.data <> buf) then
        begin
          // The buffer with the data we want to save was not the same as the one
          // in the cache. This means that it was not obtained with loadBlock().
          // We need to copy that pointer to our block.
          move(buf^, cacheNode^.data^, cacheNode^.dataSize);
        end;
      // Now we need to update the blockTime. We do this by first deleting
      // the cacheNode out of the blockTime cache.
      blockTime^.Delete(@cacheNode^.lastUsed, u);  // Delete old one
      cacheNode^.lastUsed := getRDTSCTime();       // Get the new timestamp
      blockTime^.Insert(@cacheNode^.lastUsed, u);  // Insert with new time
    end
  else
    begin
      // block was not in cache
      if (blockCache^.getKeyCount() = maxCacheEntry) then
        begin
          blockTime^.getFirstKey(searchRec);
          cacheNode := searchRec.value.p;
          assert(cacheNode <> NIL);
          // I suppose we could write a syncBlock() function, since this
          // code is also duplicated in sync().
          if (cacheNode^.dirty) then
            begin
              with cacheNode^ do
                dev^.write(data, 
                           address * (dataSize shr SECTOR_SHIFT),
                           dataSize shr SECTOR_SHIFT
                          ); // dev^.write    
              cacheNode^.dirty := FALSE;
            end; // if dirty
          u.p := cacheNode;
          blockCache^.Delete(@cacheNode^.address, u);
          blockTime^.Delete(@cacheNode^.lastUsed, u);
          // We've deleted the cacheNode out of both the blockCache and
          // the blockTime trees. We're going to re-use the cacheNode.
          cacheNode^.address := blockAddress;
          cacheNode^.lastUsed := getRDTSCTime();
          // No need to update data, dataSize, or dirty. We re-use the first two,
          // and dirty will be set to FALSE above if necessary.  Might need to 
          // reset locked if we're using that.
        end
      else
        new(cacheNode, init(blockAddress, getRDTSCTime(), superBlock^.blockSize));

      assert(cacheNode <> NIL);
      assert(cacheNode^.data <> NIL);
      move(buf^, cacheNode^.data^, cacheNode^.dataSize);
      u.p := cacheNode;
      if not (blockCache^.Insert(@blockAddress, u)) then
        writeln('Error inserting into blockCache');
      if not (blockTime^.Insert(@cacheNode^.lastUsed, u)) then
        writeln('Error inserting into blockTime');
    end;
  cacheNode^.dirty := TRUE;  // indicate the block has been modified
  result := TRUE;
end; // TUbixFS.saveBlock

function TUbixFS.isValidIAddr;
begin
  result := (iAddr.len <> 0) and (iAddr.AG < superBlock^.numAGs); 
end; // TUbixFS.isValidIAddr

function TUbixFS.readDataStream;
var 
  tmpBlock:uInt8ArrayPtr;
  blockRun:TBlockRun;
  size, remainder:uInt32;
  blockStart:uInt32;
  blocksToRead:uInt32;
  bytesToRead:uInt32;
  inode:PUbixFSInode;
begin
 
  result := -1;
  if ((buf = NIL) or (superBlock = NIL)) then exit;
  inode := loadBlock(iAddr);
  tmpBlock := NIL;
  size := length;
  remainder := uInt32(position mod superBlock^.blockSize);

  if (remainder <> 0) then
    begin
      // the position we've been asked to read from doesn't align to
      // a block size. We need to read in the previous block and pad
      // the data
      blockStart := uInt32(position shr superBlock^.blockShift);
      blockRun := findBRun(@inode^.blocks, blockStart);
      assert(blockRun.len > 0);
      tmpBlock := loadBlock(blockRun);
      assert(tmpBlock <> NIL);
      bytesToRead := min(size, (superBlock^.blockSize - remainder));
      move(tmpBlock^[remainder], buf^, bytesToRead);
      inc(buf, bytesToRead);
      dec(size, bytesToRead);
      position := position + bytesToRead;
    end; // if remainder != 0

  // If the blockSize is always a power of 2, then you can eliminate the MOD and use
  // an AND operation instead, e.g. remainder := size and (superBlock^.blockSize-1)
  remainder := size mod superBlock^.blockSize; 
  dec(size, remainder);

  // Size is currently in bytes. The remainder variable above holds the number
  // of remaining bytes (if any) outside full blocks we have to write.

  // convert the size into blocks
  size := size shr superBlock^.blockShift;

  while (size > 0) do
    begin
      blockRun := findBRun(@inode^.blocks, dword(position shr superBlock^.blockShift));
      assert(blockRun.len > 0);
      for blocksToRead := 0 to min(blockRun.len, size)-1 do
        begin
          tmpBlock := loadBlock(blockRun);
          assert(tmpBlock <> NIL);
          move(tmpBlock^, buf^, superBlock^.blockSize);
          dec(size);
          inc(buf, superBlock^.blockSize);
          inc(blockRun.start);  // This might wrap around?
          dec(blockRun.len);    // might be unnecessary
        end; // for
    end; // while 

  if (remainder <> 0) then
    begin
      // There is less than a block of data left to read.
      blockStart := dword((position+length-remainder) shr superBlock^.blockShift);
      blockRun := findBRun(@inode^.blocks, blockStart);
      assert(blockRun.len > 0);
      tmpBlock := loadBlock(blockRun);
      assert(tmpBlock <> NIL);
      move(tmpBlock^, buf^, remainder);      
    end;

  result := 0;
end; // TUbixFS.readDataStream

procedure TUbixFS.setBlockRun;
begin
  blockRun.AG    := AG;
  blockRun.start := start;
  blockRun.len   := len;
end; // TUbixFS.setBlockRun

procedure TUbixFS.setIAddr;
begin
  inode.AG    := AG;
  inode.start := start;
  inode.len   := len;
end; // TUbixFS.setIAddr

function TUbixFS.strToIAddr;
begin
  // Note: this won't work in C/C++ because strings are really null
  // terminated character arrays. So, the B^Tree won't hold a string
  // with nulls in it properly
  // begin fiendishly clever hack
  move(s[1], result, sizeof(inodeAddr));
  // end fiendishly clever hack
end; // TUbixFS.strToIAddr

procedure TUbixFS.sync;
var 
  cacheNode:PCacheNode;
  searchRec:BTreeSearchRec;
begin
  // XXX ToDo:
  // Need to mark the superblock as clean.

  if (blockCache = NIL) or (dev = NIL) then exit;
  // We sync out the blocks in ascending order from the blockCache.
  // To do this we examine each entry to see if the block is dirty.
  // If it is, we write it out to the device and set dirty to FALSE.
  if (blockCache^.getFirstKey(searchRec)) then
    repeat
      cacheNode := searchRec.value.p;
      assert(cacheNode <> NIL);
      if (cacheNode^.dirty) then
        begin
          with cacheNode^ do
            dev^.write(data, 
                       address * (dataSize shr SECTOR_SHIFT),
                       dataSize shr SECTOR_SHIFT
                       ); // dev^.write    
          cacheNode^.dirty := FALSE;
        end;
    until not blockCache^.FindNext(searchRec);
  // Also write out the superblock.
  dev^.write(superBlock, 1, 1);
end; // TUbixFS.sync

function TUbixFS.vfs_close;
var 
  u:uPtr;
  vnid:PVnid;
begin
  result := -1;
  if (fileDescriptors = NIL) or (handle >= maxFDEntry) then exit;
  if not (vnids^.Find(@fileDescriptors^[handle].iAddr, u)) then writeln('Could not find iAddr in vnids');
  vnid := u.p;
  if (vnid = NIL) then 
    begin
      writeln('vnid is NIL');
      exit;
    end; 

  with vnid^ do
    begin
      dec(refCount);
      if (refCount = 0) then 
        begin   
          // refCount has fallen to 0. If the file is deleted we should take care of it here
          vnids^.Delete(@fileDescriptors^[handle].iAddr, u);
        end;
    end;
  if not vNodeQueue^.enqueue(handle) then writeln('Error enqueuing handle back into vNodeQueue');
  result := 0;
end; // TUbixFS.vfs_close

function TUbixFS.vfs_fCreate;
var 
  inode:PUbixFSInode;
  dirTree:PBTree;
  dirIAddr, fileIAddr:inodeAddr;
  filedes:int64;
  u:uPtr;
  searchRec:BTreeSearchRec;
  len : integer;
  path:PChar;
  filenamePtr:PChar; 
  curPos:integer;
  vnid:PVnid;
  handle:integer;
begin
  result := -1;  
  if (filename = NIL) or (superBlock = NIL) then exit;
  // we need to split the filename into it's component path and filename pieces.
  // From there we need to see if the file exists.  If it does then I guess we 
  // should truncate the file and load the inode.  Otherwise allocate a new inode
  len := strlen(filename);
  if (len = 0) then exit;
  if uInt8ArrayPtr(filename)^[0] <> ord('/') then exit;  // malformed path
  GetMem(path, len+1);
  strCopy(path, filename);
  curPos := len;

  setIAddr(dirIAddr, 0, 0, 0);  // zero out the inode address for the directory
  setIAddr(fileIAddr, 0, 0, 0); // zero out the inode address for the file

  while (curPos > 0) and (uInt8ArrayPtr(path)^[curPos] <> ord('/')) do dec(curPos);
  fileNamePtr := path + curPos + 1;
  if (curPos = 0) then
    begin
      // file is in the root dir
      writeln('path: /');
      writeln('filename: ', fileNamePtr);
      dirIAddr := pathToIAddr('/');
    end
  else
    begin
      uInt8ArrayPtr(path)^[curPos] := 0; // null out the separator
      writeln('Path: ', path);
      writeln('Filename: ', fileNamePtr);
      dirIAddr := pathToIAddr(path);
    end; 

  with dirIAddr do
    writeln('iaddr to path: ', AG, ':', start, ':', len);
  assert(isValidIAddr(dirIAddr));
  if not isValidIAddr(dirIAddr) then exit;
  // should probably check to make sure that we're opening a directory and not a file
  new(dirTree, open(IAddrToStr(dirIAddr), new(PUbixBTreeVFS, init(@self))));
  if (dirTree = NIL) then exit;
  // We need to ascertain if the file is existing or new.
  // If the file exists, we should truncate the file
  // If the file is new, we need to create a new inode and write the entry to the directory
  // Then we need to allocate a file descriptor and store it somewhere.  
  if dirTree^.Find(fileNamePtr, u) then 
    begin
      writeln(fileNamePtr, ' already exists');
    end
  else
    begin
      writeln('creating new file: ', fileNamePtr);
      writeln('allocating new inode');
      inode := allocInode(dirIAddr);   // alloc a new inode (pass in the parent iaddr)
      assert(inode <> NIL);            // make sure it isn't nil
      if (inode = NIL) then exit;
      saveBlock(inode^.inodeNum, inode);                // save the new inode
      u.iAddr := inode^.inodeNum;      // copy the inode addr to a uPtr      
      // now add the file to the directory
      writeln('Inserting ', fileNamePtr, ' into directory returned ', dirTree^.Insert(fileNamePtr, u));
    end;  
  dispose(dirTree, done);
  FreeMem(path, len+1);

  // This file doesn't have an associated vnid. Create a new one.
  new(vnid);
  vnid^.refCount := 1;
  vnid^.iAddr := inode^.inodeNum;
  vnid^.mode := 0; // XXX ?
  u.p := vnid;
  if not (vnids^.insert(@inode^.inodeNum, u)) then writeln('Could not insert into vnids');

  if not vNodeQueue^.dequeue(handle) then 
    begin 
      writeln('Out of file descriptors!');
      exit;
    end;

  with fileDescriptors^[handle] do
    begin
      iAddr := inode^.inodeNum;
      position := 0;
      flags := 0;  // XXX ?
    end;
  result := handle;
end; // TUbixFS.vfs_fCreate

function TUbixFS.vfs_fExist;
begin
  // XXX ToDo:
  // find out why fExist('/./.') fails
  // fExist('/.') no longer fails
  
  result := -1;
  if ((superBlock = NIL) or (path = NIL)) then exit;
  if (not isValidIAddr(superBlock^.rootDir)) then exit;
  if isValidIAddr(pathToIAddr(path)) then result := 0;
end; // TUbixFS.vfs_fExist

function TUbixFS.vfs_format;
var
  sector:array[0..511] of byte;
  fs:PUbixFS;
  logicalBlocks, fsBATSize, fsBATBlocks:uInt32;
  sb:PdiskSuperBlock;
  inode:PubixfsInode;
  i, fsBlockShift, lBlockShift, nodeSize, keyLength:integer;
  blockRun:TBlockRun;
  remainderBlocks:uInt32;
  blockBit:shortint;
begin
  result := -1;
  if (device = NIL) then exit;
  if (fsBlockSize < 1024) then
    begin
      writeln('Blocksize must be >= 1024');
      exit;
    end;
  // zero out the sector
  fillchar(sector, sizeof(sector), 0);

  writeln('device^.sectors: ', device^.sectors);

  if not quick then
    begin
      system.write('clearing device... ');

      for i := 0 to device^.sectors-1 do
        device^.write(@sector, i, 1);

      writeln('done');
    end; // if !quick


  fsBlockShift := 10;  {minimum is 1024}
  while (1 shl fsBlockShift < fsBlockSize) do inc(fsBlockShift);

  // allocate a new superBlock and clear it

  new(sb);
  if (sb = NIL) then exit;
  fillchar(sb^, sizeof(diskSuperBlock), 0);

  // device^.sectors is the number of 512 byte sectors

  lBlockShift := fsBlockSize shr 9;
  writeln('lBlockShift: ', lBlockShift);
  logicalBlocks := device^.sectors div lBlockShift;        // logical blocks
  writeln('logical blocks: ', logicalBlocks);

  with sb^ do
    begin
      strcopy(name, 'UbixFS');
      magic1      := UBIXFS_MAGIC_1;
      fsByteOrder := 0;
      blockSize   := fsBlockSize;
      blockShift  := fsBlockShift;
      numBlocks   := logicalBlocks;
      usedBlocks  := 0;            
      inodeSize   := fsBlockSize;
      magic2      := UBIXFS_MAGIC_2;
      blocksPerAG := min(65536, blockSize shl 3);
      AGShift     := 10;  // realistically it should be a min of 13
      while (1 shl AGShift <> blocksPerAG) do inc(AGShift);
      numAGs      := (dword(numBlocks)+(blocksPerAG-1)) div blocksPerAG;

      flags       := UBIXFS_CLEAN;
      setBlockRun(logBlocks, 0, 0, 0);
      logStart    := 0;
      logEnd      := 0;
      magic3      := UBIXFS_MAGIC_3;
      setIAddr(indicies, 0, 0, 0);
      setIAddr(rootDir, 0, 0, 0);
    end; // with
  // create the free block list inode 

  getmem(inode, fsBlockSize);  // make the inode the same size as a block
  if (inode = NIL) then exit;

  fillchar(inode^, sb^.inodeSize, 0);

  with inode^ do
    begin
      magic := UBIXFS_INODE_MAGIC;

      // inodes point to themselves
      setIAddr(inodeNum, 0, sb^.logBlocks.len+1, 1); // the +1 for the start might be wrong
      setIAddr(parent, 0, 0, 0);

      // If the partition is larger than 550GB we will need to allocate more than
      // one blockRun for the BAT.
      fsBATSize := logicalBlocks shr 3;  // compute how many bytes the BAT takes up
      fsBATBlocks := (fsBATSize + fsBlockSize-1) div fsBlockSize;
      writeln('fsBATSize: ', fsBATSize);
      writeln('fsBATBlocks: ', fsBATBlocks);
      setBlockRun(blocks.direct[0], 0, inodeNum.start+1, fsBATBlocks);

      blocks.size := (fsBATBlocks * fsBlockSize); // round the file size up to the nearest block size
      blocks.maxDirect := blocks.size-1;
      blocks.blockCount := fsBATBlocks;

      uid := 0;
      gid := 0;
      mode := S_IFDIR;

      flags := INODE_NO_FLAG;

      setIAddr(attributes, 0, 0, 0);
      inodeSize := sb^.inodeSize;

  end; {with}
  sb^.BAT := inode^.inodeNum;
  writeln('writing out superblock to sector 1');
  device^.write(sb, 1, 1);
  // note that the superblock does not contain proper inode addresses for the
  // BAT and rootDir. Those are sync'd to disk in the destructor after they've been
  // created below. 
    
  system.write('Writing out BAT inode... ');
  // write out the Block Allocation Table inode
  with inode^.inodeNum, sb^ do
    device^.write(inode,
                  ((AG shl AGShift) + start) * (inodeSize shr SECTOR_SHIFT),
                  inodeSize shr SECTOR_SHIFT
                 ); // device^.write 
  writeln('done');
  // Okay, the superblock is the only piece of information needed
  // to mount an UbixFS partition
  new(fs, init(device));
  assert(fs <> NIL);
  assert(fs^.vfs_init() = 0);

  // The freshly formatted parition is now mounted. 
  // From here we need to create:
  //   BAT (Block Allocation Table) file contents
  //   journal
  //   root dir 

  // inode still points to the BAT. Clear out the file in case people decided to
  // use quick formatting
  for i := 0 to (dword(inode^.blocks.size) div sizeof(sector))-1 do
    fs^.writeDataStream(sb^.BAT, i*sizeof(sector), @sector, sizeof(sector));
  setBlockRun(blockRun, 0, 0, 1);
  fs^.markusedBlocks(blockRun);         // mark superBlock block as used
  // mark the journal as used here
  fs^.markUsedBlocks(sb^.BAT);  // mark the BAT inode as used
  // now mark the BAT file blocks as used. Note that if the BAT takes up
  // more than one blockRun (unlikely, but not impossible) then this code
  // will have to check for that.
  writeln('BAT inode blocks.direct[0].len: ', inode^.blocks.direct[0].len);
  fs^.markUsedBlocks(inode^.blocks.direct[0]);  

  // If the number of blocksPerAG doesn't divide evenly into the
  // number of logical blocks, mark out all the "remainder" blocks
  // We need to mark the blocks as used in the BAT manually because
  // if we do it through markUsedBlocks() it will change the usedBlocks
  // count.

  remainderBlocks := (fsBATBlocks shl sb^.blockShift) - fsBATSize;
  writeln('RemainderBlocks: ', remainderBlocks);
  if (remainderBlocks <> 0) then
    begin
      // check if the remainder blocks are evenly divisible by 8
      if (remainderBlocks and 7 <> 0) then
        begin
          blockBit := uInt8(1 shl (remainderBlocks and 7)) -1;
          fs^.writeDataStream(sb^.BAT, fsBatSize, @blockBit, 1);
          blockBit := -1;
          for i := fsBatSize+1 to (fsBATBlocks shl sb^.blockShift)-1 do
            fs^.writeDataStream(sb^.BAT, i, @blockBit, 1);
        end
      else
        begin
          blockBit := -1;
          for i := fsBatSize to (fsBATBlocks shl sb^.blockShift)-1 do
            fs^.writeDataStream(sb^.BAT, i, @blockBit, 1);
        end;
    end;

  fs^.vfs_mkdir('/');
  if (fs^.vfs_fExist('/.') = -1) then 
    writeln('Warning! /. doesn''t exist') 
  else 
    writeln('fExist(''/.'') returned properly');
  if (fs^.vfs_fExist('/nofile.dat') <> -1) then 
    writeln('Warning! /nofile.dat exists?') 
  else 
    writeln('fExist(''/nofile.dat'') returned properly');

  if (fs^.vfs_fExist('/nodir/nofile.dat') <> -1) then 
    writeln('Warning! /nodir/nofile.dat exists?') 
  else 
    writeln('fExist(''/nodir/nofile.dat'') returned properly');

//  writeln('fs^.vfs_fExist(''/.'') returned: ', fs^.vfs_fExist('/.'));
//  writeln('fs^.vfs_fExist(''/nofile.dat'') returned: ', fs^.vfs_fExist('/nofile.dat')); 
  fs^.printSuperBlock;
  
  dispose(fs, done);
  writeln('Format complete!');
  dispose(sb);
  freemem(inode, fsBlockSize);
   
end; // TUbixfS.vfs_format

function TUbixFS.vfs_init;
var index:uInt32;
begin
  // This function should probably not be called more than once without
  // calling the appropiate shutdown code
  writeln('TUbixFS.vfs_init()');
  result := -1;
  if (dev = NIL) then exit;
  new(superBlock);
  assert(superBlock <> NIL);
  if (superBlock = NIL) then exit;
  fillchar(superBlock^, sizeof(superBlock^), 0); // better safe than sorry
  // read in the superBlock. It's always the 1st block on the partition (counting from 0)
  system.write('reading in superBlock... ');
  dev^.read(superBlock, 1, 1);
  writeln('done');

  assert(superBlock^.magic1 = UBIXFS_MAGIC_1);
  assert(superBlock^.magic2 = UBIXFS_MAGIC_2);
  assert(superBlock^.magic3 = UBIXFS_MAGIC_3);
  assert(strcomp(superBlock^.name, 'UbixFS') = 0);
  assert((1 shl superBlock^.blockShift) = superBlock^.blockSize);
  assert((1 shl superBlock^.AGShift) = superBlock^.blocksPerAG);
  assert(superBlock^.flags = UBIXFS_CLEAN);
  
  // create the file decriptor tree


  // The blockCache tree has a [key::value] pair of [block address::pointer to cacheNode].
  // The blockTime tree has a [key::value] pair of [last used time::pointer to cacheNode].

  // create the block cache tree
  new(blockCache, init('', 256, BT_INT64, NIL));
  // create the block last used time tree
  new(blockTime, init('', 256, BT_INT64, NIL));

  // Create the file descriptors and related stuff

  // vnids is a tree which has a [key::value] pair of [iAddr::pointer to vNode].
  new(vnids, init('', 256, BT_INT64, NIL));

  // vnodeQueue is a queue which has a list of available FD handles
  new(vNodeQueue, init(maxFDEntry, sizeof(uInt32)));
  // fill the vNodeQueue with a list of all available FD handles
  for index := 0 to maxFDEntry-1 do
    vNodeQueue^.enqueue(index);

  // Allocate and clear the file descriptor array
  GetMem(fileDescriptors, maxFDEntry*sizeof(TVNode));
  fillchar(fileDescriptors^, maxFDEntry*sizeof(TVNode), 0);
  
  // The last used time is obtained through the RDTSC instruction to give us a unique
  // and increasing timestamp.  When the cache is full we drop off the least recently 
  // used block.  Because the tree is sorted, this will be the first entry. 

  result := 0;  // success
end; // TUbixFS.vfs_init

function TUbixFS.vfs_mkdir;
const 
  dot:array[0..1] of char = '.'#0;
  dotdot:array[0..2] of char = '..'#0;
var 
  inode:PUbixFSInode;
  iAddr:inodeAddr;
  bRun:TBlockRun;
  len:integer;
  dirTree:PbTree;
  u:uPtr;
begin
  // XXX ToDo:
  // Ability to add sub directories

  result := -1;
  assert(path <> NIL);
  assert(superBlock <> NIL);
  if (path = NIL) or (superBlock = NIL) then exit;

  len := strlen(path);
  assert(len <> 0);
  if (len = 0) then exit;

  if (len = 1) and (uInt8ArrayPtr(path)^[0] = ord('/')) then
    begin
      writeln('Creating root directory');
      // wanting to create the root. Check to see if the root dir inode is empty
      assert(not isValidIAddr(superBlock^.rootDir));
      if isValidIAddr(superBlock^.rootDir) then exit;  // should set a proper error code here
      setIAddr(iAddr, 0, 0, 0);   // technically the "parent" iaddr
      inode := allocInode(iAddr); // allocate an inode (parent points to la-la land)
      assert(inode <> NIL);       // make sure it isn't nil
      if (inode = NIL) then exit;
      inode^.mode := S_IFDIR;
      superBlock^.rootDir := inode^.inodeNum;  // assign the root dir
      saveBlock(superBlock^.rootDir, inode);           // save the inode
      u.iAddr := inode^.inodeNum; // copy the inode addr to a uPtr

      // open the root dir
      with superBlock^.rootDir do
        writeln('Root dir inode: ', AG, ':', start, ':', len);
      writeln('Opening root dir tree');
      new(dirTree, init(iAddrToStr(superBlock^.rootDir), 1024, BT_PCHAR, new(PUbixBTreeVFS, init(@self))));
      writeln('Inserting ''.''');
      // Get ready to insert the '.' :: inode^.inodeNum key :: value pair
      writeln('dirTree^.Insert(@dot, u) returned ', dirTree^.Insert(@dot, u));     // '.' points to ourself
      dispose(dirTree, done);       // close and save the directory
//      saveInode(inode);           // save the inode      
      //freemem(inode, inode^.inodeSize);  // free the memory associate with the inode (saved above)
    end
  else
    begin
      // subdir support goes here
    end;
    
  result := 0;
end; // TUbixFS.vfs_mkdir

function TUbixFS.vfs_read;
var 
  inode:PUbixFSInode;
  u:uPtr;
begin
  result := -1;
  if (buffer = NIL) or (fileDescriptors = NIL) or (handle >= maxFDEntry) then exit;
  with fileDescriptors^[handle] do
    begin
      assert(readDataStream(iAddr, position, buffer, size) = 0);
      position := position + int64(size);
      result := size;
    end; // with
end; // TUbixFS.vfs_read

function TUbixFS.vfs_seek;
begin
  result := -1;
  if (fileDescriptors = NIL) or (handle >= maxFDEntry) then exit;
  fileDescriptors^[handle].position := newPosition;
  result := 0;
end; // TUbixFS.vfs_seek

function TUbixFS.vfs_write;
begin
  result := -1;
  if (buffer = NIL) or (fileDescriptors = NIL) or (handle >= maxFDEntry) then exit;
  with fileDescriptors^[handle] do
    begin
      assert(writeDataStream(iAddr, position, buffer, size) = 0);
      position := position + int64(size);
      result := size;
    end; // with
end; // TUbixFS.vfs_write


function TUbixFS.writeDataStream;
var 
  tmpBlock:uInt8ArrayPtr;
  blockRun:TBlockRun;
  size, remainder:uInt32;
  blockStart:uInt32;
  blocksToWrite:uInt32;
  bytesToWrite:uInt32;
  inodeChanged:boolean;
  inode:PUbixFSInode;
begin
  // XXX ToDo:
  // Read/Write through cache
  // fix block extension size
  // change whether the inode is saved or not
  result := -1;
  if ((buf = NIL) or (superBlock = NIL)) then exit; 
  inode := loadBlock(iAddr);
  tmpBlock := NIL;

  inodeChanged := FALSE;
  if (position + length > getFileSpace(inode)) then 
    begin
      extendFile(inode, position+length);  
//      inode^.blocks.size := position+length;  // update the blocks
      writeln('inode^.blocks.size (after extendFile) = ', inode^.blocks.size); 
    end
  else
    if (position + length > inode^.blocks.size) then
      begin
      	// The above case checks to see if we're writing out past
      	// the end of the allocated space on the device. If we do, we extend 
      	// out the file. However, if we're just writing past the end of 
      	// the file, we have to update the size.
        
      	inode^.blocks.size := position+length;
        // XXX To Do:
        // Might want to clear out the blocks we're skipping past, otherwise
        // there might be garbage in there
        assert(saveBlock(iAddr, inode));  // save the inode
      end;

  size := length;
  remainder := uInt32(position mod superBlock^.blockSize);

  if (remainder <> 0) then
    begin
      // the position we've been asked to write to doesn't align to
      // a block size. We need to read in the previous block and pad
      // the data
      blockStart := uInt32(position shr superBlock^.blockShift);
      blockRun := findBRun(@inode^.blocks, blockStart);
      assert(blockRun.len > 0);
      tmpBlock := loadBlock(blockRun);
      assert(tmpBlock <> NIL);
    
      if (tmpBlock = NIL) then exit;
      // read in the block
      readDataStream(iAddr, blockStart, tmpBlock, superBlock^.blockSize);
      bytesToWrite := min(size, (superBlock^.blockSize - remainder));
      move(buf^, tmpBlock^[remainder], bytesToWrite);
      saveBlock(blockRun, tmpBlock);
      inc(buf, bytesToWrite);
      dec(size, bytesToWrite);
      position := position + bytesToWrite;
    end; // if remainder != 0

  remainder := size mod superBlock^.blockSize;
  dec(size, remainder);

  // Size is currently in bytes. The remainder variable above holds the number
  // of remaining bytes (if any) outside full blocks we have to write.

  // convert the size into blocks
  size := size shr superBlock^.blockShift;

  while (size > 0) do
    begin
      blockRun := findBRun(@inode^.blocks, uInt32(position shr superBlock^.blockShift));
      assert(blockRun.len > 0);
      blocksToWrite := min(blockRun.len, size);
      for blocksToWrite := 0 to min(blockRun.len, size)-1 do
        begin
          saveBlock(blockRun, buf);
          inc(blockRun.start);  // This might wrap around?
          dec(blockRun.len);    // might be unnecessary
          dec(size);
          inc(buf, superBlock^.blockSize);
        end;
    end; // while

  if (remainder <> 0) then
    begin
      // There is less than a block of data left to write. Read in the old block
      blockStart := uInt32((position+length-remainder) shr superBlock^.blockShift);
      blockRun := findBRun(@inode^.blocks, blockStart);
      assert(blockRun.len > 0);
      // obtain a pointer to the block
      tmpBlock := loadBlock(blockRun);
      assert(tmpBlock <> NIL);
      if (tmpBlock = NIL) then exit;
      move(buf^, tmpBlock^[0], remainder);
      saveBlock(blockRun, tmpBlock);
    end;
  result := 0;
end; // TUbixFS.writeDataStream

destructor TUbixFS.done;
var 
  searchRec:BTreeSearchRec;
  inode:PUbixfsInode;
  cacheNode:PCacheNode;
  dirtyCount:integer;
  u:uPtr;
begin
  sync();
  // XXX ToDo:
  // close all "open" files manually.
  dispose(vnids, done);
  dispose(vNodeQueue, done);
  FreeMem(fileDescriptors, maxFDEntry*sizeof(TVNode));
  vnids := NIL;
  vNodeQueue := NIL;
  fileDescriptors := NIL;

  if (blockTime <> NIL) then dispose(blockTime, done);
  if (blockCache <> NIL) then 
    begin
      dirtyCount := 0;
      fillchar(u, sizeof(u), 0);
      while (blockCache^.getFirstKey(searchRec)) do
        begin
          cacheNode := searchRec.value.p;
          assert(cacheNode <> NIL);
          if (cacheNode^.dirty) then inc(dirtyCount);
          u.p := cacheNode;
          blockCache^.Delete(@cacheNode^.address, u);
          dispose(cacheNode, done);          
        end;
      if (dirtyCount <> 0) then writeln('Dirty blocks in cache: ', dirtyCount);
      dispose(blockCache, done);
    end;
  if (superBlock <> NIL) then dispose(superBlock);
end; // TUbixFS.done

procedure TUbixFS.examine;
var 
  dirTree:PBTree;
  searchRec:BTreeSearchRec;
  u:uPtr;
begin
  writeln('------------- examining FS -----------------');
  write('Opening root dir...');
  new(dirTree, open(IAddrToStr(superblock^.rootDir), new(PUbixBTreeVFS, init(@self))));
  //dirTree^.PrintList('rootDir.txt');
  writeln('done');
  writeln('Root directory has ', dirTree^.getKeyCount(), ' entrie(s)');
  dirTree^.getFirstKey(searchRec);
  repeat
    with searchRec.value.iAddr do
      write('value: ', AG, ':', start, ':', len);
    writeln('    key: ', pchar(searchRec.key));
  until not dirTree^.findNext(searchRec);
  dispose(dirTree, done);
  if (blockCache <> NIL) then writeln('Block cache has ', blockCache^.getKeyCount(), ' entries');
  writeln('------------------ done --------------------');
end;

end.