Hi there,

I'm a C/C++ programmer that decided to try D for a small side-project (it boils down to extracting informations from a NTFS partition).

After exhausting all others explanations, I think I found a bug in gdc : when I use ulong fields in structs with align(1) they are not properly aligned ; even worse, they "bleed" three bytes onto the next field.

Here's the minimal test code :

---
import std.stdio;
import std.stream;
import std.conv;

//
// On-disk data structures
//

// NTFS BIOS parameter block
struct NTFS_BootSector_BPB {
        align (1) {
                ushort          bytesPerSector;
                ubyte           sectorsPerCluster;
                ubyte[7]        _unused1;
                ubyte           mediaDescriptor;
                ubyte[18]       _unused2;
                ulong           totalSectors;
                ulong           logicalClusterNumberMFT;
                ulong           logicalClusterNumberMFTmir;
                uint            clustersPerMFTRecord;
                uint            clustersPerIndexBuffer;
                ulong           volumeSerialNumber;
                ubyte[4]        _unused3;
        }
}

// NTFS boot sector
struct NTFS_BootSector {
        align (1) {
                ubyte[3]                _unused1;
                ubyte[8]                oemID;
                NTFS_BootSector_BPB     bpb;
                ubyte[426]              _unused2;
                ushort                  signature;
        }
}

// NTFS BIOS parameter block 2
struct NTFS_BootSector_BPB2 {
        align (1) {
                ushort          bytesPerSector;
                ubyte           sectorsPerCluster;
                ubyte[7]        _unused1;
                ubyte           mediaDescriptor;
                ubyte[18]       _unused2;
                ubyte[8]        totalSectors;
                ubyte[8]        logicalClusterNumberMFT;
                ubyte[8]        logicalClusterNumberMFTmir;
                uint            clustersPerMFTRecord;
                uint            clustersPerIndexBuffer;
                ubyte[8]        volumeSerialNumber;
                ubyte[4]        _unused3;
        }
}

// NTFS boot sector
struct NTFS_BootSector2 {
        align (1) {
                ubyte[3]                _unused1;
                ubyte[8]                oemID;
                NTFS_BootSector_BPB2    bpb;
                ubyte[426]              _unused2;
                ushort                  signature;
        }
}

int main(char[][] args) {
        if(args.length != 2) {
                writeln("Usage: bug INPUT_FILE");
                return -1;
        }
        
        Stream                  stream;
        NTFS_BootSector         bootSector;
        NTFS_BootSector2        bootSector2;
        
        stream = new BufferedFile(to!string(args[1]));
        
        // Read boot sector
        stream.seek(0, SeekPos.Set);
        stream.readExact(cast(ubyte*)&bootSector, 512);
        
        // Read boot sector again
        stream.seek(0, SeekPos.Set);
        stream.readExact(cast(ubyte*)&bootSector2, 512);
        
        return 0;
}
---

And here's the results viewed with gdb (boot code cut for brevety) :
---
(gdb) print /x bootSector
$2 = {
  _unused1 = {0xeb, 0x52, 0x90},
  oemID = {0x4e, 0x54, 0x46, 0x53, 0x20, 0x20, 0x20, 0x20},
  bpb = {
    bytesPerSector = 0x200,
    sectorsPerCluster = 0x8,
    _unused1 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
    mediaDescriptor = 0xf8,
_unused2 = {0x0, 0x0, 0x3f, 0x0, 0xff, 0x0, 0x0, 0x28, 0x3, 0x0, 0x0, 0x0, 0x0, 0x0, 0x80, 0x0, 0x80, 0x0},
    totalSectors = 0xc00000000000006,
    logicalClusterNumberMFT = 0x20000000000,
    logicalClusterNumberMFTmir = 0xf60000000000,
    clustersPerMFTRecord = 0x100,
    clustersPerIndexBuffer = 0x451adf00,
    volumeSerialNumber = 0x96a04533a0,
    _unused3 = {0x0, 0x0, 0x0, 0x0}
  },
  _unused2 = {0xfa...},
  signature = 0xaa55
}
(gdb) print /x bootSector2
$3 = {
  _unused1 = {0xeb, 0x52, 0x90},
  oemID = {0x4e, 0x54, 0x46, 0x53, 0x20, 0x20, 0x20, 0x20},
  bpb = {
    bytesPerSector = 0x200,
    sectorsPerCluster = 0x8,
    _unused1 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
    mediaDescriptor = 0xf8,
_unused2 = {0x0, 0x0, 0x3f, 0x0, 0xff, 0x0, 0x0, 0x28, 0x3, 0x0, 0x0, 0x0, 0x0, 0x0, 0x80, 0x0, 0x80, 0x0},
    totalSectors = {0xd0, 0x14, 0xe0, 0x6, 0x0, 0x0, 0x0, 0x0},
logicalClusterNumberMFT = {0x0, 0x0, 0xc, 0x0, 0x0, 0x0, 0x0, 0x0}, logicalClusterNumberMFTmir = {0x2, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
    clustersPerMFTRecord = 0x100,
    clustersPerIndexBuffer = 0x451adf00,
volumeSerialNumber = {0xdf, 0x1a, 0x45, 0xa0, 0x33, 0x45, 0xa0, 0x96},
    _unused3 = {0x0, 0x0, 0x0, 0x0}
  },
  _unused2 = {0xfa...},
  signature = 0xaa55
}
---

Even though both structures have exactly the same memory layout in theory, the values of totalSectors, logicalClusterNumberMFT, logicalClusterNumberMFTmir and volumeSerialNumber are different (shifted 3 bytes).

Worse, when I hex-modify the file to have byte 0x52 equals to 0xFF (_unused3 becomes {0x0, 0x0, 0xff, 0x0}), this is what happens :

---
(gdb) print /x bootSector
$4 = {
  _unused1 = {0xeb, 0x52, 0x90},
  oemID = {0x4e, 0x54, 0x46, 0x53, 0x20, 0x20, 0x20, 0x20},
  bpb = {
    bytesPerSector = 0x200,
    sectorsPerCluster = 0x8,
    _unused1 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
    mediaDescriptor = 0xf8,
_unused2 = {0x0, 0x0, 0x3f, 0x0, 0xff, 0x0, 0x0, 0x28, 0x3, 0x0, 0x0, 0x0, 0x0, 0x0, 0x80, 0x0, 0x80, 0x0},
    totalSectors = 0xc00000000000006,
    logicalClusterNumberMFT = 0x20000000000,
    logicalClusterNumberMFTmir = 0xf60000000000,
    clustersPerMFTRecord = 0x100,
    clustersPerIndexBuffer = 0x451adf00,
    volumeSerialNumber = 0xff000096a04533a0,
    _unused3 = {0x0, 0x0, 0xff, 0x0}
  },
  _unused2 = {0xfa...},
  signature = 0xaa55
}
(gdb) print /x bootSector2
$5 = {
  _unused1 = {0xeb, 0x52, 0x90},
  oemID = {0x4e, 0x54, 0x46, 0x53, 0x20, 0x20, 0x20, 0x20},
  bpb = {
    bytesPerSector = 0x200,
    sectorsPerCluster = 0x8,
    _unused1 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
    mediaDescriptor = 0xf8,
_unused2 = {0x0, 0x0, 0x3f, 0x0, 0xff, 0x0, 0x0, 0x28, 0x3, 0x0, 0x0, 0x0, 0x0, 0x0, 0x80, 0x0, 0x80, 0x0},
    totalSectors = {0xd0, 0x14, 0xe0, 0x6, 0x0, 0x0, 0x0, 0x0},
logicalClusterNumberMFT = {0x0, 0x0, 0xc, 0x0, 0x0, 0x0, 0x0, 0x0}, logicalClusterNumberMFTmir = {0x2, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
    clustersPerMFTRecord = 0x100,
    clustersPerIndexBuffer = 0x451adf00,
volumeSerialNumber = {0xdf, 0x1a, 0x45, 0xa0, 0x33, 0x45, 0xa0, 0x96},
    _unused3 = {0x0, 0x0, 0xff, 0x0}
  },
  _unused2 = {0xfa...},
  signature = 0xaa55
}
---

In the bootSector struct, the volumeSerialNumber field "bled" onto _unused3. This is not limited to gdb, the values are also wrong when I try to use them in the D code...

I'm using gdc (Debian 4.6.3-2) 4.6.3 and gdb (GDB) 7.4.1-debian.

I thoroughly checked everything, and I'm all out of rational explanations. I'm terribly sorry in advance if I missed something totally obvious :-)

Reply via email to