unit uWavZ48;

{****************************************************************************

Targets: Win32
Compile: Delphi 7.00

uAkpZ48 contains the Z48 wave file structure for yaaep

Version: History:
-------- --------
00.01.00 13.09.2005 - Created

****************************************************************************}

{
CREDITS
=======
This documentation draws on information on the WAVE and RIFF
format from various sources found on the internet.

The Z4/8 .wav file structure differs in a few small ways
so that the impression is of errors and oversights rather
than a deliberate non-standard format.

This analysis was originally made for my yaape program
(Yet Another Akai Program Editor) but I felt to make the effort
worthwhile I would release this to the public domain in the
interests of moving fellow owners of this remarkable and possibly
last-of-a-kind hardware rack sampler to also produce software
for it.

Yaape (and hence this page of code) is written in Borland
Pascal. To understand the structure, some tips are in order.

1. Pascal works from the bottom up. The main structure is
the record type trRIFF at the bottom of this file.
All chunks are in the form of record types.

2. The data types are 8-bit, 16-bit and 32-bit, in
signed and unsigned forms. They are denoted by hungarian notation as follows:

Byte     - Hungaran prefix:  ubXXX - 8 bits unsigned, 0..255
Shortint - Hungarian prefix: sbXXX - 8 bits signed, -128..127
Word     - Hungarian prefix: uwXXX - 16 bits unsigned, 0..65535
Smallint - Hungarian prefix: swXXX - 16 bits signed, -32768..32767
Longword - Hungarian prefix: ulXXX - 32 bits unsigned, 0..4294967295
Longint  - Hungarian prefix: slXXX - 32 bits signed, -2147483648..2147483647

There is also an ID type which is a string of 4 8-bit characters.

...and the main component of structure is the Record, comparable
to a C/C++ "struct".

3. As I analyzed the various sections, I found it easier just to lay
out a string of numbered bytes, eg. ubSmplCk12, ubSmplCk13... and to
give them their proper name as their various functions were
identified. There are, therefore, many numbered bytes which
either have not been identified yet, or which may be simply
unused. Behind each such byte is the value which, by observation,
it seems to normally contain.

4. The sections always begin with a 4 character tag (padded
with blanks if less than 4 chars), followed by a longword
indicating the number of bytes remaining which belong to this
tag. There are exceptions, however, with some secondary ID types,
or, sometimes, no ID at all.

5. As an aid to understanding how the parts fit together, I have
included a sample parser with this reference as a starting point.

*****

This is a work in progress and probably contains many errors.
I make no claim as to its correctness. I accept no responsibility
for any damages arising from it's use. If you do not agree
to this, do not use this reference.
}

interface

{$align off}

uses
  Classes;


type

  tID = array[0..3] of char;
  tLen = longword;

  tDataID = tID; // 'data'
  tDataLen = tLen; // depends on data bytes
  //Assume 16-bit data sample - create chunk for each bit size supported.
  trData16Ck = record
    swSample :smallint; //signed word
  end;
  //Array of trData16Ck follows immediately after the data header.


  (* Cue Points *)
  //There is one cue point per region.
  tCueID = tID; // 'cue '
  tCueLen = tLen; // number of cue points in following array.
  trCueCk = record
    //Number of cuepoints and, by implication, the number of
    // items in every array associated with cuepoints.
    PointCount :longword;
  end;
  //Array of CuePoint follows immediately after the cue points header.

  //One of an array of cuepoints in the cue point chunk.
  trCuePoint = record // 24
    //Identifies this CuePoint structure with other structures with the same ID.
    ulCuePointID :longword;

    //specifies the position of the cue point within
    // the "play order" (as determined by the Playlist chunk
    ulStart :longword; // Region starts at this position.
    AssociatedChunkID :tID; // 'data'

    //dwChunkStart and dwBlockStart fields are always 0 because
    // this is an uncompressed .wav with only one 'data' chunk.
    ulChunkStart :longword; // 0

    //Byte offset of the start of the block containing the start position.
    // This offset is relative to the start of the waveform data.
    ulBlockStart :longword; // 0

    //Sample offset of the CuePoint relative to the start of this block.
    ulSampleOffset :longword; // seems always to be identical to ulStart.
  end;
  (* end Cue Points *)


  (* Associated Data List *)
  tListID = tID; // 'LIST'
  tListLen = tLen; // number of TypeID chunks.
  trListCk = record
    TypeID :tID; // Z4/8 always 'adtl'
  end;
  //Array of (TypeID) follows immediately after the associated data list header.

  //The associated data list contains one list per region;
  // corresponding to one per cue point.
  trLtxtCk = record // 28 bytes
    ulIdentifier :longword; //Corresponds to same ID in CuePoints.

    //Note: +1 to what is displayed on Z4/8!
    ulLength :longword; // Length of this region (really 1-based last byte).
    Purpose :tID; // 'rgn '
    uwCountry :word; // 0
    uwLanguage :word; // 0

    //The official definition of a labelled text chunk has
    // the following two words at this point...
    uwDialect :word; // 0
    uwCodePage :word; // 0

    //...but AKAI always put 0 in them, except for the last
    // instance, where it contains the ID of the 'data' chunk!
    // Looking at the value in ListLen we find it does not include
    // the final 4 bytes of the last LtxtCk, so technically this is
    // correct. Nevertheless, it is a somewhat odd construct and I
    // speculate the Z4/8 forgets the 4 bytes of the trListCh.TypeID.
    // The spec states that TypeID *is* included in the ListLen!
    // Not serious, but needs awkward correction during reading
    // (see sample parser code) and writing.
    // Typically of a hard-to-find memory leak.
  end;

  tAdtlID = tID; // 'adtl'
  tAdtlLen = tLen; // number of trAdtlCk chunks
  trAdtlCk = record
    LtxtID :tID; // spec: 'labl', 'note', 'ltxt' : Z4/8 only 'ltxt'
    LtxtLen :tLen;
    rLtxtCk :trLtxtCk;
  end;
  //Array of trAdtlCk
  (* end Associated Data List *)


  tFactID = tID; // 'fact'
  tFactLen = tLen; // 4
  trFactCk = record
    //Number of sample frames, including stereo and more channels.
    ulTotalSampleCount :longword;
  end;


  tLoopID = tID; // 'loop'
  trSampleLoop = record // 20 bytes
    //Spec states this is ID associating this loop with other lists.
    //  Z4/8 has the string 'loop' in it!
    //ulSampleLoopID :longword; // spec.
    SampleLoopID :tID; // 'loop' - Special in Z4/8.

    ulDirection :longword; // 0 = forward 1 = alternating
    ulStart :longword; // Start point of loop.
    ulEnd :longword; // End point of loop.

    //"Fraction" is the 3 decimal points of the loop start value.
    // It increments in steps alternating by 66 and 65, backwards(!)
    // as in:
    // .000=0; .999=66; .998=131(+65); .997=197(+66); .996=262(+65) ...
    // I cannot find any rationale behind this other than the
    // observation that at .001=65470 so +66=65536(1 over FFFF).
    ulFraction :longword;
    ulPlayCount :longword; // 0 = HOLD, else 1-9999 times.
  end;


  trSampleData = record // 18 bytes
    ubSmplDataCk01 :byte; // 2
    ubSmplDataCk02 :byte; // 0

    // The actual MIDI root note for this sample.
    // See also trSmplCk.ulMidiUnityNote.
    ubOriginalNote :byte;
    sbCoarseTune :shortint; // Interacts with trSmplCk.dwMIDIUnityNote
    sbFineTune :shortint;
    sbLoopTune :shortint;

    // 0 = no loop; 1 = one shot; 2 = loop in release 3 = loop until release
    // If = 0, trSmplCk.ulSampleLoops = 0 and the Sample Array will be empty.
    // If = 1, you might be tempted to think this will be the case too...
    ubLoopType :byte;

    ubSmplDataCk08 :byte; // 0
    ubSmplDataCk09 :byte; // 0
    ubSmplDataCk10 :byte; // 1

    ulSampleStartMarker :longword;

    // 0-based, 1 less than FactCk.NumberOfSamples
    ulLastSampleNumber :longword;
  end;


  (* Smpl chunk *)
  tSmplID = tID; // 'smpl'
  tSmplLen = tLen; // With loop, = 78. Without loop (no SampleLoop) = 54
  trSmplCk = record
    ubManufacturerID1 :byte; // MIDI association ID for AKAI; = 71
    ubManufacturerID2 :byte; // 0
    ubManufacturerID3 :byte; // 0
    ubManufacturerByteCount :byte; // 1 or 3, depending on form.

    ulProductID :longword; // 94 for Z8. Same for Z4?

    //Period of one sample in nanoseconds:
    // (1/n trFmtCk.ulSamplesPerSec) * 1,000,000,000
    // 44100 = 22675
    ulSamplePeriod :longword;

    // trSampleData.ubOriginalNote + trSampleData.sbCoarseTune
    ulMIDIUnityNote :longword;
    ulMIDIPitchFraction :longword; // 0
    ulSMPTEFormat :longword; // 0
    ulSMPTEOffset :longword; // 0

    //Number of trSampleLoop instances. Z4/8 only 0 or 1.
    ulSampleLoops :longword;

    //Length of rSampleData chunk, = 18.
    // Comes after ulSampleLoops, if there are any.
    ulSampleDataLen :longword;
  end;
  (* end Smpl chunk *)


  tFmtID = tID; // 'fmt '
  tFmtLen = tLen; // 18
  trFmtCk = record
    uwFormatTag :word; //1 = WAVE_FORMAT_PCM, no other tag legal.
    uwChannels :word; //1 = mono, 2 = stereo etc.
    ulSamplesPerSec :longword; //44100, 48000, 96000

    //How fast must the bytes be read out?
    // For example, in mono 16-bit wave files, this will be 88200
    // (2 bytes * 44100). Stereo will double again, at 176400.
    ulAvgBytesPerSec :longword;

    //Alignment in the 'data' chunk. Naturally, for 16 bits,
    // there must always be an even number of 8-bit bytes. And
    // for stereo 16 bits, each sample needs 4 bytes. So:
    // uwChannels *  (uwBitsPerSample / 8)
    uwBlockAlign :word;

    uwBitsPerSample :word; // 16 for typical 16-bit samples.

    //The Z8 has a length of 18 for the 'fmt ' chunk, and these
    // two additional bytes are by experience always 0.
    // This is odd, because SoundFont puts out a length for this chunk
    // of only 16 and these two bytes do not exist. This conforms to
    // the WAVE_FORMAT_PCM spec. The Z4/8 adds them for no reason I
    // can discern (which doesn't mean there isn't a reason).
    // Overlooked or reserved for some special feature?
    ubFmtCk17 :byte;
    ubFmtCk18 :byte;
  end;


  //RIFF file header
  trRiffHead = record
    RiffID :tID; // 'RIFF'
    RiffLen :tLen;

    //RIFF file type identifier
    WaveID :tID; // 'WAVE'
  end;

var
  rRiffHead :trRiffHead;

  FmtID :tFmtID;
  FmtLen :tFmtLen;
  rFmtCk :trFmtCk;

  FactID :tFactID;
  FactLen :tFactLen;
  rFactCk :trFactCk;

  SmplID :tSmplID;
  SmplLen :tSmplLen;
  rSmplCk :trSmplCk;

  rSampleData :trSampleData;
  SampleLoopArray :array of trSampleLoop;

  CueID :tCueID;
  CueLen :tCueLen;
  rCueCk :trCueCk;
  CuePointArray :array of trCuePoint;

  ListID :tListID;
  ListLen :tListLen;
  rListCk :trListCk;

  AdtlID :tAdtlID;
  AdtlCkArray :array of trAdtlCk;

  DataID :tDataID;
  DataLen :tDataLen;
  DataArray :array of trData16Ck;

const
  fmt = 'fmt ';
  fact = 'fact';
  smpl = 'smpl';
  cue = 'cue ';
  loop = 'loop';
  LIST = 'LIST';
  adtl = 'adtl';
  ltxt = 'ltxt';
  data = 'data';

procedure ParseAkaiWavFile(FileName :string);


implementation


uses SysUtils;


var
  FileSize :integer;
  FileStream :TFileStream;
  i :integer;
  ID :tID;

procedure ParseAkaiWavFile(FileName :string);
begin
  FileStream:= TFileStream.Create(FileName, fmOpenRead);
  FileSize:= FileStream.Size;

  FileStream.ReadBuffer(rRiffHead, SizeOf(rRiffHead));

  //Trap non-compatible files
  if rRiffHead.RiffID <> 'RIFF' then exit;
  if rRiffHead.WaveID <> 'WAVE' then exit;

  while FileSize > FileStream.Position do begin
    FileStream.ReadBuffer(ID, SizeOf(ID));

    if ID = fmt then begin
      FmtID:= ID;
      FileStream.ReadBuffer(FmtLen, SizeOf(FmtLen));
      FileStream.ReadBuffer(rFmtCk, FmtLen);
    end;

    //Trap non-compatible files
    if (rFmtCk.uwFormatTag <> 1) then exit;

    if ID = fact then begin
      FactID:= ID;
      FileStream.ReadBuffer(FactLen, SizeOf(FactLen));
      FileStream.ReadBuffer(rFactCk, FactLen);
    end;

    if ID = smpl then begin
      SmplID:= ID;
      FileStream.ReadBuffer(SmplLen, SizeOf(SmplLen));
      FileStream.ReadBuffer(rSmplCk, SizeOf(rSmplCk));

      //The Z8 only has one or no loops. For the sake of completeness
      // we prepare for multiple loops anyway.
      if rSmplCk.ulSampleLoops > 0 then begin
        SetLength(SampleLoopArray, rSmplCk.ulSampleLoops);
        for i:= 0 to rSmplCk.ulSampleLoops - 1 do begin
          FileStream.ReadBuffer(SampleLoopArray[i], SizeOf(trSampleLoop));
        end;
      end;

      FileStream.ReadBuffer(rSampleData, rSmplCk.ulSampleDataLen);
    end;

    //There is one cuepoint per region.
    if ID = cue then begin // cue point list
      CueID:= ID;
      FileStream.ReadBuffer(CueLen, SizeOf(CueLen));
      FileStream.ReadBuffer(rCueCk, SizeOf(rCueCk));
      if rCueCk.PointCount > 0 then begin
        SetLength(CuePointArray, rCueCk.PointCount);
        for i:= 0 to rCueCk.PointCount - 1 do begin
          FileStream.ReadBuffer(CuePointArray[i], SizeOf(trCuePoint));
        end;
      end;
    end;

    //For the Z4/8, the list can only be of associated data list type.
    //There is one cuepoint per region.
    if ID = LIST then begin
      ListID:= ID;
      FileStream.ReadBuffer(ListLen, SizeOf(ListLen));
      FileStream.ReadBuffer(rListCk, SizeOf(rListCk));
      if rListCk.TypeID = adtl then begin
        AdtlID:= rListCk.TypeID;
        SetLength(AdtlCkArray, rCueCk.PointCount);
        for i:= 0 to rCueCk.PointCount - 1 do begin
          FileStream.ReadBuffer(AdtlCkArray[i], SizeOf(trAdtlCk));
        end;
        //Data chunk. The last cue point has 4 bytes cut short
        // because the Z4/8 opsys seems to forget rListCk in
        // ListLen. This is proof positive that the Z8 does not
        // use the uwDialect and uwCodePage fields, which is
        // perhaps not so great a surprise and no loss :)
        // Correct the stream cursor position before continuing.
        FileStream.Seek(-SizeOf(rListCk), soCurrent);
      end else begin
        //Non-AKAI list chunk. Skip over it.
        FileStream.Seek(ListLen - SizeOf(rListCk), soCurrent);
      end;
    end;

    if ID = data then begin
      FileStream.ReadBuffer(DataLen, SizeOf(DataLen));
      SetLength(DataArray, DataLen div 2);
      FileStream.ReadBuffer(DataArray[0], DataLen);
    end;
  end;

  FileStream.Free;
end;

end.