//******************************************************************************
//* File       : TagMp3.hpp                                                    *
//* Author     : Mahlon R. Smith                                               *
//*              Copyright (c) 2016-2020 Mahlon R. Smith, The Software Samurai *
//*                  GNU GPL copyright notice located in Taggit.hpp            *
//* Date       : 16-Dec-2019                                                   *
//* Version    : (see AppVersion string)                                       *
//*                                                                            *
//* Description: This header defines the metadata fields for MP3 (ID3v2.3.0)   *
//*              audio files. Some ID3v2.4 features are implemented also.      *
//*                                                                            *
//******************************************************************************

//****************
//* Header Files *
//****************


//* Codes used to identify frames which contain text.                      *
//* Important Note: This array is ordered to correspond to enum TagFields, *
//* and MUST remain synched to it.                                         *
const short TEXT_FRAMES = 39 ;      // number of text frame types
const char* const TextFrameID[TEXT_FRAMES + 1] = 
{
"TIT2",    // #TIT2 Title/songname/content description
"TPE1",    // #TPE1 Lead performer(s)/Soloist(s)
"TALB",    // #TALB Album/Movie/Show title
"TRCK",    // #TRCK Track number/Position in set
"TYER",    // #TYER Year
"TCON",    // #TCON Content type, Genre
"TPE2",    // #TPE2 Band/orchestra/accompaniment
"TPUB",    // #TPUB Publisher
"TCOM",    // #TCOM Composer
"TEXT",    // #TEXT Lyricist/Text writer
"TPE4",    // #TPE4 Interpreted, remixed, or otherwise modified by
"TCOP",    // #TCOP Copyright message
"TOWN",    // #TOWN File owner/licensee
"TXXX",    // #TXXX User defined text information frame
"TDAT",    // #TDAT Date (DDMM)
"TIME",    // #TIME Time (HHMM)
"TRDA",    // #TRDA Recording dates
"TLEN",    // #TLEN Length
"TSIZ",    // #TSIZ Size
"TBPM",    // #TBPM BPM (beats per minute)
"TIT1",    // #TIT1 Content group description
"TIT3",    // #TIT3 Subtitle/Description refinement
"TPE3",    // #TPE3 Conductor/performer refinement
"TPOS",    // #TPOS Part of a set
"TKEY",    // #TKEY Initial key
"TLAN",    // #TLAN Language(s)
"TOPE",    // #TOPE Original artist(s)/performer(s)
"TOAL",    // #TOAL Original album/movie/show title
"TOFN",    // #TOFN Original filename
"TOLY",    // #TOLY Original lyricist(s)/text writer(s)
"TORY",    // #TORY Original release year
"TRSN",    // #TRSN Internet radio station name
"TRSO",    // #TRSO Internet radio station owner
"TSRC",    // #TSRC ISRC (international standard recording code)
"TSEE",    // #TSEE Software/Hardware and settings used for encoding
"TFLT",    // #TFLT File type
"TDLY",    // #TDLY Playlist delay
"TENC",    // #TENC Encoded by
"TMED",    // #TMED Media type

"TCOUNT",  // corresponds to 'tfCOUNT' i.e. tag not identified
} ;

//* Tag header code for an embedded image.                                     *
const char* const IMAGE_TAG = "APIC" ;

//* Tag header code for a Popularimeter.                                       *
const char* const POP_TAG = "POPM" ;

//* Tag header code for a Play Counter.                                        *
const char* const CNT_TAG = "PCNT" ;

//* According to Wikipedia, the following text-encoding markers are valid.     *
//* Note: As of Dec 2016, many media players cannot decode ID3v2.4.            *
enum TextEncode : char
{
   ENCODE_ASCII   = 0x00,  // ISO-8859-1 (0x20 - 0xFF plus 0x0A)
                           // (LATIN-1, Identical to ASCII for values smaller than 0x80).
   ENCODE_UTF16   = 0x01,  // UTF-16, (big-endian) encoded with BOM, ID3v2.2 and ID3v2.3.
                           // (Formerly UCS-2 (ISO/IEC 10646-1:1993, obsolete))
   ENCODE_UTF16BE = 0x02,  // UTF-16BE (big-endian) encoded without BOM, in ID3v2.4.
   ENCODE_UTF8    = 0x03,  // UTF-8 encoded in ID3v2.4.
   ENCODE_TXTERR           // Invalid text-frame encoding
} ;

//* Definitions for encoding and decoding UTF-16 data.*
const UINT usxMIN20  = 0x0010000 ;  // minimum value requiring 20-bit conversion
const UINT usxMAX20  = 0x010FFFF ;  // maximum value for 20-bit UTF-16
const UINT usxMSU16  = 0x000D800 ;  // MS unit mask value
                                    // and beginning of 16-bit reserved sequence
const UINT usxMSU16e = 0x000DBFF ;  // end of MS unit range
const UINT usxLSU16  = 0x000DC00 ;  // LS unit mask value
const UINT usxLSU16e = 0x000DFFF ;  // end of LS unit range
const UINT usxHIGH16 = 0x00E000 ;   // beginning of high-range 16-bit codepoints
const UINT usxMAX16  = 0x00FFFD ;   // maximum value for 16-bit codepoint
const UINT usxMASK10 = 0x0003FF ;   // 10-bit mask


//* If a text frame begins with ENCODE_UTF16, then the next two  *
//* bytes must be either "UTF_MSB UTF_LSB" or "UTF_LSB UTF_MSB". *
//* If not, it is not a valid Unicode-16 string.                 *
const char UTF_MSB = 0xFE ;
const char UTF_LSB = 0xFF ;

//* A valid Unicode-16 encoded string must be at least this long: *
//*             "01 FF FE xx xx" or "01 FE FF xx xx               *
//*             "02 xx xx" or "02 xx xx                           *
const int  UTF16_MIN = 5 ;
const int  UTF16BE_MIN = 3 ;

//* A valid ASCII or UTF-8 encoded string must be at least this long: *
//*             "00 xx"                                               *
const int  ASCII_MIN = 2 ;

//* "Picture Type" descriptions as defined for id3v2 standard.  *
//* These strings are defined in the languages supported by the *
//* Taggit application. See enum AppLang in Taggit.hpp.         *
const short PIC_TYPES = 21 ;     // number of descriptions per sub-array
const char* const pType[][PIC_TYPES] = 
{
   {  // English
     "Other",
     "32x32 pixels 'file icon' (PNG only)",
     "Other file icon",
     "Cover (front)",
     "Cover (back)",
     "Leaflet page",
     "Media (e.g. label side of CD)",
     "Lead artist/lead performer/soloist",
     "Artist/performer",
     "Conductor",
     "Band/Orchestra",
     "Composer",
     "Lyricist/text writer",
     "Recording Location",
     "During recording",
     "During performance",
     "Movie/video screen capture",
     "A bright coloured fish",
     "Illustration",
     "Band/artist logotype",
     "Publisher/Studio logotype",
   },
   {  // Espanol
     "Otro",
     "32x32 píxeles 'icono do archivo' (sólo formato PNG)",
     "Otro icono de archivo",
     "Arte de la cubierta (del anverso)",
     "Arte de la cubierta (reverso)",
     "Página del folleto",
     "Etiqueta en el medio de grabación",
     "Artista principal/intérprete principal/solista",
     "Artista/Ejecutante",
     "Director de orquesta",
     "Banda musical/Orquesta",
     "Compositor",
     "Letrista/Escritor de texto",
     "Ubicación de la grabación",
     "Fotos durante la grabación",
     "Fotos durante la actuación",
     "Imágenes fijas de película/Captura de pantalla de video",
     "Un pez de colores brillantes",
     "Ilustración",
     "Logotipo del grupo o del artista",
     "Logotipo del editor o del estudio",
   },
   {  // Zhongwen
     "其他",
     "32x32 像素 '文件图标' (只有 PNG)",
     "其他文件图标",
     "封面艺术 (前边的)",
     "封面艺术 (背面)",
     "从传单页面",
     "标记在记录介质上",
     "主唱/主演/独奏家",
     "艺人/演员",
     "乐团指挥",
     "音乐团体/乐队",
     "作曲家",
     "歌词作家/文字作家",
     "录音工作室",
     "录制过程中的照片",
     "表演期间的照片",
     "电影静止图像/视频截图",
     "鲜艳的鱼",
     "插图",
     "团体或艺术家的标识",
     "出版商或工作室标志",
   },
   {  // Tieng Viet
     "Khác",
     "32x32 pixel 'tập tin biểu tượng' (PNG chỉ có một)",
     "Khác tập tin biểu tượng",
     "Bìa trước",
     "Sau trước",
     "Trang từ tờ rơi",
     "Bì mạc (Nhãn đĩa CD)",
     "Nghệ sĩ chính/diễn viên chính/nghệ sĩ độc tấu",
     "Nghệ sĩ / nghệ sĩ biểu diễn",
     "Nhạc trưởng",
     "Ban nhạc/dàn nhạc",
     "Nhà soạn nhạc",
     "Người lyric/nhà văn của văn bản",
     "Vị trí ghi âm",
     "Trong quá trình ghi âm",
     "Trong khi thực hiện",
     "Chụp màn hình của phim hoặc video",
     "Một con cá màu sáng",
     "Hình minh họa",
     "Ban nhạc/biểu tượng của nghệ sĩ",
     "Nhà xuất bản/ghi nhãn hiệu studio",
   },
} ;

//* Map of ISO8859-1, single-byte characters between 0x7F and 0xFF. These      *
//* characters were the first extension to pure ASCII (0x00-0x7E) and support  *
//* various European languages, especially vowels.                             *
//* The defined characters map directly to UNICODE (UTF-32); however, we       *
//* have created this lookup table to verify the mapping. ISO 8859-1 has       *
//* several varients up to ISO 8859-15, but the MPEG standard references the   *
//* original ISO 8859-1 version.                                               *
//*   0x00         null terminator                                             *
//*   0x01 - 0x09  undefined                                                   *
//*   0x0A         newline                                                     *
//*   0x0B - 0x1F  undefined                                                   *
//*   0x20 - 0x7E  ASCII (ISO 646)                                             *
//*   0x7F - 0x9F  control codes                                               *
//*   0xA0 - 0xFF  additional European characters, ISO 2022 (this table)       *
//*                                                                            *
//* In the internet world, this standard is often confused with the            *
//* Windows(tm) 1252 character set which is similar, but not identical.        *
const short iso8859FIRST = 0x00A0,  // first character in array
            iso8859LAST  = 0x00FF,  // last character in array
            iso8859COUNT = 96 ;     // number of elements in array

#if 0    // NOT CURRENTLY USED
// Programmer's Note: Because we have manually verified that this range of 
// single-byte characters maps directly to UTF-32, we do not need this table; 
// however, we have kept a copy for hysterical purposes.
const wchar_t iso_8859_1[iso8859COUNT] = 
{  //             CODE  UNICODE  HTML     COMMENT
   0x00A0,  //     A0   U+00A0   &nbsp;   (non-breaking space)
   0x00A1,  // ¡   A1   U+00A1            (inverted exclamation)
   0x00A2,  // ¢   A2   U+00A2            (cent)
   0x00A3,  // £   A3   U+00A3            (Pound Sterling)
   0x00A4,  // ¤   A4   U+00A4            (currency)
   0x00A5,  // ¥   A5   U+00A5            (Yuan)
   0x00A6,  // ¦   A6   U+00A6            (broken bar)
   0x00A7,  // §   A7   U+00A7            (section)
   0x00A8,  // ¨   A8   U+00A8            (diaeresis)
   0x00A9,  // ©   A9   U+00A9            (copyright)
   0x00AA,  // ª   AA   U+00AA            (feminine ordinal)
   0x00AB,  // «   AB   U+00AB            (left double-angle quote)
   0x00AC,  // ¬   AC   U+00AC            (NOT)
   0x00AD,  // SHY AD   U+00AD   &shy;    (soft hyphen)
   0x00AE,  // ®   AE   U+00AE            (registered)
   0x00AF,  // ¯   AF   U+00AF            (macron)
   0x00B0,  // °   B0   U+00B0            (degree)
   0x00B1,  // ±   B1   U+00B1            (plus/minus)
   0x00B2,  // ²   B2   U+00B2            (superscript 2)
   0x00B3,  // ³   B3   U+00B3            (superscript 3)
   0x00B4,  // ´   B4   U+00B4            (acute)
   0x00B5,  // µ   B5   U+00B5            (micro)
   0x00B6,  // ¶   B6   U+00B7            (pilcrow)
   0x00B7,  // ·   B7   U+00B8            (middle dot)
   0x00B8,  // ¸   B8   U+00B8            (cedilla)
   0x00B9,  // ¹   B9   U+00B9            (superscript 1)
   0x00BA,  // º   BA   U+00BA            (masculine ordinal)
   0x00BB,  // »   BB   U+00BB            (right double-angle quote)
   0x00BC,  // ¼   BC   U+00BC            (one quarter)
   0x00BD,  // ½   BD   U+00BD            (one half)
   0x00BE,  // ¾   BE   U+00BE            (three quarters)
   0x00BF,  // ¿   BF   U+00BF            (inverted question)
   0x00C0,  // À   C0   U+00C0            (A + grave)
   0x00C1,  // Á   C1   U+00C1            (A + acute)
   0x00C2,  // Â   C2   U+00C2            (A + circumflex)
   0x00C3,  // Ã   C3   U+00C3            (A + tilde)
   0x00C4,  // Ä   C4   U+00C4   &Auml;   (A + diaeresis)
   0x00C5,  // Å   C5   U+00C5            (A + ring)
   0x00C6,  // Æ   C6   U+00C6            (AE)
   0x00C7,  // Ç   C7   U+00C7            (C + cedilla)
   0x00C8,  // È   C8   U+00C8            (E + grave)
   0x00C9,  // É   C9   U+00C9            (E + acute)
   0x00CA,  // Ê   CA   U+00CA            (E + circumflex)
   0x00CB,  // Ë   CB   U+00CB   &Euml;   (E + diaeresis)
   0x00CC,  // Ì   CC   U+00CC            (I + grave)
   0x00CD,  // Í   CD   U+00CD            (I + acute)
   0x00CE,  // Î   CE   U+00CE            (I + circumflex)
   0x00CF,  // Ï   CF   U+00CF   &Iuml;   (I + diaeresis)
   0x00D0,  // Ð   D0   U+00D0            (large eth)
   0x00D1,  // Ñ   D1   U+00D1            (N + tilde)
   0x00D2,  // Ò   D2   U+00D2            (O + grave)
   0x00D3,  // Ó   D3   U+00D3            (O + acute)
   0x00D4,  // Ô   D4   U+00D4            (O + circumflex)
   0x00D5,  // Õ   D5   U+00D5            (O + tilde)
   0x00D6,  // Ö   D6   U+00D6   &Ouml;   (O + diaeresis)
   0x00D7,  // ×   D7   U+00D7            (multiply)
   0x00D8,  // Ø   D8   U+00D8            (O with stroke)
   0x00D9,  // Ù   D9   U+00D9            (U + grave)
   0x00DA,  // Ú   DA   U+00DA            (U + acute)
   0x00DB,  // Û   DB   U+00DB            (U + circumflex)
   0x00DC,  // Ü   DC   U+00DC   &Uuml;   (U + diaeresis)
   0x00DD,  // Ý   DD   U+00DD            (Y + acute)
   0x00DE,  // Þ   DE   U+00DE            (upper thorn)
   0x00DF,  // ß   DF   U+00DF            (small sharp s)
   0x00E0,  // à   E0   U+00E0            (a + grave)
   0x00E1,  // á   E1   U+00E1            (a + acute)
   0x00E2,  // â   E2   U+00E2            (a + circumflex)
   0x00E3,  // ã   E3   U+00E3            (a + tilde)
   0x00E4,  // ä   E4   U+00E4   &auml;   (a + diaeresis)
   0x00E5,  // å   E5   U+00E5            (a + ring)
   0x00E6,  // æ   E6   U+00E6            (ae)
   0x00E7,  // ç   E7   U+00E7            (c + cedilla)
   0x00E8,  // è   E8   U+00E8            (e + grave)
   0x00E9,  // é   E9   U+00E9            (e + acute)
   0x00EA,  // ê   EA   U+00EA            (e + circumflex)
   0x00EB,  // ë   EB   U+00EB   &euml;   (e + diaeresis)
   0x00EC,  // ì   EC   U+00EC            (i + grave)
   0x00ED,  // í   ED   U+00ED            (i + acute)
   0x00EE,  // î   EE   U+00EE            (i + circumflex)
   0x00EF,  // ï   EF   U+00EF   &iuml;   (i + diaeresis)
   0x00F0,  // ð   F0   U+00F0            (small eth)
   0x00F1,  // ñ   F1   U+00F1            (n + tilde)
   0x00F2,  // ò   F2   U+00F2            (o + grave)
   0x00F3,  // ó   F3   U+00F3            (o + acute)
   0x00F4,  // ô   F4   U+00F4            (o + circumflex)
   0x00F5,  // õ   F5   U+00F5            (o + tilde)
   0x00F6,  // ö   F6   U+00F6   &ouml;   (o + diaeresis)
   0x00F7,  // ÷   F7   U+00F7            (divide)
   0x00F8,  // ø   F8   U+00F8            (o with stroke)
   0x00F9,  // ù   F9   U+00F9            (u + grave)
   0x00FA,  // ú   FA   U+00FA            (u + acute)
   0x00FB,  // û   FB   U+00FB            (u + circumflex)
   0x00FC,  // ü   FC   U+00FC   &uuml;   (u + diaeresis)
   0x00FD,  // ý   FD   U+00FD            (y acute)
   0x00FE,  // þ   FE   U+00FE            (lower thorn)
   0x00FF,  // ÿ   FF   U+00FF   &yuml;   (y with diaeresis)
} ;   // iso_8859_1
#endif   // NOT CURRENTLY USED

const UINT framehdrCNT = 10 ;    // size of frame header
class id3v2_framehdr
{
   public:

   id3v2_framehdr ( void )
   { this->reset () ; }
   ~id3v2_framehdr ( void ) {}
   void reset ( void )
   {
      this->frame_id[0] = this->frame_id[1] = this->frame_id[2] = 
      this->frame_id[3] = this->frame_id[4] = this->frame_id[5] = NULLCHAR ;
      this->frame_size = ZERO ;
      this->status_flags = this->encode_flags = ZERO ;
      this->flag_tag_pres = this->flag_file_pres = this->flag_readonly = false ;
      this->flag_compress = this->flag_encrypt   = this->flag_grouped  = false ;
      this->encoding = ENCODE_ASCII ;
      this->big_endian = false ;
      this->decomp = ZERO ;   // if compressed, Zlib compression is used
      this->crypto = ZERO ;   // if active, s/b a value greater than 0x80 (see ENCR frame)
      this->group_id = ZERO ; // if active, s/b a value greater than 0x80 (see GRID frame)
   }

   //* Convert a 4-byte sequence (MSB at offset 0) to an integer value. *
   //* Programmer's Note: This clunky construct avoids the C library's  *
   //* "helpful" automatic sign extension.                              *
   int intConv ( const UCHAR* ucp )
   {
      int i =    (UINT)ucp[3] 
              | ((UINT)ucp[2] << 8)
              | ((UINT)ucp[1] << 16)
              | ((UINT)ucp[0] << 24) ;
      return i ;
   }

   //* Convert a 32-bit integer into a big-endian byte stream.    *
   //* (used for both 'frame-size' and 'decomp')                  *
   short intConv ( int ival, char* obuff )
   {
      const int bMASK = 0x000000FF ;
      short indx = ZERO ;     // return value

      obuff[indx++] = (char)((ival >> 24) & bMASK) ;
      obuff[indx++] = (char)((ival >> 16) & bMASK) ;
      obuff[indx++] = (char)((ival >>  8) & bMASK) ;
      obuff[indx++] = (char)(ival & bMASK) ;

      return indx ;
   }

   //* Convert raw byte data to gString (UTF-8) format.           *
   //* -- Assumes that 'frame_count' has been initialized.        *
   //* -- First byte of source indicates encoding:                *
   //*      ENCODE_ASCII or ENCODE_UTF16 (with BOM) or            *
   //*      ENCODE_UTF16BE (no BOM) or ENCODE_UTF8                *
   //* -- ENCODE_UTF16 requires frame_size >= 5 because"          *
   //*        "01 FF FE xx xx" or "01 FE FF xx xx"                *
   //*    is the shortest possible valid UTF-16 string.           *
   //* -- ENCODE_UTF16BE requires frame_size >= 3 because:        *
   //*        "02 xx xx"                                          *
   //*    is the shortest possible UTF16BE string.                *
   //* -- ENCODE_ASCII and ENCODE_UTF8 require frame_size >= 2    *
   //*       because: 0n xx is the shorted possible string        *
   //*                                                            *
   //* Note that if UTF-16 were written as a byte stream as it    *
   //* should have been, then there wouldn't be an issue with     *
   //* 'endian-ness'. However, we must accomodate the encoders    *
   //* who write the data as 16-bit integers on either big-endian *
   //* or little-endian hardware without regard to what they are  *
   //* actually doing.                                            *
   //* Be aware that when we _encode_, we always encode byte-wise *
   //* as big-endian.                                             *
   //*                                                            *
   //* Because the folks at the VLC project and elsewhere have    *
   //* little regard for the standard, we must do some defensive  *
   //* programming:                                               *
   //* a) Verify any claim that the source data are ASCII.        *
   //*    Note that we do not support the Latin-1 extensions      *
   //*    (ISO-8859-1). It is either pure ASCII or it is not.     *
   //* b) Verify that a UTF-16 string actually is.                *
   //* c) Verify the endian-ness of UTF-16 data.                  *
   //* d) For id3v2.4 (but not id3v2.3) text frames may consist   *
   //*    of multiple strings which are delimited by the null     *
   //*    character (00h(00h)). We believe that this is a huge    *
   //*    mistake and a bone-headed "enhancement." However, since *
   //*    they didn't consult us, we have to parse for it, and if *
   //*    found, we substitute a forward slash '/' to concatenate *
   //*    the strings for display.                                *
   //*         "This is a \0 concatenated string."                *
   //*    becomes:                                                *
   //*         "This is a / concatenated string."                 *
   //* e) If encoding error, then attempt to decode as UTF-8,     *
   //*    which has been routinely used in versions prior to      *
   //*    id3v2.4 even though it was not part of the standard.    *
   //* -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  - *
   //* Input  : rawPtr  : pointer to source byte stream           *
   //*          gsTrg   : (by reference) receives converted text  *
   //*                                                            *
   //* Return: number of wchar_t characters converted             *
   //*         or (-1) if conversion error                        *
   short txtConv ( const char* rawPtr, gString& gsTrg )
   {
      short convChars = ZERO ;         // return value

      this->encoding = rawPtr[0] ;     // text encoding

      if ( this->encoding == ENCODE_UTF16 || this->encoding == ENCODE_UTF16BE )
      {
         this->big_endian = true ;     // assume big-endian input
         if ( this->encoding == ENCODE_UTF16 )
         {
            //* Check the byte-order mark.        *
            //* If the BOM was stored backward,   *
            //* then it is little-endian encoding.*
            if ( rawPtr[1] == UTF_LSB )
               this->big_endian = false ;
         }

         if ( ((this->encoding == ENCODE_UTF16) && (this->frame_size >= UTF16_MIN)
                && (rawPtr[1] == UTF_MSB || rawPtr[1] == UTF_LSB)
                && (rawPtr[2] == UTF_MSB || rawPtr[2] == UTF_LSB))
              ||
              ((this->encoding == ENCODE_UTF16BE) && (this->frame_size >= UTF16BE_MIN)) )
         {
            short offset = this->encoding == ENCODE_UTF16 ? 3 : 1 ;
            convChars = this->utf16Conv ( &rawPtr[offset], gsTrg ) ;
         }
         else  // invalid UTF-16 encoding
         {
            convChars = this->utf8Conv ( &rawPtr[1], gsTrg ) ;
         }
      }

      else if ( this->encoding == ENCODE_ASCII )
         convChars = this->ascConv ( &rawPtr[1], gsTrg ) ;

      else     // ENCODE_UTF8 or unsupported format
         convChars = this->utf8Conv ( &rawPtr[1], gsTrg ) ;

      return convChars ;
   }  //* End txtConv() *

   //* Public Method.                                             *
   //* Encode the wchar_t (wide text) array into a form that MP3  *
   //* text frames can understand.                                *
   //*  Note that we always include the NULL terminator to the    *
   //*  text data. The standard neither demands nor forbids it.   *
   //*                                                            *
   //* Input  : enc   : inticates the type of encoding:           *
   //*                   a) ENCODE_ASCII                          *
   //*                   b) ENCODE_UTF8                           *
   //*                   c) ENCODE_UTF16 (big-endian with BOM)    *
   //*                   d) ENCODE_UTF16BE (big-endian, no BOM)   *
   //*          idIndx: index into the TextFrameID[] array (above)*
   //*          fData : receives the encoded data                 *
   //*          src   : wchar_t-encoded source text data          *
   //*                                                            *
   //* Returns: number of bytes in formatted output               *
   //*          or ZERO if encoding error                         *
   short txtEncode ( TextEncode enc, short idIndx, char* fData, const gString& src )
   {
      //* Initialize the data members *
      this->reset () ;

      //* Caller's choice of encoding (validate) *
      if ( (enc == ENCODE_ASCII)   || (enc == ENCODE_UTF16) || 
           (enc == ENCODE_UTF16BE) || (enc == ENCODE_UTF8) )
         this->encoding = enc ;
      else
         this->encoding = ENCODE_UTF16 ;
      this->big_endian = true ;           // 'true' but ignored

      //* Validate the frame ID *
      if ( (idIndx >= ZERO) && (idIndx < TEXT_FRAMES) )
      {
         gString gsid( TextFrameID[idIndx] ) ; // 4-byte Tnnn frame ID
         gsid.copy( this->frame_id, 6 ) ;

         //* Encode the text data.                                       *
         //* -- For ASCII and UTF-8, simply copy the source to target.   *
         //*    'frame_size' == number of text bytes + encoding byte     *
         //* -- For UTF-16 varients, convert from wchar_t to 16-bit data.*
         //*    'frame_size' == number of text bytes + encoding byte     *
         if ( (this->encoding == ENCODE_ASCII) || (this->encoding == ENCODE_UTF8) )
         {
            short fIndex = ZERO ;   // index into output array
            fData[fIndex++] = this->encoding ;
            src.copy( &fData[fIndex], src.utfbytes() ) ;
            this->frame_size = src.utfbytes() + 1 ;
         }
         else
            this->frame_size = this->utf16Encode ( enc, fData, src ) ;
      }
      return (short)this->frame_size ;

   }  //* End txtEncode() *

   #if 0    // FOR DEBUGGING ONLY
   //* Encode source data as UTF-16, little-endian.               *
   //* Note that for debugging purposes, we have implemented code *
   //* that can encode in a little-endian format, but this option *
   //* is not useful in production mode.                          *
   //* Note that only 'frame_size' member is initialized.         *
   short utf16leEncode ( char* fText, const gString& src )
   {
      short fIndex = ZERO ;   // index into output array

      fText[fIndex++] = ENCODE_UTF16 ;    // type of encoding
      fText[fIndex++] = (char)0x0FF ;     // LSB of Byte-Order-Mark
      fText[fIndex++] = (char)0x0FE ;     // MSB of Byte-Order-Mark

      //* Establish the source array of wchar_t (wide) characters *
      short wcnt ;   // number of source characters (including the null terminator)
      const wchar_t *wstr = src.gstr( wcnt ) ;
      UINT cx ;      // source character

      for ( short wIndex = ZERO ; wIndex < wcnt ; ++wIndex )
      {
         cx = wstr[wIndex] ;     // get a character from the input stream

         //* If the character can be encoded with a single, 16-bit value *
         if ( ((cx >= ZERO) && (cx < usxMSU16)) || 
              ((cx >= usxHIGH16) && (cx <= usxMAX16)) )
         {  //* Encode the bytes in little-endian order *
            fText[fIndex++] = cx & 0x000000FF ;
            fText[fIndex++] = (cx >> 8) & 0x000000FF ;
         }

         //* Else, character requires a pair of 16-bit values *
         else
         {
            UINT msUnit = ZERO, lsUnit = ZERO ;
            if ( (cx >= usxMIN20) && (cx <= usxMAX20) )
            {
               msUnit = ((cx - usxMIN20) >> 10) | usxMSU16 ;
               lsUnit = (cx & usxMASK10) | usxLSU16 ;
               fText[fIndex++] = msUnit & 0x000000FF ;
               fText[fIndex++] = (msUnit >> 8) & 0x000000FF ;
               fText[fIndex++] = lsUnit & 0x000000FF ;
               fText[fIndex++] = (lsUnit >> 8) & 0x000000FF ;
            }
            //* Character cannot be encoded as UTF-16. *
            //* Encode a question mark character.      *
            else
            {
               cx = L'?' ;
               fText[fIndex++] = (cx >> 8) & 0x000000FF ;
               fText[fIndex++] = cx & 0x000000FF ;
            }
         }
      }
      return ( this->frame_size = fIndex ) ;

   }  //* End utf16leEncode() *
   #endif   // FOR DEBUGGING ONLY

   //***************************************************************
   //* Methods that require controlled setup sequences are private.*
   //***************************************************************
   private:

   //* Private Method.                                            *
   //* Convert a raw ISO 8859-1 string to gString format.         *
   //* -- Assumes that 'frame_count' has been initialized.        *
   //* -- ISO 8859-1 is a group of single-byte characters.        *
   //* -- Note that the input strings are not always terminated.  *
   //* -- Note that Linux systems do not use the "Latin-1"        *
   //*    extensions of ISO8859-1 by default, so we must match    *
   //*    the Danish, German, Spanish etc. special alphabetical   *
   //*    characters (A0-FFh) explicitly.                         *
   short ascConv ( const char* rawPtr, gString& gsTrg )
   {
      gsTrg.clear() ;                        // initialize the target buffer
      int rawBytes = this->frame_size - 1 ;  // number of bytes in raw string
      wchar_t wc ;                           // 32-bit character
      for ( short ti = ZERO ; ti < rawBytes ; ++ti )
      {
         //* Printing characters of the ISO 8859-1 standard *
         if (   (((UCHAR)(rawPtr[ti]) >= 0x20) && (((UCHAR)rawPtr[ti]) <= 0x7E))
             || (rawPtr[ti] == '\n')
             || ((UCHAR)(rawPtr[ti]) >= iso8859FIRST && (UCHAR)(rawPtr[ti]) <= iso8859LAST)
            )
         {
            wc = (wchar_t)((UCHAR)(rawPtr[ti])) ;
            gsTrg.append( wc ) ;
         }
         //* If a null terminator is encountered BEFORE end-of-text,         *
         //* concatenate the separate strings in the field. (see note above) *
         else if ( (rawPtr[ti] == NULLCHAR) && (ti < (rawBytes - 1)) && (rawPtr[ti - 1] != ' ') )
         {
            gsTrg.append( L'/' ) ;
         }
         //* Control character OR a character not defined in ISO 8859-1.*
         else if ( rawPtr[ti] != NULLCHAR ) { wc = L'?' ; gsTrg.append( wc ) ; }
      }
      return ( gsTrg.gschars() ) ;
   }  //* End ascConv() *

   //* Private Method.                                            *
   //* Convert an (assumed) UTF-8 string to gString format.       *
   //* Note that UTF-8 is not supported by id3v2.3 but is         *
   //* supported by id3.v2.4.                                     *
   //* -- Assumes that 'frame_count' has been initialized.        *
   //* -- Assumes that 'rawPtr' points to head of text (not       *
   //*    encoding byte)                                          *
   //* -- The string may or may not be terminated.                *
   //* -- Strings _may be_ concatenated (id3v2.4 only).           *
   //* -- Returns number of wchar_t (wide) characters created.    *

   short utf8Conv ( const char* rawPtr, gString& gsTrg )
   {
      char cbuff[this->frame_size] ;
      int ti,                                // source index
          rawBytes = this->frame_size - 1 ;  // number of bytes in raw string
      gsTrg.clear() ;                        // initialize the target buffer
      for ( ti = ZERO ; ti <= rawBytes ; ++ti )
      {
         if ( rawPtr[ti] != '\0' )
            cbuff[ti] = rawPtr[ti] ;
         else           // null terminator encountered before end-of-text
         {              // concatenate the strings (see note above)
            if ( ti < (rawBytes - 1) )
               cbuff[ti] = '/' ;
            else        // end of text
               break ;
         }
      }
      cbuff[ti] = '\0' ;                     // be sure string is terminated
      gsTrg = cbuff ;
      return ( gsTrg.gschars() ) ;
   }  //* End utf8Conv() *

   //* Private Method:                                            *
   //* Convert a Unicode-16 string (big-endian or little-endian)  *
   //* to gString format.                                         *
   //* -- Assumes that 'frame_count' has been initialized.        *
   //* -- Unicode-16 strings are generally not terminated (except *
   //*    for empty strings).                                     *
   //* -- See additional information on decoding UTF-16 in the    *
   //*    mptDecodetextFrame() method header.                     *
   //* -- Note that because the wchar_t type is a signed integer, *
   //*    the compiler wants to sign-extend. Don't allow this.    *
   //* -- Returns number of wchar_t (wide) characters created.    *

   short utf16Conv ( const char* rawPtr, gString& gsTrg )
   {
      int ti = ZERO ;                        // source index
      gsTrg.clear() ;                        // initialize the target buffer
      int rawBytes = this->frame_size        // number of bytes in raw string
                     - ((this->encoding == ENCODE_UTF16) ? 3 : 1) ;
      UINT msUnit, lsUnit,                   // for converting unit pairs
           cx ;                              // undecoded 16-bit character
      while ( rawBytes > ZERO )
      {
         if ( this->big_endian )
         {
            cx = ((UINT(rawPtr[ti++]) << 8) & 0x0000FF00) ;
            cx |= (UINT(rawPtr[ti++]) & 0x000000FF) ;
         }
         else  // (little-endian)
         {
            cx = (UINT(rawPtr[ti++]) & 0x000000FF) ;
            cx |= ((UINT(rawPtr[ti++]) << 8) & 0x0000FF00) ;
         }
         rawBytes -= 2 ;
         //* If the character is fully represented by 16 bits *
         //* (most characters are)                            *
         if ( ((cx >= ZERO) && (cx < usxMSU16)) || 
              ((cx >= usxHIGH16) && (cx <= usxMAX16)) )
         {
            if ( cx != 0 )
               gsTrg.append( cx ) ;
            else if ( rawBytes > ZERO ) // if interim NULLCHAR found
            {
               gsTrg.append( L'/' ) ;   // string concatenation (see note above)
               //* Note that the following string may also have a *
               //* byte-order-mark (BOM). If so, step over it.    *
               if ( (rawBytes >= 2) &&
                    ((rawPtr[ti] == UTF_MSB && rawPtr[ti + 1] == UTF_LSB)
                     ||
                     (rawPtr[ti] == UTF_LSB && rawPtr[ti + 1] == UTF_MSB)) )
               {
                  rawBytes -= 2 ;
                  ti += 2 ;
               }
            }
         }

         //* Character is represented by 32 bits    *
         //* (20 bits actually used), 16 MSBs first.*
         else
         {
            msUnit = cx ;
            lsUnit = ZERO ;
            if ( this->big_endian )
            {
               lsUnit = ((UINT(rawPtr[ti++]) << 8) & 0x0000FF00) ;
               lsUnit |= (UINT(rawPtr[ti++]) & 0x000000FF) ;
            }
            else     // (little-endian)
            {
               lsUnit = (UINT(rawPtr[ti++]) & 0x000000FF) ;
               lsUnit |= ((UINT(rawPtr[ti++]) << 8) & 0x0000FF00) ;
            }
            rawBytes -= 2 ;

            //* Validate the range *
            if ( ((msUnit >= usxMSU16) && (msUnit <= usxMSU16e))
                 &&
                 ((lsUnit >= usxLSU16) && (msUnit <= usxLSU16e)) )
            {
               cx = usxMIN20 + ((msUnit & usxMASK10) << 10) ;
               cx |= (lsUnit & usxMASK10) ;
               if ( (cx == 0) && rawBytes > ZERO )
               {
                  cx = L'/' ;    // string concatenation (see note above)
                  //* Note that the following string may also have a *
                  //* byte-order-mark (BOM). If so, step over it.    *
                  if ( (rawBytes >= 2) &&
                       ((rawPtr[ti] == UTF_MSB && rawPtr[ti + 1] == UTF_LSB)
                        ||
                        (rawPtr[ti] == UTF_LSB && rawPtr[ti + 1] == UTF_MSB)) )
                  {
                     rawBytes -= 2 ;
                     ti += 2 ;
                  }
               }
            }
            else                 // invalid UTF-16 codepoint
               cx = L'?' ;
            gsTrg.append( cx ) ; // add the chararacter to output buffer
         }
      }
      return ( gsTrg.gschars() ) ;

   }  //* End utf16Conv() *

   //* Private Method.                                            *
   //* Convert the wchar_t string to a UTF-16-encoded             *
   //* (big-endian) byte stream.                                  *
   //*                                                            *
   //* Input  : enc   : inticates the type of encoding:           *
   //*                   a) ENCODE_UTF16 (big-endian with BOM)    *
   //*                   b) ENCODE_UTF16BE (big-endian, no BOM)   *
   //*          fText : receives the encoded data                 *
   //*          src   : wchar_t-encoded source text data          *
   //*                                                            *
   //* Returns: number of bytes in formatted output               *
   //*          or (-1) if encoding error                         *
   //* -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  -  - *
   //* Note: UTF-16 may be encoded on either big-endian or        *
   //* little-endian hardware; however, we encode the UTF-16 as   *
   //* a byte stream, so our output is always encoded as          *
   //* big-endian.                                                *
   short utf16Encode ( TextEncode enc, char* fText, const gString& src )
   {
      short fIndex = ZERO ;   // index into output array

      if ( enc == ENCODE_UTF16 )
      {
         fText[fIndex++] = ENCODE_UTF16 ;    // type of encoding
         fText[fIndex++] = (char)0x0FE ;     // MSB of Byte-Order-Mark
         fText[fIndex++] = (char)0x0FF ;     // LSB of Byte-Order-Mark
      }
      else
         fText[fIndex++] = ENCODE_UTF16BE ;  // type of encoding

      //* Establish the source array of wchar_t (wide) characters *
      short wcnt ;   // number of source characters (including the null terminator)
      const wchar_t *wstr = src.gstr( wcnt ) ;
      UINT cx ;      // source character

      for ( short wIndex = ZERO ; wIndex < wcnt ; ++wIndex )
      {
         cx = wstr[wIndex] ;     // get a character from the input stream

         //* If the character can be encoded with a single, 16-bit value *
         if ( ((cx >= ZERO) && (cx < usxMSU16)) || 
              ((cx >= usxHIGH16) && (cx <= usxMAX16)) )
         {  //* Encode the bytes in big-endian order *
            fText[fIndex++] = (cx >> 8) & 0x000000FF ;
            fText[fIndex++] = cx & 0x000000FF ;
         }

         //* Else, character requires a pair of 16-bit values *
         else
         {
            UINT msUnit = ZERO, lsUnit = ZERO ;
            if ( (cx >= usxMIN20) && (cx <= usxMAX20) )
            {
               msUnit = ((cx - usxMIN20) >> 10) | usxMSU16 ;
               lsUnit = (cx & usxMASK10) | usxLSU16 ;
               fText[fIndex++] = (msUnit >> 8) & 0x000000FF ;
               fText[fIndex++] = msUnit & 0x000000FF ;
               fText[fIndex++] = (lsUnit >> 8) & 0x000000FF ;
               fText[fIndex++] = lsUnit & 0x000000FF ;
            }
            //* Character cannot be encoded as UTF-16. *
            //* Encode a question mark character.      *
            else
            {
               msUnit = L'?' ;
            }
         }
      }
      return ( this->frame_size = fIndex ) ;

   }  //* End utf16Encode() *


   //********************************
   //* All data members are public. *
   //********************************
   public:
   char  frame_id[6] ;     // S/B one of the 4-character names defined by the standard
   int   frame_size ;      // size of the frame in bytes
   char  status_flags ;    // status bits %abc0.0000
   char  encode_flags ;    // encoding flags %ijk0.0000
   bool  flag_tag_pres ;   // if 'true',  preserve frame if tag is modified
                           // if 'false', discard frame if tag is modified
   bool  flag_file_pres ;  // if 'true',  preserve frame if non-tag data modified
                           // if 'false', discard frame if non-tag data is modified
   bool  flag_readonly ;   // if 'true',  frame is read-only unless software know what it's doing
   bool  flag_compress ;   // if 'true',  frame is compressed
                           //    (4 bytes "decompressed size" added to frame header)
                           // if 'false', frame is not compressed
   bool  flag_encrypt ;    // if 'true',  frame is encrypted
                           //    (1 byte "encryption type"" added to frame header)
                           // if 'false', frame is not encrypted
   bool  flag_grouped ;    // if 'true',  frame is a member of a frame group
                           //    (1 byte "group identifier" added to frame header)
                           // if 'false', frame is not grouped
   char  encoding ;        // for text frames only: text encoding
                           // 00h == ISO8859-1 (ASCII, i.e. Latin-1: 0x20 - 0xFF)
                           // 01h == UTF-16 (with BOM)
                           // 02h == UTF-16 big-endian (no BOM) (id3v2.4 only)
                           // 03h == UTF-8 encoding (id3v2.4 only)
   bool  big_endian ;      // for UTF-16 text frames only
                           // if 'true', big-endian Unicode-16
                           // if 'false', little-endian Unicode-16
   int   decomp ;          // for compressed frames only: expanded size
   char  crypto ;          // for encrypted frames only: type of encryption
   char  group_id ;        // for grouped frames only: group ID

} ;   // id3v2_framehdr

//* Flag byte for id3v2_taghdr class *
//* Four bits are defined: Bits 7-4  *
//*  bit7 unsynch
//*  bit6 extended header follows
//*  bit5 experimental tag
//*  bit4 tag footer terminates the tag (defined in id3v2.4)
//*       (bits 6 and 4 are mutually exclusive)
//*  bits 3-0 currently undefined (always reset)
// Programmer's Note: We could have the same functionality 
// using a union with bit fields, but this is more fun.
class taghdrFlags
{
   public:
   ~taghdrFlags ( void ) {}         // destructor
   taghdrFlags ( void )             // default constructor
   { this->reset () ; }
   taghdrFlags ( UCHAR init )       // initialization constructor
   { this->reset () ; this->Flags = init ; }
   void reset ( void )
   { this->Flags = ZERO ; }
   UCHAR setflags ( UCHAR reinit )  // Initialize all flags (requires intelligence)
   { return ( (UCHAR)((this->Flags = UINT(reinit & 0x000000F0))) ) ; }
   UCHAR getflags ( void ) const    // Returns flag byte
   { return (UCHAR)(this->Flags) ; }

   bool unsynch ( void ) const      // Bit7 - unsynch
   { return bool(this->Flags & 0x080) ; }
   void unsynch ( bool set )
   { 
      if ( set ) this->Flags |= 0x080 ;
      else       this->Flags &= 0x07F ;
   }

   bool exthdr ( void ) const       // Bit6 - exthdr
   { return bool(this->Flags & 0x040) ; }
   void exthdr ( bool set )
   { 
      if ( set ) this->Flags |= 0x040 ;
      else       this->Flags &= 0x0BF ;
   }

   bool exper ( void ) const        // Bit5 - exper
   { return bool(this->Flags & 0x020) ; }
   void exper ( bool set )
   { 
      if ( set ) this->Flags |= 0x020 ;
      else       this->Flags &= 0x0DF ;
   }

   bool footer ( void ) const       // Bit4 - footer
   { return bool(this->Flags & 0x010) ; }
   void footer ( bool set )
   { 
      if ( set ) this->Flags |= 0x010 ;
      else       this->Flags &= 0x0EF ;
   }

   //* Data Members *
   private:
   UINT Flags ;
} ;

//* Decoded data from an ID3v2.x Tag Header.           *
//* The tag header lives at the top of every MP3 file. *
const UINT taghdrCNT = 10 ;   // size of tag header
const UINT exthdrCNT = 6 ;    // size of extended header (if present)
class id3v2_taghdr
{
   public:
   id3v2_taghdr ( void )
   { this->reset () ; }
   ~id3v2_taghdr ( void ) {}
   void reset ( void )
   {
      this->file_id[0] = this->file_id[1] = this->file_id[2] = this->file_id[3] = 
      this->major = this->rev = ZERO ;
      this->flags.reset() ;
      this->tag_size = ZERO ;
      this->exBytes = this->padBytes = this->tagCRC = ZERO ;
      this->exFlags = ZERO ;
      this->crcFlag = false ;
   }

   void setHeader ( void )
   {
      this->file_id[0] = 'I' ;
      this->file_id[1] = 'D' ;
      this->file_id[2] = '3' ;
      this->file_id[3] = NULLCHAR ;
      this->major      = 3 ;     // id3v2.3.0
      this->rev        = 0 ;
   }

   //* Convert a 4-byte sequence (MSB at offset 0) to an integer value. *
   //* Integer data are stored MSBit first in the bytes a,d MSByte first*
   //* in multi-byte integers, i.e. big-endian.                         *
   //* Programmer's Note: This clunky construct avoids the C library's  *
   //* "helpful" automatic sign extension.                              *
   int intConv ( const UCHAR* ucp ) const
   {
      int i =    (UINT)ucp[3] 
              | ((UINT)ucp[2] << 8)
              | ((UINT)ucp[1] << 16)
              | ((UINT)ucp[0] << 24) ;
      return i ;
   }

   //* Convert a 32-bit integer into an 4-byte, big-endian binary stream *
   int intConv ( int val, UCHAR* ucp ) const
   {
      int i = ZERO ;
      ucp[i++] = (UCHAR)((val >> 24) & 0x000000FF) ;
      ucp[i++] = (UCHAR)((val >> 16) & 0x000000FF) ;
      ucp[i++] = (UCHAR)((val >>  8) & 0x000000FF) ;
      ucp[i++] = (UCHAR)((val      ) & 0x000000FF) ;
      return i ;
   }

   //* Convert a 2-byte sequence (MSB at offset 0) to a short integer value. *
   short intConv16 ( const UCHAR* ucp ) const
   {
      short s =    (short)ucp[1]
                | ((short)ucp[0] << 8) ;
      return s ;
   }

   //* Convert a 16-bit integer into an 2-byte, big-endian binary stream *
   int intConv ( short val, UCHAR* ucp ) const
   {
      int i = ZERO ;
      ucp[i++] = (UCHAR)((val >>  8) & 0x00FF) ;
      ucp[i++] = (UCHAR)((val      ) & 0x00FF) ;
      return i ;
   }

   //* Decode the formatted tag size, a 28-bit value.         *
   //* Four(4) raw hex bytes with bit7 of each byte reset.    *
   //* Example: $00 00 02 01 == 257 byte (256 + 1) tag length.*
   int decodeTagSize ( const char* rawPtr )
   {
      this->tag_size = ZERO ;
      if ( !(rawPtr[0] & 0x80) && !(rawPtr[1] & 0x80) &&
           !(rawPtr[2] & 0x80) && !(rawPtr[3] & 0x80) )
      {
         this->tag_size =    int(rawPtr[3])
                          + (int(rawPtr[2]) << 7)
                          + (int(rawPtr[1]) << 14)
                          + (int(rawPtr[0]) << 21) ;
      }
      return this->tag_size ;
   }

   //* Convert the integer tag size (byte count) to a byte stream.*
   int encodeTagSize ( char* obuff ) const
   {
      const int bMASK = 0x0000007F ;
      short indx = ZERO ;
      obuff[indx++] = (this->tag_size >> 21) & bMASK ;
      obuff[indx++] = (this->tag_size >> 14) & bMASK ;
      obuff[indx++] = (this->tag_size >>  7) & bMASK ;
      obuff[indx++] = this->tag_size & bMASK ;
      return indx ;
   }  //* End encodeTagSize() *

   char  file_id[4] ;      // S/B "ID3", if not, then no metadata
   char  major ;           // ID3 major version (hex)
   char  rev ;             // ID3 revision (hex)
   taghdrFlags flags ;     // flag byte

   //* Size of the tag record EXCLUDING the 10-byte tag header *
   int   tag_size ;

   //* Extended Header (if present) *
   int   exBytes ;         // (32-bit int) (indicated header size excludes this value)
                           // currently: six(6) bytes without CRC or
                           //            ten(10) bytes with CRC 
   int   padBytes ;        // bytes of reserved tag space i.e. the padding
   int   tagCRC ;          // 32-bit CRC value (if CRC used)
                           // Note: CRC is calculated over the range from AFTER the 
                           // extended header to the beginning of the padding i.e.
                           // only the actual frame data (not headers and not padding)
   short exFlags ;         // (all except MSB currently unused)
   bool  crcFlag ;         // true if CRC is used
   bool  spare ;           // (unused)

} ;   // id3v2_taghdr


//* From the ID3v2.4 specification: section 3.4:                               *
//* "To speed up the process of locating an ID3v2 tag when searching from      *
//*  the end of a file, a footer can be added to the tag. It is REQUIRED       *
//*  to add a footer to an appended tag, i.e. a tag located after all          *
//*  audio data. The footer is a copy of the header, but with a different      *
//*  identifier."                                                              *
//*                  ID3v2 identifier           "3DI"                          *
//*                  ID3v2 version              $04 00                         *
//*                  ID3v2 flags                %abcd0000                      *
//*                  ID3v2 size             4 * %0xxxxxxx                      *
//*  a) The size of the footer is the same as the header: taghdrCNT (10 bytes).*
//*  a) A tag footer is the same as a tag header except with a different       *
//*     identifier ("3DI" vs. "ID3").                                          *
//*  b) The 'size' is the same 28-bit format as the header                     *
//*  c) There must be no tag padding if a footer is present.                   *
//*  d) If a "SEEK" frame is found in the prepended tag, then use it to scan   *
//*     for additional tag(s).                                                 *
//*  e) If the continuation flag is set in the extended header, it indicates   *
//*     a continuation of the tag data.                                        *
//*                                                                            *
//*                                                                            *
//*                                                                            *

class id3v2_tagfooter
{
   public:
   ~id3v2_tagfooter ( void ) {}
   id3v2_tagfooter ( void )
   {
      this->reset() ;
   }
   void reset ( void )
   {
      this->foot_id[0] = this->foot_id[1] = this->foot_id[2] = this->foot_id[3] = 
      this->major = this->rev = /*this->flags = */ZERO ;
      this->flags.reset() ;
      this->tag_size = ZERO ;
   }

   void setFootID ( void )
   {
      this->foot_id[0] = '3' ;
      this->foot_id[1] = 'D' ;
      this->foot_id[2] = 'I' ;
      this->foot_id[3] = NULLCHAR ;
   }

   //* Decode the formatted tag size. *
   int decodeTagSize ( const char* rawPtr )
   {
      this->tag_size = ZERO ;
      if ( !(rawPtr[0] & 0x80) && !(rawPtr[1] & 0x80) &&
           !(rawPtr[2] & 0x80) && !(rawPtr[3] & 0x80) )
      {
         this->tag_size =    int(rawPtr[3])
                          + (int(rawPtr[2]) << 7)
                          + (int(rawPtr[1]) << 14)
                          + (int(rawPtr[0]) << 21) ;
      }
      return this->tag_size ;
   }

   //* Convert the integer tag size (byte count) to a byte stream.*
   int encodeTagSize ( char* obuff ) const
   {
      const int bMASK = 0x0000007F ;
      short indx = ZERO ;
      obuff[indx++] = (this->tag_size >> 21) & bMASK ;
      obuff[indx++] = (this->tag_size >> 14) & bMASK ;
      obuff[indx++] = (this->tag_size >>  7) & bMASK ;
      obuff[indx++] = this->tag_size & bMASK ;
      return indx ;
   }  //* End encodeTagSize() *

   char  foot_id[4] ;      // S/B "3DI"
   char  major ;           // ID3 major version (hex)
   char  rev ;             // ID3 revision (hex)
   taghdrFlags flags ;     // flag byte

   //* Size of the tag record EXCLUDING the 10-byte tag header *
   //* -- four(4) raw hex bytes with bit7 of each byte ignored.*
   //*    Thus $00 00 02 01 == 257 byte (256 + 1) tag length.  *
   //* -- This value is the DECODED tag size.                  *
   int   tag_size ;
} ;   // id3v2_tagfooter

//* From the ID3v2.4 specification: section 4.15                               *
//* Data contained in an 'APIC' image frame.                                   *
//* <Header for 'Attached picture', ID: "APIC">                                *
//* Text encoding   $xx                                                        *
//* MIME type       <text string> $00                                          *
//* Picture type    $xx                                                        *
//* Description     <text string according to encoding> $00 (00)               *
//* Picture data    <binary data>                                              *
const short imgMAX_DESC = 256 ;        // Max bytes in description string
const char* const mimeJPG = "image/jpeg" ;   // MIME type for JPEG images
const char* const mimePNG = "image/png" ;    // MIME type for PNG images
const char* const mimeUnk = "image/" ;       // MIME type for unknown image formats
const char* const mimeLnk = "-->" ;          // MIME type external link

class id3v2_image
{
   public:
   ~id3v2_image ( void ) {}
   id3v2_image ( void )
   {
      this->reset() ;
   }
   void reset ( void )
   {
      //* NOTE: If 'picPath' points to a temp file, *
      //*       the reset orphans the file.         *
      this->picPath[0] = this->txtDesc[0] = 
      this->mimType[0] = this->picExpl[0] = NULLCHAR ;
      this->picSize = picEncSize = ZERO ;
      this->picType = 0x00 ;           // default to "Other"
      this->url = false ;              // MIME type does not contain a URL
      this->encoding = ENCODE_ASCII ;  // default to ASCII
   }

   //* Convert raw byte data to gString (UTF-8) format.           *
   //*                                                            *
   //* Programmer's Note: For the image description, we take a    *
   //* shortcut in decoding ISO8859-1 text. If the data contain   *
   //* "Latin-1" extension characters, we will incorrectly decode *
   //* them because we assume pure ASCII or UTF-8 encoding.       *
   //* For full decoding of ISO8859-1 characters, see the         *
   //* id3v2_framehdr class definition.                           *
   //*                                                            *
   //* Input  : rawPtr  : pointer to source byte stream           *
   //*                    Note that string _should be_ null       *
   //*                    terminated, but we verify.              *
   //*          enc     : contains the text encoding type.        *
   //*                                                            *
   //* Return: number of wchar_t characters converted             *
   //*         or (-1) if conversion error                        *
   short txtConv ( const char* rawPtr, TextEncode enc )
   {
      short convChars = ZERO ;         // return value

      //* Range check and set text encoding *
      if ( (enc == ENCODE_ASCII) || (enc == ENCODE_UTF16) || 
           (enc == ENCODE_UTF16BE) || (enc == ENCODE_UTF8) )
         this->encoding = enc ;
      else
         this->encoding = ENCODE_UTF8 ;

      //* If ASCII encoding was specified, verify that the *
      //* data are actually ASCII. If not, assume UTF-8.   *
      if ( this->encoding == ENCODE_ASCII )
      {
         gString gs( rawPtr ) ;
         if ( !(gs.isASCII()) )
            this->encoding = ENCODE_UTF8 ;
      }

      if ( (this->encoding == ENCODE_UTF16) || (this->encoding == ENCODE_UTF16BE) )
      {
         // Programmer's Note: If the text encoding byte is wrong or if the BOM
         // is missing or otherwise invalid, we may produce garbage output, but
         // we rely on the presence of the null terminator to stop the madness.
         int ti = ZERO ;               // source index
         gString gs ;                  // temp buffer
         UINT msUnit, lsUnit,          // for converting unit pairs
              cx ;                     // undecoded 16-bit character
         bool big_endian = true ;      // assume big-endian input
         if ( this->encoding == ENCODE_UTF16 )
         {
            //* Check the byte-order mark.        *
            //* If the BOM was stored backward,   *
            //* then it is little-endian encoding.*
            if ( rawPtr[0] == UTF_LSB )
               big_endian = false ;
            if ( (rawPtr[0] == UTF_MSB || rawPtr[0] == UTF_LSB) &&
                 (rawPtr[1] == UTF_MSB || rawPtr[1] == UTF_LSB) )
               ti += 2 ;
         }
         do
         {
            if ( big_endian )
            {
               cx = ((UINT(rawPtr[ti++]) << 8) & 0x0000FF00) ;
               cx |= (UINT(rawPtr[ti++]) & 0x000000FF) ;
            }
            else
            {
               cx = (UINT(rawPtr[ti++]) & 0x000000FF) ;
               cx |= ((UINT(rawPtr[ti++]) << 8) & 0x0000FF00) ;
            }

            //* If the character is fully represented by 16 bits *
            //* (most characters are)                            *
            if ( ((cx >= ZERO) && (cx < usxMSU16)) || 
                 ((cx >= usxHIGH16) && (cx <= usxMAX16)) )
            {
               if ( cx != ZERO )    // add the chararacter to output buffer
                  gs.append( cx ) ;
            }
            //* Character is represented by 32 bits    *
            //* (20 bits actually used), 16 MSBs first.*
            else
            {
               msUnit = cx ;
               lsUnit = ZERO ;
               if ( big_endian )
               {
                  lsUnit = ((UINT(rawPtr[ti++]) << 8) & 0x0000FF00) ;
                  lsUnit |= (UINT(rawPtr[ti++]) & 0x000000FF) ;
               }
               else     // (little-endian)
               {
                  lsUnit = (UINT(rawPtr[ti++]) & 0x000000FF) ;
                  lsUnit |= ((UINT(rawPtr[ti++]) << 8) & 0x0000FF00) ;
               }
   
               //* Validate the range *
               if ( ((msUnit >= usxMSU16) && (msUnit <= usxMSU16e))
                    &&
                    ((lsUnit >= usxLSU16) && (msUnit <= usxLSU16e)) )
               {
                  cx = usxMIN20 + ((msUnit & usxMASK10) << 10) ;
                  cx |= (lsUnit & usxMASK10) ;
               }
               else                 // invalid UTF-16 codepoint
                  cx = L'?' ;
               if ( cx != ZERO )    // add the chararacter to output buffer
                  gs.append( cx ) ;
            }
         }
         while ( cx != ZERO ) ;
         convChars = gs.gschars() ;
         gs.copy( this->txtDesc, gsMAXBYTES ) ;
      }

      else if ( (this->encoding == ENCODE_ASCII) || (this->encoding == ENCODE_UTF8) )
      {
         gString gs( rawPtr ) ;
         convChars = gs.gschars() ;
         gs.copy( this->txtDesc, gsMAXBYTES ) ;
      }
      return convChars ;
   }  //* End txtConv() *

   //* Public Method.                                             *
   //* Encode the wchar_t (wide text) array into a form that MP3  *
   //* text frames can understand.                                *
   //*  Note that we always include the NULL terminator to the    *
   //*  text data. The standard neither demands nor forbids it.   *
   //*                                                            *
   //* Input  : enc   : inticates the type of encoding:           *
   //*                   a) ENCODE_ASCII                          *
   //*                   b) ENCODE_UTF8                           *
   //*                   c) ENCODE_UTF16 (big-endian with BOM)    *
   //*                   d) ENCODE_UTF16BE (big-endian, no BOM)   *
   //*          fData : receives the encoded data                 *
   //*          src   : wchar_t-encoded source text data          *
   //*                                                            *
   //* Returns: number of bytes in formatted output               *
   //*          or ZERO if encoding error                         *
   short txtEncode ( TextEncode enc, char* fData, const gString& src )
   {
      gString srcx = src ;
      srcx.limitChars( imgMAX_DESC ) ; // standard limits size to 64 "characters"
      short fIndex = ZERO ;

      //* Caller's choice of encoding (validate) *
      if ( !(enc == ENCODE_ASCII)   && !(enc == ENCODE_UTF16) && 
           !(enc == ENCODE_UTF16BE) && !(enc == ENCODE_UTF8) )
         enc = ENCODE_UTF16 ;

      if ( (this->encoding == ENCODE_ASCII) || (this->encoding == ENCODE_UTF8) )
      {
         srcx.copy( &fData[fIndex], srcx.utfbytes() ) ;
         fIndex = srcx.utfbytes() ;
      }
      else
      {
         if ( enc == ENCODE_UTF16 )
         {
            fData[fIndex++] = (char)0x0FE ;     // MSB of Byte-Order-Mark
            fData[fIndex++] = (char)0x0FF ;     // LSB of Byte-Order-Mark
         }
         short wcnt ;   // number of source chars (incl. null)
         const wchar_t *wstr = srcx.gstr( wcnt ) ;
         UINT cx ;      // source character

         for ( short wIndex = ZERO ; wIndex < wcnt ; ++wIndex )
         {
            cx = wstr[wIndex] ;     // get a character from the input stream

            //* If the character can be encoded with a single, 16-bit value *
            if ( ((cx >= ZERO) && (cx < usxMSU16)) || 
                 ((cx >= usxHIGH16) && (cx <= usxMAX16)) )
            {  //* Encode the bytes in big-endian order *
               fData[fIndex++] = (cx >> 8) & 0x000000FF ;
               fData[fIndex++] = cx & 0x000000FF ;
            }

            //* Else, character requires a pair of 16-bit values *
            else
            {
               UINT msUnit = ZERO, lsUnit = ZERO ;
               if ( (cx >= usxMIN20) && (cx <= usxMAX20) )
               {
                  msUnit = ((cx - usxMIN20) >> 10) | usxMSU16 ;
                  lsUnit = (cx & usxMASK10) | usxLSU16 ;
                  fData[fIndex++] = (msUnit >> 8) & 0x000000FF ;
                  fData[fIndex++] = msUnit & 0x000000FF ;
                  fData[fIndex++] = (lsUnit >> 8) & 0x000000FF ;
                  fData[fIndex++] = lsUnit & 0x000000FF ;
               }
               //* Character cannot be encoded as UTF-16. *
               //* Encode a question mark character.      *
               else
               {
                  cx = L'?' ;
                  fData[fIndex++] = (cx >> 8) & 0x000000FF ;
                  fData[fIndex++] = cx & 0x000000FF ;
               }
            }
         }     // for(;;)
      }
      return fIndex ;

   }  // txtEncode()

   //* Convert the image info to a byte stream.      *
   //* Note that caller will append the binary image *
   //* data at the returned insertion point.         *
   short encode ( char* obuff )
   {
      short oindx = ZERO ;
      gString gs( this->txtDesc ) ;
      this->encoding = (gs.isASCII()) ? ENCODE_ASCII : ENCODE_UTF16 ;
      obuff[oindx++] = this->encoding ;
      for ( short i = ZERO ; i < gsMAXBYTES ; ++i )
      {
         obuff[oindx++] = this->mimType[i] ;
         if ( this->mimType[i] == NULLCHAR )
            break ;
      }
      obuff[oindx++] = this->picType ;
      oindx += this->txtEncode( this->encoding, &obuff[oindx], gs ) ;

      return oindx ;
   }

   //* Encode the image header data into a byte stream.*

   //***********************
   //* Public Data Members *
   //***********************
   char picPath[gsMAXBYTES] ;       // path/filename of external image file
   char txtDesc[gsMAXBYTES] ;       // text description of image (optional)
   char mimType[gsMAXBYTES] ;       // MIME type: generally "image/jpg" or 
                                    //   "image/png" or "image/" (default)
                                    //   or possibly "-->url..."
   char picExpl[gsMAXBYTES] ;       // explanation of 'picType'. Note: Array of
                                    // pre-defined description strings: pType[][x]
   int  picSize ;                   // image size in bytes
   int  picEncSize ;                // size of ENCODED image in bytes (not used for MP3 and ASF)
                                    // for OGG images: size of Base64-encoded image data
   UCHAR picType ;                  // picture type code
   bool  url ;                      // 'true' if mimType contains a URL
   TextEncode encoding ;            // text encoding (member of enum TextEncode)

} ;   // id3v2_image

//* MPEG Audio Frame Header:                                                   *
//* Used to identify MP3 files when the file contains no metadata.             *
//* The header consists of a bitfield which can be thought of as               *
//* a 32-bit big-endian integer:                                               *
//* bits 31-21       11 bit synch word                                         *
//* bits 20-19       MPEG version number                                       *
//*                  11b == MPEG1, 10b == MPEG2, 00b == reserved               *
//* bits 18-17       Layer Index                                               *
//*                  11b == layer1, 10b == layer2, 01b == layer-3, 00b reserved*
//* bit  16          CRC protection flag: 1b == no CRC, 0b == with CRC         *
//* bits 15-12       Bit Rate in kilobytes  per second: 0000b, OR 32-320 in    *
//*                  multiples of 8: 0001b == 32, 0010b == 40, 0011b == 48 etc.*
//* bits 11-10       Sampling Rate Index                                       *
//*                  00b == 44100Hz, 01b == 48000Hz, 10b == 32000Hz, 00b res.  *
//* bit  9           Padding Bit: set == with padding, reset == no padding     *
//* bit  8           Private (informational only)                              *
//* bit  7-6         Channel Mode: 00b = stereo,  01b == joint stereo,         *
//*                                10b == 2 mono, 11b == 1 mono channel        *
//* bit  5-4         Joint-stereo mode extension                               *
//* bit  3           Copyright bit (informational only)                        *
//* bit  2           Original bit (informational only)                         *
//* bit  1-0         Emhasis (equalization indicator)                          *
//*                  00b == none,     01b == 50/15 ms                          *
//*                  10b == reserved, 11b == CCIT J.17                         *
//*                                                                            *
//* The start-of-frame may be identified by the 16-bit combination of          *
//* synch word/version/layer/crc as either 0xFFFB or 0xFFFA depending on the   *
//* state of the CRC flag.                                                     *
//* Note that this is not a definitive test of whether the file actually is    *
//* MPEG1, layer-3. For a definitive test, it would be necessary to locate,    *
//* decode and verify the header of at least one more audio frame. See below   *
//* for calculation of the current frame's size.                               *
//*                                                                            *
//* Note that the playback time (duration) of the audio is calculated as:      *
//*             seconds == File Size / Bitrate * 8                             *
//******************************************************************************

class id3v2_audioframe
{
   public:
   ~id3v2_audioframe ( void ) {}                   // destructor
   // NOTE: there is no default constructor.

   id3v2_audioframe ( const char* ibuff = NULL )   // initialization constructor
   {
      this->reset () ;
      if ( ibuff != NULL )
         this->decode ( ibuff ) ;
   }

   void reset ( void )                             // reset all data members
   {
      this->frame_size = ZERO ;
      this->synch    = this->version = this->layer  = this->bitrate = 
      this->samprate = this->channel = this->jsmode = this->emphasis = ZERO ;
      this->crc = this->padding = this->infobit = this->copyright =
      this->original = this->valid = false ;
   }

   bool decode ( const char* ibuff )
   {
      this->reset () ;
      this->synch     = ((((USHORT)ibuff[0] << 3) & 0x07F8)          // bits 21-31
                         | ((((USHORT)ibuff[1] >> 5) & 0x0007))) ; 
      this->version   = (((USHORT)ibuff[1] >> 3) & 0x0003) ;         // bits 19-20
      this->layer     = (((USHORT)ibuff[1] >> 1) & 0x0003) ;         // bits 17-18
      this->crc       = (bool)(ibuff[1] & 0x01) ;                    // bit  16
      this->bitrate   = (((USHORT)(ibuff[2] >> 4)) & 0x000F) ;       // bits 12-15
      this->samprate  = (USHORT)ibuff[2] & 0x03 ;           // bits 10-11
      this->padding   = (bool)(ibuff[3] & 0x02) ;           // bit  9
      this->infobit   = (bool)(ibuff[3] & 0x01) ;           // bit  8
      this->channel   = ((USHORT)ibuff[3] >> 6) & 0x0003 ;  // bits 6-7
      this->jsmode    = ((USHORT)ibuff[3] >> 4) & 0x0003 ;  // bits 4-5
      this->copyright = (bool)(ibuff[3] & 0x08) ;           // bit  3
      this->original  = (bool)(ibuff[3] & 0x04) ;           // bit  2
      this->emphasis  = (USHORT)ibuff[3] & 0x0003 ;         // bits 0-1

      if ( (this->synch == 0x07FF)   &&      // synch bits set
           (this->version == 0x0003) &&      // version 1
           (this->layer == 0x00001)          // layer-3
         )
      {
         this->valid = true ;

         //* The size of the audio frame is calculated as:                     *
         //* ((Samples Per Frame / 8 * Bitrate) / Sampling Rate) + PaddingSize *
         //* This is actually only an approximation because it is subject to   *
         //* rounding errors. Therefore, to find the next frame step ahead by  *
         //* this many bytes and begin scanning the input stream for the next  *
         //* synch word/version/layer/crc: 0xFFFB or 0xFFFA                    *
         double spf  = 1152.0,   // magic number from layer-3 specification
                brt  = ZERO,     // bitrate (see below)
                srt  = ZERO,     // sample rate (see below)
                slot = 1.0 ;     // slot size, always 1 byte for layer-3
         switch ( this->bitrate )
         {
            case  1:   brt = 32000 ;     break ;
            case  2:   brt = 40000 ;     break ;
            case  3:   brt = 48000 ;     break ;
            case  4:   brt = 56000 ;     break ;
            case  5:   brt = 64000 ;     break ;
            case  6:   brt = 80000 ;     break ;
            case  7:   brt = 96000 ;     break ;
            case  8:   brt = 112000 ;    break ;
            case 10:   brt = 160000 ;    break ;
            case 11:   brt = 192000 ;    break ;
            case 12:   brt = 224000 ;    break ;
            case 13:   brt = 256000 ;    break ;
            case 14:   brt = 320000 ;    break ;
            case  9:
            case  0:    // constant bitrate (will generate wrong answer)
            case 15:    // reserved (should not happen)
            default:    // most common bitrate
               brt = 128000 ; break ;
         } ;
         switch ( this->samprate )
         {
            case 1:  srt = 48000.0 ;  break ;
            case 2:  srt = 32000.0 ;  break ;
            case 3:     // reserved (should not happen)
            case 0:
            default:    // (one chance in 3 of being correct)
               srt = 44100.0 ;  break ;
         } ;
         this->frame_size = (UINT)(((spf / 8.0 * brt) / srt)
                             + (this->padding ? slot : ZERO)) ;
      }

      return this->valid ;
   }

   //** Data Members *
   UINT   frame_size ;        // number of byte contained in the frame
   USHORT synch ;             // synch word allows for synchronization of frame
   USHORT version ;           // MPEG version number
   USHORT layer ;             // layer index
   USHORT bitrate ;           // bit rate
   USHORT samprate ;          // sampling rate index
   USHORT channel ;           // channel mode
   USHORT jsmode ;            // joint-stereo mode extension
   USHORT emphasis ;          // emphasis code
   bool   crc ;               // CRC flag (reset == with CRC)
   bool   padding ;           // padding bit
   bool   infobit ;           // informational bit
   bool   copyright ;         // copyright bit
   bool   original ;          // original bit
   bool   valid ;             // 'true' if valid header
} ;   // id3v2_audioframe
