NO

Author Topic: Problem with C11 unicode support  (Read 14154 times)

CommonTater

  • Guest
Re: Problem with C11 unicode support
« Reply #15 on: July 08, 2012, 05:12:54 PM »
Hi Pelle.... Like you I use the API's unicode functions, not those in C and have had no problems at all with it. 

I think the big problem with Unicode support is that it's constantly changing and almost impossible to keep up with.  If they would settle on one standard (I'd recommend UTF-8) and develop it fully things would get a lot easier than having this huge proliferation of standards that are obviously implemented differently from one platform to the next.  This was supposed to enable better interchange... I think it's made it worse.

Ansi, OEM, utf-8, utf-16le utf-16be, utf-32le, utf-32be... how many code pages?  Ridiculous.

Of course the problem is that (as we all know) if you develop one way of doing things then make a leaps and bounds improvement, you can never change entirely over to the new way of doing it... you end up supporting both methods whether you want to or not. 

I wish they could standardize a universal character set (64bits if need be) and implement it into utf-8's extensible character architecture.  Build all future compilers and OSs to be compatible with this one standard... life gets a lot easer! 




Offline Stefan Pendl

  • Global Moderator
  • Member
  • *****
  • Posts: 582
    • Homepage
Re: Problem with C11 unicode support
« Reply #16 on: July 08, 2012, 10:01:29 PM »
I wish they could standardize a universal character set (64bits if need be) and implement it into utf-8's extensible character architecture.  Build all future compilers and OSs to be compatible with this one standard... life gets a lot easer!
I second that, since all the hassle around multi-byte character sets is driving me nuts.
---
Stefan

Proud member of the UltraDefrag Development Team

CommonTater

  • Guest
Re: Problem with C11 unicode support
« Reply #17 on: July 08, 2012, 10:13:09 PM »
I second that, since all the hassle around multi-byte character sets is driving me nuts.

These days I just define UNICODE and _UNICODE at the top of every file.  I use WCHAR from windows and wchar_t from Pelles. I don't do anything in ANSI anymore. Things that get written to disk are either written as wide characters or converted to UTF8 when saving. 

Opening an unknown file is a massive pain... here's what I use to open playlists in one of my programs...
Code: [Select]
// parse input file to strings
void CopyWChar(PWCHAR Buf)
  { PWCHAR tok;           // token string
    PWCHAR nt;            // next token
    WCHAR  fp[MAX_PATH];  // line buffer
    nt = wcstok(Buf,L"\r\n",&tok);
    while(nt)
      { // ignore comments and urls
        if ((nt[0] != '#') && (!PathIsURL(nt)))
          { // test for relative paths
            if (PathIsRelative( nt ))
              { wcscpy(fp,FilePath);
                wcscat(fp,nt); }
            else
              wcscpy(fp,nt);
            // test for folders
           if (PathFileExists( fp ))
              { if (PathIsDirectory( fp ))
                  ExpandFolder( fp );
                else
                  AddLine( fp ); } }
        nt = wcstok(tok,L"\r\n",&tok); } 
    // randomize here
    ShuffleList();
    SavePlayerFile(); }

// convert mbyte to utf16le for parser
void CopyMByte(PBYTE Buf, DWORD Bytes)
  { PWCHAR ut = calloc(Bytes + 1,sizeof(WCHAR));     // unicode buffer
    try
      { if (MultiByteToWideChar(CP_UTF8,0,(PCHAR)Buf,Bytes,ut,Bytes * sizeof(WCHAR)) < 1)
          Exception(0xE0640006);
        CopyWChar( ut ); }   
    finally
      { free (ut); } }
 
// convert UTF-16 byte order
void FlipEndian(PBYTE Buf, DWORD Bytes)
  { BYTE t; // temp for swaps
    for (INT i = 0; i < Bytes; i += 2)
      { t = Buf[i];
        Buf[i] = Buf[i + 1];
        Buf[i + 1] = t; } }
 
// open and translate file
BOOL M3ULaunch(PWCHAR FileName)
  { PBYTE  rf;      // raw file data
    DWORD  br;      // bytes read
    // load the raw file
    { HANDLE pl;    // playlist file handle
      DWORD  fs;    // file size
      // get path to file
      wcsncpy(FilePath,FileName,MAX_PATH);
      PathRemoveFileSpec(FilePath);
      wcscat(FilePath,L"\\");
      // open the file
      pl = CreateFile(FileName,GENERIC_READ,0,NULL,OPEN_EXISTING,FILE_ATTRIBUTE_NORMAL,NULL);
      if (pl == INVALID_HANDLE_VALUE)
        Exception(GetLastError());
      fs = GetFileSize(pl,NULL);       
      rf = calloc(fs + 2, sizeof(BYTE));
      if (! ReadFile(pl, rf, fs, &br, NULL))
        Exception(GetLastError());
      CloseHandle(pl); 
      if (br != fs)
        Exception(0xE00640007); }
    try                                   
     { DWORD bom = *(DWORD*)rf;
       if ((bom == 0x0000FEFF) || (bom == 0xFFFE0000))  // utf32le bom 
         Exception(0xE0640002);                         // utf32be bom 
       else if ((bom & 0xFFFF) == 0xFFFE)               // utf16be bom
         { FlipEndian(rf,br);
           CopyWChar((PWCHAR) rf + 1); }
       else if ((bom & 0xFFFF) == 0xFEFF)               // utf16le bom
         CopyWChar((PWCHAR) rf + 1); 
       else if ((bom & 0xFFFFFF) == 0xBFBBEF)           // utf8 bom
         CopyMByte(rf + 3, br - 3);
       else                                             // no known bom, probe the file
         { if (! memchr(rf, 0x00, br))                  // 8 bit text has no nulls
             CopyMByte(rf,br);                          // ansi / utf8 no bom
           else
            { PBYTE lf = memchr(rf,0x0A,br);            // lf is always present as 1 byte.
              if (!lf)
                Exception(0xE0640003);
              if ((!(*(DWORD*)(lf - 3) & 0x00FFFFFF)) ||    //utf32be no bom
                   (!(*(DWORD*)lf & 0xFFFFFF00)))           //utf32le no bom
                 Exception(0xE0640002);   
              if ((lf - rf) & 1)                        // big endian? (lf at odd offset)
                FlipEndian(rf,br);                      // utf16be no bom 
              CopyWChar((PWCHAR) rf);  } } }            // utf16le no bom
     finally 
      { free(rf); }
    return 1; }

You either have to have the patience of Job or really like pain to think that's OK.