ソフトウェア開発 Win32プログラミング

UTF-7をUnicodeに変換する

戻る


UTF-7をUnicodeに変換するコードは以下の通り。

#include <windows.h>

static const signed char 
base64inv[] = 
{
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, 
    52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, 
    -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
    15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
    -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 
    41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1
};

VOID Utf7Base64Decode(BYTE *pbDest, LPCSTR pszSrc, INT cchSrc)
{
    INT i, j, n;
    BYTE b;
    for(i = 0; i < cchSrc / 4 * 4; i += 4)
    {
        for(j = n = 0; j < 4; )
        {
            b = (BYTE) base64inv[(BYTE) *pszSrc++];
            n |= (((INT) b) << ((3 - j) * 6));
            j++;
        }
        for(j = 0; j < 3; j++)
            *pbDest++ = (BYTE) ((n >> (8 * (2 - j))) & 0xFF);
    }
    for(j = n = 0; j < cchSrc % 4; )
    {
        b = (BYTE) base64inv[(BYTE) *pszSrc++];
        n |= (((INT) b) << ((3 - j) * 6));
        j++;
    }
    for(j = 0; j < ((cchSrc % 4) * 6 / 8); j++)
        *pbDest++ = (BYTE) ((n >> (8 * (2 - j))) & 0xFF);
}

VOID myswab(LPVOID pv, INT cw)
{
    LPBYTE pb = (LPBYTE) pv;
    BYTE b;
    while(cw > 0)
    {
        b = *pb;
        *pb = pb[1];
        pb[1] = b;
        pb += 2;
        cw--;
    }
}

INT Utf7ToWideCharSize(LPCSTR pszUtf7, INT cchUtf7)
{
    INT n, c, cch;
    CHAR ch;
    LPCSTR pch;

    if (cchUtf7 == -1)
        cchUtf7 = lstrlenA(pszUtf7) + 1;

    c = 0;
    while(cchUtf7 > 0)
    {
        ch = *pszUtf7++;
        if (ch == '+')
        {
            ch = *pszUtf7;
            if (ch == '-')
            {
                c++;
                pszUtf7++;
                cchUtf7 -= 2;
                continue;
            }
            cchUtf7--;
            pch = pszUtf7;
            while(cchUtf7 > 0 && (BYTE) *pszUtf7 < 0x80 && 
                  base64inv[*pszUtf7] != -1)
            {
                cchUtf7--;
                pszUtf7++;
            }
            cch = pszUtf7 - pch;
            n = (cch * 3) / 8;
            c += n;
            if (cchUtf7 > 0 && *pszUtf7 == '-')
            {
                pszUtf7++;
                cchUtf7--;
            }
        }
        else
        {
            c++;
            cchUtf7--;
        }
    }

    return c;
}

INT Utf7ToWideChar(LPCSTR pszUtf7, INT cchUtf7, LPWSTR pszWide, INT cchWide)
{
    INT n, c, cch;
    CHAR ch;
    LPCSTR pch;
    WORD *pwsz;

    if (cchUtf7 == -1)
        cchUtf7 = lstrlenA(pszUtf7) + 1;

    c = Utf7ToWideCharSize(pszUtf7, cchUtf7);
    if (cchWide == 0)
        return c;
    if (cchWide < c)
    {
        SetLastError(ERROR_INSUFFICIENT_BUFFER);
        return 0;
    }

    while(cchUtf7 > 0)
    {
        ch = *pszUtf7++;
        if (ch == '+')
        {
            if (*pszUtf7 == '-')
            {
                *pszWide++ = L'+';
                pszUtf7++;
                cchUtf7 -= 2;
                continue;
            }
            cchUtf7--;
            pch = pszUtf7;
            while(cchUtf7 > 0 && (BYTE) *pszUtf7 < 0x80 && 
                  base64inv[*pszUtf7] != -1)
            {
                cchUtf7--;
                pszUtf7++;
            }
            cch = pszUtf7 - pch;
            n = (cch * 3) / 8;
            pwsz = new WORD[n + 1];
            ZeroMemory(pwsz, n * sizeof(WORD));
            Utf7Base64Decode((BYTE *) pwsz, pch, cch);
            myswab(pwsz, n);
            CopyMemory(pszWide, pwsz, n * sizeof(WORD));
            delete[] pwsz;
            pszWide += n;
            if (cchUtf7 > 0 && *pszUtf7 == '-')
            {
                pszUtf7++;
                cchUtf7--;
            }
        }
        else
        {
            *pszWide++ = (WCHAR) ch;
            cchUtf7--;
        }
    }

    return c;
}

#ifdef UNITTEST
#include <stdlib.h>
#include <stdio.h>
#include <new>
using namespace std;

#define BUFSIZE 1024

class DataStream
{
    LPSTR m_p;
    INT m_size;
public:
    DataStream()
    {
        m_p = (LPSTR)malloc(1);
        if (m_p == NULL) throw bad_alloc();
        m_size = 0;
        m_p[m_size] = '\0';
    }
    ~DataStream() { if (m_p != NULL) free(m_p); }
    LPSTR Ptr() { return m_p; }
    INT Size() const { return m_size; }
    VOID Append(LPCVOID p, INT size)
    {
        m_p = (LPSTR)realloc(m_p, m_size + size + 1);
        if (m_p == NULL) throw bad_alloc();
        CopyMemory(m_p + m_size, p, size);
        m_size += size;
        m_p[m_size] = '\0';
    }
};

int main(int argc, char **argv)
{
    FILE *fin, *fout;
    CHAR buf[BUFSIZE];
    LPWSTR psz;
    INT c, cchWide;
    DataStream ds;

    if (argc != 3)
    {
        printf("Usage: utf7de utf7_file unicode_file\n");
        return 0;
    }

    fin = fopen(argv[1], "rb");
    if (fin == NULL)
    {
        fprintf(stderr, "utf7de: cannot open file: %s\n", argv[1]);
        return 1;
    }
    while((c = fread(buf, 1, BUFSIZE, fin)) != 0)
    {
        ds.Append(buf, c);
    }
    fclose(fin);

    fout = fopen(argv[2], "wb");
    if (fout == NULL)
    {
        fprintf(stderr, "utf7de: cannot open file: %s\n", argv[2]);
        return 2;
    }

    cchWide = Utf7ToWideCharSize(ds.Ptr(), ds.Size());
    psz = new WCHAR[cchWide + 1];
    Utf7ToWideChar(ds.Ptr(), ds.Size(), psz, cchWide);
    psz[cchWide] = 0;
    fwrite(psz, cchWide * sizeof(WCHAR), 1, fout);
    delete[] psz;

    fclose(fout);
    return 0;
}
#endif  // def UNITTEST

ソース: utf7de.zip


戻る

©片山博文MZ
katayama.hirofumi.mz@gmail.com

inserted by FC2 system