ソフトウェア開発 Win32プログラミング

シフトJISをJISに変換する

戻る


文字コードのシフトJIS(Shift_JIS)をJISに変換する方法を以下に示す。

#include <windows.h>
#include <mbstring.h>

#define ESC_JIS     "\x1b$B"
#define ESC_ASCII   "\x1b(B" /*)*/
#define ESC_8BIT    "\x1b(I" /*)*/

#define CHAR_ASCII    0
#define CHAR_8BITCODE 1
#define CHAR_ZENKAKU  2
#define CHAR_NULL     3

#define _IS_SJIS_1(c) ((UINT)((BYTE)(c) ^ 0x20) - 0xa1 < 0x3c)
#define _IS_SJIS_2(c) (0x40 <= (BYTE)(c) && (BYTE)(c) <= 0xfc && (BYTE)(c) != 0x7f)

static WORD _mbcjmstojisex(LPCSTR pszSrc)
{
    WORD w;
    if (_IS_SJIS_1(*pszSrc) && _IS_SJIS_2(pszSrc[1]))
    {
        w = MAKEWORD(pszSrc[1], *pszSrc);
        if ((BYTE)*pszSrc == 0xFA || (BYTE)*pszSrc == 0xFB || 
            ((BYTE)*pszSrc == 0xFC && (BYTE)pszSrc[1] <= 0x4B))
        {
            if      (w <= 0xFA49) w -= 0x0B51;
            else if (w <= 0xFA53) w -= 0x72F6;
            else if (w <= 0xFA57) w -= 0x0B5B;
            else if (w == 0xFA58) w  = 0x878A;
            else if (w == 0xFA59) w  = 0x8782;
            else if (w == 0xFA5A) w  = 0x8784;
            else if (w == 0xFA5B) w  = 0x879A;
            else if (w <= 0xFA7E) w -= 0x0D1C;
            else if (w <= 0xFA9B) w -= 0x0D1D;
            else if (w <= 0xFAFC) w -= 0x0D1C;
            else if (w <= 0xFB5B) w -= 0x0D5F;
            else if (w <= 0xFB7E) w -= 0x0D1C;
            else if (w <= 0xFB9B) w -= 0x0D1D;
            else if (w <= 0xFBFC) w -= 0x0D1C;
            else                  w -= 0x0D5F;
        }
        return (WORD)_mbcjmstojis(w);
    }
    return 0;
}

static INT _SJ2J(LPSTR psz, INT cchSrc)
{
    INT i, j;
    LPSTR pszDest;
    WORD sCode;

    pszDest = new CHAR[cchSrc + 1];
    ZeroMemory(pszDest, cchSrc + 1);
    j = 0;
    for(i = 0; i < cchSrc - 1; i++)
    {
        sCode = _mbcjmstojisex(psz + i);
        if (sCode != 0)
        {
            pszDest[j] = (CHAR)(sCode >> 8);
            pszDest[j + 1] = (CHAR)sCode;
            ++i;
            j += 2;
        }
        else
        {
            pszDest[j] = psz[i];
            j++;
        }
    }
    pszDest[j] = 0;
    CopyMemory(psz, pszDest, j);
    delete[] pszDest;
    return j;
}

INT SJIS2JISSize(LPCSTR pszSrc, INT cchSrc)
{
    INT cchDest;
    INT i;
    LPSTR pszWork;
    INT iWork, cchWork;
    INT nCharKind, nCharKindOld;
    BOOL bChange;

    nCharKindOld = CHAR_ASCII;
    bChange = FALSE;
    cchDest = 0;
    iWork = 0;
    for(i = 0; ; i++)
    {
        if (i >= cchSrc)
            nCharKind = CHAR_NULL;
        else if (i < cchSrc - 1 && _IS_SJIS_1(pszSrc[i]) && _IS_SJIS_2(pszSrc[i + 1]))
            nCharKind = CHAR_ZENKAKU;
        else if (pszSrc[i] & 0x80)
            nCharKind = CHAR_8BITCODE;
        else
            nCharKind = CHAR_ASCII;

        if (nCharKindOld != nCharKind)
        {
            if(CHAR_NULL != nCharKind)
                bChange = TRUE;

            cchWork = i - iWork;
            switch(nCharKindOld)
            {
            case CHAR_ASCII:
                if (0 < cchWork)
                    cchDest += cchWork;
                break;

            case CHAR_8BITCODE:
                if (0 < cchWork)
                    cchDest += cchWork;
                break;

            case CHAR_ZENKAKU:
                if (0 < cchWork)
                {
                    pszWork = new CHAR[cchWork + 1];
                    CopyMemory(pszWork, pszSrc + iWork, cchWork);
                    pszWork[cchWork] = '\0';
                    cchWork = _SJ2J(pszWork, cchWork);
                    cchDest += cchWork;
                    delete[] pszWork;
                }
                break;
            }

            switch(nCharKind)
            {
            case CHAR_ASCII:
                cchDest += 3;
                break;

            case CHAR_NULL:
                if (bChange && nCharKindOld != CHAR_ASCII)
                    cchDest += 3;
                break;

            case CHAR_8BITCODE:
                cchDest += 3;
                break;

            case CHAR_ZENKAKU:
                cchDest += 3;
                break;
            }
            nCharKindOld = nCharKind;
            iWork = i;
            if (nCharKind == CHAR_NULL) break;
        }
        if (nCharKind == CHAR_ZENKAKU)
            ++i;
    }
    return cchDest + 1;
}

INT SJIS2JIS(LPSTR pszDest, LPCSTR pszSrc, INT cchSrc)
{
    INT cchDest;
    INT i, j;
    LPSTR pszWork;
    INT iWork, cchWork;
    INT nCharKind, nCharKindOld;
    BOOL bChange;

    nCharKindOld = CHAR_ASCII;
    bChange = FALSE;
    cchDest = 0;
    pszDest[0] = 0;
    iWork = 0;
    for(i = 0; ; i++)
    {
        if (i >= cchSrc)
            nCharKind = CHAR_NULL;
        else if (i < cchSrc - 1 && _IS_SJIS_1(pszSrc[i]) && 
                 _IS_SJIS_2(pszSrc[i + 1]))
        {
            nCharKind = CHAR_ZENKAKU;
        }
        else if (pszSrc[i] & 0x80)
            nCharKind = CHAR_8BITCODE;
        else
            nCharKind = CHAR_ASCII;

        if (nCharKindOld != nCharKind)
        {
            if(CHAR_NULL != nCharKind)
                bChange = TRUE;

            cchWork = i - iWork;
            switch(nCharKindOld)
            {
            case CHAR_ASCII:
                if (0 < cchWork)
                {
                    CopyMemory(pszDest + cchDest, pszSrc + iWork, cchWork);
                    cchDest += cchWork;
                }
                break;

            case CHAR_8BITCODE:
                if (0 < cchWork)
                {
                    pszWork = new CHAR[cchWork + 1];
                    CopyMemory(pszWork, pszSrc + iWork, cchWork);
                    pszWork[cchWork] = '\0';
                    for(j = 0; j < cchWork; ++j)
                        pszWork[j] &= 0x7F;
                    CopyMemory(pszDest + cchDest, pszWork, cchWork);
                    cchDest += cchWork;
                    delete[] pszWork;
                }
                break;

            case CHAR_ZENKAKU:
                if (0 < cchWork)
                {
                    pszWork = new CHAR[cchWork + 1];
                    CopyMemory(pszWork, pszSrc + iWork, cchWork);
                    pszWork[cchWork] = '\0';
                    cchWork = _SJ2J(pszWork, cchWork);
                    CopyMemory(pszDest + cchDest, pszWork, cchWork);
                    cchDest += cchWork;
                    delete[] pszWork;
                }
                break;
            }

            switch(nCharKind)
            {
            case CHAR_ASCII:
                CopyMemory(pszDest + cchDest, ESC_ASCII, 3);
                cchDest += 3;
                break;

            case CHAR_NULL:
                if (bChange && nCharKindOld != CHAR_ASCII)
                {
                    CopyMemory(pszDest + cchDest, ESC_ASCII, 3);
                    cchDest += 3;
                }
                break;

            case CHAR_8BITCODE:
                CopyMemory(pszDest + cchDest, ESC_8BIT, 3);
                cchDest += 3;
                break;

            case CHAR_ZENKAKU:
                CopyMemory(pszDest + cchDest, ESC_JIS, 3);
                cchDest += 3;
                break;
            }
            nCharKindOld = nCharKind;
            iWork = i;
            if (nCharKind == CHAR_NULL) break;
        }
        if (nCharKind == CHAR_ZENKAKU)
            ++i;
    }
    pszDest[cchDest] = 0;
    return cchDest;
}

#ifdef UNITTEST
#include <cstdio>
using namespace std;

int main(void)
{
    LPSTR pszBuf1, pszBuf2, pszBuf3;
    CHAR sz1[] = "Shift_JISをJISにします。";
    CHAR sz2[] = "Convert to JIS";
    CHAR sz3[] = "半角カナのテスト";
    INT cch1, cch2, cch3;
    
    cch1 = SJIS2JISSize(sz1, lstrlen(sz1));
    cch2 = SJIS2JISSize(sz2, lstrlen(sz2));
    cch3 = SJIS2JISSize(sz3, lstrlen(sz3));
    printf("%d\n", cch1);
    printf("%d\n", cch2);
    printf("%d\n", cch3);
    pszBuf1 = new CHAR[cch1];
    pszBuf2 = new CHAR[cch2];
    pszBuf3 = new CHAR[cch3];
    cch1 = SJIS2JIS(pszBuf1, sz1, lstrlen(sz1));
    cch2 = SJIS2JIS(pszBuf2, sz2, lstrlen(sz2));
    cch3 = SJIS2JIS(pszBuf3, sz3, lstrlen(sz3));
    printf("%d: %s\n", cch1, pszBuf1);
    printf("%d: %s\n", cch2, pszBuf2);
    printf("%d: %s\n", cch3, pszBuf3);
    delete[] pszBuf1;
    delete[] pszBuf2;
    delete[] pszBuf3;
    return 0;
}
#endif

ソース: sjis2jis.zip


戻る

©片山博文MZ
katayama.hirofumi.mz@gmail.com

inserted by FC2 system