wconv.hpp
#ifndef WCONV_HPP
#define WCONV_HPP
#include <string>
std::wstring string_to_unicode( const std::string& src, unsigned code_page );
std::string unicode_to_string( const std::wstring& src, unsigned code_page );
template <int CODEPAGE>
std::wstring string_to_unicode( const std::string& src ) {
return string_to_unicode( src, CODEPAGE );
}
template <int CODEPAGE>
std::string unicode_to_string( const std::wstring& src ) {
return unicode_to_string( src, CODEPAGE );
}
template <int DST_CODE_PAGE, int SRC_CODE_PAGE>
std::string code_to_code( const std::string& src ) {
return unicode_to_string<DST_CODE_PAGE>( string_to_unicode<SRC_CODE_PAGE>( src ) );
}
#endif // WCONV_HPP
codepage.hpp
#ifndef CODE_PAGE_HPP
#define CODE_PAGE_HPP
/// form winnls.h
//@{
#define CP_ACP 0 // Current Code Page
#define CP_UTF7 65000 // UTF7
#define CP_UTF8 65001 // UTF8
//@}
#define CP_OEM_US 437
#define CP_OEM_ALABIC 720
#define CP_OEM_GREEK 737
#define CP_OEM_BALTIC 775
#define CP_OEM_MLATIN1 850
#define CP_OEM_LATIN2 852
#define CP_OEM_CYRILLIC 855
#define CP_OEM_TURKISH 857
#define CP_OEM_MLATIN1P 858
#define CP_OEM_HEBREW 862
#define CP_OEM_RUSSIAN 866
#define CP_THAI 874
#define CP_SJIS 932
#define CP_GBK 936
#define CP_KOREA 949
#define CP_BIG5 950
#define CP_EUROPE 1250
#define CP_CYRILLIC 1251
#define CP_LATIN1 1252
#define CP_GREEK 1253
#define CP_TURKISH 1254
#define CP_HEBREW 1255
#define CP_ARABIC 1256
#define CP_BALTIC 1257
#define CP_VIETNAM 1258
#define CP_ISO_LATIN1 28591
#define CP_ISO_LATIN2 28592
#define CP_ISO_LATIN3 28593
#define CP_ISO_BALTIC 28594
#define CP_ISO_CYRILLIC 28595
#define CP_ISO_ARABIC 28596
#define CP_ISO_HEBREW 28598
#define CP_ISO_TURKISH 28599
#define CP_ISO_LATIN9 28605
#endif // CODE_PAGE_HPP
wconv.cpp
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#include <winnls.h>
#include <string>
#include <boost/scoped_array.hpp>
#include "wconv.hpp"
std::wstring string_to_unicode( const std::string& src, unsigned code_page ) {
int result_length = MultiByteToWideChar( code_page, 0, src.c_str(), -1, 0, 0 );
if( result_length > 255 ) {
boost::scoped_array<wchar_t> tbuff( new wchar_t[ result_length + 2 ] );
MultiByteToWideChar( code_page, 0, src.c_str(), -1, tbuff.get(), result_length );
std::wstring result = tbuff.get();
return result;
} else {
wchar_t tbuff[ 256 ];
MultiByteToWideChar( code_page, 0, src.c_str(), -1, tbuff, result_length );
std::wstring result = tbuff;
return result;
}
}
std::string unicode_to_string( const std::wstring& src, unsigned code_page ) {
int result_length = WideCharToMultiByte( code_page, 0, src.c_str(), -1, 0, 0, 0, 0 );
if( result_length > 511 ) {
boost::scoped_array<char> tbuff( new char[ result_length + 2 ] );
WideCharToMultiByte( code_page, 0, src.c_str(), -1, tbuff.get(), result_length, 0, 0 );
std::string result = tbuff.get();
return result;
} else {
char tbuff[ 512 ];
WideCharToMultiByte( code_page, 0, src.c_str(), -1, tbuff, result_length, 0, 0 );
std::string result = tbuff;
return result;
}
}
//#define WCONV_TEST_APP
#ifdef WCONV_TEST_APP
#include "codepage.hpp"
#include <fstream>
#include <iostream>
//#define YOURPAGE CP_SJIS
#define YOURPAGE CP_ACP
int main(int argc, char* argv[]) {
if( argc < 3 ) return 1;
std::ifstream ifs( argv[1] );
std::ofstream ofs( argv[2] );
//std::wcout.imbue( std::locale("") );
std::string str;
while( std::getline( ifs, str ) ) {
std::string dst = code_to_code<CP_UTF8,YOURPAGE>( str );
ofs << dst << std::endl;
//ofs << code_to_code<YOURPAGE,CP_UTF8>( str ) << std::endl;
}
return 0;
}
#endif
2018/08/29 追記 std::basic_string のバッファを直接使っていたが、それだと null terminated まで組み込まれてしまうため、修正を行った。
0 件のコメント:
コメントを投稿