wconv.hpp
#ifndef WCONV_HPP #define WCONV_HPP #include <string> std::wstring string_to_unicode( const std::string& src, unsigned code_page ); std::string unicode_to_string( const std::wstring& src, unsigned code_page ); template <int CODEPAGE> std::wstring string_to_unicode( const std::string& src ) { return string_to_unicode( src, CODEPAGE ); } template <int CODEPAGE> std::string unicode_to_string( const std::wstring& src ) { return unicode_to_string( src, CODEPAGE ); } template <int DST_CODE_PAGE, int SRC_CODE_PAGE> std::string code_to_code( const std::string& src ) { return unicode_to_string<DST_CODE_PAGE>( string_to_unicode<SRC_CODE_PAGE>( src ) ); } #endif // WCONV_HPP
codepage.hpp
#ifndef CODE_PAGE_HPP #define CODE_PAGE_HPP /// form winnls.h //@{ #define CP_ACP 0 // Current Code Page #define CP_UTF7 65000 // UTF7 #define CP_UTF8 65001 // UTF8 //@} #define CP_OEM_US 437 #define CP_OEM_ALABIC 720 #define CP_OEM_GREEK 737 #define CP_OEM_BALTIC 775 #define CP_OEM_MLATIN1 850 #define CP_OEM_LATIN2 852 #define CP_OEM_CYRILLIC 855 #define CP_OEM_TURKISH 857 #define CP_OEM_MLATIN1P 858 #define CP_OEM_HEBREW 862 #define CP_OEM_RUSSIAN 866 #define CP_THAI 874 #define CP_SJIS 932 #define CP_GBK 936 #define CP_KOREA 949 #define CP_BIG5 950 #define CP_EUROPE 1250 #define CP_CYRILLIC 1251 #define CP_LATIN1 1252 #define CP_GREEK 1253 #define CP_TURKISH 1254 #define CP_HEBREW 1255 #define CP_ARABIC 1256 #define CP_BALTIC 1257 #define CP_VIETNAM 1258 #define CP_ISO_LATIN1 28591 #define CP_ISO_LATIN2 28592 #define CP_ISO_LATIN3 28593 #define CP_ISO_BALTIC 28594 #define CP_ISO_CYRILLIC 28595 #define CP_ISO_ARABIC 28596 #define CP_ISO_HEBREW 28598 #define CP_ISO_TURKISH 28599 #define CP_ISO_LATIN9 28605 #endif // CODE_PAGE_HPP
wconv.cpp
#define WIN32_LEAN_AND_MEAN #include <windows.h> #include <winnls.h> #include <string> #include <boost/scoped_array.hpp> #include "wconv.hpp" std::wstring string_to_unicode( const std::string& src, unsigned code_page ) { int result_length = MultiByteToWideChar( code_page, 0, src.c_str(), -1, 0, 0 ); if( result_length > 255 ) { boost::scoped_array<wchar_t> tbuff( new wchar_t[ result_length + 2 ] ); MultiByteToWideChar( code_page, 0, src.c_str(), -1, tbuff.get(), result_length ); std::wstring result = tbuff.get(); return result; } else { wchar_t tbuff[ 256 ]; MultiByteToWideChar( code_page, 0, src.c_str(), -1, tbuff, result_length ); std::wstring result = tbuff; return result; } } std::string unicode_to_string( const std::wstring& src, unsigned code_page ) { int result_length = WideCharToMultiByte( code_page, 0, src.c_str(), -1, 0, 0, 0, 0 ); if( result_length > 511 ) { boost::scoped_array<char> tbuff( new char[ result_length + 2 ] ); WideCharToMultiByte( code_page, 0, src.c_str(), -1, tbuff.get(), result_length, 0, 0 ); std::string result = tbuff.get(); return result; } else { char tbuff[ 512 ]; WideCharToMultiByte( code_page, 0, src.c_str(), -1, tbuff, result_length, 0, 0 ); std::string result = tbuff; return result; } } //#define WCONV_TEST_APP #ifdef WCONV_TEST_APP #include "codepage.hpp" #include <fstream> #include <iostream> //#define YOURPAGE CP_SJIS #define YOURPAGE CP_ACP int main(int argc, char* argv[]) { if( argc < 3 ) return 1; std::ifstream ifs( argv[1] ); std::ofstream ofs( argv[2] ); //std::wcout.imbue( std::locale("") ); std::string str; while( std::getline( ifs, str ) ) { std::string dst = code_to_code<CP_UTF8,YOURPAGE>( str ); ofs << dst << std::endl; //ofs << code_to_code<YOURPAGE,CP_UTF8>( str ) << std::endl; } return 0; } #endif2018/08/29 追記 std::basic_string のバッファを直接使っていたが、それだと null terminated まで組み込まれてしまうため、修正を行った。
0 件のコメント:
コメントを投稿