OpenMW
components/to_utf8/to_utf8.hpp
Go to the documentation of this file.
00001 #ifndef COMPONENTS_TOUTF8_H
00002 #define COMPONENTS_TOUTF8_H
00003 
00004 #include <string>
00005 #include <cstring>
00006 #include <vector>
00007 
00008 namespace ToUTF8
00009 {
00010     // These are all the currently supported code pages
00011     enum FromType
00012     {
00013         WINDOWS_1250,      // Central ane Eastern European languages
00014         WINDOWS_1251,      // Cyrillic languages
00015         WINDOWS_1252,       // Used by English version of Morrowind (and
00016             // probably others)
00017         CP437           // Used for fonts (*.fnt) if data files encoding is 1252. Otherwise, uses the same encoding as the data files.
00018     };
00019 
00020     FromType calculateEncoding(const std::string& encodingName);
00021     std::string encodingUsingMessage(const std::string& encodingName);
00022 
00023     // class
00024 
00025     class Utf8Encoder
00026     {
00027         public:
00028             Utf8Encoder(FromType sourceEncoding);
00029 
00030             // Convert to UTF8 from the previously given code page.
00031             std::string getUtf8(const char *input, size_t size);
00032             inline std::string getUtf8(const std::string &str)
00033             {
00034                 return getUtf8(str.c_str(), str.size());
00035             }
00036 
00037             std::string getLegacyEnc(const char *input, size_t size);
00038             inline std::string getLegacyEnc(const std::string &str)
00039             {
00040                 return getLegacyEnc(str.c_str(), str.size());
00041             }
00042 
00043         private:
00044             void resize(size_t size);
00045             size_t getLength(const char* input, bool &ascii);
00046             void copyFromArray(unsigned char chp, char* &out);
00047             size_t getLength2(const char* input, bool &ascii);
00048             void copyFromArray2(const char*& chp, char* &out);
00049 
00050             std::vector<char> mOutput;
00051             signed char* translationArray;
00052     };
00053 }
00054 
00055 #endif