EHS Embedded HTTP Server  1.5.0.132
samples/wsutf8.h
00001  /*
00002   * This file has been derived from the WebSockets++ project at
00003   * https://github.com/zaphoyd/websocketpp which is licensed under a BSD-license.
00004   */
00005 
00006 // Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
00007 // See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
00008 
00009 #ifndef UTF8_VALIDATOR_HPP
00010 #define UTF8_VALIDATOR_HPP
00011 
00012 #include <stdint.h>
00013 
00014 namespace utf8_validator {
00015 
00016 static const unsigned int UTF8_ACCEPT = 0;
00017 static const unsigned int UTF8_REJECT = 1;
00018 
00019 static const uint8_t utf8d[] = {
00020   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f
00021   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f
00022   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f
00023   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f
00024   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f
00025   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf
00026   8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df
00027   0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef
00028   0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff
00029   0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, // s0..s0
00030   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, // s1..s2
00031   1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, // s3..s4
00032   1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, // s5..s6
00033   1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8
00034 };
00035 
00036 uint32_t inline
00037 decode(uint32_t* state, uint32_t* codep, uint8_t byte) {
00038     uint32_t type = utf8d[byte];
00039 
00040     *codep = (*state != UTF8_ACCEPT) ?
00041         (byte & 0x3fu) | (*codep << 6) :
00042         (0xff >> type) & (byte);
00043 
00044     *state = utf8d[256 + *state*16 + type];
00045     return *state;
00046 }
00047 
00051 class validator {
00052     public:
00054         validator() : m_state(UTF8_ACCEPT),m_codepoint(0) {}
00055 
00061         bool consume (uint32_t byte) {
00062             if (utf8_validator::decode(&m_state,&m_codepoint,byte) == UTF8_REJECT) {
00063                 return false;
00064             }
00065             return true;
00066         }
00067 
00068         template <typename iterator_type>
00075             bool decode (iterator_type b, iterator_type e) {
00076                 for (iterator_type i = b; i != e; i++) {
00077                     if (utf8_validator::decode(&m_state,&m_codepoint,*i) == UTF8_REJECT) {
00078                         return false;
00079                     }
00080                 }
00081                 return true;
00082             }
00083 
00088         bool complete() {
00089             return m_state == UTF8_ACCEPT;
00090         }
00091 
00093         void reset() {
00094             m_state = UTF8_ACCEPT;
00095             m_codepoint = 0;
00096         }
00097     private:
00098         uint32_t    m_state;
00099         uint32_t    m_codepoint;
00100 };
00101 
00102 // convenience function that creates a validator, validates a complete string 
00103 // and returns the result.
00104 // TODO: should this be inline?
00105 inline bool validate(const std::string& s) {
00106     validator v;
00107     if (!v.decode(s.begin(),s.end())) {
00108         return false;
00109     }
00110     return v.complete();
00111 }
00112 
00113 } // namespace utf8_validator
00114 
00115 #endif // UTF8_VALIDATOR_HPP