QString scannerResult(const wchar_t *str){ QTextCodec *codec = QTextCodec::codecForName("Windows-1251"); size_t size = wcslen(str); return codec->toUnicode((const char*)str, size*2);}
C++ (Qt)...typedef wchar_t wchar;... const wchar* get_value(); // get attribute name const char* get_attr_name(); // get tag name const char* get_tag_name();
C++ (Qt)struct str_istream: public markup::instream{ const char* p; const char* end; str_istream(const char* src): p(src), end(src + strlen(src)) {} virtual wchar_t get_char() { return p < end? *p++: 0; }}; .... QByteArray htmlData = htmlFile.readAll(); str_istream si(htmlData); markup::scanner sc(si); bool useEOF = false; while(true) { int t = sc.get_token(); switch(t) { case markup::scanner::TT_ERROR: qDebug() << "ERROR"; break; case markup::scanner::TT_EOF: useEOF = true; qDebug() << "EOF"; break; case markup::scanner::TT_TAG_START: qDebug() << "TAG START:" << sc.get_tag_name(); break; case markup::scanner::TT_TAG_END: qDebug() << "TAG END:" << sc.get_tag_name(); break; case markup::scanner::TT_ATTR: qDebug() << QString("\tATTR:%1 = %2").arg(sc.get_attr_name()).arg(QString::fromWCharArray(sc.get_value())); break; case markup::scanner::TT_WORD: case markup::scanner::TT_SPACE: QString str1 = QString::fromWCharArray(sc.get_value()); qDebug() << "value:" << str1; break; } if (useEOF) break; }
value0x003bc060 "ᆱ+ᄏᅠラ" [0]: 65451 L'ᆱ' [1]: 65472 L'' [2]: 43 L'+' [3]: 65467 L'ᄏ' [4]: 65440 L'ᅠ' [5]: 65431 L'ラ' [6]: 0