сенкс, помог
C++ (Qt)
QFile file("test.html");
file.open(QIODevice::ReadOnly);
QByteArray ba = file.readAll();
file.close();
QTextCodec *codec;
codec = QTextCodec::codecForHtml(ba, QTextCodec::codecForName("windows-1251"));
qDebug() << codec->name();
QString struni = codec->toUnicode(ba);
qDebug() << struni;
struni.replace("\\", "\\\\");
struni.replace("\"", "\\\"");
struni.replace("\b", "\\b");
struni.replace("\f", "\\f");
struni.replace("\n", "\\n");
struni.replace("\r", "");
struni.replace("\t", "\\t");
QString result;
for (int i = 0; i < struni.count(); i++) {
if (QString(struni.at(i)).contains(QRegExp("[А-я]"))) // [A-z0-9/<>\"\\s:\\.,]
result.append("\\u0" + QString::number(struni.at(i).unicode(), 16));
else
result.append(struni.at(i));
}
qDebug() << endl << endl << endl << result;