C++ (Qt)QRegExp reg("( |<a [^>]*>[^>]*</a>|\\n)");int index = 0;int oldIndex = 0;QString text = Utf("Однажды в <a href=\"qweqwe\">студеную зимнюю</a> пору\nСижу <a href=\"zc\">за решеткой в</a> темнице сырой"); QList <QStringList> resList;QStringList curList; while ((index = reg.indexIn(text, index)) >= 0){ if (text.at(index) == '<') { oldIndex = index; index += reg.matchedLength() + 1; curList.append(text.mid(oldIndex, index - oldIndex - 1).replace(QRegExp("<[^>]*>"), "")); } else if (text.at(index) == '\n') { curList.append(text.mid(oldIndex, index - oldIndex)); resList.append(curList); curList.clear(); index += reg.matchedLength(); } else { curList.append(text.mid(oldIndex, index - oldIndex)); index += reg.matchedLength(); } oldIndex = index;}curList.append(text.mid(oldIndex));resList.append(curList);
C++ (Qt)void splitHtml(QString text, QVector<QStringList> &result){ QString line; text.remove(QRegExp("<([^>]*)>")); foreach(line, text.split("\n")) { QStringList temp; QRegExp rx("(\\S+)"); int pos=0; while ((pos = rx.indexIn(line, pos)) != -1) { temp<<rx.cap(1); pos += rx.matchedLength(); } result<<temp; }}
C++ (Qt)QRegExp reg("( |<a[^>]*>[^>]*</a>|\\n)");int index = 0;int oldIndex = 0;QString text = Utf("Однажды в студеную зимнюю <a href=\"qweqwe\">пору</a>\nСижу за решеткой <a href=\"zc\">в темнице сырой</a>"); QList <QStringList> resList;QStringList curList; while ((index = reg.indexIn(text, index)) >= 0){ if (text.at(index) == '<') { oldIndex = index; index += reg.matchedLength(); curList.append(text.mid(oldIndex, index - oldIndex).replace(QRegExp("<[^>]*>"), "")); if (text.at(index) == ' ') index ++; } else if (text.at(index) == '\n') { if (index != oldIndex) curList.append(text.mid(oldIndex, index - oldIndex)); resList.append(curList); curList.clear(); index += reg.matchedLength(); } else { curList.append(text.mid(oldIndex, index - oldIndex)); index += reg.matchedLength(); } oldIndex = index;} if (oldIndex != text.length()) curList.append(text.mid(oldIndex));resList.append(curList);
C++ (Qt)void splitHtml(QString text, QList<QStringList> &result){ text = text.replace("<br/>", "\n"); text = text.remove("<nobr>").remove("</nobr>"); QRegExp reg("( |<a[^>]*>[^>]*</a>|\n)"); QRegExp rxHLink("<[^>]*>"); int left = 0, right = 0; QStringList lineList; while ( (right = reg.indexIn(text, left)) != -1 ) { if(text.at(right) == '<') { right += reg.matchedLength(); lineList << text.mid( left, right-left).replace(rxHLink, ""); right--; } else if( text.at(right) == '\n' ) { result << lineList; lineList.clear(); } else { lineList << text.mid(left, right-left); } left = right+1; } result.append(lineList);}