diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/string.cpp | 18 |
1 files changed, 16 insertions, 2 deletions
diff --git a/src/string.cpp b/src/string.cpp index d33356e6e..2a96921e4 100644 --- a/src/string.cpp +++ b/src/string.cpp @@ -752,7 +752,14 @@ public: /* The ICU word iterator considers both the start and the end of a word a valid * break point, but we only want word starts. Move to the next location in * case the new position points to whitespace. */ - while (pos != icu::BreakIterator::DONE && IsWhitespace(Utf16DecodeChar((const uint16 *)&this->utf16_str[pos]))) pos = this->word_itr->next(); + while (pos != icu::BreakIterator::DONE && + IsWhitespace(Utf16DecodeChar((const uint16 *)&this->utf16_str[pos]))) { + int32_t new_pos = this->word_itr->next(); + /* Don't set it to DONE if it was valid before. Otherwise we'll return END + * even though the iterator wasn't at the end of the string before. */ + if (new_pos == icu::BreakIterator::DONE) break; + pos = new_pos; + } this->char_itr->isBoundary(pos); break; @@ -777,7 +784,14 @@ public: /* The ICU word iterator considers both the start and the end of a word a valid * break point, but we only want word starts. Move to the previous location in * case the new position points to whitespace. */ - while (pos != icu::BreakIterator::DONE && IsWhitespace(Utf16DecodeChar((const uint16 *)&this->utf16_str[pos]))) pos = this->word_itr->previous(); + while (pos != icu::BreakIterator::DONE && + IsWhitespace(Utf16DecodeChar((const uint16 *)&this->utf16_str[pos]))) { + int32_t new_pos = this->word_itr->previous(); + /* Don't set it to DONE if it was valid before. Otherwise we'll return END + * even though the iterator wasn't at the start of the string before. */ + if (new_pos == icu::BreakIterator::DONE) break; + pos = new_pos; + } this->char_itr->isBoundary(pos); break; |