您当前的位置:首页 > IT编程 > C++
| C语言 | Java | VB | VC | python | Android | TensorFlow | C++ | oracle | 学术与代码 | cnn卷积神经网络 | gnn | 图像修复 | Keras | 数据集 | Neo4j | 自然语言处理 | 深度学习 | 医学CAD | 医学影像 | 超参数 | pointnet | pytorch | 异常检测 | Transformers | 情感分类 | 知识图谱 |

自学教程:C++ ubrk_next函数代码示例

51自学网 2021-06-03 09:05:01
  C++
这篇教程C++ ubrk_next函数代码示例写得很实用,希望能帮到您。

本文整理汇总了C++中ubrk_next函数的典型用法代码示例。如果您正苦于以下问题:C++ ubrk_next函数的具体用法?C++ ubrk_next怎么用?C++ ubrk_next使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了ubrk_next函数的30个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。

示例1: printEachForward

/* Print each element in order: */void printEachForward( UBreakIterator* boundary, UChar* str) {  int32_t end;  int32_t start = ubrk_first(boundary);  for (end = ubrk_next(boundary); end != UBRK_DONE; start = end, end =	 ubrk_next(boundary)) {    printTextRange(str, start, end );  }}
开发者ID:winlibs,项目名称:icu4c,代码行数:9,


示例2: TestBug11665

static void TestBug11665(void) {    // The problem was with the incorrect breaking of Japanese text beginning    // with Katakana characters when no prior Japanese or Chinese text had been    // encountered.    //    // Tested here in cintltst, rather than in intltest, because only cintltst    // tests have the ability to reset ICU, which is needed to get the bug    // to manifest itself.    static UChar japaneseText[] = {0x30A2, 0x30EC, 0x30EB, 0x30AE, 0x30FC, 0x6027, 0x7D50, 0x819C, 0x708E};    int32_t boundaries[10] = {0};    UBreakIterator *bi = NULL;    int32_t brk;    int32_t brkIdx = 0;    int32_t totalBreaks = 0;    UErrorCode status = U_ZERO_ERROR;    ctest_resetICU();    bi = ubrk_open(UBRK_WORD, "en_US", japaneseText, UPRV_LENGTHOF(japaneseText), &status);    TEST_ASSERT_SUCCESS(status);    if (!bi) {        return;    }    for (brk=ubrk_first(bi); brk != UBRK_DONE; brk=ubrk_next(bi)) {        boundaries[brkIdx] = brk;        if (++brkIdx >= UPRV_LENGTHOF(boundaries) - 1) {            break;        }    }    if (brkIdx <= 2 || brkIdx >= UPRV_LENGTHOF(boundaries)) {        log_err("%s:%d too few or many breaks found./n", __FILE__, __LINE__);    } else {        totalBreaks = brkIdx;        brkIdx = 0;        for (brk=ubrk_first(bi); brk != UBRK_DONE; brk=ubrk_next(bi)) {            if (brk != boundaries[brkIdx]) {                log_err("%s:%d Break #%d differs between first and second iteration./n", __FILE__, __LINE__, brkIdx);                break;            }            if (++brkIdx >= UPRV_LENGTHOF(boundaries) - 1) {                log_err("%s:%d Too many breaks./n", __FILE__, __LINE__);                break;            }        }        if (totalBreaks != brkIdx) {            log_err("%s:%d Number of breaks differ between first and second iteration./n", __FILE__, __LINE__);        }    }    ubrk_close(bi);}
开发者ID:Cyril2004,项目名称:proto-quic,代码行数:50,


示例3: grapheme_extract_bytecount_iter

/* {{{ grapheme_extract_bytecount_iter - grapheme iterator for grapheme_extract MAXBYTES */static inline int32_tgrapheme_extract_bytecount_iter(UBreakIterator *bi, int32_t bsize, unsigned char *pstr, int32_t str_len){	int pos = 0, prev_pos = 0;	int ret_pos = 0, prev_ret_pos = 0;	while ( 1 ) {		pos = ubrk_next(bi);		if ( UBRK_DONE == pos ) {			break;		}		prev_ret_pos = ret_pos;		U8_FWD_N(pstr, ret_pos, str_len, pos - prev_pos);		if ( ret_pos > bsize ) {			ret_pos = prev_ret_pos;			break;		}		if ( prev_ret_pos == ret_pos ) {			/* something wrong - malformed utf8? */			break;		}		prev_pos = pos;	}	return ret_pos;}
开发者ID:Apfelfrisch,项目名称:php-src,代码行数:32,


示例4: numGraphemeClusters

unsigned numGraphemeClusters(StringView string){    unsigned stringLength = string.length();        if (!stringLength)        return 0;    // The only Latin-1 Extended Grapheme Cluster is CRLF.    if (string.is8Bit()) {        auto* characters = string.characters8();        unsigned numCRLF = 0;        for (unsigned i = 1; i < stringLength; ++i)            numCRLF += characters[i - 1] == '/r' && characters[i] == '/n';        return stringLength - numCRLF;    }    NonSharedCharacterBreakIterator iterator { string };    if (!iterator) {        ASSERT_NOT_REACHED();        return stringLength;    }    unsigned numGraphemeClusters = 0;    while (ubrk_next(iterator) != UBRK_DONE)        ++numGraphemeClusters;    return numGraphemeClusters;}
开发者ID:wolfviking0,项目名称:webcl-webkit,代码行数:27,


示例5: grapheme_split_string

/* {{{ grapheme_split_string: find and optionally return grapheme boundaries */int grapheme_split_string(const UChar *text, int32_t text_length, int boundary_array[], int boundary_array_len TSRMLS_DC ){	unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE];	UErrorCode		status = U_ZERO_ERROR;	int ret_len, pos;	UBreakIterator* bi;	bi = grapheme_get_break_iterator((void*)u_break_iterator_buffer, &status TSRMLS_CC );	if( U_FAILURE(status) ) {		return -1;	}		ubrk_setText(bi, text, text_length,	&status);	pos = 0;		for ( ret_len = 0; pos != UBRK_DONE; ) {			pos = ubrk_next(bi);				if ( pos != UBRK_DONE ) {					if ( NULL != boundary_array && ret_len < boundary_array_len ) {				boundary_array[ret_len] = pos;			}			ret_len++;		}	}	 			ubrk_close(bi);		return ret_len;}
开发者ID:Hasib786,项目名称:php7,代码行数:36,


示例6: grapheme_extract_charcount_iter

/* {{{ grapheme_extract_charcount_iter - grapheme iterator for grapheme_extract MAXCHARS */static inline int32_tgrapheme_extract_charcount_iter(UBreakIterator *bi, int32_t csize, unsigned char *pstr, int32_t str_len){	int pos = 0, prev_pos = 0;	int ret_pos = 0, prev_ret_pos = 0;	while ( 1 ) {		pos = ubrk_next(bi);		if ( UBRK_DONE == pos ) {			break;		}		/* if we are beyond our limit, then the loop is done */		if ( pos > csize ) {			break;		}		/* update our pointer in the original UTF-8 buffer by as many characters		   as ubrk_next iterated over */		prev_ret_pos = ret_pos;		U8_FWD_N(pstr, ret_pos, str_len, pos - prev_pos);		if ( prev_ret_pos == ret_pos ) {			/* something wrong - malformed utf8? */			break;		}		prev_pos = pos;	}	return ret_pos;}
开发者ID:Apfelfrisch,项目名称:php-src,代码行数:35,


示例7: icu_BreakIterator_split

// BreakIterator.split {{{static PyObject *icu_BreakIterator_split(icu_BreakIterator *self, PyObject *args, PyObject *kwargs) {    int32_t prev = 0, p = 0, sz = 0;    PyObject *ans = NULL, *token = NULL;      ans = PyList_New(0);    if (ans == NULL) return PyErr_NoMemory();    p = ubrk_first(self->break_iterator);    while (p != UBRK_DONE) {        prev = p; p = ubrk_next(self->break_iterator);        if (self->type == UBRK_WORD && ubrk_getRuleStatus(self->break_iterator) == UBRK_WORD_NONE)             continue;  // We are not at the start of a word        sz = (p == UBRK_DONE) ? self->text_len - prev : p - prev;        if (sz > 0) {            token = icu_to_python(self->text + prev, sz);            if (token == NULL) {                Py_DECREF(ans); ans = NULL; break;             }            if (PyList_Append(ans, token) != 0) {                Py_DECREF(token); Py_DECREF(ans); ans = NULL; break;             }            Py_DECREF(token);        }    }    return ans;} // }}}
开发者ID:IvoNet,项目名称:calibre,代码行数:30,


示例8: convert_split_words

	std::vector<lstring> convert_split_words(const lstring &lt) {		std::vector<lstring> ret;		UBreakIterator* bi;		int prev = -1, pos;		UErrorCode err = U_ZERO_ERROR;		bi = ubrk_open(UBRK_WORD, get_locale(), (UChar *)lt.data(), lt.size(), &err);		if (U_FAILURE(err))			return ret;		pos = ubrk_first(bi);		while (pos != UBRK_DONE) {			int rules = ubrk_getRuleStatus(bi);			if ((rules == UBRK_WORD_NONE) || (prev == -1)) {				prev = pos;			} else {				ret.emplace_back(lt.substr(prev, pos - prev));				prev = -1;			}			pos = ubrk_next(bi);		}		ubrk_close(bi);		return ret;	}
开发者ID:reverbrain,项目名称:ribosome,代码行数:29,


示例9: numCodeUnitsInGraphemeClusters

unsigned numCodeUnitsInGraphemeClusters(StringView string, unsigned numGraphemeClusters){    unsigned stringLength = string.length();    if (stringLength <= numGraphemeClusters)        return stringLength;    // The only Latin-1 Extended Grapheme Cluster is CRLF.    if (string.is8Bit()) {        auto* characters = string.characters8();        unsigned i, j;        for (i = 0, j = 0; i < numGraphemeClusters && j + 1 < stringLength; ++i, ++j)            j += characters[j] == '/r' && characters[j + 1] == '/n';        return j + (i < numGraphemeClusters);    }    NonSharedCharacterBreakIterator iterator { string };    if (!iterator) {        ASSERT_NOT_REACHED();        return stringLength;    }    for (unsigned i = 0; i < numGraphemeClusters; ++i) {        if (ubrk_next(iterator) == UBRK_DONE)            return stringLength;    }    return ubrk_current(iterator);}
开发者ID:wolfviking0,项目名称:webcl-webkit,代码行数:28,


示例10: nextImpl

static jint nextImpl(JNIEnv* env, jclass, jint address, jint n) {    UBreakIterator* bi = breakIterator(address);    if (n < 0) {        while (n++ < -1) {            ubrk_previous(bi);        }        return ubrk_previous(bi);    } else if (n == 0) {        return ubrk_current(bi);    } else {        while (n-- > 1) {            ubrk_next(bi);        }        return ubrk_next(bi);    }    return -1;}
开发者ID:Ar3kkusu,项目名称:android_libcore,代码行数:17,


示例11: icu_BreakIterator_index

// BreakIterator.index {{{static PyObject *icu_BreakIterator_index(icu_BreakIterator *self, PyObject *token) {#if PY_VERSION_HEX >= 0x03030000#error Not implemented for python >= 3.3#endif    UChar *buf = NULL, *needle = NULL;    int32_t word_start = 0, p = 0, sz = 0, ans = -1, leading_hyphen = 0, trailing_hyphen = 0;    buf = python_to_icu(token, &sz);    if (buf == NULL) return NULL;    if (sz < 1) goto end;    needle = buf;    if (sz > 1 && IS_HYPHEN_CHAR(buf[0])) { needle = buf + 1; leading_hyphen = 1; sz -= 1; }    if (sz > 1 && IS_HYPHEN_CHAR(buf[sz-1])) trailing_hyphen = 1;    Py_BEGIN_ALLOW_THREADS;    p = ubrk_first(self->break_iterator);    while (p != UBRK_DONE) {        word_start = p; p = ubrk_next(self->break_iterator);        if (self->type == UBRK_WORD && ubrk_getRuleStatus(self->break_iterator) == UBRK_WORD_NONE)            continue;  // We are not at the start of a word        if (self->text_len >= word_start + sz && memcmp(self->text + word_start, needle, sz * sizeof(UChar)) == 0) {            if (word_start > 0 && (                    (leading_hyphen && !IS_HYPHEN_CHAR(self->text[word_start-1])) ||                    (!leading_hyphen && IS_HYPHEN_CHAR(self->text[word_start-1]))            )) continue;            if (!trailing_hyphen && IS_HYPHEN_CHAR(self->text[word_start + sz])) continue;            if (p == UBRK_DONE || self->text_len <= word_start + sz) { ans = word_start; break; }            if (                    // Check that the found word is followed by a word boundary                    ubrk_isBoundary(self->break_iterator, word_start + sz) &&                    // If there is a leading hyphen check  that the leading                    // hyphen is preceded by a word boundary                    (!leading_hyphen || (word_start > 1 && ubrk_isBoundary(self->break_iterator, word_start - 2))) &&                    // Check that there is a word boundary *after* the trailing                    // hyphen. We cannot rely on ubrk_isBoundary() as that                    // always returns true because of the trailing hyphen.                    (!trailing_hyphen || ubrk_following(self->break_iterator, word_start + sz) == UBRK_DONE || ubrk_getRuleStatus(self->break_iterator) == UBRK_WORD_NONE)            ) { ans = word_start; break; }            if (p != UBRK_DONE) ubrk_isBoundary(self->break_iterator, p); // Reset the iterator to its position before the call to ubrk_isBoundary()        }    }    if (leading_hyphen && ans > -1) ans -= 1;#ifdef Py_UNICODE_WIDE    if (ans > 0) ans = u_countChar32(self->text, ans);#endif    Py_END_ALLOW_THREADS;end:    free(buf);    return Py_BuildValue("l", (long)ans);} // }}}
开发者ID:artbycrunk,项目名称:calibre,代码行数:59,


示例12: TestBreakIteratorUText

/* *  static void TestBreakIteratorUText(void); * *         Test that ubrk_setUText() is present and works for a simple case. */static void TestBreakIteratorUText(void) {    const char *UTF8Str = "/x41/xc3/x85/x5A/x20/x41/x52/x69/x6E/x67";  /* c3 85 is utf-8 for A with a ring on top */                      /*   0  1   2 34567890  */    UErrorCode      status = U_ZERO_ERROR;    UBreakIterator *bi     = NULL;    int32_t         pos    = 0;    UText *ut = utext_openUTF8(NULL, UTF8Str, -1, &status);    TEST_ASSERT_SUCCESS(status);    bi = ubrk_open(UBRK_WORD, "en_US", NULL, 0, &status);    if (U_FAILURE(status)) {        log_err_status(status, "Failure at file %s, line %d, error = %s/n", __FILE__, __LINE__, u_errorName(status));        return;    }    ubrk_setUText(bi, ut, &status);    if (U_FAILURE(status)) {        log_err("Failure at file %s, line %d, error = %s/n", __FILE__, __LINE__, u_errorName(status));        return;    }    pos = ubrk_first(bi);    TEST_ASSERT(pos == 0);    pos = ubrk_next(bi);    TEST_ASSERT(pos == 4);    pos = ubrk_next(bi);    TEST_ASSERT(pos == 5);    pos = ubrk_next(bi);    TEST_ASSERT(pos == 10);    pos = ubrk_next(bi);    TEST_ASSERT(pos == UBRK_DONE);    ubrk_close(bi);    utext_close(ut);}
开发者ID:Epictetus,项目名称:build-couchdb,代码行数:46,


示例13: TestBreakIteratorTailoring

static void TestBreakIteratorTailoring(void) {    const RBBITailoringTest * testPtr;    for (testPtr = tailoringTests; testPtr->locale != NULL; ++testPtr) {        UErrorCode status = U_ZERO_ERROR;        UBreakIterator* ubrkiter = ubrk_open(testPtr->type, testPtr->locale, testPtr->test, -1, &status);        if ( U_SUCCESS(status) ) {            int32_t offset, offsindx;            UBool foundError;            foundError = FALSE;            for (offsindx = 0; (offset = ubrk_next(ubrkiter)) != UBRK_DONE; ++offsindx) {                if (!foundError && offsindx >= testPtr->numOffsets) {                    log_err("FAIL: locale %s, break type %d, ubrk_next expected UBRK_DONE, got %d/n",                            testPtr->locale, testPtr->type, offset);                    foundError = TRUE;                } else if (!foundError && offset != testPtr->offsFwd[offsindx]) {                    log_err("FAIL: locale %s, break type %d, ubrk_next expected %d, got %d/n",                            testPtr->locale, testPtr->type, testPtr->offsFwd[offsindx], offset);                    foundError = TRUE;                }            }            if (!foundError && offsindx < testPtr->numOffsets) {                log_err("FAIL: locale %s, break type %d, ubrk_next expected %d, got UBRK_DONE/n",                    	testPtr->locale, testPtr->type, testPtr->offsFwd[offsindx]);            }            foundError = FALSE;            for (offsindx = 0; (offset = ubrk_previous(ubrkiter)) != UBRK_DONE; ++offsindx) {                if (!foundError && offsindx >= testPtr->numOffsets) {                    log_err("FAIL: locale %s, break type %d, ubrk_previous expected UBRK_DONE, got %d/n",                            testPtr->locale, testPtr->type, offset);                    foundError = TRUE;                } else if (!foundError && offset != testPtr->offsRev[offsindx]) {                    log_err("FAIL: locale %s, break type %d, ubrk_previous expected %d, got %d/n",                            testPtr->locale, testPtr->type, testPtr->offsRev[offsindx], offset);                    foundError = TRUE;                }            }            if (!foundError && offsindx < testPtr->numOffsets) {                log_err("FAIL: locale %s, break type %d, ubrk_previous expected %d, got UBRK_DONE/n",                    	testPtr->locale, testPtr->type, testPtr->offsRev[offsindx]);            }            ubrk_close(ubrkiter);        } else {            log_err_status(status, "FAIL: locale %s, break type %d, ubrk_open status: %s/n", testPtr->locale, testPtr->type, u_errorName(status));        }    }}
开发者ID:Acorld,项目名称:WinObjC-Heading,代码行数:49,


示例14: TestBreakIteratorRules

/* *  TestBreakIteratorRules - Verify that a break iterator can be created from *                           a set of source rules. */static void TestBreakIteratorRules() {    /*  Rules will keep together any run of letters not including 'a', OR     *             keep together 'abc', but only when followed by 'def', OTHERWISE     *             just return one char at a time.     */    char         rules[]  = "abc{666}/def;/n   [//p{L} - [a]]* {2};  . {1};";    /*                        0123456789012345678 */    char         data[]   =  "abcdex abcdefgh-def";     /* the test data string                     */    char         breaks[] =  "**    **  *    **  *";    /*  * the expected break positions          */    char         tags[]   =  "01    21  6    21  2";    /*  expected tag values at break positions  */    int32_t      tagMap[] = {0, 1, 2, 3, 4, 5, 666};    UChar       *uData;    void        *freeHook = NULL;    UErrorCode   status   = U_ZERO_ERROR;    int32_t      pos;    int          i;    UBreakIterator *bi = testOpenRules(rules);    if (bi == NULL) {return;}    uData = toUChar(data, &freeHook);    ubrk_setText(bi,  uData, -1, &status);    pos = ubrk_first(bi);    for (i=0; i<sizeof(breaks); i++) {        if (pos == i && breaks[i] != '*') {            log_err("FAIL: unexpected break at position %d found/n", pos);            break;        }        if (pos != i && breaks[i] == '*') {            log_err("FAIL: expected break at position %d not found./n", i);            break;        }        if (pos == i) {            int32_t tag, expectedTag;            tag = ubrk_getRuleStatus(bi);            expectedTag = tagMap[tags[i]&0xf];            if (tag != expectedTag) {                log_err("FAIL: incorrect tag value.  Position = %d;  expected tag %d, got %d",                    pos, expectedTag, tag);                break;            }            pos = ubrk_next(bi);        }    }    freeToUCharStrings(&freeHook);    ubrk_close(bi);}
开发者ID:Epictetus,项目名称:build-couchdb,代码行数:53,


示例15: LOG_TRACE

MojErr MojDbTextTokenizer::tokenize(const MojString& text, MojDbTextCollator* collator, KeySet& keysOut) const{    LOG_TRACE("Entering function %s", __FUNCTION__);    MojAssert(m_ubrk.get());    // convert to UChar from str    MojDbTextUtils::UnicodeVec unicodeStr;    MojErr err = MojDbTextUtils::strToUnicode(text, unicodeStr);    MojErrCheck(err);    // clone break iterator and set text    MojByte buf[U_BRK_SAFECLONE_BUFFERSIZE];    UErrorCode status = U_ZERO_ERROR;    MojInt32 size = sizeof(buf);    IterPtr ubrk(ubrk_safeClone(m_ubrk.get(), buf, &size, &status));    MojUnicodeErrCheck(status);    MojAssert(ubrk.get());    ubrk_setText(ubrk.get(), unicodeStr.begin(), (MojInt32) unicodeStr.size(), &status);    MojUnicodeErrCheck(status);    MojInt32 tokBegin = -1;    MojInt32 pos = ubrk_first(ubrk.get());    while (pos != UBRK_DONE) {        UWordBreak status = (UWordBreak) ubrk_getRuleStatus(ubrk.get());        if (status != UBRK_WORD_NONE) {            MojAssert(tokBegin != -1);            MojDbKey key;            const UChar* tokChars = unicodeStr.begin() + tokBegin;            MojSize tokSize = (MojSize) (pos - tokBegin);            if (collator) {                err = collator->sortKey(tokChars, tokSize, key);                MojErrCheck(err);            } else {                MojString tok;                err = MojDbTextUtils::unicodeToStr(tokChars, tokSize, tok);                MojErrCheck(err);                err = key.assign(tok);                MojErrCheck(err);            }            err = keysOut.put(key);            MojErrCheck(err);        }        tokBegin = pos;        pos = ubrk_next(ubrk.get());    }    return MojErrNone;}
开发者ID:webOS-ports,项目名称:db8,代码行数:47,


示例16: tokenizer_next

int tokenizer_next( tokenizer_t *t, char *word, size_t size ) {  UChar   savedEndChar;  int k;  // start iterator  if( t->end == 0 ) {    t->start = ubrk_first(t->boundary);  }  // Find next wordagain:  t->end = ubrk_next(t->boundary);  if( t->end == UBRK_DONE ) {    return -1;  }	// Null terminate  savedEndChar = t->str[t->end];  t->str[t->end] = 0;	// Skip unct	if( t->end - t->start == 1 && u_ispunct( t->str[t->start] ) ) {    t->str[t->end] = savedEndChar;    t->start = t->end;    goto again;	}	// Skip whitespace	for( k=t->start; k<t->end; k++ ) {	  if( u_isspace( t->str[k] ) == 1 ) {      t->str[t->end] = savedEndChar;      t->start = t->end;			goto again;		}  }	// Copy to C bffer  u_austrncpy(word, t->str+t->start, size-1);  word[size-1] = 0;  printf("string[%2d..%2d] /"%s/" %d/n", t->start, t->end-1, word, u_isspace( t->str[t->start]));   t->str[t->end] = savedEndChar;  t->start = t->end;  return 0;}
开发者ID:adamsch1,项目名称:csearch,代码行数:45,


示例17: TestBreakIteratorStatusVec

/**   TestsBreakIteratorStatusVals()   Test the ubrk_getRuleStatusVec() funciton*/static void TestBreakIteratorStatusVec() {    #define RULE_STRING_LENGTH 200    UChar          rules[RULE_STRING_LENGTH];    #define TEST_STRING_LENGTH 25    UChar           testString[TEST_STRING_LENGTH];    UBreakIterator *bi        = NULL;    int32_t         pos       = 0;    int32_t         vals[10];    int32_t         numVals;    UErrorCode      status    = U_ZERO_ERROR;    u_uastrncpy(rules,  "[A-N]{100}; /n"                             "[a-w]{200}; /n"                             "[//p{L}]{300}; /n"                             "[//p{N}]{400}; /n"                             "[0-5]{500}; /n"                              "!.*;/n", RULE_STRING_LENGTH);    u_uastrncpy(testString, "ABC", TEST_STRING_LENGTH);    bi = ubrk_openRules(rules, -1, testString, -1, NULL, &status);    TEST_ASSERT_SUCCESS(status);    TEST_ASSERT(bi != NULL);    /* The TEST_ASSERT above should change too... */    if (bi != NULL) {        pos = ubrk_next(bi);        TEST_ASSERT(pos == 1);        memset(vals, -1, sizeof(vals));        numVals = ubrk_getRuleStatusVec(bi, vals, 10, &status);        TEST_ASSERT_SUCCESS(status);        TEST_ASSERT(numVals == 2);        TEST_ASSERT(vals[0] == 100);        TEST_ASSERT(vals[1] == 300);        TEST_ASSERT(vals[2] == -1);        numVals = ubrk_getRuleStatusVec(bi, vals, 0, &status);        TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);        TEST_ASSERT(numVals == 2);    }    ubrk_close(bi);}
开发者ID:Epictetus,项目名称:build-couchdb,代码行数:48,


示例18: TestBreakIteratorRefresh

static void TestBreakIteratorRefresh(void) {    /*     *  RefreshInput changes out the input of a Break Iterator without     *    changing anything else in the iterator's state.  Used with Java JNI,     *    when Java moves the underlying string storage.   This test     *    runs a ubrk_next() repeatedly, moving the text in the middle of the sequence.     *    The right set of boundaries should still be found.     */    UChar testStr[]  = {0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, 0x44, 0x0};  /* = " A B C D"  */    UChar movedStr[] = {0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,  0};    UErrorCode status = U_ZERO_ERROR;    UBreakIterator *bi;    UText ut1 = UTEXT_INITIALIZER;    UText ut2 = UTEXT_INITIALIZER;        bi = ubrk_open(UBRK_LINE, "en_US", NULL, 0, &status);    TEST_ASSERT_SUCCESS(status);    if (U_FAILURE(status)) {        return;    }    utext_openUChars(&ut1, testStr, -1, &status);    TEST_ASSERT_SUCCESS(status);    ubrk_setUText(bi, &ut1, &status);    TEST_ASSERT_SUCCESS(status);    if (U_SUCCESS(status)) {        /* Line boundaries will occur before each letter in the original string */        TEST_ASSERT(1 == ubrk_next(bi));        TEST_ASSERT(3 == ubrk_next(bi));        /* Move the string, kill the original string.  */        u_strcpy(movedStr, testStr);        u_memset(testStr, 0x20, u_strlen(testStr));        utext_openUChars(&ut2, movedStr, -1, &status);        TEST_ASSERT_SUCCESS(status);        ubrk_refreshUText(bi, &ut2, &status);        TEST_ASSERT_SUCCESS(status);            /* Find the following matches, now working in the moved string. */        TEST_ASSERT(5 == ubrk_next(bi));        TEST_ASSERT(7 == ubrk_next(bi));        TEST_ASSERT(8 == ubrk_next(bi));        TEST_ASSERT(UBRK_DONE == ubrk_next(bi));        TEST_ASSERT_SUCCESS(status);        utext_close(&ut1);        utext_close(&ut2);    }    ubrk_close(bi);}
开发者ID:Cyril2004,项目名称:proto-quic,代码行数:51,


示例19: len

ERL_NIF_TERM len(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]){    ErlNifBinary in;    cloner* ptr;     UBreakIterator* iter;     int count = 0, pos;    UErrorCode status = U_ZERO_ERROR;    if (argc != 2)        return enif_make_badarg(env);    /* Last argument must be a binary */    if (!(enif_inspect_binary(env, argv[1], &in)       && enif_get_resource(env, argv[0], iterator_type, (void**) &ptr))) {        return enif_make_badarg(env);    }        iter = (UBreakIterator*) cloner_get(ptr);    CHECK_RES(env, iter);    if (iter == NULL) {        return enif_make_badarg(env);    }    /* Do count */    ubrk_setText(iter,        (UChar *) in.data,        TO_ULEN(in.size),        &status);    CHECK(env, status);    pos = ubrk_first(iter);    if (pos != UBRK_DONE)        while (1) {            pos = ubrk_next(iter);            if (pos == UBRK_DONE)                break;            if (is_valid_elem(ptr, iter))                count++;        }    return enif_make_int(env, count);}
开发者ID:alepharchives,项目名称:i18n,代码行数:43,


示例20: grapheme_count_graphemes

/* {{{ grapheme_count_graphemes */int32_t grapheme_count_graphemes(UBreakIterator *bi, UChar *string, int32_t string_len){	int ret_len = 0;	int pos = 0;	UErrorCode		status = U_ZERO_ERROR;		ubrk_setText(bi, string, string_len, &status);	do {			pos = ubrk_next(bi);				if ( UBRK_DONE != pos ) {			ret_len++;		}			} while ( UBRK_DONE != pos );		return ret_len;}
开发者ID:Tyrael,项目名称:php-src,代码行数:21,


示例21: grapheme_count

int32_t grapheme_count(UBreakIterator *ubrk, const UString *ustr){    int32_t i, count;    UErrorCode status;    count = 0;    status = U_ZERO_ERROR;    ubrk_setText(ubrk, ustr->ptr, ustr->len, &status);    if (U_FAILURE(status)) {        return -1;    }    if (UBRK_DONE != (i = ubrk_first(ubrk))) {        while (UBRK_DONE != (i = ubrk_next(ubrk))) {            ++count;        }    }    ubrk_unbindText(ubrk);    return count;}
开发者ID:julp,项目名称:ugrep,代码行数:20,


示例22: icu_BreakIterator_index

// BreakIterator.index {{{static PyObject *icu_BreakIterator_index(icu_BreakIterator *self, PyObject *args, PyObject *kwargs) {#if PY_VERSION_HEX >= 0x03030000 #error Not implemented for python >= 3.3#endif    UChar *buf = NULL;    int32_t prev = 0, p = 0, sz = 0, tsz = 0, ans = -1;    PyObject *token = NULL;      if (!PyArg_ParseTuple(args, "O", &token)) return NULL;    buf = python_to_icu(token, &sz, 1);    if (buf == NULL) return NULL;    if (sz < 1) goto end;    Py_BEGIN_ALLOW_THREADS;    p = ubrk_first(self->break_iterator);    while (p != UBRK_DONE) {        prev = p; p = ubrk_next(self->break_iterator);        if (self->type == UBRK_WORD && ubrk_getRuleStatus(self->break_iterator) == UBRK_WORD_NONE)             continue;  // We are not at the start of a word        tsz = (p == UBRK_DONE) ? self->text_len - prev : p - prev;        if (sz == tsz && memcmp(self->text + prev, buf, sz * sizeof(UChar)) == 0) { #ifdef PY_UNICODE_WIDE            ans = u_countChar32(self->text, prev);#else            ans = prev; #endif            break;        }    }    Py_END_ALLOW_THREADS;end:    free(buf);    return Py_BuildValue("i", ans);} // }}}
开发者ID:IvoNet,项目名称:calibre,代码行数:39,


示例23: icu_BreakIterator_split2

// BreakIterator.split2 {{{static PyObject *icu_BreakIterator_split2(icu_BreakIterator *self, PyObject *args, PyObject *kwargs) {#if PY_VERSION_HEX >= 0x03030000 #error Not implemented for python >= 3.3#endif    int32_t prev = 0, p = 0, sz = 0;    PyObject *ans = NULL, *temp = NULL;      ans = PyList_New(0);    if (ans == NULL) return PyErr_NoMemory();    p = ubrk_first(self->break_iterator);    while (p != UBRK_DONE) {        prev = p; p = ubrk_next(self->break_iterator);        if (self->type == UBRK_WORD && ubrk_getRuleStatus(self->break_iterator) == UBRK_WORD_NONE)             continue;  // We are not at the start of a word        sz = (p == UBRK_DONE) ? self->text_len - prev : p - prev;        if (sz > 0) {#ifdef Py_UNICODE_WIDE            sz = u_countChar32(self->text + prev, sz);            prev = u_countChar32(self->text, prev);#endif            temp = Py_BuildValue("II", prev, sz);             if (temp == NULL) {                Py_DECREF(ans); ans = NULL; break;             }             if (PyList_Append(ans, temp) != 0) {                Py_DECREF(temp); Py_DECREF(ans); ans = NULL; break;             }            Py_DECREF(temp);        }    }    return ans;} // }}}
开发者ID:IvoNet,项目名称:calibre,代码行数:38,


示例24: grapheme_extract_count_iter

/* {{{ grapheme_extract_count_iter - grapheme iterator for grapheme_extract COUNT */static inline int32_tgrapheme_extract_count_iter(UBreakIterator *bi, int32_t size, unsigned char *pstr, int32_t str_len){	int pos = 0, next_pos = 0;	int ret_pos = 0;	while ( size ) {		next_pos = ubrk_next(bi);		if ( UBRK_DONE == next_pos ) {			break;		}		pos = next_pos;		size--;	}	/* pos is one past the last UChar - and represent the number of code units to		advance in the utf-8 buffer	*/	U8_FWD_N(pstr, ret_pos, str_len, pos);	return ret_pos;}
开发者ID:Apfelfrisch,项目名称:php-src,代码行数:25,


示例25: icuNext

/*** Extract the next token from a tokenization cursor.*/static int icuNext(  sqlite3_tokenizer_cursor *pCursor,  /* Cursor returned by simpleOpen */  const char **ppToken,               /* OUT: *ppToken is the token text */  int *pnBytes,                       /* OUT: Number of bytes in token */  int *piStartOffset,                 /* OUT: Starting offset of token */  int *piEndOffset,                   /* OUT: Ending offset of token */  int *piPosition                     /* OUT: Position integer of token */){  IcuCursor *pCsr = (IcuCursor *)pCursor;  int iStart = 0;  int iEnd = 0;  int nByte = 0;  while( iStart==iEnd ){    UChar32 c;    iStart = ubrk_current(pCsr->pIter);    iEnd = ubrk_next(pCsr->pIter);    if( iEnd==UBRK_DONE ){      return SQLITE_DONE;    }    while( iStart<iEnd ){      int iWhite = iStart;      U8_NEXT(pCsr->aChar, iWhite, pCsr->nChar, c);      if( u_isspace(c) ){        iStart = iWhite;      }else{        break;      }    }    assert(iStart<=iEnd);  }  do {    UErrorCode status = U_ZERO_ERROR;    if( nByte ){      char *zNew = sqlite3_realloc(pCsr->zBuffer, nByte);      if( !zNew ){        return SQLITE_NOMEM;      }      pCsr->zBuffer = zNew;      pCsr->nBuffer = nByte;    }    u_strToUTF8(        pCsr->zBuffer, pCsr->nBuffer, &nByte,    /* Output vars */        &pCsr->aChar[iStart], iEnd-iStart,       /* Input vars */        &status                                  /* Output success/failure */    );  } while( nByte>pCsr->nBuffer );  *ppToken = pCsr->zBuffer;  *pnBytes = nByte;  *piStartOffset = pCsr->aOffset[iStart];  *piEndOffset = pCsr->aOffset[iEnd];  *piPosition = pCsr->iToken++;  return SQLITE_OK;}
开发者ID:Av3ng3,项目名称:Lamobo-D1s,代码行数:64,


示例26: _toTitle

/* * Internal titlecasing function. * * Must get titleIter!=NULL. */static int32_t_toTitle(const UCaseProps *csp,         UChar *dest, int32_t destCapacity,         const UChar *src, UCaseContext *csc,         int32_t srcLength,         UBreakIterator *titleIter,         const char *locale, int32_t *locCache,         UErrorCode *pErrorCode) {    const UChar *s;    UChar32 c;    int32_t prev, titleStart, titleLimit, index, destIndex, length;    UBool isFirstIndex;    /* set up local variables */    destIndex=0;    prev=0;    isFirstIndex=TRUE;    /* titlecasing loop */    while(prev<srcLength) {        /* find next index where to titlecase */        if(isFirstIndex) {            isFirstIndex=FALSE;            index=ubrk_first(titleIter);        } else {            index=ubrk_next(titleIter);        }        if(index==UBRK_DONE || index>srcLength) {            index=srcLength;        }        /*         * Unicode 4 & 5 section 3.13 Default Case Operations:         *         * R3  toTitlecase(X): Find the word boundaries based on Unicode Standard Annex         * #29, "Text Boundaries." Between each pair of word boundaries, find the first         * cased character F. If F exists, map F to default_title(F); then map each         * subsequent character C to default_lower(C).         *         * In this implementation, segment [prev..index[ into 3 parts:         * a) uncased characters (copy as-is) [prev..titleStart[         * b) first case letter (titlecase)         [titleStart..titleLimit[         * c) subsequent characters (lowercase)                 [titleLimit..index[         */        if(prev<index) {            /* find and copy uncased characters [prev..titleStart[ */            titleStart=titleLimit=prev;            for(;;) {                U16_NEXT(src, titleLimit, srcLength, c);                if(UCASE_NONE!=ucase_getType(csp, c)) {                    break; /* cased letter at [titleStart..titleLimit[ */                }                titleStart=titleLimit;                if(titleLimit==index) {                    /*                     * only uncased characters in [prev..index[                     * stop with titleStart==titleLimit==index                     */                    break;                }            }            length=titleStart-prev;            if(length>0) {                if((destIndex+length)<=destCapacity) {                    uprv_memcpy(dest+destIndex, src+prev, length*U_SIZEOF_UCHAR);                }                destIndex+=length;            }            if(titleStart<titleLimit) {                /* titlecase c which is from [titleStart..titleLimit[ */                csc->cpStart=titleStart;                csc->cpLimit=titleLimit;                c=ucase_toFullTitle(csp, c, utf16_caseContextIterator, csc, &s, locale, locCache);                destIndex=appendResult(dest, destIndex, destCapacity, c, s);                /* lowercase [titleLimit..index[ */                if(titleLimit<index) {                    destIndex+=                        _caseMap(                            csp, ucase_toFullLower,                            dest+destIndex, destCapacity-destIndex,                            src, csc,                            titleLimit, index,                            locale, locCache,                            pErrorCode);                }            }        }        prev=index;    }    if(destIndex>destCapacity) {        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;//.........这里部分代码省略.........
开发者ID:mason105,项目名称:red5cpp,代码行数:101,


示例27: _toTitle

/* * Internal titlecasing function. */static int32_t_toTitle(UCaseMap *csm,         UChar *dest, int32_t destCapacity,         const UChar *src, UCaseContext *csc,         int32_t srcLength,         UErrorCode *pErrorCode) {    const UChar *s;    UChar32 c;    int32_t prev, titleStart, titleLimit, idx, destIndex, length;    UBool isFirstIndex;    if(csm->iter!=NULL) {        ubrk_setText(csm->iter, src, srcLength, pErrorCode);    } else {        csm->iter=ubrk_open(UBRK_WORD, csm->locale,                            src, srcLength,                            pErrorCode);    }    if(U_FAILURE(*pErrorCode)) {        return 0;    }    /* set up local variables */    destIndex=0;    prev=0;    isFirstIndex=TRUE;    /* titlecasing loop */    while(prev<srcLength) {        /* find next index where to titlecase */        if(isFirstIndex) {            isFirstIndex=FALSE;            idx=ubrk_first(csm->iter);        } else {            idx=ubrk_next(csm->iter);        }        if(idx==UBRK_DONE || idx>srcLength) {            idx=srcLength;        }        /*         * Unicode 4 & 5 section 3.13 Default Case Operations:         *         * R3  toTitlecase(X): Find the word boundaries based on Unicode Standard Annex         * #29, "Text Boundaries." Between each pair of word boundaries, find the first         * cased character F. If F exists, map F to default_title(F); then map each         * subsequent character C to default_lower(C).         *         * In this implementation, segment [prev..index[ into 3 parts:         * a) uncased characters (copy as-is) [prev..titleStart[         * b) first case letter (titlecase)         [titleStart..titleLimit[         * c) subsequent characters (lowercase)                 [titleLimit..index[         */        if(prev<idx) {            /* find and copy uncased characters [prev..titleStart[ */            titleStart=titleLimit=prev;            U16_NEXT(src, titleLimit, idx, c);            if((csm->options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(csm->csp, c)) {                /* Adjust the titlecasing index (titleStart) to the next cased character. */                for(;;) {                    titleStart=titleLimit;                    if(titleLimit==idx) {                        /*                         * only uncased characters in [prev..index[                         * stop with titleStart==titleLimit==index                         */                        break;                    }                    U16_NEXT(src, titleLimit, idx, c);                    if(UCASE_NONE!=ucase_getType(csm->csp, c)) {                        break; /* cased letter at [titleStart..titleLimit[ */                    }                }                length=titleStart-prev;                if(length>0) {                    if((destIndex+length)<=destCapacity) {                        uprv_memcpy(dest+destIndex, src+prev, length*U_SIZEOF_UCHAR);                    }                    destIndex+=length;                }            }            if(titleStart<titleLimit) {                /* titlecase c which is from [titleStart..titleLimit[ */                csc->cpStart=titleStart;                csc->cpLimit=titleLimit;                c=ucase_toFullTitle(csm->csp, c, utf16_caseContextIterator, csc, &s, csm->locale, &csm->locCache);                destIndex=appendResult(dest, destIndex, destCapacity, c, s);                 /* Special case Dutch IJ titlecasing */                if ( titleStart+1 < idx &&                      ucase_getCaseLocale(csm->locale,&csm->locCache) == UCASE_LOC_DUTCH &&                     ( src[titleStart] == (UChar32) 0x0049 || src[titleStart] == (UChar32) 0x0069 ) &&                     ( src[titleStart+1] == (UChar32) 0x004A || src[titleStart+1] == (UChar32) 0x006A )) {                             c=(UChar32) 0x004A;                            destIndex=appendResult(dest, destIndex, destCapacity, c, s);                            titleLimit++;//.........这里部分代码省略.........
开发者ID:venkatarajasekhar,项目名称:Qt,代码行数:101,


示例28: icu_BreakIterator_split2

// BreakIterator.split2 {{{static PyObject *icu_BreakIterator_split2(icu_BreakIterator *self, PyObject *args) {#if PY_VERSION_HEX >= 0x03030000#error Not implemented for python >= 3.3#endif    int32_t word_start = 0, p = 0, sz = 0, last_pos = 0, last_sz = 0;    int is_hyphen_sep = 0, leading_hyphen = 0, trailing_hyphen = 0;    UChar sep = 0;    PyObject *ans = NULL, *temp = NULL, *t = NULL;    ans = PyList_New(0);    if (ans == NULL) return PyErr_NoMemory();    p = ubrk_first(self->break_iterator);    while (p != UBRK_DONE) {        word_start = p; p = ubrk_next(self->break_iterator);        if (self->type == UBRK_WORD && ubrk_getRuleStatus(self->break_iterator) == UBRK_WORD_NONE)            continue;  // We are not at the start of a word        sz = (p == UBRK_DONE) ? self->text_len - word_start : p - word_start;        if (sz > 0) {            // ICU breaks on words containing hyphens, we do not want that, so we recombine manually            is_hyphen_sep = 0; leading_hyphen = 0; trailing_hyphen = 0;            if (word_start > 0) { // Look for a leading hyphen                sep = *(self->text + word_start - 1);                if (IS_HYPHEN_CHAR(sep)) {                    leading_hyphen = 1;                    if (last_pos > 0 && word_start - last_pos == 1) is_hyphen_sep = 1;                }            }            if (word_start + sz < self->text_len) { // Look for a trailing hyphen                sep = *(self->text + word_start + sz);                if (IS_HYPHEN_CHAR(sep)) trailing_hyphen = 1;            }            last_pos = p;#ifdef Py_UNICODE_WIDE            sz = u_countChar32(self->text + word_start, sz);            word_start = u_countChar32(self->text, word_start);#endif            if (is_hyphen_sep && PyList_GET_SIZE(ans) > 0) {                sz = last_sz + sz + trailing_hyphen;                last_sz = sz;                t = PyInt_FromLong((long)sz);                if (t == NULL) { Py_DECREF(ans); ans = NULL; break; }                temp = PyList_GET_ITEM(ans, PyList_GET_SIZE(ans) - 1);                Py_DECREF(PyTuple_GET_ITEM(temp, 1));                PyTuple_SET_ITEM(temp, 1, t);            } else {                sz += leading_hyphen + trailing_hyphen;                last_sz = sz;                temp = Py_BuildValue("ll", (long)(word_start - leading_hyphen), (long)sz);                if (temp == NULL) {                    Py_DECREF(ans); ans = NULL; break;                }                if (PyList_Append(ans, temp) != 0) {                    Py_DECREF(temp); Py_DECREF(ans); ans = NULL; break;                }                Py_DECREF(temp);            }        }    }    return ans;} // }}}
开发者ID:artbycrunk,项目名称:calibre,代码行数:66,


示例29: textBreakNext

int textBreakNext(TextBreakIterator* iterator){    return ubrk_next(reinterpret_cast<UBreakIterator*>(iterator));}
开发者ID:CannedFish,项目名称:deepin-webkit,代码行数:4,


示例30: printFirst

/* Print first element */void printFirst(UBreakIterator* boundary, UChar* str) {  int32_t end;  int32_t start = ubrk_first(boundary);  end = ubrk_next(boundary);  printTextRange( str, start, end );}
开发者ID:winlibs,项目名称:icu4c,代码行数:7,



注:本文中的ubrk_next函数示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


C++ uc函数代码示例
C++ ubrk_close函数代码示例
万事OK自学网:51自学网_软件自学网_CAD自学网自学excel、自学PS、自学CAD、自学C语言、自学css3实例,是一个通过网络自主学习工作技能的自学平台,网友喜欢的软件自学网站。