您当前的位置：首页 > IT编程 > C++
\| C语言 \| Java \| VB \| VC \| python \| Android \| TensorFlow \| C++ \| oracle \| 学术与代码 \| cnn卷积神经网络 \| gnn \| 图像修复 \| Keras \| 数据集 \| Neo4j \| 自然语言处理 \| 深度学习 \| 医学CAD \| 医学影像 \| 超参数 \| pointnet \| pytorch \| 异常检测 \| Transformers \| 情感分类 \| 知识图谱 \|

自学教程：C++ ASSERT_HOST函数代码示例

51自学网 2021-06-01 19:37:36

C++

这篇教程C++ ASSERT_HOST函数代码示例写得很实用，希望能帮到您。

本文整理汇总了C++中ASSERT_HOST函数的典型用法代码示例。如果您正苦于以下问题：C++ ASSERT_HOST函数的具体用法？C++ ASSERT_HOST怎么用？C++ ASSERT_HOST使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了ASSERT_HOST函数的30个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于我们的系统推荐出更棒的C++代码示例。

示例1: ASSERT_HOST

// Returns a random number in [-range, range].double Network::Random(double range) {  ASSERT_HOST(randomizer_ != NULL);  return randomizer_->SignedRand(range);}

开发者ID:hoiqs，项目名称:tesseract-ocr，代码行数:5，

示例2: ASSERT_HOST

void Dict::ReplaceAmbig(int wrong_ngram_begin_index, int wrong_ngram_size,                        UNICHAR_ID correct_ngram_id, WERD_CHOICE *werd_choice,                        MATRIX *ratings) {  int num_blobs_to_replace = 0;  int begin_blob_index = 0;  int i;  // Rating and certainty for the new BLOB_CHOICE are derived from the  // replaced choices.  float new_rating = 0.0f;  float new_certainty = 0.0f;  BLOB_CHOICE* old_choice = nullptr;  for (i = 0; i < wrong_ngram_begin_index + wrong_ngram_size; ++i) {    if (i >= wrong_ngram_begin_index) {      int num_blobs = werd_choice->state(i);      int col = begin_blob_index + num_blobs_to_replace;      int row = col + num_blobs - 1;      BLOB_CHOICE_LIST* choices = ratings->get(col, row);      ASSERT_HOST(choices != nullptr);      old_choice = FindMatchingChoice(werd_choice->unichar_id(i), choices);      ASSERT_HOST(old_choice != nullptr);      new_rating += old_choice->rating();      new_certainty += old_choice->certainty();      num_blobs_to_replace += num_blobs;    } else {      begin_blob_index += werd_choice->state(i);    }  }  new_certainty /= wrong_ngram_size;  // If there is no entry in the ratings matrix, add it.  MATRIX_COORD coord(begin_blob_index,                     begin_blob_index + num_blobs_to_replace - 1);  if (!coord.Valid(*ratings)) {    ratings->IncreaseBandSize(coord.row - coord.col + 1);  }  if (ratings->get(coord.col, coord.row) == nullptr)    ratings->put(coord.col, coord.row, new BLOB_CHOICE_LIST);  BLOB_CHOICE_LIST* new_choices = ratings->get(coord.col, coord.row);  BLOB_CHOICE* choice = FindMatchingChoice(correct_ngram_id, new_choices);  if (choice != nullptr) {    // Already there. Upgrade if new rating better.    if (new_rating < choice->rating())      choice->set_rating(new_rating);    if (new_certainty < choice->certainty())      choice->set_certainty(new_certainty);    // DO NOT SORT!! It will mess up the iterator in LanguageModel::UpdateState.  } else {    // Need a new choice with the correct_ngram_id.    choice = new BLOB_CHOICE(*old_choice);    choice->set_unichar_id(correct_ngram_id);    choice->set_rating(new_rating);    choice->set_certainty(new_certainty);    choice->set_classifier(BCC_AMBIG);    choice->set_matrix_cell(coord.col, coord.row);    BLOB_CHOICE_IT it (new_choices);    it.add_to_end(choice);  }  // Remove current unichar from werd_choice. On the last iteration  // set the correct replacement unichar instead of removing a unichar.  for (int replaced_count = 0; replaced_count < wrong_ngram_size;       ++replaced_count) {    if (replaced_count + 1 == wrong_ngram_size) {      werd_choice->set_blob_choice(wrong_ngram_begin_index,                                   num_blobs_to_replace, choice);    } else {      werd_choice->remove_unichar_id(wrong_ngram_begin_index + 1);    }  }  if (stopper_debug_level >= 1) {      werd_choice->print("ReplaceAmbig() ");      tprintf("Modified blob_choices: ");      print_ratings_list("/n", new_choices, getUnicharset());  }}

开发者ID:ming-hai，项目名称:tesseract，代码行数:73，

示例3: worst_noise_blob

inT16 worst_noise_blob(WERD_RES *word_res, float *worst_noise_score) {  PBLOB_IT blob_it;  inT16 blob_count;  float noise_score[512];  int i;  int min_noise_blob;            //1st contender  int max_noise_blob;            //last contender  int non_noise_count;  int worst_noise_blob;          //Worst blob  float small_limit = bln_x_height * fixsp_small_outlines_size;  float non_noise_limit = bln_x_height * 0.8;  blob_it.set_to_list (word_res->outword->blob_list ());  //normalised  blob_count = blob_it.length ();  ASSERT_HOST (blob_count <= 512);  if (blob_count < 5)    return -1;                   //too short to split  /* Get the noise scores for all blobs */  #ifndef SECURE_NAMES  if (debug_fix_space_level > 5)    tprintf ("FP fixspace Noise metrics for /"%s/": ",      word_res->best_choice->string ().string ());  #endif  for (i = 0; i < blob_count; i++, blob_it.forward ()) {    if (word_res->reject_map[i].accepted ())      noise_score[i] = non_noise_limit;    else      noise_score[i] = blob_noise_score (blob_it.data ());    if (debug_fix_space_level > 5)      tprintf ("%1.1f ", noise_score[i]);  }  if (debug_fix_space_level > 5)    tprintf ("/n");  /* Now find the worst one which is far enough away from the end of the word */  non_noise_count = 0;  for (i = 0;  (i < blob_count) && (non_noise_count < fixsp_non_noise_limit); i++) {    if (noise_score[i] >= non_noise_limit)      non_noise_count++;  }  if (non_noise_count < fixsp_non_noise_limit)    return -1;  min_noise_blob = i;  non_noise_count = 0;  for (i = blob_count - 1;  (i >= 0) && (non_noise_count < fixsp_non_noise_limit); i--) {    if (noise_score[i] >= non_noise_limit)      non_noise_count++;  }  if (non_noise_count < fixsp_non_noise_limit)    return -1;  max_noise_blob = i;  if (min_noise_blob > max_noise_blob)    return -1;  *worst_noise_score = small_limit;  worst_noise_blob = -1;  for (i = min_noise_blob; i <= max_noise_blob; i++) {    if (noise_score[i] < *worst_noise_score) {      worst_noise_blob = i;      *worst_noise_score = noise_score[i];    }  }  return worst_noise_blob;}

开发者ID:90b56587，项目名称:Tesseract-iPhone-Demo，代码行数:73，

示例4: ASSERT_HOST

// Top-level method to perform splitting based on current settings.// Returns true if a split was actually performed.// split_for_pageseg should be true if the splitting is being done prior to// page segmentation. This mode uses the flag// pageseg_devanagari_split_strategy to determine the splitting strategy.bool ShiroRekhaSplitter::Split(bool split_for_pageseg) {  SplitStrategy split_strategy = split_for_pageseg ? pageseg_split_strategy_ :      ocr_split_strategy_;  if (split_strategy == NO_SPLIT) {    return false;  // Nothing to do.  }  ASSERT_HOST(split_strategy == MINIMAL_SPLIT ||              split_strategy == MAXIMAL_SPLIT);  ASSERT_HOST(orig_pix_);  if (devanagari_split_debuglevel > 0) {    tprintf("Splitting shiro-rekha .../n");    tprintf("Split strategy = %s/n",            split_strategy == MINIMAL_SPLIT ? "Minimal" : "Maximal");    tprintf("Initial pageseg available = %s/n",            segmentation_block_list_ ? "yes" : "no");  }  // Create a copy of original image to store the splitting output.  pixDestroy(&splitted_image_);  splitted_image_ = pixCopy(NULL, orig_pix_);  // Initialize debug image if required.  if (devanagari_split_debugimage) {    pixDestroy(&debug_image_);    debug_image_ = pixConvertTo32(orig_pix_);  }  // Determine all connected components in the input image. A close operation  // may be required prior to this, depending on the current settings.  Pix* pix_for_ccs = pixClone(orig_pix_);  if (perform_close_ && global_xheight_ != kUnspecifiedXheight &&      !segmentation_block_list_) {    if (devanagari_split_debuglevel > 0) {      tprintf("Performing a global close operation../n");    }    // A global measure is available for xheight, but no local information    // exists.    pixDestroy(&pix_for_ccs);    pix_for_ccs = pixCopy(NULL, orig_pix_);    PerformClose(pix_for_ccs, global_xheight_);  }  Pixa* ccs;  Boxa* tmp_boxa = pixConnComp(pix_for_ccs, &ccs, 8);  boxaDestroy(&tmp_boxa);  pixDestroy(&pix_for_ccs);  // Iterate over all connected components. Get their bounding boxes and clip  // out the image regions corresponding to these boxes from the original image.  // Conditionally run splitting on each of them.  Boxa* regions_to_clear = boxaCreate(0);  for (int i = 0; i < pixaGetCount(ccs); ++i) {    Box* box = ccs->boxa->box[i];    Pix* word_pix = pixClipRectangle(orig_pix_, box, NULL);    ASSERT_HOST(word_pix);    int xheight = GetXheightForCC(box);    if (xheight == kUnspecifiedXheight && segmentation_block_list_ &&        devanagari_split_debugimage) {      pixRenderBoxArb(debug_image_, box, 1, 255, 0, 0);    }    // If some xheight measure is available, attempt to pre-eliminate small    // blobs from the shiro-rekha process. This is primarily to save the CCs    // corresponding to punctuation marks/small dots etc which are part of    // larger graphemes.    if (xheight == kUnspecifiedXheight ||        (box->w > xheight / 3 && box->h > xheight / 2)) {      SplitWordShiroRekha(split_strategy, word_pix, xheight,                          box->x, box->y, regions_to_clear);    } else if (devanagari_split_debuglevel > 0) {      tprintf("CC dropped from splitting: %d,%d (%d, %d)/n",              box->x, box->y, box->w, box->h);    }    pixDestroy(&word_pix);  }  // Actually clear the boxes now.  for (int i = 0; i < boxaGetCount(regions_to_clear); ++i) {    Box* box = boxaGetBox(regions_to_clear, i, L_CLONE);    pixClearInRect(splitted_image_, box);    boxDestroy(&box);  }  boxaDestroy(&regions_to_clear);  pixaDestroy(&ccs);  if (devanagari_split_debugimage) {    DumpDebugImage(split_for_pageseg ? "pageseg_split_debug.png" :                   "ocr_split_debug.png");  }  return true;}

开发者ID:ManishKSharma，项目名称:tess-two，代码行数:91，

示例5: part_it

// Attempt to improve this by adding partitions or expanding partitions.void ColPartitionSet::ImproveColumnCandidate(WidthCallback* cb,                                             PartSetVector* src_sets) {  int set_size = src_sets->size();  // Iterate over the provided column sets, as each one may have something  // to improve this.  for (int i = 0; i < set_size; ++i) {    ColPartitionSet* column_set = src_sets->get(i);    if (column_set == NULL)      continue;    // Iterate over the parts in this and column_set, adding bigger or    // new parts in column_set to this.    ColPartition_IT part_it(&parts_);    ASSERT_HOST(!part_it.empty());    int prev_right = MIN_INT32;    part_it.mark_cycle_pt();    ColPartition_IT col_it(&column_set->parts_);    for (col_it.mark_cycle_pt(); !col_it.cycled_list(); col_it.forward()) {      ColPartition* col_part = col_it.data();      if (col_part->blob_type() < BRT_UNKNOWN)        continue;  // Ignore image partitions.      int col_left = col_part->left_key();      int col_right = col_part->right_key();      // Sync-up part_it (in this) so it matches the col_part in column_set.      ColPartition* part = part_it.data();      while (!part_it.at_last() && part->right_key() < col_left) {        prev_right = part->right_key();        part_it.forward();        part = part_it.data();      }      int part_left = part->left_key();      int part_right = part->right_key();      if (part_right < col_left || col_right < part_left) {        // There is no overlap so this is a new partition.        AddPartition(col_part->ShallowCopy(), &part_it);        continue;      }      // Check the edges of col_part to see if they can improve part.      bool part_width_ok = cb->Run(part->KeyWidth(part_left, part_right));      if (col_left < part_left && col_left > prev_right) {        // The left edge of the column is better and it doesn't overlap,        // so we can potentially expand it.        int col_box_left = col_part->BoxLeftKey();        bool tab_width_ok = cb->Run(part->KeyWidth(col_left, part_right));        bool box_width_ok = cb->Run(part->KeyWidth(col_box_left, part_right));        if (tab_width_ok || (!part_width_ok )) {          // The tab is leaving the good column metric at least as good as          // it was before, so use the tab.          part->CopyLeftTab(*col_part, false);          part->SetColumnGoodness(cb);        } else if (col_box_left < part_left &&                   (box_width_ok || !part_width_ok)) {          // The box is leaving the good column metric at least as good as          // it was before, so use the box.          part->CopyLeftTab(*col_part, true);          part->SetColumnGoodness(cb);        }        part_left = part->left_key();      }      if (col_right > part_right &&          (part_it.at_last() ||           part_it.data_relative(1)->left_key() > col_right)) {        // The right edge is better, so we can possibly expand it.        int col_box_right = col_part->BoxRightKey();        bool tab_width_ok = cb->Run(part->KeyWidth(part_left, col_right));        bool box_width_ok = cb->Run(part->KeyWidth(part_left, col_box_right));        if (tab_width_ok || (!part_width_ok )) {          // The tab is leaving the good column metric at least as good as          // it was before, so use the tab.          part->CopyRightTab(*col_part, false);          part->SetColumnGoodness(cb);        } else if (col_box_right > part_right &&                   (box_width_ok || !part_width_ok)) {          // The box is leaving the good column metric at least as good as          // it was before, so use the box.          part->CopyRightTab(*col_part, true);          part->SetColumnGoodness(cb);        }      }    }  }  ComputeCoverage();}

开发者ID:ErfanHasmin，项目名称:scope-ocr，代码行数:83，

示例6: bleft

/** * Sets up auto page segmentation, determines the orientation, and corrects it. * Somewhat arbitrary chunk of functionality, factored out of AutoPageSeg to * facilitate testing. * photo_mask_pix is a pointer to a NULL pointer that will be filled on return * with the leptonica photo mask, which must be pixDestroyed by the caller. * to_blocks is an empty list that will be filled with (usually a single) * block that is used during layout analysis. This ugly API is required * because of the possibility of a unlv zone file. * TODO(rays) clean this up. * See AutoPageSeg for other arguments. * The returned ColumnFinder must be deleted after use. */ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation(    PageSegMode pageseg_mode, BLOCK_LIST* blocks, Tesseract* osd_tess,    OSResults* osr, TO_BLOCK_LIST* to_blocks, Pix** photo_mask_pix,    Pix** music_mask_pix) {  int vertical_x = 0;  int vertical_y = 1;  TabVector_LIST v_lines;  TabVector_LIST h_lines;  ICOORD bleft(0, 0);  ASSERT_HOST(pix_binary_ != NULL);  if (tessedit_dump_pageseg_images) {    pixa_debug_.AddPix(pix_binary_, "PageSegInput");  }  // Leptonica is used to find the rule/separator lines in the input.  LineFinder::FindAndRemoveLines(source_resolution_,                                 textord_tabfind_show_vlines, pix_binary_,                                 &vertical_x, &vertical_y, music_mask_pix,                                 &v_lines, &h_lines);  if (tessedit_dump_pageseg_images) {    pixa_debug_.AddPix(pix_binary_, "NoLines");  }  // Leptonica is used to find a mask of the photo regions in the input.  *photo_mask_pix = ImageFind::FindImages(pix_binary_, &pixa_debug_);  if (tessedit_dump_pageseg_images) {    pixa_debug_.AddPix(pix_binary_, "NoImages");  }  if (!PSM_COL_FIND_ENABLED(pageseg_mode)) v_lines.clear();  // The rest of the algorithm uses the usual connected components.  textord_.find_components(pix_binary_, blocks, to_blocks);  TO_BLOCK_IT to_block_it(to_blocks);  // There must be exactly one input block.  // TODO(rays) handle new textline finding with a UNLV zone file.  ASSERT_HOST(to_blocks->singleton());  TO_BLOCK* to_block = to_block_it.data();  TBOX blkbox = to_block->block->bounding_box();  ColumnFinder* finder = NULL;  int estimated_resolution = source_resolution_;  if (source_resolution_ == kMinCredibleResolution) {    // Try to estimate resolution from typical body text size.    int res = IntCastRounded(to_block->line_size * kResolutionEstimationFactor);    if (res > estimated_resolution && res < kMaxCredibleResolution) {      estimated_resolution = res;      tprintf("Estimating resolution as %d/n", estimated_resolution);    }  }  if (to_block->line_size >= 2) {    finder = new ColumnFinder(static_cast<int>(to_block->line_size),                              blkbox.botleft(), blkbox.topright(),                              estimated_resolution, textord_use_cjk_fp_model,                              textord_tabfind_aligned_gap_fraction, &v_lines,                              &h_lines, vertical_x, vertical_y);    finder->SetupAndFilterNoise(pageseg_mode, *photo_mask_pix, to_block);    if (equ_detect_) {      equ_detect_->LabelSpecialText(to_block);    }    BLOBNBOX_CLIST osd_blobs;    // osd_orientation is the number of 90 degree rotations to make the    // characters upright. (See osdetect.h for precise definition.)    // We want the text lines horizontal, (vertical text indicates vertical    // textlines) which may conflict (eg vertically written CJK).    int osd_orientation = 0;    bool vertical_text = textord_tabfind_force_vertical_text ||                         pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT;    if (!vertical_text && textord_tabfind_vertical_text &&        PSM_ORIENTATION_ENABLED(pageseg_mode)) {      vertical_text =          finder->IsVerticallyAlignedText(textord_tabfind_vertical_text_ratio,                                          to_block, &osd_blobs);    }    if (PSM_OSD_ENABLED(pageseg_mode) && osd_tess != NULL && osr != NULL) {      GenericVector<int> osd_scripts;      if (osd_tess != this) {        // We are running osd as part of layout analysis, so constrain the        // scripts to those allowed by *this.        AddAllScriptsConverted(unicharset, osd_tess->unicharset, &osd_scripts);        for (int s = 0; s < sub_langs_.size(); ++s) {          AddAllScriptsConverted(sub_langs_[s]->unicharset,                                 osd_tess->unicharset, &osd_scripts);        }      }//.........这里部分代码省略.........

开发者ID:Kailigithub，项目名称:tesseract，代码行数:101，

示例7: Emalloc

//.........这里部分代码省略.........      DoError (ILLEGALSAMPLECOUNT, "Illegal sample count");    Proto->NumSamples = SampleCount;    Proto->Mean = ReadNFloats (File, N, NULL);    if (Proto->Mean == NULL)      DoError (ILLEGALMEANSPEC, "Illegal prototype mean");    switch (Proto->Style) {      case spherical:        if (ReadNFloats (File, 1, &(Proto->Variance.Spherical)) == NULL)          DoError (ILLEGALVARIANCESPEC, "Illegal prototype variance");        Proto->Magnitude.Spherical =          1.0 / sqrt ((double) (2.0 * PI * Proto->Variance.Spherical));        Proto->TotalMagnitude =          pow (Proto->Magnitude.Spherical, (float) N);        Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);        Proto->Weight.Spherical = 1.0 / Proto->Variance.Spherical;        Proto->Distrib = NULL;        break;      case elliptical:        Proto->Variance.Elliptical = ReadNFloats (File, N, NULL);        if (Proto->Variance.Elliptical == NULL)          DoError (ILLEGALVARIANCESPEC, "Illegal prototype variance");        Proto->Magnitude.Elliptical =          (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));        Proto->Weight.Elliptical =          (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));        Proto->TotalMagnitude = 1.0;        for (i = 0; i < N; i++) {          Proto->Magnitude.Elliptical[i] =            1.0 /            sqrt ((double) (2.0 * PI * Proto->Variance.Elliptical[i]));          Proto->Weight.Elliptical[i] =            1.0 / Proto->Variance.Elliptical[i];          Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];        }        Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);        Proto->Distrib = NULL;        break;      case mixed:        Proto->Distrib =          (DISTRIBUTION *) Emalloc (N * sizeof (DISTRIBUTION));        for (i = 0; i < N; i++) {          if (tfscanf(File, "%s", Token) != 1)            DoError (ILLEGALDISTRIBUTION,              "Illegal prototype distribution");          switch (Token[0]) {            case 'n':              Proto->Distrib[i] = normal;              break;            case 'u':              Proto->Distrib[i] = uniform;              break;            case 'r':              Proto->Distrib[i] = D_random;              break;            default:              DoError (ILLEGALDISTRIBUTION,                "Illegal prototype distribution");          }        }        Proto->Variance.Elliptical = ReadNFloats (File, N, NULL);        if (Proto->Variance.Elliptical == NULL)          DoError (ILLEGALVARIANCESPEC, "Illegal prototype variance");        Proto->Magnitude.Elliptical =          (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));        Proto->Weight.Elliptical =          (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));        Proto->TotalMagnitude = 1.0;        for (i = 0; i < N; i++) {          switch (Proto->Distrib[i]) {            case normal:              Proto->Magnitude.Elliptical[i] = 1.0 /                sqrt ((double)                (2.0 * PI * Proto->Variance.Elliptical[i]));              Proto->Weight.Elliptical[i] =                1.0 / Proto->Variance.Elliptical[i];              break;            case uniform:            case D_random:              Proto->Magnitude.Elliptical[i] = 1.0 /                (2.0 * Proto->Variance.Elliptical[i]);              break;            case DISTRIBUTION_COUNT:              ASSERT_HOST(!"Distribution count not allowed!");          }          Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];        }        Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);        break;    }    return (Proto);  }  else if (Status == EOF)    return (NULL);  else {    DoError (ILLEGALSIGNIFICANCESPEC, "Illegal significance specification");    return (NULL);  }}

开发者ID:0xkasun，项目名称:tesseract，代码行数:101，

示例8: bleft

/** * Sets up auto page segmentation, determines the orientation, and corrects it. * Somewhat arbitrary chunk of functionality, factored out of AutoPageSeg to * facilitate testing. * photo_mask_pix is a pointer to a NULL pointer that will be filled on return * with the leptonica photo mask, which must be pixDestroyed by the caller. * to_blocks is an empty list that will be filled with (usually a single) * block that is used during layout analysis. This ugly API is required * because of the possibility of a unlv zone file. * TODO(rays) clean this up. * See AutoPageSeg for other arguments. * The returned ColumnFinder must be deleted after use. */ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation(    bool single_column, bool osd, bool only_osd,    BLOCK_LIST* blocks, Tesseract* osd_tess, OSResults* osr,    TO_BLOCK_LIST* to_blocks, Pix** photo_mask_pix, Pix** music_mask_pix) {  int vertical_x = 0;  int vertical_y = 1;  TabVector_LIST v_lines;  TabVector_LIST h_lines;  ICOORD bleft(0, 0);  ASSERT_HOST(pix_binary_ != NULL);  if (tessedit_dump_pageseg_images) {    pixWrite("tessinput.png", pix_binary_, IFF_PNG);  }  // Leptonica is used to find the rule/separator lines in the input.  LineFinder::FindAndRemoveLines(source_resolution_,                                 textord_tabfind_show_vlines, pix_binary_,                                 &vertical_x, &vertical_y, music_mask_pix,                                 &v_lines, &h_lines);  if (tessedit_dump_pageseg_images)    pixWrite("tessnolines.png", pix_binary_, IFF_PNG);  // Leptonica is used to find a mask of the photo regions in the input.  *photo_mask_pix = ImageFind::FindImages(pix_binary_);  if (tessedit_dump_pageseg_images)    pixWrite("tessnoimages.png", pix_binary_, IFF_PNG);  if (single_column)    v_lines.clear();  // The rest of the algorithm uses the usual connected components.  textord_.find_components(pix_binary_, blocks, to_blocks);  TO_BLOCK_IT to_block_it(to_blocks);  // There must be exactly one input block.  // TODO(rays) handle new textline finding with a UNLV zone file.  ASSERT_HOST(to_blocks->singleton());  TO_BLOCK* to_block = to_block_it.data();  TBOX blkbox = to_block->block->bounding_box();  ColumnFinder* finder = NULL;  if (to_block->line_size >= 2) {    finder = new ColumnFinder(static_cast<int>(to_block->line_size),                              blkbox.botleft(), blkbox.topright(),                              source_resolution_,                              &v_lines, &h_lines, vertical_x, vertical_y);    finder->SetupAndFilterNoise(*photo_mask_pix, to_block);    if (equ_detect_) {      equ_detect_->LabelSpecialText(to_block);    }    BLOBNBOX_CLIST osd_blobs;    // osd_orientation is the number of 90 degree rotations to make the    // characters upright. (See osdetect.h for precise definition.)    // We want the text lines horizontal, (vertical text indicates vertical    // textlines) which may conflict (eg vertically written CJK).    int osd_orientation = 0;    bool vertical_text = finder->IsVerticallyAlignedText(to_block, &osd_blobs);    if (osd && osd_tess != NULL && osr != NULL) {      os_detect_blobs(&osd_blobs, osr, osd_tess);      if (only_osd) {        delete finder;        return NULL;      }      osd_orientation = osr->best_result.orientation_id;      double osd_score = osr->orientations[osd_orientation];      double osd_margin = min_orientation_margin * 2;      for (int i = 0; i < 4; ++i) {        if (i != osd_orientation &&            osd_score - osr->orientations[i] < osd_margin) {          osd_margin = osd_score - osr->orientations[i];        }      }      if (osd_margin < min_orientation_margin) {        // The margin is weak.        int best_script_id = osr->best_result.script_id;        bool cjk = (best_script_id == osd_tess->unicharset.han_sid()) ||            (best_script_id == osd_tess->unicharset.hiragana_sid()) ||            (best_script_id == osd_tess->unicharset.katakana_sid());        if (!cjk && !vertical_text && osd_orientation == 2) {          // upside down latin text is improbable with such a weak margin.          tprintf("OSD: Weak margin (%.2f), horiz textlines, not CJK: "                  "Don't rotate./n", osd_margin);          osd_orientation = 0;        } else {          tprintf("OSD: Weak margin (%.2f) for %d blob text block, "//.........这里部分代码省略.........

开发者ID:9999bao，项目名称:tess-two，代码行数:101，

示例9: res_it

// Returns the mean confidence of the current object at the given level.// The number should be interpreted as a percent probability. (0.0f-100.0f)float LTRResultIterator::Confidence(PageIteratorLevel level) const {  if (it_->word() == NULL) return 0.0f;  // Already at the end!  float mean_certainty = 0.0f;  int certainty_count = 0;  PAGE_RES_IT res_it(*it_);  WERD_CHOICE* best_choice = res_it.word()->best_choice;  ASSERT_HOST(best_choice != NULL);  switch (level) {    case RIL_BLOCK:      do {        best_choice = res_it.word()->best_choice;        ASSERT_HOST(best_choice != NULL);        mean_certainty += best_choice->certainty();        ++certainty_count;        res_it.forward();      } while (res_it.block() == res_it.prev_block());      break;    case RIL_PARA:      do {        best_choice = res_it.word()->best_choice;        ASSERT_HOST(best_choice != NULL);        mean_certainty += best_choice->certainty();        ++certainty_count;        res_it.forward();      } while (res_it.block() == res_it.prev_block() &&               res_it.row()->row->para() == res_it.prev_row()->row->para());      break;    case RIL_TEXTLINE:      do {        best_choice = res_it.word()->best_choice;        ASSERT_HOST(best_choice != NULL);        mean_certainty += best_choice->certainty();        ++certainty_count;        res_it.forward();      } while (res_it.row() == res_it.prev_row());      break;    case RIL_WORD:      mean_certainty += best_choice->certainty();     ++certainty_count;      break;    case RIL_SYMBOL:      BLOB_CHOICE_LIST_CLIST* choices = best_choice->blob_choices();      if (choices != NULL) {        BLOB_CHOICE_LIST_C_IT blob_choices_it(choices);        for (int blob = 0; blob < blob_index_; ++blob)          blob_choices_it.forward();        BLOB_CHOICE_IT choice_it(blob_choices_it.data());        for (choice_it.mark_cycle_pt();             !choice_it.cycled_list();             choice_it.forward()) {          if (choice_it.data()->unichar_id() ==              best_choice->unichar_id(blob_index_))            break;        }        mean_certainty += choice_it.data()->certainty();      } else {        mean_certainty += best_choice->certainty();      }      ++certainty_count;  }  if (certainty_count > 0) {    mean_certainty /= certainty_count;    float confidence = 100 + 5 * mean_certainty;    if (confidence < 0.0f) confidence = 0.0f;    if (confidence > 100.0f) confidence = 100.0f;    return confidence;  }  return 0.0f;}

开发者ID:Strongc，项目名称:Tesseract_Ocr，代码行数:71，

示例10: while

/** * Split input into space-separated tokens, strip trailing punctuation * from each, determine case properties, call UTF-8 flavor of cost * function on each word, and aggregate all into single mean word * cost. */int WordUnigrams::Cost(const char_32 *key_str32,                       LangModel *lang_mod,                       CharSet *char_set) const {  if (!key_str32)    return 0;  // convert string to UTF8 to split into space-separated words  string key_str;  CubeUtils::UTF32ToUTF8(key_str32, &key_str);  vector<string> words;  CubeUtils::SplitStringUsing(key_str, " /t", &words);  // no words => no cost  if (words.size() <= 0) {    return 0;  }  // aggregate the costs of all the words  int cost = 0;  for (int word_idx = 0; word_idx < words.size(); word_idx++) {    // convert each word back to UTF32 for analyzing case and punctuation    string_32 str32;    CubeUtils::UTF8ToUTF32(words[word_idx].c_str(), &str32);    int len = CubeUtils::StrLen(str32.c_str());    // strip all trailing punctuation    string clean_str;    int clean_len = len;    bool trunc = false;    while (clean_len > 0 &&           lang_mod->IsTrailingPunc(str32.c_str()[clean_len - 1])) {      --clean_len;      trunc = true;    }    // If either the original string was not truncated (no trailing    // punctuation) or the entire string was removed (all characters    // are trailing punctuation), evaluate original word as is;    // otherwise, copy all but the trailing punctuation characters    char_32 *clean_str32 = NULL;    if (clean_len == 0 || !trunc) {      clean_str32 = CubeUtils::StrDup(str32.c_str());    } else {      clean_str32 = new char_32[clean_len + 1];      for (int i = 0; i < clean_len; ++i) {        clean_str32[i] = str32[i];      }      clean_str32[clean_len] = '/0';    }    ASSERT_HOST(clean_str32 != NULL);    string str8;    CubeUtils::UTF32ToUTF8(clean_str32, &str8);    int word_cost = CostInternal(str8.c_str());    // if case invariant, get costs of all-upper-case and all-lower-case    // versions and return the min cost    if (clean_len >= kMinLengthNumOrCaseInvariant &&        CubeUtils::IsCaseInvariant(clean_str32, char_set)) {      char_32 *lower_32 = CubeUtils::ToLower(clean_str32, char_set);      if (lower_32) {        string lower_8;        CubeUtils::UTF32ToUTF8(lower_32, &lower_8);        word_cost = MIN(word_cost, CostInternal(lower_8.c_str()));        delete [] lower_32;      }      char_32 *upper_32 = CubeUtils::ToUpper(clean_str32, char_set);      if (upper_32) {        string upper_8;        CubeUtils::UTF32ToUTF8(upper_32, &upper_8);        word_cost = MIN(word_cost, CostInternal(upper_8.c_str()));        delete [] upper_32;      }    }    if (clean_len >= kMinLengthNumOrCaseInvariant) {      // if characters are all numeric, incur 0 word cost      bool is_numeric = true;      for (int i = 0; i < clean_len; ++i) {        if (!lang_mod->IsDigit(clean_str32[i]))          is_numeric = false;      }      if (is_numeric)        word_cost = 0;    }    delete [] clean_str32;    cost += word_cost;  }  // word_idx  // return the mean cost  return static_cast<int>(cost / static_cast<double>(words.size()));}

开发者ID:0xkasun，项目名称:tesseract，代码行数:97，

示例11: ASSERT_HOST

/** * Segment the page according to the current value of tessedit_pageseg_mode. * pix_binary_ is used as the source image and should not be NULL. * On return the blocks list owns all the constructed page layout. */int Tesseract::SegmentPage(const STRING* input_file, BLOCK_LIST* blocks,                           Tesseract* osd_tess, OSResults* osr) {  ASSERT_HOST(pix_binary_ != NULL);  int width = pixGetWidth(pix_binary_);  int height = pixGetHeight(pix_binary_);  // Get page segmentation mode.  PageSegMode pageseg_mode = static_cast<PageSegMode>(      static_cast<int>(tessedit_pageseg_mode));  // If a UNLV zone file can be found, use that instead of segmentation.  if (!PSM_COL_FIND_ENABLED(pageseg_mode) &&      input_file != NULL && input_file->length() > 0) {    STRING name = *input_file;    const char* lastdot = strrchr(name.string(), '.');    if (lastdot != NULL)      name[lastdot - name.string()] = '/0';    read_unlv_file(name, width, height, blocks);  }  if (blocks->empty()) {    // No UNLV file present. Work according to the PageSegMode.    // First make a single block covering the whole image.    BLOCK_IT block_it(blocks);    BLOCK* block = new BLOCK("", TRUE, 0, 0, 0, 0, width, height);    block->set_right_to_left(right_to_left());    block_it.add_to_end(block);  } else {    // UNLV file present. Use PSM_SINGLE_BLOCK.    pageseg_mode = PSM_SINGLE_BLOCK;  }  int auto_page_seg_ret_val = 0;  TO_BLOCK_LIST to_blocks;  if (PSM_OSD_ENABLED(pageseg_mode) || PSM_BLOCK_FIND_ENABLED(pageseg_mode) ||      PSM_SPARSE(pageseg_mode)) {    auto_page_seg_ret_val =        AutoPageSeg(pageseg_mode, blocks, &to_blocks, osd_tess, osr);    if (pageseg_mode == PSM_OSD_ONLY)      return auto_page_seg_ret_val;    // To create blobs from the image region bounds uncomment this line:    //  to_blocks.clear();  // Uncomment to go back to the old mode.  } else {    deskew_ = FCOORD(1.0f, 0.0f);    reskew_ = FCOORD(1.0f, 0.0f);    if (pageseg_mode == PSM_CIRCLE_WORD) {      Pix* pixcleaned = RemoveEnclosingCircle(pix_binary_);      if (pixcleaned != NULL) {        pixDestroy(&pix_binary_);        pix_binary_ = pixcleaned;      }    }  }  if (auto_page_seg_ret_val < 0) {    return -1;  }  if (blocks->empty()) {    if (textord_debug_tabfind)      tprintf("Empty page/n");    return 0;  // AutoPageSeg found an empty page.  }  textord_.TextordPage(pageseg_mode, width, height, pix_binary_,                       blocks, &to_blocks);  return auto_page_seg_ret_val;}

开发者ID:9999bao，项目名称:tess-two，代码行数:69，

示例12: STRING

//.........这里部分代码省略.........      }      stats_.tilde_crunch_written = true;      stats_.last_char_was_newline = false;      stats_.write_results_empty_block = false;    }    if ((word->word->flag (W_EOL) && !stats_.last_char_was_newline) || force_eol) {      /* Add a new line output */      txt_chs[txt_index] = '/n';      map_chs[txt_index++] = '/n';                                 //end line      ep_chars[ep_chars_index++] = newline_type;                                 //Cos of the real newline      stats_.tilde_crunch_written = false;      stats_.last_char_was_newline = true;      stats_.last_char_was_tilde = false;    }    txt_chs[txt_index] = '/0';    map_chs[txt_index] = '/0';    ep_chars[ep_chars_index] = '/0';  // terminate string    word->ep_choice = new WERD_CHOICE(ep_chars, uchset);    if (force_eol)      stats_.write_results_empty_block = true;    return;  }  /* NORMAL PROCESSING of non tilde crunched words */  stats_.tilde_crunch_written = false;  if (newline_type)    stats_.last_char_was_newline = true;  else    stats_.last_char_was_newline = false;  stats_.write_results_empty_block = force_eol;  // about to write a real word  if (unlv_tilde_crunching &&      stats_.last_char_was_tilde &&      (word->word->space() == 0) &&      !(word->word->flag(W_REP_CHAR) && tessedit_write_rep_codes) &&      (word->best_choice->unichar_id(0) == space)) {    /* Prevent adjacent tilde across words - we know that adjacent tildes within       words have been removed */    word->best_choice->remove_unichar_id(0);    if (word->best_choice->blob_choices() != NULL) {      BLOB_CHOICE_LIST_C_IT blob_choices_it(word->best_choice->blob_choices());      if (!blob_choices_it.empty()) delete blob_choices_it.extract();    }    word->reject_map.remove_pos (0);    word->box_word->DeleteBox(0);  }  if (newline_type ||    (word->word->flag (W_REP_CHAR) && tessedit_write_rep_codes))    stats_.last_char_was_tilde = false;  else {    if (word->reject_map.length () > 0) {      if (word->best_choice->unichar_id(word->reject_map.length() - 1) == space)        stats_.last_char_was_tilde = true;      else        stats_.last_char_was_tilde = false;    }    else if (word->word->space () > 0)      stats_.last_char_was_tilde = false;    /* else it is unchanged as there are no output chars */  }  ASSERT_HOST (word->best_choice->length() == word->reject_map.length());  set_unlv_suspects(word);  check_debug_pt (word, 120);  if (tessedit_rejection_debug) {    tprintf ("Dict word: /"%s/": %d/n",             word->best_choice->debug_string().string(),             dict_word(*(word->best_choice)));  }  if (word->word->flag (W_REP_CHAR) && tessedit_write_rep_codes) {    repetition_code = "|^~R";    wordstr_lengths = "/001/001/001/001";    repetition_code += uchset.id_to_unichar(get_rep_char(word));    wordstr_lengths += strlen(uchset.id_to_unichar(get_rep_char(word)));    wordstr = &repetition_code;  } else {    if (tessedit_zero_rejection) {      /* OVERRIDE ALL REJECTION MECHANISMS - ONLY REJECT TESS FAILURES */      for (i = 0; i < word->best_choice->length(); ++i) {        if (word->reject_map[i].rejected())          word->reject_map[i].setrej_minimal_rej_accept();      }    }    if (tessedit_minimal_rejection) {      /* OVERRIDE ALL REJECTION MECHANISMS - ONLY REJECT TESS FAILURES */      for (i = 0; i < word->best_choice->length(); ++i) {        if ((word->best_choice->unichar_id(i) != space) &&            word->reject_map[i].rejected())          word->reject_map[i].setrej_minimal_rej_accept();      }    }  }}

开发者ID:0xkasun，项目名称:Dummy_Tes，代码行数:101，

示例13: pixGetWidth

// Segment the page according to the current value of tessedit_pageseg_mode.// If the pix_binary_ member is not NULL, it is used as the source image,// and copied to image, otherwise it just uses image as the input.// On return the blocks list owns all the constructed page layout.int Tesseract::SegmentPage(const STRING* input_file,                           IMAGE* image, BLOCK_LIST* blocks) {  int width = image->get_xsize();  int height = image->get_ysize();  int resolution = image->get_res();#ifdef HAVE_LIBLEPT  if (pix_binary_ != NULL) {    width = pixGetWidth(pix_binary_);    height = pixGetHeight(pix_binary_);    resolution = pixGetXRes(pix_binary_);  }#endif  // Zero resolution messes up the algorithms, so make sure it is credible.  if (resolution < kMinCredibleResolution)    resolution = kDefaultResolution;  // Get page segmentation mode.  PageSegMode pageseg_mode = static_cast<PageSegMode>(      static_cast<int>(tessedit_pageseg_mode));  // If a UNLV zone file can be found, use that instead of segmentation.  if (pageseg_mode != tesseract::PSM_AUTO &&      input_file != NULL && input_file->length() > 0) {    STRING name = *input_file;    const char* lastdot = strrchr(name.string(), '.');    if (lastdot != NULL)      name[lastdot - name.string()] = '/0';    read_unlv_file(name, width, height, blocks);  }  bool single_column = pageseg_mode > PSM_AUTO;  if (blocks->empty()) {    // No UNLV file present. Work according to the PageSegMode.    // First make a single block covering the whole image.    BLOCK_IT block_it(blocks);    BLOCK* block = new BLOCK("", TRUE, 0, 0, 0, 0, width, height);    block_it.add_to_end(block);  } else {    // UNLV file present. Use PSM_SINGLE_COLUMN.    pageseg_mode = PSM_SINGLE_COLUMN;  }  TO_BLOCK_LIST land_blocks, port_blocks;  TBOX page_box;  if (pageseg_mode <= PSM_SINGLE_COLUMN) {    if (AutoPageSeg(width, height, resolution, single_column,                    image, blocks, &port_blocks) < 0) {      return -1;    }    // To create blobs from the image region bounds uncomment this line:    //  port_blocks.clear();  // Uncomment to go back to the old mode.  } else {#if HAVE_LIBLEPT    image->FromPix(pix_binary_);#endif    deskew_ = FCOORD(1.0f, 0.0f);    reskew_ = FCOORD(1.0f, 0.0f);  }  if (blocks->empty()) {    tprintf("Empty page/n");    return 0;  // AutoPageSeg found an empty page.  }  if (port_blocks.empty()) {    // AutoPageSeg was not used, so we need to find_components first.    find_components(blocks, &land_blocks, &port_blocks, &page_box);  } else {    // AutoPageSeg does not need to find_components as it did that already.    page_box.set_left(0);    page_box.set_bottom(0);    page_box.set_right(width);    page_box.set_top(height);    // Filter_blobs sets up the TO_BLOCKs the same as find_components does.    filter_blobs(page_box.topright(), &port_blocks, true);  }  TO_BLOCK_IT to_block_it(&port_blocks);  ASSERT_HOST(!port_blocks.empty());  TO_BLOCK* to_block = to_block_it.data();  if (pageseg_mode <= PSM_SINGLE_BLOCK ||      to_block->line_size < 2) {    // For now, AUTO, SINGLE_COLUMN and SINGLE_BLOCK all map to the old    // textord. The difference is the number of blocks and how the are made.    textord_page(page_box.topright(), blocks, &land_blocks, &port_blocks,                 this);  } else {    // SINGLE_LINE, SINGLE_WORD and SINGLE_CHAR all need a single row.    float gradient = make_single_row(page_box.topright(),                                     to_block, &port_blocks, this);    if (pageseg_mode == PSM_SINGLE_LINE) {      // SINGLE_LINE uses the old word maker on the single line.      make_words(page_box.topright(), gradient, blocks,                 &land_blocks, &port_blocks, this);    } else {      // SINGLE_WORD and SINGLE_CHAR cram all the blobs into a      // single word, and in SINGLE_CHAR mode, all the outlines      // go in a single blob.      make_single_word(pageseg_mode == PSM_SINGLE_CHAR,                       to_block->get_rows(), to_block->block->row_list());//.........这里部分代码省略.........

开发者ID:mk219533，项目名称:tesseract-ocr，代码行数:101，

示例14: bleft

// Auto page segmentation. Divide the page image into blocks of uniform// text linespacing and images.// Width, height and resolution are derived from the input image.// If the pix is non-NULL, then it is assumed to be the input, and it is// copied to the image, otherwise the image is used directly.// The output goes in the blocks list with corresponding TO_BLOCKs in the// to_blocks list.// If single_column is true, then no attempt is made to divide the image// into columns, but multiple blocks are still made if the text is of// non-uniform linespacing.int Tesseract::AutoPageSeg(int width, int height, int resolution,                           bool single_column, IMAGE* image,                           BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks) {  int vertical_x = 0;  int vertical_y = 1;  TabVector_LIST v_lines;  TabVector_LIST h_lines;  ICOORD bleft(0, 0);  Boxa* boxa = NULL;  Pixa* pixa = NULL;  // The blocks made by the ColumnFinder. Moved to blocks before return.  BLOCK_LIST found_blocks;#ifdef HAVE_LIBLEPT  if (pix_binary_ != NULL) {    if (textord_debug_images) {      Pix* grey_pix = pixCreate(width, height, 8);      // Printable images are light grey on white, but for screen display      // they are black on dark grey so the other colors show up well.      if (textord_debug_printable) {        pixSetAll(grey_pix);        pixSetMasked(grey_pix, pix_binary_, 192);      } else {        pixSetAllArbitrary(grey_pix, 64);        pixSetMasked(grey_pix, pix_binary_, 0);      }      AlignedBlob::IncrementDebugPix();      pixWrite(AlignedBlob::textord_debug_pix().string(), grey_pix, IFF_PNG);      pixDestroy(&grey_pix);    }    if (tessedit_dump_pageseg_images)      pixWrite("tessinput.png", pix_binary_, IFF_PNG);    // Leptonica is used to find the lines and image regions in the input.    LineFinder::FindVerticalLines(resolution, pix_binary_,                                  &vertical_x, &vertical_y, &v_lines);    LineFinder::FindHorizontalLines(resolution, pix_binary_, &h_lines);    if (tessedit_dump_pageseg_images)      pixWrite("tessnolines.png", pix_binary_, IFF_PNG);    ImageFinder::FindImages(pix_binary_, &boxa, &pixa);    if (tessedit_dump_pageseg_images)      pixWrite("tessnoimages.png", pix_binary_, IFF_PNG);    // Copy the Pix to the IMAGE.    image->FromPix(pix_binary_);    if (single_column)      v_lines.clear();  }#endif  TO_BLOCK_LIST land_blocks, port_blocks;  TBOX page_box;  // The rest of the algorithm uses the usual connected components.  find_components(blocks, &land_blocks, &port_blocks, &page_box);  TO_BLOCK_IT to_block_it(&port_blocks);  ASSERT_HOST(!to_block_it.empty());  for (to_block_it.mark_cycle_pt(); !to_block_it.cycled_list();       to_block_it.forward()) {    TO_BLOCK* to_block = to_block_it.data();    TBOX blkbox = to_block->block->bounding_box();    if (to_block->line_size >= 2) {      // Note: if there are multiple blocks, then v_lines, boxa, and pixa      // are empty on the next iteration, but in this case, we assume      // that there aren't any interesting line separators or images, since      // it means that we have a pre-defined unlv zone file.      ColumnFinder finder(static_cast<int>(to_block->line_size),                          blkbox.botleft(), blkbox.topright(),                          &v_lines, &h_lines, vertical_x, vertical_y);      if (finder.FindBlocks(height, resolution, single_column,                            to_block, boxa, pixa, &found_blocks, to_blocks) < 0)        return -1;      finder.ComputeDeskewVectors(&deskew_, &reskew_);      boxa = NULL;      pixa = NULL;    }  }#ifdef HAVE_LIBLEPT  boxaDestroy(&boxa);  pixaDestroy(&pixa);#endif  blocks->clear();  BLOCK_IT block_it(blocks);  // Move the found blocks to the input/output blocks.  block_it.add_list_after(&found_blocks);  if (textord_debug_images) {    // The debug image is no longer needed so delete it.    unlink(AlignedBlob::textord_debug_pix().string());  }  return 0;}

开发者ID:mk219533，项目名称:tesseract-ocr，代码行数:99，

示例15: find_components

// Make the textlines and words inside each block.void Textord::TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew,                          int width, int height, Pix *binary_pix,                          Pix *thresholds_pix, Pix *grey_pix,                          bool use_box_bottoms,                          BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks) {  page_tr_.set_x(width);  page_tr_.set_y(height);  if (to_blocks->empty()) {    // AutoPageSeg was not used, so we need to find_components first.    find_components(binary_pix, blocks, to_blocks);    TO_BLOCK_IT it(to_blocks);    for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {      TO_BLOCK *to_block = it.data();      // Compute the edge offsets whether or not there is a grey_pix.      // We have by-passed auto page seg, so we have to run it here.      // By page segmentation mode there is no non-text to avoid running on.      to_block->ComputeEdgeOffsets(thresholds_pix, grey_pix);    }  } else if (!PSM_SPARSE(pageseg_mode)) {    // AutoPageSeg does not need to find_components as it did that already.    // Filter_blobs sets up the TO_BLOCKs the same as find_components does.    filter_blobs(page_tr_, to_blocks, true);  }  ASSERT_HOST(!to_blocks->empty());  if (pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT) {    const FCOORD anticlockwise90(0.0f, 1.0f);    const FCOORD clockwise90(0.0f, -1.0f);    TO_BLOCK_IT it(to_blocks);    for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {      TO_BLOCK *to_block = it.data();      BLOCK *block = to_block->block;      // Create a fake poly_block in block from its bounding box.      block->set_poly_block(new POLY_BLOCK(block->bounding_box(),                                           PT_VERTICAL_TEXT));      // Rotate the to_block along with its contained block and blobnbox lists.      to_block->rotate(anticlockwise90);      // Set the block's rotation values to obey the convention followed in      // layout analysis for vertical text.      block->set_re_rotation(clockwise90);      block->set_classify_rotation(clockwise90);    }  }  TO_BLOCK_IT to_block_it(to_blocks);  TO_BLOCK *to_block = to_block_it.data();  // Make the rows in the block.  float gradient = 0;  // Do it the old fashioned way.  if (PSM_LINE_FIND_ENABLED(pageseg_mode)) {    gradient = make_rows(page_tr_, to_blocks);  } else if (!PSM_SPARSE(pageseg_mode)) {    // RAW_LINE, SINGLE_LINE, SINGLE_WORD and SINGLE_CHAR all need a single row.    gradient = make_single_row(page_tr_, pageseg_mode != PSM_RAW_LINE,                               to_block, to_blocks);  }  BaselineDetect baseline_detector(textord_baseline_debug,                                   reskew, to_blocks);  baseline_detector.ComputeStraightBaselines(use_box_bottoms);  baseline_detector.ComputeBaselineSplinesAndXheights(page_tr_, true,      textord_heavy_nr,      textord_show_final_rows,      this);  // Now make the words in the lines.  if (PSM_WORD_FIND_ENABLED(pageseg_mode)) {    // SINGLE_LINE uses the old word maker on the single line.    make_words(this, page_tr_, gradient, blocks, to_blocks);  } else {    // SINGLE_WORD and SINGLE_CHAR cram all the blobs into a    // single word, and in SINGLE_CHAR mode, all the outlines    // go in a single blob.    TO_BLOCK *to_block = to_block_it.data();    make_single_word(pageseg_mode == PSM_SINGLE_CHAR,                     to_block->get_rows(), to_block->block->row_list());  }  cleanup_blocks(PSM_WORD_FIND_ENABLED(pageseg_mode), blocks);  // Remove empties.  // Compute the margins for each row in the block, to be used later for  // paragraph detection.  BLOCK_IT b_it(blocks);  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {    b_it.data()->compute_row_margins();  }#ifndef GRAPHICS_DISABLED  close_to_win();#endif}

开发者ID:koth，项目名称:yocr，代码行数:89，

示例16: alloc_mem

C_OUTLINE::C_OUTLINE(                     //constructor                     C_OUTLINE *srcline,  //outline to                     FCOORD rotation      //rotate                    ) {  TBOX new_box;                   //easy bounding  inT16 stepindex;               //index to step  inT16 dirdiff;                 //direction change  ICOORD pos;                    //current position  ICOORD prevpos;                //previous dest point  ICOORD destpos;                //destination point  inT16 destindex;               //index to step  DIR128 dir;                    //coded direction  uinT8 new_step;  stepcount = srcline->stepcount * 2;                                 //get memory  steps = (uinT8 *) alloc_mem (step_mem());  memset(steps, 0, step_mem());  for (int iteration = 0; iteration < 2; ++iteration) {    DIR128 round1 = iteration == 0 ? 32 : 0;    DIR128 round2 = iteration != 0 ? 32 : 0;    pos = srcline->start;    prevpos = pos;    prevpos.rotate (rotation);    start = prevpos;    box = TBOX (start, start);    destindex = 0;    for (stepindex = 0; stepindex < srcline->stepcount; stepindex++) {      pos += srcline->step (stepindex);      destpos = pos;      destpos.rotate (rotation);      //  printf("%i %i %i %i ", destpos.x(), destpos.y(), pos.x(), pos.y());      while (destpos.x () != prevpos.x () || destpos.y () != prevpos.y ()) {        dir = DIR128 (FCOORD (destpos - prevpos));        dir += 64;                 //turn to step style        new_step = dir.get_dir ();        //  printf(" %i/n", new_step);        if (new_step & 31) {          set_step(destindex++, dir + round1);          prevpos += step(destindex - 1);          if (destindex < 2            || ((dirdiff =            step_dir (destindex - 1) - step_dir (destindex - 2)) !=            -64 && dirdiff != 64)) {            set_step(destindex++, dir + round2);            prevpos += step(destindex - 1);          } else {            prevpos -= step(destindex - 1);            destindex--;            prevpos -= step(destindex - 1);            set_step(destindex - 1, dir + round2);            prevpos += step(destindex - 1);          }        }        else {          set_step(destindex++, dir);          prevpos += step(destindex - 1);        }        while (destindex >= 2 &&               ((dirdiff =                 step_dir (destindex - 1) - step_dir (destindex - 2)) == -64 ||                dirdiff == 64)) {          prevpos -= step(destindex - 1);          prevpos -= step(destindex - 2);          destindex -= 2;        // Forget u turn        }        //ASSERT_HOST(prevpos.x() == destpos.x() && prevpos.y() == destpos.y());        new_box = TBOX (destpos, destpos);        box += new_box;      }    }    ASSERT_HOST (destpos.x () == start.x () && destpos.y () == start.y ());    dirdiff = step_dir (destindex - 1) - step_dir (0);    while ((dirdiff == 64 || dirdiff == -64) && destindex > 1) {      start += step (0);      destindex -= 2;      for (int i = 0; i < destindex; ++i)        set_step(i, step_dir(i + 1));      dirdiff = step_dir (destindex - 1) - step_dir (0);    }    if (destindex >= 4)      break;  }  stepcount = destindex;  destpos = start;  for (stepindex = 0; stepindex < stepcount; stepindex++) {    destpos += step (stepindex);  }  ASSERT_HOST (destpos.x () == start.x () && destpos.y () == start.y ());}

开发者ID:90b56587，项目名称:Tesseract-iPhone-Demo，代码行数:92，

示例17: ASSERT_HOST

/** * Segment the page according to the current value of tessedit_pageseg_mode. * pix_binary_ is used as the source image and should not be NULL. * On return the blocks list owns all the constructed page layout. */int Tesseract::SegmentPage(const STRING* input_file, BLOCK_LIST* blocks,                           Tesseract* osd_tess, OSResults* osr) {  ASSERT_HOST(pix_binary_ != NULL);  int width = pixGetWidth(pix_binary_);  int height = pixGetHeight(pix_binary_);  // Get page segmentation mode.  PageSegMode pageseg_mode = static_cast<PageSegMode>(      static_cast<int>(tessedit_pageseg_mode));  // If a UNLV zone file can be found, use that instead of segmentation.  if (!PSM_COL_FIND_ENABLED(pageseg_mode) &&      input_file != NULL && input_file->length() > 0) {    STRING name = *input_file;    const char* lastdot = strrchr(name.string(), '.');    if (lastdot != NULL)      name[lastdot - name.string()] = '/0';    read_unlv_file(name, width, height, blocks);  }  if (blocks->empty()) {    // No UNLV file present. Work according to the PageSegMode.    // First make a single block covering the whole image.    BLOCK_IT block_it(blocks);    BLOCK* block = new BLOCK("", TRUE, 0, 0, 0, 0, width, height);    block->set_right_to_left(right_to_left());    block_it.add_to_end(block);  } else {    // UNLV file present. Use PSM_SINGLE_BLOCK.    pageseg_mode = PSM_SINGLE_BLOCK;  }  // The diacritic_blobs holds noise blobs that may be diacritics. They  // are separated out on areas of the image that seem noisy and short-circuit  // the layout process, going straight from the initial partition creation  // right through to after word segmentation, where they are added to the  // rej_cblobs list of the most appropriate word. From there classification  // will determine whether they are used.  BLOBNBOX_LIST diacritic_blobs;  int auto_page_seg_ret_val = 0;  TO_BLOCK_LIST to_blocks;  if (PSM_OSD_ENABLED(pageseg_mode) || PSM_BLOCK_FIND_ENABLED(pageseg_mode) ||      PSM_SPARSE(pageseg_mode)) {    auto_page_seg_ret_val = AutoPageSeg(        pageseg_mode, blocks, &to_blocks,        enable_noise_removal ? &diacritic_blobs : NULL, osd_tess, osr);    if (pageseg_mode == PSM_OSD_ONLY)      return auto_page_seg_ret_val;    // To create blobs from the image region bounds uncomment this line:    //  to_blocks.clear();  // Uncomment to go back to the old mode.  } else {    deskew_ = FCOORD(1.0f, 0.0f);    reskew_ = FCOORD(1.0f, 0.0f);    if (pageseg_mode == PSM_CIRCLE_WORD) {      Pix* pixcleaned = RemoveEnclosingCircle(pix_binary_);      if (pixcleaned != NULL) {        pixDestroy(&pix_binary_);        pix_binary_ = pixcleaned;      }    }  }  if (auto_page_seg_ret_val < 0) {    return -1;  }  if (blocks->empty()) {    if (textord_debug_tabfind)      tprintf("Empty page/n");    return 0;  // AutoPageSeg found an empty page.  }  bool splitting =      pageseg_devanagari_split_strategy != ShiroRekhaSplitter::NO_SPLIT;  bool cjk_mode = textord_use_cjk_fp_model;  textord_.TextordPage(pageseg_mode, reskew_, width, height, pix_binary_,                       pix_thresholds_, pix_grey_, splitting || cjk_mode,                       &diacritic_blobs, blocks, &to_blocks);  return auto_page_seg_ret_val;}

开发者ID:Kailigithub，项目名称:tesseract，代码行数:81，

示例18: switch

//.........这里部分代码省略.........      }      image_win->Pen(color);      TBOX box = box_word->BlobBox(i);      image_win->Rectangle(box.left(), box.bottom(), box.right(), box.top());    }    return true;  }  /*    Note the double coercions of(COLOUR)((inT32)editor_image_word_bb_color)    etc. are to keep the compiler happy.  */                                 // display bounding box  if (word->display_flag(DF_BOX)) {    word->bounding_box().plot(image_win,     (ScrollView::Color)((inT32)      editor_image_word_bb_color),     (ScrollView::Color)((inT32)      editor_image_word_bb_color));    ScrollView::Color c = (ScrollView::Color)       ((inT32) editor_image_blob_bb_color);    image_win->Pen(c);    c_it.set_to_list(word->cblob_list());    for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward())      c_it.data()->bounding_box().plot(image_win);    displayed_something = TRUE;  }                                 // display edge steps  if (word->display_flag(DF_EDGE_STEP)) {     // edgesteps available    word->plot(image_win);      // rainbow colors    displayed_something = TRUE;  }                                 // display poly approx  if (word->display_flag(DF_POLYGONAL)) {                                 // need to convert    TWERD* tword = TWERD::PolygonalCopy(poly_allow_detailed_fx, word);    tword->plot(image_win);    delete tword;    displayed_something = TRUE;  }  // Display correct text and blamer information.  STRING text;  STRING blame;  if (word->display_flag(DF_TEXT) && word->text() != NULL) {    text = word->text();  }  if (word->display_flag(DF_BLAMER) &&      !(word_res->blamer_bundle != NULL &&        word_res->blamer_bundle->incorrect_result_reason() == IRR_CORRECT)) {    text = "";    const BlamerBundle *blamer_bundle = word_res->blamer_bundle;    if (blamer_bundle == NULL) {      text += "NULL";    } else {      text = blamer_bundle->TruthString();    }    text += " -> ";    STRING best_choice_str;    if (word_res->best_choice == NULL) {      best_choice_str = "NULL";    } else {      word_res->best_choice->string_and_lengths(&best_choice_str, NULL);    }    text += best_choice_str;    IncorrectResultReason reason = (blamer_bundle == NULL) ?        IRR_PAGE_LAYOUT : blamer_bundle->incorrect_result_reason();    ASSERT_HOST(reason < IRR_NUM_REASONS)    blame += " [";    blame += BlamerBundle::IncorrectReasonName(reason);    blame += "]";  }  if (text.length() > 0) {    word_bb = word->bounding_box();    image_win->Pen(ScrollView::RED);    word_height = word_bb.height();    int text_height = 0.50 * word_height;    if (text_height > 20) text_height = 20;    image_win->TextAttributes("Arial", text_height, false, false, false);    shift = (word_height < word_bb.width()) ? 0.25 * word_height : 0.0f;    image_win->Text(word_bb.left() + shift,                    word_bb.bottom() + 0.25 * word_height, text.string());    if (blame.length() > 0) {      image_win->Text(word_bb.left() + shift,                      word_bb.bottom() + 0.25 * word_height - text_height,                      blame.string());    }    displayed_something = TRUE;  }  if (!displayed_something)      // display BBox anyway    word->bounding_box().plot(image_win,     (ScrollView::Color)((inT32) editor_image_word_bb_color),     (ScrollView::Color)((inT32)      editor_image_word_bb_color));  return TRUE;}

开发者ID:xmarston，项目名称:BillRecognizer，代码行数:101，

示例19: array_count

//.........这里部分代码省略.........  //    NOTE: a choice composed form original fragment choices will be always  //    added to the new choices list for each character composed from  //    fragments (even if the choice for the corresponding character appears  //    in the re-classified choices list of for the newly merged blob).  BLOB_CHOICE_IT temp_it;  int char_choices_index = char_choices->length() - 1;  for (i = search_state[0]; i >= 0; i--) {    BLOB_CHOICE_LIST *current_choices = join_blobs_and_classify(        blobs, seam_list, x, y, fx, ratings, old_choices);    // Combine character fragments.    if (expanded_fragment_lengths[i] > 1) {      // Start merging character fragments.      if (!merging_fragment) {        merging_fragment = true;        true_y = y;        fragment_pieces = expanded_fragment_lengths[i];        rating = 0.0;        certainty = -MAX_FLOAT32;        strncpy(unichar, word_ptr, *word_lengths_ptr);        unichar[*word_lengths_ptr] = '/0';      }      // Take into account the fact that we could have joined pieces      // since we first recorded the ending point of a fragment (true_y).      true_y -= y - x;      // Populate fragment with updated values and look for the      // fragment with the same values in current_choices.      // Update rating and certainty of the character being composed.      fragment_pieces--;      CHAR_FRAGMENT fragment;      fragment.set_all(unichar, fragment_pieces,                       expanded_fragment_lengths[i]);      temp_it.set_to_list(current_choices);      for (temp_it.mark_cycle_pt(); !temp_it.cycled_list();           temp_it.forward()) {        const CHAR_FRAGMENT *current_fragment =          getDict().getUnicharset().get_fragment(temp_it.data()->unichar_id());        if (current_fragment && fragment.equals(current_fragment)) {          rating += temp_it.data()->rating();          if (temp_it.data()->certainty() > certainty) {            certainty = temp_it.data()->certainty();          }          break;        }      }      assert(!temp_it.cycled_list());  // make sure we found the fragment      // Free current_choices for the fragmented character.      delete current_choices;      // Finish composing character from fragments.      if (fragment_pieces == 0) {        // Populate current_choices with the classification of        // the blob merged from blobs of each character fragment.        current_choices = join_blobs_and_classify(blobs, seam_list, x,                                                  true_y, fx, ratings, NULL);        BLOB_CHOICE *merged_choice =          new BLOB_CHOICE(getDict().getUnicharset().unichar_to_id(unichar),                          rating, certainty, 0, NO_PERM);        // Insert merged_blob into current_choices, such that current_choices        // are still sorted in non-descending order by rating.        ASSERT_HOST(!current_choices->empty());        temp_it.set_to_list(current_choices);        for (temp_it.mark_cycle_pt();             !temp_it.cycled_list() &&             merged_choice->rating() > temp_it.data()->rating();             temp_it.forward());        temp_it.add_before_stay_put(merged_choice);        // Done merging this fragmented character.        merging_fragment = false;      }    }    if (!merging_fragment) {      // Get rid of fragments in current_choices.      temp_it.set_to_list(current_choices);      for (temp_it.mark_cycle_pt(); !temp_it.cycled_list();           temp_it.forward()) {        if (getDict().getUnicharset().get_fragment(            temp_it.data()->unichar_id())) {          delete temp_it.extract();        }      }      char_choices->set(current_choices, char_choices_index);      char_choices_index--;      // Update word_ptr and word_lengths_ptr.      if (word_lengths_ptr != NULL && word_ptr != NULL) {        word_lengths_ptr--;        word_ptr -= (*word_lengths_ptr);      }    }    y = x - 1;    x = y - search_state[i];  }  old_choices->delete_data_pointers();  delete old_choices;  memfree(search_state);  return (char_choices);}

开发者ID:AngusHardie，项目名称:TesseractOCR-For-Mac，代码行数:101，

示例20: SetupBasicProperties

// Helper sets the character attribute properties and sets up the script table.// Does not set tops and bottoms.void SetupBasicProperties(bool report_errors, bool decompose,                          UNICHARSET* unicharset) {  for (int unichar_id = 0; unichar_id < unicharset->size(); ++unichar_id) {    // Convert any custom ligatures.    const char* unichar_str = unicharset->id_to_unichar(unichar_id);    for (int i = 0; UNICHARSET::kCustomLigatures[i][0] != nullptr; ++i) {      if (!strcmp(UNICHARSET::kCustomLigatures[i][1], unichar_str)) {        unichar_str = UNICHARSET::kCustomLigatures[i][0];        break;      }    }    // Convert the unichar to UTF32 representation    std::vector<char32> uni_vector = UNICHAR::UTF8ToUTF32(unichar_str);    // Assume that if the property is true for any character in the string,    // then it holds for the whole "character".    bool unichar_isalpha = false;    bool unichar_islower = false;    bool unichar_isupper = false;    bool unichar_isdigit = false;    bool unichar_ispunct = false;    for (char32 u_ch : uni_vector) {      if (u_isalpha(u_ch)) unichar_isalpha = true;      if (u_islower(u_ch)) unichar_islower = true;      if (u_isupper(u_ch)) unichar_isupper = true;      if (u_isdigit(u_ch)) unichar_isdigit = true;      if (u_ispunct(u_ch)) unichar_ispunct = true;    }    unicharset->set_isalpha(unichar_id, unichar_isalpha);    unicharset->set_islower(unichar_id, unichar_islower);    unicharset->set_isupper(unichar_id, unichar_isupper);    unicharset->set_isdigit(unichar_id, unichar_isdigit);    unicharset->set_ispunctuation(unichar_id, unichar_ispunct);    tesseract::IcuErrorCode err;    unicharset->set_script(unichar_id, uscript_getName(        uscript_getScript(uni_vector[0], err)));    const int num_code_points = uni_vector.size();    // Obtain the lower/upper case if needed and record it in the properties.    unicharset->set_other_case(unichar_id, unichar_id);    if (unichar_islower || unichar_isupper) {      std::vector<char32> other_case(num_code_points, 0);      for (int i = 0; i < num_code_points; ++i) {        // TODO(daria): Ideally u_strToLower()/ustrToUpper() should be used.        // However since they deal with UChars (so need a conversion function        // from char32 or UTF8string) and require a meaningful locale string,        // for now u_tolower()/u_toupper() are used.        other_case[i] = unichar_islower ? u_toupper(uni_vector[i]) :          u_tolower(uni_vector[i]);      }      std::string other_case_uch = UNICHAR::UTF32ToUTF8(other_case);      UNICHAR_ID other_case_id =          unicharset->unichar_to_id(other_case_uch.c_str());      if (other_case_id != INVALID_UNICHAR_ID) {        unicharset->set_other_case(unichar_id, other_case_id);      } else if (unichar_id >= SPECIAL_UNICHAR_CODES_COUNT && report_errors) {        tprintf("Other case %s of %s is not in unicharset/n",                other_case_uch.c_str(), unichar_str);      }    }    // Set RTL property and obtain mirror unichar ID from ICU.    std::vector<char32> mirrors(num_code_points, 0);    for (int i = 0; i < num_code_points; ++i) {      mirrors[i] = u_charMirror(uni_vector[i]);      if (i == 0) {  // set directionality to that of the 1st code point        unicharset->set_direction(unichar_id,                                  static_cast<UNICHARSET::Direction>(                                      u_charDirection(uni_vector[i])));      }    }    std::string mirror_uch = UNICHAR::UTF32ToUTF8(mirrors);    UNICHAR_ID mirror_uch_id = unicharset->unichar_to_id(mirror_uch.c_str());    if (mirror_uch_id != INVALID_UNICHAR_ID) {      unicharset->set_mirror(unichar_id, mirror_uch_id);    } else if (report_errors) {      tprintf("Mirror %s of %s is not in unicharset/n",              mirror_uch.c_str(), unichar_str);    }    // Record normalized version of this unichar.    std::string normed_str;    if (unichar_id != 0 &&        tesseract::NormalizeUTF8String(            decompose ? tesseract::UnicodeNormMode::kNFKD                      : tesseract::UnicodeNormMode::kNFKC,            tesseract::OCRNorm::kNormalize, tesseract::GraphemeNorm::kNone,            unichar_str, &normed_str) &&        !normed_str.empty()) {      unicharset->set_normed(unichar_id, normed_str.c_str());    } else {      unicharset->set_normed(unichar_id, unichar_str);    }    ASSERT_HOST(unicharset->get_other_case(unichar_id) < unicharset->size());//.........这里部分代码省略.........

开发者ID:jan-ruzicka，项目名称:tesseract，代码行数:101，

示例21: ASSERT_HOST

// Returns true if the given string is equivalent to the truth string for// the current word.bool LTRResultIterator::EquivalentToTruth(const char *str) const {  if (!HasTruthString()) return false;  ASSERT_HOST(it_->word()->uch_set != NULL);  WERD_CHOICE str_wd(str, *(it_->word()->uch_set));  return it_->word()->blamer_bundle->ChoiceIsCorrect(&str_wd);}

开发者ID:0xkasun，项目名称:tesseract，代码行数:8，

示例22: while

WERD_CHOICE *split_and_recog_word(                           //recog one owrd                                  WERD *word,                //word to do                                  DENORM *denorm,            //de-normaliser                                  POLY_MATCHER matcher,      //matcher function                                  POLY_TESTER tester,        //tester function                                  POLY_TESTER trainer,       //trainer function                                  BOOL8 testing,             //true if answer driven                                  WERD_CHOICE *&raw_choice,  //raw result //list of blob lists                                  BLOB_CHOICE_LIST_CLIST *blob_choices,                                  WERD *&outword             //bln word output                                 ) {  //   inT32                                                      outword1_len;  //   inT32                                                      outword2_len;  WERD *first_word;              //poly copy of word  WERD *second_word;             //fabricated word  WERD *outword2;                //2nd output word  PBLOB *blob;  WERD_CHOICE *result;           //resturn value  WERD_CHOICE *result2;          //output of 2nd word  WERD_CHOICE *raw_choice2;      //raw version of 2nd  float gap;                     //blob gap  float bestgap;                 //biggest gap  PBLOB_LIST new_blobs;          //list of gathered blobs  PBLOB_IT blob_it;                                 //iterator  PBLOB_IT new_blob_it = &new_blobs;  first_word = word->poly_copy (denorm->row ()->x_height ());  blob_it.set_to_list (first_word->blob_list ());  bestgap = -MAX_INT32;  while (!blob_it.at_last ()) {    blob = blob_it.data ();                                 //gap to next    gap = blob_it.data_relative (1)->bounding_box ().left () - blob->bounding_box ().right ();    blob_it.forward ();    if (gap > bestgap) {      bestgap = gap;             //find biggest      new_blob_it = blob_it;     //save position    }  }                                 //take 2nd half  new_blobs.assign_to_sublist (&new_blob_it, &blob_it);                                 //make it a word  second_word = new WERD (&new_blobs, 1, NULL);  ASSERT_HOST (word->blob_list ()->length () ==    first_word->blob_list ()->length () +    second_word->blob_list ()->length ());  result = recog_word_recursive (first_word, denorm, matcher,    tester, trainer, testing, raw_choice,    blob_choices, outword);  delete first_word;             //done that one  result2 = recog_word_recursive (second_word, denorm, matcher,    tester, trainer, testing, raw_choice2,    blob_choices, outword2);  delete second_word;            //done that too  *result += *result2;           //combine ratings  delete result2;  *raw_choice += *raw_choice2;  delete raw_choice2;            //finished with it  //   outword1_len= outword->blob_list()->length();  //   outword2_len= outword2->blob_list()->length();  outword->join_on (outword2);   //join words  delete outword2;  //   if ( outword->blob_list()->length() != outword1_len + outword2_len )  //      tprintf( "Split&Recog: part1len=%d; part2len=%d; combinedlen=%d/n",  //                                outword1_len, outword2_len, outword->blob_list()->length() );  //   ASSERT_HOST( outword->blob_list()->length() == outword1_len + outword2_len );  return result;}

开发者ID:chanchai，项目名称:botker，代码行数:70，

示例23: it

// Return the ColumnSpanningType that best explains the columns overlapped// by the given coords(left,right,y), with the given margins.// Also return the first and last column index touched by the coords and// the leftmost spanned column.// Column indices are 2n + 1 for real columns (0 based) and even values// represent the gaps in between columns, with 0 being left of the leftmost.// resolution refers to the ppi resolution of the image.ColumnSpanningType ColPartitionSet::SpanningType(int resolution,                                                 int left, int right, int y,                                                 int left_margin,                                                 int right_margin,                                                 int* first_col,                                                 int* last_col,                                                 int* first_spanned_col) {  *first_col = -1;  *last_col = -1;  *first_spanned_col = -1;  int margin_columns = 0;  ColPartition_IT it(&parts_);  int col_index = 1;  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), col_index += 2) {    ColPartition* part = it.data();    if (part->ColumnContains(left, y)) {      // In the default case, first_col is set, but columns_spanned remains      // zero, so first_col will get reset in the first column genuinely      // spanned, but we can tell the difference from a noise partition      // that touches no column.      *first_col = col_index;      if (part->ColumnContains(right, y)) {        // Both within a single column.        *last_col = col_index;        return CST_FLOWING;      }      if (left_margin <= part->LeftAtY(y)) {        // It completely spans this column.        *first_spanned_col = col_index;        margin_columns = 1;      }    } else if (part->ColumnContains(right, y)) {      if (*first_col < 0) {        // It started in-between.        *first_col = col_index - 1;      }      if (right_margin >= part->RightAtY(y)) {        // It completely spans this column.        if (margin_columns == 0)          *first_spanned_col = col_index;        ++margin_columns;      }      *last_col = col_index;      break;    } else if (left < part->LeftAtY(y) && right > part->RightAtY(y)) {      // Neither left nor right are contained within, so it spans this      // column.      if (*first_col < 0) {        // It started in between the previous column and the current column.        *first_col = col_index - 1;      }      if (margin_columns == 0)        *first_spanned_col = col_index;      *last_col = col_index;    } else if (right < part->LeftAtY(y)) {      // We have gone past the end.      *last_col = col_index - 1;      if (*first_col < 0) {        // It must lie completely between columns =>noise.        *first_col = col_index - 1;      }      break;    }  }  if (*first_col < 0)    *first_col = col_index - 1;  // The last in-between.  if (*last_col < 0)    *last_col = col_index - 1;  // The last in-between.  ASSERT_HOST(*first_col >= 0 && *last_col >= 0);  ASSERT_HOST(*first_col <= *last_col);  if (*first_col == *last_col && right - left < kMinColumnWidth * resolution) {    // Neither end was in a column, and it didn't span any, so it lies    // entirely between columns, therefore noise.    return CST_NOISE;  } else if (margin_columns <= 1) {    // An exception for headings that stick outside of single-column text.    if (margin_columns == 1 && parts_.singleton()) {      return CST_HEADING;    }    // It is a pullout, as left and right were not in the same column, but    // it doesn't go to the edge of its start and end.    return CST_PULLOUT;  }  // Its margins went to the edges of first and last columns => heading.  return CST_HEADING;}

开发者ID:ErfanHasmin，项目名称:scope-ocr，代码行数:93，

示例24: WERD_CHOICE

/********************************************************************** * recog_word * * Convert the word to tess form and pass it to the tess segmenter. * Convert the output back to editor form. **********************************************************************/WERD_CHOICE *recog_word(                           //recog one owrd                        WERD *word,                //word to do                        DENORM *denorm,            //de-normaliser                        POLY_MATCHER matcher,      //matcher function                        POLY_TESTER tester,        //tester function                        POLY_TESTER trainer,       //trainer function                        BOOL8 testing,             //true if answer driven                        WERD_CHOICE *&raw_choice,  //raw result //list of blob lists                        BLOB_CHOICE_LIST_CLIST *blob_choices,                        WERD *&outword             //bln word output                       ) {  WERD_CHOICE *word_choice;  uinT8 perm_type;  uinT8 real_dict_perm_type;  if (word->blob_list ()->empty ()) {    char empty_lengths[] = {0};    word_choice = new WERD_CHOICE ("", empty_lengths,                                   10.0f, -1.0f, TOP_CHOICE_PERM);    raw_choice = new WERD_CHOICE ("", empty_lengths,                                  10.0f, -1.0f, TOP_CHOICE_PERM);    outword = word->poly_copy (denorm->row ()->x_height ());  }  else    word_choice = recog_word_recursive (word, denorm, matcher, tester,      trainer, testing, raw_choice,      blob_choices, outword);  if ((word_choice->lengths ().length () !=    outword->blob_list ()->length ()) ||  (word_choice->lengths ().length () != blob_choices->length ())) {    tprintf      ("recog_word ASSERT FAIL String:/"%s/"; Strlen=%d; #Blobs=%d; #Choices=%d/n",      word_choice->string ().string (), word_choice->lengths ().length (),      outword->blob_list ()->length (), blob_choices->length ());  }  ASSERT_HOST (word_choice->lengths ().length () ==    outword->blob_list ()->length ());  ASSERT_HOST (word_choice->lengths ().length () == blob_choices->length ());  /* Copy any reject blobs into the outword */  outword->rej_blob_list ()->deep_copy (word->rej_blob_list ());  if (tessedit_override_permuter) {    /* Override the permuter type if a straight dictionary check disagrees. */    perm_type = word_choice->permuter ();    if ((perm_type != SYSTEM_DAWG_PERM) &&    (perm_type != FREQ_DAWG_PERM) && (perm_type != USER_DAWG_PERM)) {      real_dict_perm_type = dict_word (word_choice->string ().string ());      if (((real_dict_perm_type == SYSTEM_DAWG_PERM) ||        (real_dict_perm_type == FREQ_DAWG_PERM) ||        (real_dict_perm_type == USER_DAWG_PERM)) &&        (alpha_count (word_choice->string ().string (),                      word_choice->lengths ().string ()) > 0))        word_choice->set_permuter (real_dict_perm_type);      //Use dict perm    }    if (tessedit_rejection_debug && perm_type != word_choice->permuter ()) {      tprintf ("Permuter Type Flipped from %d to %d/n",        perm_type, word_choice->permuter ());    }  }  assert ((word_choice == NULL) == (raw_choice == NULL));  return word_choice;}

开发者ID:chanchai，项目名称:botker，代码行数:70，

示例25: ASSERT_HOST

void AssociateUtils::ComputeStats(int col, int row,                                  const AssociateStats *parent_stats,                                  int parent_path_length,                                  bool fixed_pitch,                                  float max_char_wh_ratio,                                  WERD_RES *word_res,                                  bool debug,                                  AssociateStats *stats) {  stats->Clear();  ASSERT_HOST(word_res != NULL);  if (word_res->blob_widths.empty()) {    return;  }  if (debug) {    tprintf("AssociateUtils::ComputeStats() for col=%d, row=%d%s/n",            col, row, fixed_pitch ? " (fixed pitch)" : "");  }  float normalizing_height = kBlnXHeight;  ROW* blob_row = word_res->blob_row;  // TODO(rays/daria) Can unicharset.script_has_xheight be useful here?  if (fixed_pitch && blob_row != NULL) {    // For fixed pitch language like CJK, we use the full text height    // as the normalizing factor so we are not dependent on xheight    // calculation.    if (blob_row->body_size() > 0.0f) {      normalizing_height = word_res->denorm.y_scale() * blob_row->body_size();    } else {      normalizing_height = word_res->denorm.y_scale() *          (blob_row->x_height() + blob_row->ascenders());    }    if (debug) {      tprintf("normalizing height = %g (scale %g xheight %g ascenders %g)/n",              normalizing_height, word_res->denorm.y_scale(),              blob_row->x_height(), blob_row->ascenders());    }  }  float wh_ratio = word_res->GetBlobsWidth(col, row) / normalizing_height;  if (wh_ratio > max_char_wh_ratio) stats->bad_shape = true;  // Compute the gap sum for this shape. If there are only negative or only  // positive gaps, record their sum in stats->gap_sum. However, if there is  // a mixture, record only the sum of the positive gaps.  // TODO(antonova): explain fragment.  int negative_gap_sum = 0;  for (int c = col; c < row; ++c) {    int gap = word_res->GetBlobsGap(c);    (gap > 0) ? stats->gap_sum += gap : negative_gap_sum += gap;  }  if (stats->gap_sum == 0) stats->gap_sum = negative_gap_sum;  if (debug) {    tprintf("wh_ratio=%g (max_char_wh_ratio=%g) gap_sum=%d %s/n",            wh_ratio, max_char_wh_ratio, stats->gap_sum,            stats->bad_shape ? "bad_shape" : "");  }  // Compute shape_cost (for fixed pitch mode).  if (fixed_pitch) {    bool end_row = (row == (word_res->ratings->dimension() - 1));    // Ensure that the blob has gaps on the left and the right sides    // (except for beginning and ending punctuation) and that there is    // no cutting through ink at the blob boundaries.    if (col > 0) {      float left_gap = word_res->GetBlobsGap(col - 1) / normalizing_height;      SEAM *left_seam = word_res->seam_array[col - 1];      if ((!end_row && left_gap < kMinGap) || left_seam->priority > 0.0f) {        stats->bad_shape = true;      }      if (debug) {        tprintf("left_gap %g, left_seam %g %s/n", left_gap, left_seam->priority,                stats->bad_shape ? "bad_shape" : "");      }    }    float right_gap = 0.0f;    if (!end_row) {      right_gap = word_res->GetBlobsGap(row) / normalizing_height;      SEAM *right_seam = word_res->seam_array[row];      if (right_gap < kMinGap || right_seam->priority > 0.0f) {        stats->bad_shape = true;        if (right_gap < kMinGap) stats->bad_fixed_pitch_right_gap = true;      }      if (debug) {        tprintf("right_gap %g right_seam %g %s/n",                right_gap, right_seam->priority,                stats->bad_shape ? "bad_shape" : "");      }    }    // Impose additional segmentation penalties if blob widths or gaps    // distribution don't fit a fixed-pitch model.    // Since we only know the widths and gaps of the path explored so far,    // the means and variances are computed for the path so far (not    // considering characters to the right of the last character on the path).    stats->full_wh_ratio = wh_ratio + right_gap;    if (parent_stats != NULL) {      stats->full_wh_ratio_total =        (parent_stats->full_wh_ratio_total + stats->full_wh_ratio);      float mean =        stats->full_wh_ratio_total / static_cast<float>(parent_path_length+1);      stats->full_wh_ratio_var =        parent_stats->full_wh_ratio_var + pow(mean-stats->full_wh_ratio, 2);//.........这里部分代码省略.........

开发者ID:11110101，项目名称:tess-two，代码行数:101，

示例26: ASSERT_HOST

void UnicharAmbigs::LoadUnicharAmbigs(const UNICHARSET& encoder_set,                                      TFile *ambig_file,                                      int debug_level,                                      bool use_ambigs_for_adaption,                                      UNICHARSET *unicharset) {  int i, j;  UnicharIdVector *adaption_ambigs_entry;  if (debug_level) tprintf("Reading ambiguities/n");  int test_ambig_part_size;  int replacement_ambig_part_size;  // The space for buffer is allocated on the heap to avoid  // GCC frame size warning.  const int kBufferSize = 10 + 2 * kMaxAmbigStringSize;  char *buffer = new char[kBufferSize];  char replacement_string[kMaxAmbigStringSize];  UNICHAR_ID test_unichar_ids[MAX_AMBIG_SIZE + 1];  int line_num = 0;  int type = NOT_AMBIG;  // Determine the version of the ambigs file.  int version = 0;  ASSERT_HOST(ambig_file->FGets(buffer, kBufferSize) != NULL &&              strlen(buffer) > 0);  if (*buffer == 'v') {    version = static_cast<int>(strtol(buffer+1, NULL, 10));    ++line_num;  } else {    ambig_file->Rewind();  }  while (ambig_file->FGets(buffer, kBufferSize) != NULL) {    chomp_string(buffer);    if (debug_level > 2) tprintf("read line %s/n", buffer);    ++line_num;    if (!ParseAmbiguityLine(line_num, version, debug_level, encoder_set,                            buffer, &test_ambig_part_size, test_unichar_ids,                            &replacement_ambig_part_size,                            replacement_string, &type)) continue;    // Construct AmbigSpec and add it to the appropriate AmbigSpec_LIST.    AmbigSpec *ambig_spec = new AmbigSpec();    if (!InsertIntoTable((type == REPLACE_AMBIG) ? replace_ambigs_                                                 : dang_ambigs_,                         test_ambig_part_size, test_unichar_ids,                         replacement_ambig_part_size, replacement_string, type,                         ambig_spec, unicharset))      continue;    // Update one_to_one_definite_ambigs_.    if (test_ambig_part_size == 1 &&        replacement_ambig_part_size == 1 && type == DEFINITE_AMBIG) {      if (one_to_one_definite_ambigs_[test_unichar_ids[0]] == NULL) {        one_to_one_definite_ambigs_[test_unichar_ids[0]] = new UnicharIdVector();      }      one_to_one_definite_ambigs_[test_unichar_ids[0]]->push_back(          ambig_spec->correct_ngram_id);    }    // Update ambigs_for_adaption_.    if (use_ambigs_for_adaption) {      GenericVector<UNICHAR_ID> encoding;      // Silently ignore invalid strings, as before, so it is safe to use a      // universal ambigs file.      if (unicharset->encode_string(replacement_string, true, &encoding,                                    NULL, NULL)) {        for (i = 0; i < test_ambig_part_size; ++i) {          if (ambigs_for_adaption_[test_unichar_ids[i]] == NULL) {            ambigs_for_adaption_[test_unichar_ids[i]] = new UnicharIdVector();          }          adaption_ambigs_entry = ambigs_for_adaption_[test_unichar_ids[i]];          for (int r = 0; r < encoding.size(); ++r) {            UNICHAR_ID id_to_insert = encoding[r];            ASSERT_HOST(id_to_insert != INVALID_UNICHAR_ID);            // Add the new unichar id to adaption_ambigs_entry (only if the            // vector does not already contain it) keeping it in sorted order.            for (j = 0; j < adaption_ambigs_entry->size() &&                 (*adaption_ambigs_entry)[j] > id_to_insert; ++j);            if (j < adaption_ambigs_entry->size()) {              if ((*adaption_ambigs_entry)[j] != id_to_insert) {                adaption_ambigs_entry->insert(id_to_insert, j);              }            } else {              adaption_ambigs_entry->push_back(id_to_insert);            }          }        }      }    }  }  delete[] buffer;  // Fill in reverse_ambigs_for_adaption from ambigs_for_adaption vector.  if (use_ambigs_for_adaption) {    for (i = 0; i < ambigs_for_adaption_.size(); ++i) {      adaption_ambigs_entry = ambigs_for_adaption_[i];      if (adaption_ambigs_entry == NULL) continue;      for (j = 0; j < adaption_ambigs_entry->size(); ++j) {        UNICHAR_ID ambig_id = (*adaption_ambigs_entry)[j];        if (reverse_ambigs_for_adaption_[ambig_id] == NULL) {          reverse_ambigs_for_adaption_[ambig_id] = new UnicharIdVector();        }        reverse_ambigs_for_adaption_[ambig_id]->push_back(i);//.........这里部分代码省略.........

开发者ID:vnvizitiu，项目名称:tesseract，代码行数:101，

示例27: tprintf

// Extracts the needed information from the CHAR_DESC_STRUCT.void TrainingSample::ExtractCharDesc(int int_feature_type,                                     int micro_type,                                     int cn_type,                                     int geo_type,                                     CHAR_DESC_STRUCT* char_desc) {    // Extract the INT features.    if (features_ != NULL) delete [] features_;    FEATURE_SET_STRUCT* char_features = char_desc->FeatureSets[int_feature_type];    if (char_features == NULL) {        tprintf("Error: no features to train on of type %s/n",                kIntFeatureType);        num_features_ = 0;        features_ = NULL;    } else {        num_features_ = char_features->NumFeatures;        features_ = new INT_FEATURE_STRUCT[num_features_];        for (int f = 0; f < num_features_; ++f) {            features_[f].X =                static_cast<uinT8>(char_features->Features[f]->Params[IntX]);            features_[f].Y =                static_cast<uinT8>(char_features->Features[f]->Params[IntY]);            features_[f].Theta =                static_cast<uinT8>(char_features->Features[f]->Params[IntDir]);            features_[f].CP_misses = 0;        }    }    // Extract the Micro features.    if (micro_features_ != NULL) delete [] micro_features_;    char_features = char_desc->FeatureSets[micro_type];    if (char_features == NULL) {        tprintf("Error: no features to train on of type %s/n",                kMicroFeatureType);        num_micro_features_ = 0;        micro_features_ = NULL;    } else {        num_micro_features_ = char_features->NumFeatures;        micro_features_ = new MicroFeature[num_micro_features_];        for (int f = 0; f < num_micro_features_; ++f) {            for (int d = 0; d < MFCount; ++d) {                micro_features_[f][d] = char_features->Features[f]->Params[d];            }        }    }    // Extract the CN feature.    char_features = char_desc->FeatureSets[cn_type];    if (char_features == NULL) {        tprintf("Error: no CN feature to train on./n");    } else {        ASSERT_HOST(char_features->NumFeatures == 1);        cn_feature_[CharNormY] = char_features->Features[0]->Params[CharNormY];        cn_feature_[CharNormLength] =            char_features->Features[0]->Params[CharNormLength];        cn_feature_[CharNormRx] = char_features->Features[0]->Params[CharNormRx];        cn_feature_[CharNormRy] = char_features->Features[0]->Params[CharNormRy];    }    // Extract the Geo feature.    char_features = char_desc->FeatureSets[geo_type];    if (char_features == NULL) {        tprintf("Error: no Geo feature to train on./n");    } else {        ASSERT_HOST(char_features->NumFeatures == 1);        geo_feature_[GeoBottom] = char_features->Features[0]->Params[GeoBottom];        geo_feature_[GeoTop] = char_features->Features[0]->Params[GeoTop];        geo_feature_[GeoWidth] = char_features->Features[0]->Params[GeoWidth];    }    features_are_indexed_ = false;    features_are_mapped_ = false;}

开发者ID:EmuxEvans，项目名称:tesseract-ocr，代码行数:69，

示例28: ASSERT_HOST

// Classifies the given [training] sample, writing to results.// See shapeclassifier.h for a full description.// Default implementation aborts.int ShapeClassifier::ClassifySample(const TrainingSample& sample, Pix* page_pix,                           int debug, int keep_this,                           GenericVector<ShapeRating>* results) {  ASSERT_HOST("Must implement ClassifySample!" == NULL);  return 0;}

开发者ID:H-Plus-Time，项目名称:tesseract-emscripten，代码行数:9，

示例29: ASSERT_HOST

void UnicharAmbigs::LoadUnicharAmbigs(FILE *AmbigFile, inT64 end_offset,                                      UNICHARSET *unicharset) {  int i;  for (i = 0; i < unicharset->size(); ++i) {    replace_ambigs_.push_back(NULL);    dang_ambigs_.push_back(NULL);    one_to_one_definite_ambigs_.push_back(NULL);  }  if (global_ambigs_debug_level) tprintf("Reading ambiguities/n");  int TestAmbigPartSize;  int ReplacementAmbigPartSize;  // Maximum line size:  //   10 for sizes of ambigs, tabs, abmig type and newline  //   UNICHAR_LEN * (MAX_AMBIG_SIZE + 1) for each part of the ambig  // The space for buffer is allocated on the heap to avoid  // GCC frame size warning.  const int kMaxAmbigStringSize = UNICHAR_LEN * (MAX_AMBIG_SIZE + 1);  const int kBufferSize = 10 + 2 * kMaxAmbigStringSize;  char *buffer = new char[kBufferSize];  char ReplacementString[kMaxAmbigStringSize];  UNICHAR_ID TestUnicharIds[MAX_AMBIG_SIZE + 1];  int line_num = 0;  int type = NOT_AMBIG;  // Determine the version of the ambigs file.  int version = 0;  ASSERT_HOST(fgets(buffer, kBufferSize, AmbigFile) != NULL &&              strlen(buffer) > 0);  if (*buffer == 'v') {    version = static_cast<int>(strtol(buffer+1, NULL, 10));    ++line_num;  } else {    rewind(AmbigFile);  }  while ((end_offset < 0 || ftell(AmbigFile) < end_offset) &&         fgets(buffer, kBufferSize, AmbigFile) != NULL) {    chomp_string(buffer);    if (global_ambigs_debug_level > 2) tprintf("read line %s/n", buffer);    ++line_num;    if (!ParseAmbiguityLine(line_num, version, *unicharset, buffer,                            &TestAmbigPartSize, TestUnicharIds,                            &ReplacementAmbigPartSize,                            ReplacementString, &type)) continue;    // Construct AmbigSpec and add it to the appropriate AmbigSpec_LIST.    AmbigSpec *ambig_spec = new AmbigSpec();    InsertIntoTable((type == REPLACE_AMBIG) ? replace_ambigs_ : dang_ambigs_,                    TestAmbigPartSize, TestUnicharIds,                    ReplacementAmbigPartSize, ReplacementString, type,                    ambig_spec, unicharset);    // Update one_to_one_definite_ambigs_.    if (use_definite_ambigs_for_classifier && TestAmbigPartSize == 1 &&        ReplacementAmbigPartSize == 1 && type == DEFINITE_AMBIG) {      if (one_to_one_definite_ambigs_[TestUnicharIds[0]] == NULL) {        one_to_one_definite_ambigs_[TestUnicharIds[0]] = new UnicharIdVector();      }      one_to_one_definite_ambigs_[TestUnicharIds[0]]->push_back(          ambig_spec->correct_ngram_id);    }  }  delete[] buffer;  // Print what was read from the input file.  if (global_ambigs_debug_level > 2) {    for (int tbl = 0; tbl < 2; ++tbl) {      const UnicharAmbigsVector &print_table =        (tbl == 0) ? replace_ambigs_ : dang_ambigs_;      for (i = 0; i < print_table.size(); ++i) {        AmbigSpec_LIST *lst = print_table[i];        if (lst == NULL) continue;        if (!lst->empty()) {          tprintf("%s Ambiguities for %s:/n",                  (tbl == 0) ? "Replaceable" : "Dangerous",                  unicharset->debug_str(i).string());        }        AmbigSpec_IT lst_it(lst);        for (lst_it.mark_cycle_pt(); !lst_it.cycled_list(); lst_it.forward()) {          AmbigSpec *ambig_spec = lst_it.data();          tprintf("wrong_ngram:");          UnicharIdArrayUtils::print(ambig_spec->wrong_ngram, *unicharset);          tprintf("correct_fragments:");          UnicharIdArrayUtils::print(ambig_spec->correct_fragments, *unicharset);        }      }    }  }}

开发者ID:Appiah，项目名称:tesseractstuff，代码行数:87，

示例30: ASSERT_HOST

// Adds the dw_ in other to the dw_ is *this.void WeightMatrix::AddDeltas(const WeightMatrix& other) {  ASSERT_HOST(dw_.dim1() == other.dw_.dim1());  ASSERT_HOST(dw_.dim2() == other.dw_.dim2());  dw_ += other.dw_;}

开发者ID:bhanu475，项目名称:tesseract，代码行数:6，

注：本文中的ASSERT_HOST函数示例整理自Github/MSDocs等源码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。

C++ ASSERT_INT_EQ函数代码示例
C++ ASSERT_GE函数代码示例