Logo Search packages:      
Sourcecode: ocrad version File versions  Download package

character_r12.cc

/*  GNU Ocrad - Optical Character Recognition program
    Copyright (C) 2003, 2004, 2005 Antonio Diaz Diaz.

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/

#include <cstdio>
#include <vector>
#include "common.h"
#include "rectangle.h"
#include "ucs.h"
#include "bitmap.h"
#include "block.h"
#include "character.h"
#include "profile.h"
#include "feats.h"


// Recognizes 2 block characters.
// ij!%:;=?|
//
void Character::recognize12( const Charset & charset, const Rectangle & charbox ) throw()
  {
  const Block & b1 = block( 0 );          // upper block
  const Block & b2 = block( 1 );          // lower block
  int a1 = b1.area();
  int a2 = b2.area();
  Features f1( b1 );
  Features f2( b2 );

  if( Ocrad::similar( a1, a2, 10 ) )
    {
    if( !b1.holes() && !b2.holes() &&
        2 * a1 > b1.size() && 2 * a2 > b2.size() )
      {
      if( width() > height() || Ocrad::similar( width(), height(), 40 ) )
        { add_guess( '=', 0 ); return; }
      if( Ocrad::similar( b1.width(), b1.height(), 20, 2 ) &&
          Ocrad::similar( b2.width(), b2.height(), 20, 2 ) )
        add_guess( ':', 0 );
      return;
      }
    return;
    }
  if( Ocrad::similar( a1, a2, 60 ) )
    {
    if( f1.test_solid( charbox ) == '.' )
      {
      if( f2.test_solid( charbox ) == '.' )
        { add_guess( ':', 0 ); return; }
      if( b2.height() > b1.height() && b2.top() > charbox.vcenter() )
        { add_guess( ';', 0 ); return; }
      }
    if( charset.enabled( Charset::iso_8859_15 ) ||
        charset.enabled( Charset::iso_8859_9 ) )
      {
      int code = f2.test_solid( charbox );
      if( code == '-' || code == '_' )
        { add_guess( UCS::PLUSMIN, 0 ); return; }
      }
    if( b1.includes_hcenter( b2 ) && b2.includes_hcenter( b1 ) )
      { if( b1.holes() && b2.holes() ) { add_guess( 'g', 0 ); return; } }
    if( b1.hcenter() < b2.hcenter() )
      {
      // Looks for merged 'fi'
      if( b2.height() > b2.width() && b1.hcenter() < b2.left() &&
          b1.includes_hcenter( b2 ) && 4 * b1.height() > 5 * b2.height() &&
          Ocrad::similar( b1.bottom()-b1.top(), b2.bottom()-b1.top(), 10 ) )
        {
        Character c2( new Block( b2 ) ); c2.recognize1( charset, charbox );
        if( c2.maybe('l') || c2.maybe('|') )
          {
          only_guess( 0, b1.left() ); add_guess( 'f', b2.left() );
          add_guess( 'i', b2.right() ); return;
          }
        }
      if( 2 * b1.height() > 3 * b2.height() &&
          b1.holes() == 1 && b2.holes() == 1 &&
          Ocrad::similar( b2.width(), b2.height(), 50 ) )
        { add_guess( '%', 0 ); return; }
      }
    }
  if( a1 < a2 )
    {
      {
      int code = f1.test_solid( charbox );            //FIXME all this
      if( code == '-' && 2 * b1.height() > b1.width() ) code = '.';
      else if( code == '\'' || code == '|' ) code = '.';
      if( !code && !b1.holes() && 
          2 * b1.height() < b2.height() && b1.width() <= b2.width() )
        {
        if( 10 * a1 >= 7 * b1.height() * b1.width() ) code = '.';
        else code = '\'';
        }
      if( !b2.holes() && ( code == '.' || code == '\'' ) )
        {
        // Looks for merged 'ri' or 'r'
        if( f2.bp.minima( b2.height() / 4 ) == 2 &&
            b2.top() > b1.bottom() && b2.hcenter() < b1.left() )
          {
          Character c2( new Block( b2 ) ); c2.recognize1( charset, charbox );
          if( c2.maybe('n') )
            {
            if( code == '.' && ( b1.left() < b2.hcenter() || b1.right() > b2.right() ) )
              { add_guess( 'n', 0 ); return; }        // FIXME remove dot
            int col, limit = b2.seek_right( b2.vcenter(), b2.hcenter() );
            for( col = b2.hcenter(); col <= limit; ++col )
              if( b2.seek_bottom( b2.vcenter(), col ) < b2.bottom() ) break;
            if( b2.left() < col && col < b2.right() )
              {
              only_guess( 0, b2.left() );
              if( charset.enabled( Charset::iso_8859_9 ) && f2.rp.istip() )
                { add_guess( 'T', col - 1 ); add_guess( UCS::CIDOT, b2.right() ); }
              else
                {
                add_guess( 'r', col - 1 );
                if( code == '.' ) add_guess( 'i', b2.right() );
                else add_guess( UCS::SIACUTE, b2.right() );
                }
              return;
              }
            }
          }

        if( code == '.' && f2.bp.minima( b2.height() / 4 ) == 1 &&
            b1.bottom() <= b2.top() )
          {
          int hdiff;
          if( b2.bottom_hook( &hdiff ) && std::abs( hdiff ) >= b2.height() / 2 )
            {
            if( hdiff > 0 && f2.rp.increasing( f2.rp.pos( 80 ) ) )
              { add_guess( 'j', 0 ); return; }
            if( hdiff < 0 )
              {
              if( charset.enabled( Charset::iso_8859_15 ) ||
                  charset.enabled( Charset::iso_8859_9 ) )
                if( f2.wp.max() > 2 * f1.wp.max() && f2.lp.minima() == 1 )
                  { add_guess( UCS::IQUEST, 0 ); return; }
              add_guess( 'i', 0 ); return;
              }
            }
          if( f2.tp.minima() == 1 )
            {
            if( Ocrad::similar( f1.wp.max(), f2.wp.max(), 20 ) )
              {
              if( charset.enabled( Charset::iso_8859_15 ) ||
                  charset.enabled( Charset::iso_8859_9 ) )
                if( !f2.lp.isctip() && f2.wp.max() >= f1.wp.max() &&
                    3 * f2.wp[f2.wp.pos(10)] < 2 * f1.wp.max() )
                  { add_guess( UCS::IEXCLAM, 0 ); return; }
              add_guess( 'i', 0 ); return;
              }
            if( 3 * f2.wp.max() > 4 * f1.wp.max() &&
                b2.seek_bottom( b2.vcenter(), b2.hpos( 10 ) ) < b2.bottom() &&
                f2.rp.increasing( f2.rp.pos( 75 ) ) &&
                ( b1.left() >= b2.hcenter() ||
                  b2.seek_top( b2.vcenter(), b2.hpos( 10 ) ) <= b2.top() ) )
              { add_guess( 'j', 0 ); return; }
            if( charset.enabled( Charset::iso_8859_9 ) && f2.rp.istip() )
              { add_guess( UCS::CIDOT, 0 ); return; }
            add_guess( 'i', 0 ); return;
            }
          }
        }
      }

    // upper block has no holes
    if( ( b1.bottom() < b2.vcenter() || 2 * a1 < a2 ) && !b1.holes() )
      {
      Character c( new Block( b2 ) ); c.recognize1( charset, charbox );
      if( c.guesses() )
        {
        int code = c.guess( 0 ).code;
        if( b1.bottom() < b2.vcenter() )
          {
          int atype = '\'';
          if( UCS::isvowel( code ) && 2 * b1.width() > 3 * b1.height() &&
              !f1.tp.iscpit() && f1.hp.iscpit() ) atype = ':';
          else if( f1.bp.minima() == 2 ) atype = '^';
          else if( std::min( b1.height(), b1.width() ) >= 6 &&
                   ( f1.rp.decreasing() || f1.tp.increasing() ) &&
                   ( f1.bp.decreasing() || f1.lp.increasing() ) ) atype = '`';
          code = UCS::compose( code, atype );
          if( charset.only( Charset::ascii ) )
            {
            if( UCS::base_letter( code ) == 'i' ) code = 'i';
            else code = c.guess( 0 ).code;
            }
          }
        if( code ) add_guess( code, 0 );
        }
      }
    return;
    }

  if( b1.bottom() <= b2.top() )
    {
    int code = f2.test_solid( charbox );
    if( !b1.holes() && ( code == '.' || 
          ( code && Ocrad::similar( b2.height(), b2.width(), 50 ) ) ) )
      {
      if( Ocrad::similar( b1.width(), b2.width(), 50 ) )
        { add_guess( '!', 0 ); return; }
      add_guess( '?', 0 ); return;
      }
    if( code == '-' || code == '_' )
      if( charset.enabled( Charset::iso_8859_15 ) ||
          charset.enabled( Charset::iso_8859_9 ) )
        {
        if( b1.holes() == 1 )
          {
          const Bitmap & h = b1.hole( 0 );
          if( Ocrad::similar( h.left() - b1.left(), b1.right() - h.right(), 40 ) )
            { add_guess( UCS::MASCORD, 0 ); return; }
          add_guess( UCS::FEMIORD, 0 ); return;
          }
        }
    }
  }

Generated by  Doxygen 1.6.0   Back to index