/* LibRCD Copyright (C) 2005-2018 Suren A. Chilingaryan This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 or later as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ #include #include #include "../config.h" #define _LIBRCD_C #include "librcd.h" #define NF_VALUE -2 #define max(a,b) ((a>b)?a:b) #define min(a,b) ((a>1; wi=d; ki=d; ai=d; winptr=0; koiptr=0; altptr=0; do{ d>>=1; if(!ws){ if (wi>indexes2) wi-=d; else { winptr=enc_win+wi-1; if(a[0]==winptr->a){ if(a[1]==winptr->b){ ws=1; }else if(a[1]b){ wi-=d; }else{ //b>win[wi].b wi+=d; } }else if(a[0]a){ wi-=d; }else{ //a>win[wi].a wi+=d; } } } if(!ks){ if (ki>indexes2) ki-=d; else { koiptr=enc_koi+ki-1; if(a[0]==koiptr->a){ if(a[1]==koiptr->b){ ks=1; }else if(a[1]b){ ki-=d; }else{ //b>win[wi].b ki+=d; } }else if(a[0]a){ ki-=d; }else{ //a>win[wi].a ki+=d; } } } if(!as){ if (ai>indexes2) ai-=d; else { altptr=enc_alt+ai-1; if(a[0]==altptr->a){ if(a[1]==altptr->b){ as=1; }else if(a[1]b){ ai-=d; }else{ //b>win[wi].b ai+=d; } }else if(a[0]a){ ai-=d; }else{ //a>win[wi].a ai+=d; } } } }while(d); if (ws) *w=winptr; else *w=NULL; if (ks) *k=koiptr; else *k=NULL; if (as) *al=altptr; else *al=NULL; } static double calculate(double s, double m, double e) { return s+m+e; } static int is_win_charset2(const unsigned char *txt, int len){ const struct lng_stat2 *winptr, *koiptr,*altptr; double winstep,koistep,altstep,winestep,koiestep,altestep,winsstep,koisstep,altsstep; double winstat=0,koistat=0,altstat=0,winestat=0,koiestat=0,altestat=0,winsstat=0,koisstat=0,altsstat=0; long j; #ifdef _AUTO_DEBUG fprintf(stderr,"Word: %s\n",txt); #endif for(j=0;jsrate; else winsstep=NF_VALUE; if (koiptr) koisstep=koiptr->srate; else koisstep=NF_VALUE; if (altptr) altsstep=altptr->srate; else altsstep=NF_VALUE; winestep=0; koiestep=0; altestep=0; winstep=0; koistep=0; altstep=0; #ifdef _AUTO_DEBUG fprintf(stderr,", Win %lf, Koi %lf, Alt: %lf\n",winsstep,koisstep,altsstep); #endif } else if ((j==len-2)||(end_symbol(txt[j+2]))) { if (winptr) winestep=winptr->erate; else winestep=NF_VALUE; if (koiptr) koiestep=koiptr->erate; else koiestep=NF_VALUE; if (altptr) altestep=altptr->erate; else altestep=NF_VALUE; winsstep=0; koisstep=0; altsstep=0; winstep=0; koistep=0; altstep=0; #ifdef _AUTO_DEBUG fprintf(stderr,", Win %lf, Koi %lf, Alt %lf\n",winestep,koiestep,altestep); #endif } else { if (winptr) winstep=winptr->rate; else winstep=NF_VALUE; if (koiptr) koistep=koiptr->rate; else koistep=NF_VALUE; if (altptr) altstep=altptr->rate; else altstep=NF_VALUE; winsstep=0; winestep=0; koisstep=0; koiestep=0; altsstep=0; altestep=0; #ifdef _AUTO_DEBUG fprintf(stderr,", Win %lf, Koi %lf, Alt %lf\n",winstep,koistep,altstep); #endif } winstat+=winstep; koistat+=koistep; altstat+=altstep; winsstat+=winsstep; koisstat+=koisstep; altsstat+=altsstep; winestat+=winestep; koiestat+=koiestep; altestat+=altestep; } #ifdef _AUTO_DEBUG fprintf(stderr,"Start. Win: %lf, Koi: %lf, Alt: %lf\n",winsstat,koisstat,altsstat); fprintf(stderr,"Middle. Win: %lf, Koi: %lf, Alt: %lf\n",winstat,koistat,altstat); fprintf(stderr,"End. Win: %lf, Koi: %lf, Alt: %lf\n",winestat,koiestat,altestat); fprintf(stderr,"Final. Win: %lf, Koi: %lf, Alt: %lf\n",calculate(winsstat,winstat,winestat),calculate(koisstat,koistat,koiestat),calculate(altsstat,altstat,altestat)); #endif if ((calculate(altsstat,altstat,altestat)>calculate(koisstat,koistat,koiestat))&&(calculate(altsstat,altstat,altestat)>calculate(winsstat,winstat,winestat))) return 3; if (calculate(koisstat,koistat,koiestat)>calculate(winsstat,winstat,winestat)) return 1; return 0; } static int check_utf8(const unsigned char *buf, int len) { long i,j; int bytes=0,rflag=0; unsigned char tmp; int res=0; for (i=0;i0) { if ((buf[i]&0xC0)==0x80) { if (rflag) { tmp=buf[i]&0x3F; // Russian is 0x410-0x44F if ((rflag==1)&&(tmp>=0x10)) res++; else if ((rflag==2)&&(tmp<=0x0F)) res++; } bytes--; } else { res--; bytes=1-bytes; rflag=0; } } else { for (j=6;j>=0;j--) if ((buf[i]&bit(j))==0) break; if ((j==0)||(j==6)) { if ((j==6)&&(bytes<0)) bytes++; else res--; continue; } bytes=6-j; if (bytes==1) { // Cyrrilic D0-D3, Russian - D0-D1 if (buf[i]==0xD0) rflag=1; else if (buf[i]==0xD1) rflag=2; } } if ((buf[i]==0xD0)||(buf[i]==0xD1)) { if (i+1==len) break; } } return res; } /* In russian language we will have whole word consisting of >127 characters, with latin languages there is in every word besides umlauts should exist at least one standard latin character with code < 127. */ static int check_latin(const unsigned char *buf, int len) { long i; int cyr = 0; int latin = 0; for (i=0;i='a')&&(buf[i]<='z'))||((buf[i]>='A')&&(buf[i]<='Z'))) { // Latin character inside a word, so it probably isn't cyrillic word latin++; } else { // Treating as a word separator. if (cyr > 0) { if (!latin) return 0; if (cyr>latin) return 0; } cyr = 0; latin = 0; } } else { // Could be cyrillic word cyr++; } } if (cyr > 0) { if (!latin) return 0; if (cyr>latin) return 0; } // printf("C%u:L%u\n",cyr,latin); return 1; } rcd_russian_charset rcdGetRussianCharset(const char *buf,int len) { long l; l = len?len:strlen(buf); if (check_utf8(buf,l)>1) return RUSSIAN_CHARSET_UTF8; #ifdef DETECT_LATIN if (check_latin(buf,l)) return RUSSIAN_CHARSET_LATIN; #endif /* DETECT_LATIN */ return is_win_charset2(buf,l); } /* rcd_russian_charset rcdGetRussianCharset(const char *buf,int len) { int res; res = rcdGetRussianCharset1(buf, len); printf("%u: %s\n", res, (buf&&!len)?buf:"null"); return res; } */ /* Compatibility */ rcd_russian_charset get_russian_charset(const char *buf,int len) { return rcdGetRussianCharset(buf, len); }