summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSuren A. Chilingaryan <csa@dside.dyndns.org>2013-04-26 09:03:42 +0200
committerSuren A. Chilingaryan <csa@dside.dyndns.org>2013-04-26 09:03:42 +0200
commit77707bc1f75c237d1e4ecdab3fbf567a8d0ac3e8 (patch)
tree18d5dc9ba400ec95d58747666bd50a42f8a98d61
parent0d488beb76dfc46883e23c52ccf4ac5d2f3726a4 (diff)
downloadlibrcc-77707bc1f75c237d1e4ecdab3fbf567a8d0ac3e8.tar.gz
librcc-77707bc1f75c237d1e4ecdab3fbf567a8d0ac3e8.tar.bz2
librcc-77707bc1f75c237d1e4ecdab3fbf567a8d0ac3e8.tar.xz
librcc-77707bc1f75c237d1e4ecdab3fbf567a8d0ac3e8.zip
Language autodetection: prefer english over the languages without any non-parent word (english is global parent)
-rw-r--r--src/recode.c12
1 files changed, 6 insertions, 6 deletions
diff --git a/src/recode.c b/src/recode.c
index ac6e11b..3b1bc02 100644
--- a/src/recode.c
+++ b/src/recode.c
@@ -172,12 +172,12 @@ static rcc_language_id rccDetectLanguageInternal(rcc_context ctx, rcc_class_id c
mode = 0;
} else continue;
} else {
- if (mode) {
- if (utf8[j]>0x7F) english_word = 0;
- } else {
+ if (!mode) {
mode = j + 1;
english_word = 1;
}
+
+ if (utf8[j]>0x7F) english_word = 0;
}
}
@@ -200,7 +200,7 @@ static rcc_language_id rccDetectLanguageInternal(rcc_context ctx, rcc_class_id c
words++;
}
-
+
if (english_mode) {
if (english_string) free(english_string);
@@ -256,7 +256,7 @@ static rcc_language_id rccDetectLanguageInternal(rcc_context ctx, rcc_class_id c
return english_lang;
}
- if ((bestres > RCC_ACCEPTABLE_PROBABILITY)&&(bestlongest > RCC_ACCEPTABLE_LENGTH)) {
+ if ((bestres > RCC_ACCEPTABLE_PROBABILITY)&&(bestlongest > RCC_ACCEPTABLE_LENGTH)&&(bestown>0)) {
if (english_string) free(english_string);
if (retstring) *retstring = best_string;
else if (best_string) free(best_string);
@@ -265,7 +265,7 @@ static rcc_language_id rccDetectLanguageInternal(rcc_context ctx, rcc_class_id c
return bestlang;
}
- if ((is_english_string)&&(english_res > RCC_ACCEPTABLE_PROBABILITY)&&(english_longest > RCC_ACCEPTABLE_LENGTH)) {
+ if ((is_english_string)&&(((english_res > RCC_ACCEPTABLE_PROBABILITY)&&(english_longest > RCC_ACCEPTABLE_LENGTH))||(!bestown))) {
if (best_string) free(best_string);
if (retstring) *retstring = english_string;
else if (english_string) free(english_string);