summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSuren A. Chilingaryan <csa@dside.dyndns.org>2005-08-11 01:06:56 +0000
committerSuren A. Chilingaryan <csa@dside.dyndns.org>2005-08-11 01:06:56 +0000
commit3736c5f3635863e54ab2cc47860628d26855c749 (patch)
tree3c1dadec1b75557463fcc740429cceb6e948f998
parent63bf2a90a6d6fb0859e4c9dd9fcac85de9adc0f1 (diff)
downloadlibrcc-3736c5f3635863e54ab2cc47860628d26855c749.tar.gz
librcc-3736c5f3635863e54ab2cc47860628d26855c749.tar.bz2
librcc-3736c5f3635863e54ab2cc47860628d26855c749.tar.xz
librcc-3736c5f3635863e54ab2cc47860628d26855c749.zip
Transliteration and Documentation Update
- Fix: Autodetection of dissabled charsets. - Fix: Cleanely terminate external process if parrent thread terminated. - Transliteration for Russian, Ukrainian and using IConv. - Documentation Update.
-rw-r--r--INSTALL37
-rw-r--r--NEWS6
-rw-r--r--README162
-rw-r--r--ToDo63
-rw-r--r--examples/rcc-gtk-config.c2
-rw-r--r--external/rccexternal.c42
-rw-r--r--src/librcc.h10
-rw-r--r--src/lngconfig.c59
-rw-r--r--src/rccconfig.c11
-rw-r--r--src/rccconfig.h3
-rw-r--r--src/recode.c47
-rw-r--r--ui/rccnames.c2
12 files changed, 382 insertions, 62 deletions
diff --git a/INSTALL b/INSTALL
index e69de29..bad8d54 100644
--- a/INSTALL
+++ b/INSTALL
@@ -0,0 +1,37 @@
+Dependencies
+============
+ LibRCC is dependent on LibXML2 library. However it requires some other
+ libraries to provide considered services.
+
+ * LibRCD and Enca libraries are used to provide encoding autodetection.
+ * DB4 is used to cache translations and recodings.
+ * Aspell is required for language autodetection.
+ * LibTranslate is required for translation.
+ o The Libtranslate uses online services to translate the text. In the
+ defaultIt version there are no ways to limit translation time. The
+ LibRCC will respect the maximum time for recoding in any case, but
+ nevertheless it will be good idea to use patched version of Libtranslate,
+ providing API call to make time limited translate. The patch can be
+ downloaded from the RusXMMS(http://RusXMMS.sf.net) project page.
+
+ The configure script will report which options are available.
+
+
+Build
+=====
+ LibRCC utilizes standard GNU autoconf build system. To build LibRCC release
+ you should type:
+ ./configure
+ make
+
+ Installation can be performed using following command:
+ make install
+
+ The configure script accepts following options:
+ --enable-force-dynamic-engines:
+ Will force encoding autodetection to load Enca and LibRCD libraries
+ dynamically, rather than compiling them in.
+ --disable-libtranslate:
+ Will not compile support for language translation.
+ Even if Libtranslate library is available.
+ \ No newline at end of file
diff --git a/NEWS b/NEWS
index e69de29..127ef5d 100644
--- a/NEWS
+++ b/NEWS
@@ -0,0 +1,6 @@
+LibRCC-0.2.0:
+ + Language AutoDetection
+ + Language Translation
+ + Language Transliteration for Russian, Ukrainian and Languages supported by IConv.
+ + Support for per-class Charset Configuration
+
diff --git a/README b/README
index 6a47827..5a13638 100644
--- a/README
+++ b/README
@@ -1,7 +1,161 @@
+RusXMMS Project
+===============
-Language Autodetection
-----------------------
+Originally, the project was aimed to provide means to work with multiple encodings
+of the same language through adapting encoding of ID3 tags, M3U and PLS playlists
+(including file names) to local settings on-the-fly. Both the tag reading and
+writing back using any selected encoding was supported.
+Nowadays there are library available providing the same functionality for almost
+any program with just a few lines of code. The library is not limited to ID3 tags,
+it can be useful for any program working with small titles or file names in
+different languages and encodings. The patches for several music players, ID3 tag
+libraries and some other programs are available on the project page.
+The Abilities of LibRCC Library
+===============================
-Translation
------------
+ * Language Autodetection
+ * On the fly translation between languages, using online-services!
+ * Encoding Autodetection for most of European Languages1
+ * Support for encoding detection plugins (besides Enca and LibRCD)
+ * Recoding/translation of multi-language playlists!
+ * Cache to speed-up re-recoding.
+ * Possibility to configure new languages and encodings.
+ * Shared configuration file. For example mentioned TagLib and LibID3 patches
+ do not have their own user interface, but will utilize the same recoding
+ configuration as XMMS.
+ * As well the separate program for configuration adjustment is available.
+ * GTK/GTK2 UI Library: you can add properties page to your GTK application
+ with 3 lines of code.
+ * Menu localization opportunity
+
+The Available Patches
+=====================
+
+ * RusXMMS: Visualization and editing of the whole range of ID3 tags using any
+ of eight byte or unicode encodings. Support for playlists with non-english
+ filenames. The translation of foreign languages to english or locale one is
+ supported as well. The embeded properties page. The patch makes XMMS the best
+ player to work with ID3 titles.
+ * TagLib: Visualization and editing of ID3 v.1 and v.2 titles. Any TagLib
+ based application will correctly work with ID3 tags out of the box. The
+ properties page can be added to application with several lines of code.
+ Additionaly, after applying the patch, the 'tagwriter' program from the
+ TagLib examples can be used to convert titles of all your MP3 files to
+ unicode ID3 v.2 tags just using command: "tagwriter *.mp3".
+ * LibID3: Visualization of ID3 v.1 and v.2 titles. Any LibID3 based
+ application will correctly display (but not edit) ID3 tags out of the box.
+ * Mpg123: Visualization of ID3 v.1 titles.
+ * GFtp: Recoding file names between FTP servers using different encodings.
+ * Unzip: Recoding file names from Windows created archives.
+
+
+Gratitudes
+==========
+ * Me ;)
+ * Michael Shigorin - Ideas and great help in wiping bugs
+ * Dmitry A. Koptev - Slackware packages
+ * IPE, ForschungsZentrum, Karlsruhe
+ * CRD, Yerevan Physics Institute
+ * Georgian and Spanish winemakers :)
+
+Important Notes
+===============
+
+1. It have much more sense to report problems here, then just claiming nothing is
+ working on miscellaneous forums and mailing lists.
+2. If you want patches presented here to be included in the correspondent project
+ trees, please, ask authors. The same thing should be concerned about inclusion
+ of LibRCC and LibRCD in the official Gentoo portage tree.
+3. Most of the patches will modify configure.in and Makefile.am files, so the full
+ autoconfiguration should be performed.
+ You should run "aclocal; automake; autoconf" prior to using configure script.
+4. Output encoding normally must correspond to current "LC_CTYPE" locale. If
+ you would set it to another value without really knowing what are you doing,
+ it can raise problems.
+
+Preferences
+===========
+
+ * Current Language. The English, Russian, Ukrainian, Belarussian, Bulgarian,
+ Czech, Estonian, Croatian, Hungarian, Lithuanian, Latvian, Polish, Slovak,
+ Slovenian and Chinese are embeded in the library. To get other languages you
+ should configure them in the "/etc/rcc.xml" or in user-defined configuration
+ "~/.rcc/rcc.xml". By default the language will be determined using LC_CTYPE
+ environmental variable.
+ * Current encoding for supported encoding classes. For RusXMMS the following
+ classes are defined:
+ o ID3 Encoding
+ o ID3 v.2 Encoding (uses ID3 by default)
+ o PlayList Encoding (uses ID3 by default)
+ o Encoding for Filenames in the Playlists (defaults to FS encoding)
+ o FileSystem Encoding (uses locale encoding by default)
+ o Output Encoding (uses locale encoding by default)
+ The default encoding will be resolved using:
+ o The unicode encoding selected for english language.
+ o The encoding of the parrent class if any.
+ o The unicode encoding defined by locale variable or any locale encoding
+ in the case of locale language is used.
+ o First available usable encoding.
+ * Encoding autodetection engine. First available is used by default.
+ * Mode for recoding cache:
+ o Off: Do not use recoding cache
+ o On: Use recoding cache to find out encoding and language
+ o ReLearn: Fill recoding cache with detected values
+ o Learn: Try to use recoding cache to find out encoding. If there are
+ no cached encoding for current title try to detect it and store in the
+ cache.
+ * Autodetect File Name: If option is switched on the encoding of the file
+ will be resolved using search over file system.
+ * Autoengine Set Current Encoding: Encoding autodetection engine will
+ automatically set detected encoding to be used by default.
+ * Autodetect Language: Try to autodetect used language. Quite slow.
+ * Translate Text: Translate text from detected language to the locale
+ language. Very slow and requires internet connection. The different modes is
+ available. In the full mode the string will be translated to the current
+ locale language. If translation to locale language is failed, the string will
+ be translated to english. With "Skip Related" and "Skip parrent" options the
+ translation between related languages will be omited (The language is
+ considered to be parrent language in the case then it is expected to have
+ words from that language in the strings of child language. The english
+ language is considered to be parrent language for any other.).
+ With "Translate to English" option it is possible to translate all strings
+ to english.
+ * Since the translation is slow, it is possible to limit maximum time used to
+ recode/translate string. In this case if translation in the specified amount
+ of time is not finished, untranslated string will be returned. However, the
+ string will be queued for translation and in the next access the translated
+ and cached value will be returned.
+ * Additionally, for RusXMMS only it is possible to specify font used by shade
+ form of xmms playlist.
+
+Using Multi-Language Playlist
+=============================
+There are two ways of using multi-language playlists. The first is to use
+Language autodetection:
+
+ * The UTF-8 locale should be set.
+ * The LibRCC should be compiled with aspell support.
+ * The aspell dictionaries for all languages used should be installed.
+
+However this is quite slow, and errnous in the cases then mostly non-dictionary
+words are used. The second option, is to use recodings cache:
+
+ * The UTF-8 locale should be set.
+ * Select "Learn" mode for recoding caching policy in the preferences prior
+ to loading new files.
+ * Afterwards the titles for the loaded files will be recoded correctly
+ whenever the recoding caching is enabled.
+
+Using Language Translation
+==========================
+It is possible to translate titles to your languages using libtranslate.
+The LibRCC should be compiled with LibTranslate support (It will be quite good
+idea to use patched libtranslated with posibility to limit maximum amount of
+time spent for translation) and you should have internet connection. Since
+libtranslate utilizes online translation services it takes a lot of time to get
+translation. To solve this problem the translation queueing and caching is used.
+If translation is failed in considered amount of time error will be returned to
+caller and string will be queued for translation. The translated string will be
+stored in the DB4 cache and will be returned to caller on the following requests
+for translation.
diff --git a/ToDo b/ToDo
index 6c0cfa3..78abbaa 100644
--- a/ToDo
+++ b/ToDo
@@ -1,40 +1,39 @@
0.3.x:
- Buffer managment:
+ SetBufferSize ( 0 - autogrow )
- - Language autodetection and translation improvements
- + Look on ofline translation libraries and other possibilities to improove
- translation and language detection.
- + Implement ispell support
- + Configurable timeouts
- - Move all recoding functionality on rccConfig level
- - Revise locking subsystem
- - Libtranslate can leave translated message partly in old language. This causes problems
- because of recoding from UTF8 to Current language. (With UTF-8 encoding should be Okey).
- - Lating languages. If in the string all characters < 0x7F then we have one of the Latin
- languages?
- - Statistic approach of language detection.
- - LibRCD autolearning using db4
- + Charset detection
- + Language detection (same as charsets, but for UTF8...)
- * Consider word recognition based on probability
- + Autolearning is triggered by large enough dictionary words
- - Configurable common classes
+ - Move all recoding functionality on the rccConfig Level
+ - Revise Locking Subsystem
+ - Load class configurations from the XML files.
-1.x:
- - Common encodings:
- + Provide way to add to all languages several default Unicode encodings (UTF8, UTF16, UTF16BE)
- + Special type of classes to select only from Unicode encodings (or even just specified subset of encodings)
- + Special pluggable encodings. For example translate to english.
- * rccToEncoding(current_language, *new_language, buf, size)?
- * rccFromEncoding(current_language, utf8_language, buf, size)?
- * Code some options in charset name. (SpecialEncodingPrefix_Encoding_EncodingOptions)
- - Recoding options:
- + Skip Translation
- - Switch to Get/Ref/UnRef system
+
+0.4.x:
+ - Language and Encoding autodetection improvements.
+ + LibRCD should use DB4 with statistic for different languages
+ + The statistic should be gathered using:
+ * Aspell dictionaries.
+ * Special program getting text on the standard input.
+ * From LibRCC when language is preciesely detected.
+ + The LibRCD engine should be used to fast language detection as well.
+ * Just analyze output UTF8 string
+ + Add ispell support
+ - Translation improvemtns
+ + Look if there are any offline translation libraries available.
+ + Use stardict (or other dictionary) to translate on per-word basis.
+ + Try to translate to first parrent encoding if translation to the current one is failed.
+ + Transliterate translation mode
+
+0.5.x:
+ - Special encoding.
+ + Instead of IConv call considered function.
+ * For example: Transliterate
+ * For example: Translate to English
+ + The options for encoding should be passed as a part of encoding name.
+ * Develope naming conventions
+ + Pluggable special encodings.
+
+1.0.x:
+ - Switch to Get/Ref/UnRef calls.
- Drop down 'Class' keywords in all 'ClassCharset' function. Make it default behaviour.
on request:
- Multibyte(not-UTF8) support for FS classes
- - If there are neccessity in western-european language relating.
- + Check for correctness between related western-european languages while
- invalid translation checking (rccTo). Can be done with rccSpeller.
diff --git a/examples/rcc-gtk-config.c b/examples/rcc-gtk-config.c
index 9a3f988..da73608 100644
--- a/examples/rcc-gtk-config.c
+++ b/examples/rcc-gtk-config.c
@@ -16,7 +16,7 @@ static rcc_class classes[] = {
{ "ftp", RCC_CLASS_STANDARD, NULL, NULL, "FTP Encoding", 0 },
{ "http", RCC_CLASS_STANDARD, NULL, NULL, "HTTP Encoding", 0 },
{ "ssh", RCC_CLASS_STANDARD, NULL, NULL, "SSH Encoding", 0 },
- { "out", RCC_CLASS_STANDARD, "LC_CTYPE", NULL, NULL, 0 },
+ { "out", RCC_CLASS_STANDARD, "LC_CTYPE", NULL, "Output Encoding", 0 },
{ NULL }
};
diff --git a/external/rccexternal.c b/external/rccexternal.c
index 47f628a..292ee5d 100644
--- a/external/rccexternal.c
+++ b/external/rccexternal.c
@@ -1,6 +1,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <errno.h>
#include "../config.h"
@@ -24,19 +25,37 @@
#ifdef HAVE_SYS_UN_H
# include <sys/un.h>
#endif /* HAVE_SYS_UN_H */
+#ifdef HAVE_SYS_TIME_H
+# include <sys/time.h>
+#endif /* JAVE_SYS_TIME_H */
+
+#ifdef HAVE_SIGNAL_H
+# include <signal.h>
+#endif /* HAVE_SIGNAL_H */
#include <glib/gthread.h>
#include "../src/rccexternal.h"
#include "rcclibtranslate.h"
+#define RCC_EXIT_CHECK_TIMEOUT 10 /* seconds */
+
int main() {
+#ifdef HAVE_SIGNAL_H
+ struct sigaction act;
+#endif /* HAVE_PWD_H */
+ int err;
+ struct timeval tv;
+ fd_set fdcon;
+
int s, sd;
char addr[376];
const char *rcc_home_dir;
struct sockaddr_un mysock, clisock;
socklen_t socksize;
+
+ pid_t parentpid;
pid_t mypid;
unsigned char loopflag = 1;
@@ -49,6 +68,8 @@ int main() {
struct passwd *pw;
#endif /* HAVE_PWD_H */
+
+ parentpid = getppid();
mypid = getpid();
rcc_home_dir = getenv ("HOME");
@@ -78,13 +99,34 @@ int main() {
mysock.sun_path[sizeof(mysock.sun_path)-1]=0;
unlink(addr);
+
if (bind(s,(struct sockaddr*)&mysock,sizeof(mysock))==-1) return -1;
if (listen(s,1)<0) {
unlink(addr);
return -1;
}
+#ifdef HAVE_SIGNAL_H
+ act.sa_handler = SIG_IGN;
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = 0;
+ sigaction(SIGPIPE,&act,NULL);
+ sigaction(SIGINT,&act,NULL);
+#endif /* HAVE_SIGNAL_H */
+
while (loopflag) {
+ tv.tv_sec = RCC_EXIT_CHECK_TIMEOUT;
+ tv.tv_usec = 0;
+
+ FD_ZERO(&fdcon);
+ FD_SET(s, &fdcon);
+
+ err = select(s+1, &fdcon, NULL, NULL, &tv);
+ if (err<=0) {
+ if (getppid() != parentpid) break;
+ continue;
+ }
+
sd = accept(s,(struct sockaddr*)&clisock,&socksize);
if (sd < 0) continue;
diff --git a/src/librcc.h b/src/librcc.h
index 9b064d1..98ca1a6 100644
--- a/src/librcc.h
+++ b/src/librcc.h
@@ -427,6 +427,7 @@ typedef int rcc_option_value;
typedef enum rcc_option_translate_t {
RCC_OPTION_TRANSLATE_OFF = 0, /**< Switch translation off. */
+ RCC_OPTION_TRANSLATE_TRANSLITERATE, /**< Transliterate data. */
RCC_OPTION_TRANSLATE_TO_ENGLISH, /**< Translate data to english language (Current language don't matter). */
RCC_OPTION_TRANSLATE_SKIP_RELATED, /**< Skip translation of the text's between related languages. */
RCC_OPTION_TRANSLATE_SKIP_PARRENT, /**< Skip translation of the text's from parrent languages (from english). */
@@ -821,7 +822,7 @@ rcc_charset_id rccConfigGetClassCharsetByName(rcc_language_config config, rcc_cl
* Checks if charset is disabled for the specified class.
* @param config is language configuration
* @param class_id is class id.
- * @param charset is charset name.
+ * @param charset_id is charset id.
* @return 1 if charset is disabled, 0 if charset is enabled, -1 in the case of error.
*/
int rccConfigIsDisabledCharset(rcc_language_config config, rcc_class_id class_id, rcc_charset_id charset_id);
@@ -885,10 +886,13 @@ const char *rccConfigGetSelectedCharsetName(rcc_language_config config, rcc_clas
/**
* Return current encoding_id. The default value will be resolved to paticular encoding id.
* The following procedure is used to detect default encoding:
+ * - If Unicode encoding selected for the same class english language. Return this encoding.
* - If the parrent class is defined in #defcharset, - return current encoding of parrent class.
- * - If the locale variable is defined in #defcharset and config language coincide with locale language, use locale encoding.
+ * - If the locale variable is defined in #defcharset and either config language coincide with locale language or unciode encoding defined, use locale encoding.
* - If the default value for config language is defined in #defvalue return that default value.
- * - Return language with id 0. Normally this should be dummy language which indicates that RCC library is not used.
+ * - If the default value for all languages is defined in #defvalue return that default value.
+ * - If either config language is coincide with locale language or unicode locale is used, return locale encoding.
+ * - Return first by the list non-dissabled encoding.
*
* @param config is language configuration
* @param class_id is encoding class
diff --git a/src/lngconfig.c b/src/lngconfig.c
index 20aff63..631abd1 100644
--- a/src/lngconfig.c
+++ b/src/lngconfig.c
@@ -567,9 +567,11 @@ const char *rccConfigGetSelectedCharsetName(rcc_language_config config, rcc_clas
}
rcc_charset_id rccConfigGetCurrentCharset(rcc_language_config config, rcc_class_id class_id) {
+ rcc_language_config enconfig;
unsigned int i, max;
rcc_charset_id charset_id;
rcc_charset_id all_charset_id = (rcc_language_id)-1;
+ const char *charset;
rcc_class_default_charset *defcharset;
const char *lang;
@@ -582,10 +584,19 @@ rcc_charset_id rccConfigGetCurrentCharset(rcc_language_config config, rcc_class_
const char *defvalue;
if ((!config)||(!config->ctx)||(class_id<0)||(class_id>=config->ctx->n_classes)) return -1;
-
+
charset_id = config->charset[class_id];
if (charset_id) return charset_id;
+ enconfig = rccGetConfigByName(config->ctx, rcc_english_language_sn);
+ if ((enconfig)&&(enconfig!=config)) {
+ charset_id = enconfig->charset[class_id];
+ if (charset_id) {
+ charset = rccConfigGetClassCharsetName(enconfig, class_id, charset_id);
+ if ((charset)&&(rccIsUnicode(charset))) return charset_id;
+ }
+ }
+
if (!config->language) return (rcc_charset_id)-1;
else language = config->language;
@@ -598,23 +609,27 @@ rcc_charset_id rccConfigGetCurrentCharset(rcc_language_config config, rcc_class_
if (!strcmp(classes[i]->name, defvalue))
return rccConfigGetCurrentCharset(config, i);
}
- } else defvalue = config->ctx->locale_variable;
+ }
if (config->default_charset[class_id]) return config->default_charset[class_id];
if (cl->defvalue) {
charset_id = rccConfigGetLocaleClassCharset(config, class_id, defvalue);
if ((charset_id != 0)&&(charset_id != (rcc_charset_id)-1)) {
- config->default_charset[class_id] = charset_id;
- return charset_id;
+ if (!rccConfigIsDisabledCharset(config, class_id, charset_id)) {
+ config->default_charset[class_id] = charset_id;
+ return charset_id;
+ }
}
}
if (cl->defvalue) {
charset_id = rccConfigGetClassCharsetByName(config, class_id, defvalue);
if ((charset_id != 0)&&(charset_id != (rcc_charset_id)-1)) {
- config->default_charset[class_id] = charset_id;
- return charset_id;
+ if (!rccConfigIsDisabledCharset(config, class_id, charset_id)) {
+ config->default_charset[class_id] = charset_id;
+ return charset_id;
+ }
}
}
@@ -626,9 +641,17 @@ rcc_charset_id rccConfigGetCurrentCharset(rcc_language_config config, rcc_class_
if (!strcasecmp(lang, defcharset[i].lang)) {
charset_id = rccConfigGetClassCharsetByName(config, class_id, defcharset[i].charset);
if ((charset_id != 0)&&(charset_id != (rcc_charset_id)-1)) {
- config->default_charset[class_id] = charset_id;
- return charset_id;
- } else break;
+ if (!rccConfigIsDisabledCharset(config, class_id, charset_id)) {
+ config->default_charset[class_id] = charset_id;
+ return charset_id;
+ } else {
+ all_charset_id = (rcc_charset_id)-1;
+ break;
+ }
+ } else {
+ all_charset_id = (rcc_charset_id)-1;
+ break;
+ }
} else if (!strcasecmp(rcc_default_all, defcharset[i].lang)) {
charset_id = rccConfigGetClassCharsetByName(config, class_id, defcharset[i].charset);
if ((charset_id != 0)&&(charset_id != (rcc_charset_id)-1)) {
@@ -638,20 +661,26 @@ rcc_charset_id rccConfigGetCurrentCharset(rcc_language_config config, rcc_class_
}
if (all_charset_id != (rcc_language_id)-1) {
- config->default_charset[class_id] = all_charset_id;
- return all_charset_id;
+ if (!rccConfigIsDisabledCharset(config, class_id, all_charset_id)) {
+ config->default_charset[class_id] = all_charset_id;
+ return all_charset_id;
+ }
}
}
- charset_id = rccConfigGetLocaleClassCharset(config, class_id, defvalue);
+ charset_id = rccConfigGetLocaleClassCharset(config, class_id, config->ctx->locale_variable);
if ((charset_id != 0)&&(charset_id != (rcc_charset_id)-1)) {
- config->default_charset[class_id] = charset_id;
- return charset_id;
+ if (!rccConfigIsDisabledCharset(config, class_id, charset_id)) {
+ config->default_charset[class_id] = charset_id;
+ return charset_id;
+ }
}
max = rccConfigGetClassCharsetNumber(config, class_id);
for (i = 1; i< max; i++)
- if (!rccConfigIsDisabledCharset(config, class_id, (rcc_charset_id)i)) return (rcc_charset_id)i;
+ if (!rccConfigIsDisabledCharset(config, class_id, (rcc_charset_id)i)) {
+ return (rcc_charset_id)i;
+ }
return (rcc_charset_id)-1;
}
diff --git a/src/rccconfig.c b/src/rccconfig.c
index 0752ee3..ae47a63 100644
--- a/src/rccconfig.c
+++ b/src/rccconfig.c
@@ -31,6 +31,8 @@ const char rcc_default_all[] = "all";
const char rcc_default_language_sn[] = "default";
const char rcc_disabled_language_sn[] = "Off";
const char rcc_english_language_sn[] = "en";
+const char rcc_russian_language_sn[] = "ru";
+const char rcc_ukrainian_language_sn[] = "uk";
const char rcc_disabled_engine_sn[] = "Off";
const char rcc_default_charset[] = "Default";
@@ -61,18 +63,18 @@ rcc_language rcc_default_languages_embeded[RCC_MAX_LANGUAGES + 1] = {
&rcc_default_engine,
NULL
}},
-{"en", {rcc_default_charset, rcc_utf8_charset, NULL}, {
+{rcc_english_language_sn, {rcc_default_charset, rcc_utf8_charset, "ISO8859-1", NULL}, {
&rcc_default_engine,
NULL
}},
-{"ru", {rcc_default_charset,"KOI8-R","CP1251",rcc_utf8_charset,"IBM866","MACCYRILLIC","ISO8859-5", NULL}, {
+{rcc_russian_language_sn, {rcc_default_charset,"KOI8-R","CP1251",rcc_utf8_charset,"IBM866","MACCYRILLIC","ISO8859-5", NULL}, {
&rcc_default_engine,
#ifdef RCC_RCD_SUPPORT
&rcc_russian_engine,
#endif /* RCC_RCD_SUPPORT */
NULL
}},
-{"uk", {rcc_default_charset,"KOI8-U","CP1251",rcc_utf8_charset,"IBM855","MACCYRILLIC","ISO8859-5","CP1125", NULL}, {
+{rcc_ukrainian_language_sn, {rcc_default_charset,"KOI8-U","CP1251",rcc_utf8_charset,"IBM855","MACCYRILLIC","ISO8859-5","CP1125", NULL}, {
&rcc_default_engine,
#ifdef RCC_RCD_SUPPORT
&rcc_ukrainian_engine,
@@ -129,11 +131,10 @@ rcc_language rcc_default_languages_embeded[RCC_MAX_LANGUAGES + 1] = {
}},
{NULL}
};
-
rcc_option_value_name rcc_sn_boolean[] = { "OFF", "ON", NULL };
rcc_option_value_name rcc_sn_learning[] = { "OFF", "ON", "RELEARN", "LEARN", NULL };
rcc_option_value_name rcc_sn_clo[] = { "ALL", "CONFIGURED_AND_AUTO", "CONFIGURED_ONLY", NULL };
-rcc_option_value_name rcc_sn_translate[] = { "OFF", "TO_ENGLISH", "SKIP_RELATED", "SKIP_PARRENT", "FULL", NULL };
+rcc_option_value_name rcc_sn_translate[] = { "OFF", "TRANSLITERATE", "TO_ENGLISH", "SKIP_RELATED", "SKIP_PARRENT", "FULL", NULL };
rcc_option_description rcc_option_descriptions[RCC_MAX_OPTIONS+1];
rcc_option_description rcc_option_descriptions_embeded[RCC_MAX_OPTIONS+1] = {
diff --git a/src/rccconfig.h b/src/rccconfig.h
index f7f70dd..8b5ac0d 100644
--- a/src/rccconfig.h
+++ b/src/rccconfig.h
@@ -10,6 +10,9 @@
extern const char rcc_default_all[];
extern const char rcc_default_language_sn[];
extern const char rcc_english_language_sn[];
+extern const char rcc_russian_language_sn[];
+extern const char rcc_ukrainian_language_sn[];
+
extern const char rcc_disabled_language_sn[];
extern const char rcc_disabled_engine_sn[];
diff --git a/src/recode.c b/src/recode.c
index a528481..9e19078 100644
--- a/src/recode.c
+++ b/src/recode.c
@@ -322,7 +322,9 @@ static char *rccRecodeTranslate(rcc_language_config *config, rcc_class_id class_
rcc_translate trans, entrans;
+ unsigned int i;
char *translated;
+ unsigned char change_case;
ctx = (*config)->ctx;
@@ -336,7 +338,7 @@ static char *rccRecodeTranslate(rcc_language_config *config, rcc_class_id class_
english_language_id = rccGetLanguageByName(ctx, rcc_english_language_sn);
- if (translate == RCC_OPTION_TRANSLATE_TO_ENGLISH) {
+ if ((translate == RCC_OPTION_TRANSLATE_TO_ENGLISH)||(translate == RCC_OPTION_TRANSLATE_TRANSLITERATE)) {
current_language_id = english_language_id ;
} else {
if (ctype == RCC_CLASS_TRANSLATE_LOCALE) {
@@ -356,6 +358,49 @@ static char *rccRecodeTranslate(rcc_language_config *config, rcc_class_id class_
if (rccConfigConfigure(curconfig)) return NULL;
+ if (translate == RCC_OPTION_TRANSLATE_TRANSLITERATE) {
+ if (!strcasecmp((*config)->language->sn, rcc_russian_language_sn)) {
+ translated = rccSizedRecodeCharsets(ctx, "UTF-8", "KOI8-R", utfstring, 0, NULL);
+ if (!translated) return NULL;
+ for (i=0;translated[i];i++) {
+ if (translated[i]&0x80) change_case = 1;
+ else change_case = 0;
+
+ translated[i]=translated[i]&0x7F;
+ if (change_case) {
+ if ((translated[i]<'Z')&&(translated[i]>'A'))
+ translated[i]=translated[i]-'A'+'a';
+ else if ((translated[i]<'z')&&(translated[i]>'a'))
+ translated[i]=translated[i]-'a'+'A';
+ }
+ }
+ *config = curconfig;
+ return translated;
+ }
+ if (!strcasecmp((*config)->language->sn, rcc_ukrainian_language_sn)) {
+ translated = rccSizedRecodeCharsets(ctx, "UTF-8", "KOI8-U", utfstring, 0, NULL);
+ if (!translated) return NULL;
+ for (i=0;translated[i];i++) {
+ if (translated[i]&0x80) change_case = 1;
+ else change_case = 0;
+
+ translated[i]=translated[i]&0x7F;
+ if (change_case) {
+ if ((translated[i]<'Z')&&(translated[i]>'A'))
+ translated[i]=translated[i]-'A'+'a';
+ else if ((translated[i]<'z')&&(translated[i]>'a'))
+ translated[i]=translated[i]-'a'+'A';
+ }
+ }
+ *config = curconfig;
+ return translated;
+ }
+
+ translated = rccSizedRecodeCharsets(ctx, "UTF-8", "US-ASCII//TRANSLIT", utfstring, 0, NULL);
+ if (translated) *config = curconfig;
+ return translated;
+ }
+
if (translate == RCC_OPTION_TRANSLATE_SKIP_RELATED) {
if (rccAreRelatedLanguages(curconfig, *config)) return NULL;
}
diff --git a/ui/rccnames.c b/ui/rccnames.c
index d3d54d7..7f4f912 100644
--- a/ui/rccnames.c
+++ b/ui/rccnames.c
@@ -32,7 +32,7 @@ rcc_name rcc_default_language_names_embeded[RCC_MAX_LANGUAGES+1] = {
rcc_option_value_name rcc_default_option_boolean_names[] = { "Off", "On", NULL };
rcc_option_value_name rcc_default_option_learning_names[] = { "Off", "On", "Relearn", "Learn", NULL };
rcc_option_value_name rcc_default_option_clo_names[] = { "All Languages", "Configured / AutoEngine", "Configured Only", NULL };
-rcc_option_value_name rcc_default_option_translate_names[] = { "Off", "Translate to English", "Skip Translation between Related Languages", "Skip Translation from Parrent Languages", "Full", NULL };
+rcc_option_value_name rcc_default_option_translate_names[] = { "Off", "Transliterate", "Translate to English", "Skip Translation between Related Languages", "Skip Translation from Parrent Languages", "Full", NULL };
rcc_option_name rcc_default_option_names[RCC_MAX_OPTIONS+1];
rcc_option_name rcc_default_option_names_embeded[RCC_MAX_OPTIONS+1] = {