[virt-tools-list] [libosinfo 7/8] rfc: Infer ISO language from label
Zeeshan Ali (Khattak)
zeeshanak at gnome.org
Wed Dec 5 17:00:41 UTC 2012
On Mon, Dec 3, 2012 at 1:23 PM, Christophe Fergeau <cfergeau at redhat.com> wrote:
> Now that libosinfo has an osinfo_db_identify_media method which
> modifies the media it was passed, we can generate properties which
> needs information from the media stored in the OsinfoDB, and
> information from the actual media (ISO volume ID).
> This is useful to guess what languages are supported by a given
> Windows ISO: the end of the ISO volume ID has a language code, which
> we can translate to a locale identifier.
>
> This commit adds a lang-regex property to the OsinfoDB database to
> extract the language code from Windows ISO volume IDs, and
> then add mapping tables to turn it into a locale identifier.
> ---
> data/oses/windows.xml.in | 2 +
> data/schemas/libosinfo.rng | 5 ++
> osinfo/libosinfo.syms | 4 +-
> osinfo/osinfo_db.c | 177 +++++++++++++++++++++++++++++++++++++++++++++
> osinfo/osinfo_loader.c | 4 +-
> osinfo/osinfo_media.c | 67 ++++++++++++++++-
> osinfo/osinfo_media.h | 3 +
> 7 files changed, 258 insertions(+), 4 deletions(-)
>
> diff --git a/data/oses/windows.xml.in b/data/oses/windows.xml.in
> index d09e873..e8c29f9 100644
> --- a/data/oses/windows.xml.in
> +++ b/data/oses/windows.xml.in
> @@ -739,12 +739,14 @@
> <iso>
> <volume-id>(HB1_CCPA_X86FRE|HRM_CCSA_X86FRE|HRM_CCSA_X86CHK|HRM_CCSNA_X86CHK|HRM_CCSNA_X86FRE|HRM_CENA_X86FREV|HRM_CENA_X86CHKV|HRM_CENNA_X86FREV|HRM_CENNA_X86CHKV|HRM_CPRA_X86FREV|HRM_CPRNA_X86FREV)_</volume-id>
> <publisher-id>MICROSOFT CORPORATION</publisher-id>
> + <lang-regex>[[:upper:][:digit:]_]*_([[:upper:]]*-[[:upper:]]*)</lang-regex>
> </iso>
> </media>
> <media arch="x86_64">
> <iso>
> <volume-id>(HB1_CCPA_X64FRE|HRM_CCSA_X64FRE|HRM_CCSA_X64CHK|HRM_CCSNA_X64FRE|HRM_CCSNA_X64CHK|HRM_CENNA_X64FREV|HRM_CENNA_X64CHKV|HRM_CENA_X64FREV|HRM_CENA_X64CHKV|HRM_CPRA_X64FREV|HRM_CPRNA_X64FREV)_</volume-id>
> <publisher-id>MICROSOFT CORPORATION</publisher-id>
> + <lang-regex>[[:upper:][:digit:]_]*_([[:upper:]]*-[[:upper:]]*)</lang-regex>
> </iso>
> </media>
>
> diff --git a/data/schemas/libosinfo.rng b/data/schemas/libosinfo.rng
> index 87635dd..36fa1a1 100644
> --- a/data/schemas/libosinfo.rng
> +++ b/data/schemas/libosinfo.rng
> @@ -281,6 +281,11 @@
> <text/>
> </element>
> </optional>
> + <optional>
> + <element name='lang-regex'>
> + <text/>
> + </element>
> + </optional>
> </interleave>
> </element>
> </define>
> diff --git a/osinfo/libosinfo.syms b/osinfo/libosinfo.syms
> index d45e58e..7c3efe1 100644
> --- a/osinfo/libosinfo.syms
> +++ b/osinfo/libosinfo.syms
> @@ -341,11 +341,11 @@ LIBOSINFO_0.2.2 {
> osinfo_install_config_set_target_disk;
> osinfo_install_config_get_script_disk;
> osinfo_install_config_set_script_disk;
> -
> osinfo_install_script_get_avatar_format;
> osinfo_install_script_get_path_format;
> -
> osinfo_install_script_get_product_key_format;
> +
> + osinfo_media_get_languages;
> } LIBOSINFO_0.2.1;
>
> /* Symbols in next release...
> diff --git a/osinfo/osinfo_db.c b/osinfo/osinfo_db.c
> index 46101d6..2c2eb5a 100644
> --- a/osinfo/osinfo_db.c
> +++ b/osinfo/osinfo_db.c
> @@ -38,6 +38,177 @@ G_DEFINE_TYPE (OsinfoDb, osinfo_db, G_TYPE_OBJECT);
> (((str) != NULL) && \
> g_regex_match_simple((pattern), (str), 0, 0)))
>
> +static gchar *get_raw_lang(const char *volume_id, const gchar *regex_str)
> +{
> + GRegex *regex;
> + GMatchInfo *match;
> + gboolean matched;
> + gchar *raw_lang = NULL;
> +
> + regex = g_regex_new(regex_str, G_REGEX_ANCHORED,
> + G_REGEX_MATCH_ANCHORED, NULL);
> + if (regex == NULL)
> + return NULL;
> +
> + matched = g_regex_match(regex, volume_id, G_REGEX_MATCH_ANCHORED, &match);
> + if (!matched || !g_match_info_matches(match))
> + goto end;
> + raw_lang = g_match_info_fetch(match, 1);
> + if (raw_lang == NULL)
> + goto end;
> +
> +end:
> + g_match_info_unref(match);
> + g_regex_unref(regex);
> +
> + return raw_lang;
> +}
> +
> +struct LanguageMapping {
> + const char *iso_label_lang;
> + const char *gettext_lang;
> +};
> +
> +static GHashTable *init_win_lang_map(void)
> +{
> + GHashTable *lang_map;
> + const struct LanguageMapping lang_table[] = {
> + /* ISO label strings up to Windows 7 */
> + { "EN", "en_US" },
> + { "AR", "ar_SA" },
> + { "BG", "bg_BG" },
> + { "HK", "zh_HK" },
> + { "CN", "zh_CN" },
> + { "TW", "zh_TW" },
> + { "HR", "hr_HR" },
> + { "CS", "cs_CZ" },
> + { "DA", "da_DK" },
> + { "NL", "nl_NL" },
> + { "ET", "et_EE" },
> + { "FI", "fi_FI" },
> + { "FR", "fr_FR" },
> + { "DE", "de_DE" },
> + { "EL", "el_GR" },
> + { "HE", "he_IL" },
> + { "HU", "hu_HU" },
> + { "IT", "it_IT" },
> + { "JA", "ja_JP" },
> + { "KO", "ko_KR" },
> + { "LV", "lv_LV" },
> + { "LT", "lt_LT" },
> + { "NO", "nb_NO" },
> + { "PL", "pl_PL" },
> + { "BR", "pt_BR" },
> + { "PT", "pt_PT" },
> + { "RO", "ro_RO" },
> + { "RU", "ru_RU" },
> + { "SRL", "sr_RS at latin" },
> + { "SK", "sk_SK" },
> + { "SL", "sl_SI" },
> + { "ES", "es_ES" },
> + { "SV", "sv_SE" },
> + { "TH", "th_TH" },
> + { "TR", "tr_TR" },
> + { "UK", "uk_UA" },
> +
> + /* starting from Windows 8, the ISO label contains both
> + * language and country code */
> + { "EN-US", "en_US" },
> + { "EN-GB", "en_GB" },
> + { "AR-SA", "ar_SA" },
> + { "BG-BG", "bg_BG" },
> + { "ZH-HK", "zh_HK" },
> + { "ZH-CN", "zh_CN" },
> + { "ZH-TW", "zh_TW" },
> + { "HR-HR", "hr_HR" },
> + { "CS-CZ", "cs_CZ" },
> + { "DA-DK", "da_DK" },
> + { "NL-NL", "nl_NL" },
> + { "ET-EE", "et_EE" },
> + { "FI-FI", "fi_FI" },
> + { "FR-FR", "fr_FR" },
> + { "DE-DE", "de_DE" },
> + { "EL-GR", "el_GR" },
> + { "HE-IL", "he_IL" },
> + { "HU-HU", "hu_HU" },
> + { "IT-IT", "it_IT" },
> + { "JA-JP", "ja_JP" },
> + { "KO-KR", "ko_KR" },
> + { "LV-LV", "lv_LV" },
> + { "LT-LT", "lt_LT" },
> + { "NB-NO", "nb_NO" },
> + { "PL-PL", "pl_PL" },
> + { "PT-BR", "pt_BR" },
> + { "PT-PT", "pt_PT" },
> + { "RO-RO", "ro_RO" },
> + { "RU-RU", "ru_RU" },
> + { "SR-LATN-CS", "sr_RS at latin" },
> + { "SK-SK", "sk_SK" },
> + { "SL-SI", "sl_SI" },
> + { "ES-ES", "es_ES" },
> + { "SV-SE", "sv_SE" },
> + { "TH-TH", "th_TH" },
> + { "TR-TR", "tr_TR" },
> + { "UK-UA", "uk_UA" },
> +
> + { "EU-ES", "eu_ES" }, //language pack
> + { "CA-ES", "ca_ES" }, //language pack
> + { "GL-ES", "gl_ES" }, //language pack
> + { "KY-KG", "ky_KG" }, //language pack
> +
> + { NULL, NULL }
> + };
Seems all of these except for 1 can be covered by a simple 's/-/_/'
conversion and thus do not need all this hard coding.
Rest of the patch looks good now as a first implementation. We can
make use of the datamaps API here once that API is available.
--
Regards,
Zeeshan Ali (Khattak)
FSF member#5124
More information about the virt-tools-list
mailing list