Alexandre Julliard : make_unicode: Generate the character mapping tables in locale.nls.

Alexandre Julliard julliard at winehq.org
Mon Apr 25 16:30:31 CDT 2022


Module: wine
Branch: master
Commit: 9e6d0e459ff153dd62cda5000be487fd714c3957
URL:    https://source.winehq.org/git/wine.git/?a=commit;h=9e6d0e459ff153dd62cda5000be487fd714c3957

Author: Alexandre Julliard <julliard at winehq.org>
Date:   Mon Apr 25 11:52:14 2022 +0200

make_unicode: Generate the character mapping tables in locale.nls.

Signed-off-by: Alexandre Julliard <julliard at winehq.org>

---

 nls/locale.nls     | Bin 664228 -> 730526 bytes
 tools/make_unicode |  84 +++++++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 81 insertions(+), 3 deletions(-)

diff --git a/nls/locale.nls b/nls/locale.nls
index 2ab7ce55dc9..329224c92fc 100644
Binary files a/nls/locale.nls and b/nls/locale.nls differ
diff --git a/tools/make_unicode b/tools/make_unicode
index 520cbe0f1a5..f697eeaaeb4 100755
--- a/tools/make_unicode
+++ b/tools/make_unicode
@@ -26,6 +26,7 @@ use Encode;
 # base URLs for www.unicode.org files
 my $UNIVERSION = "14.0.0";
 my $UNIDATA  = "https://www.unicode.org/Public/$UNIVERSION/ucd/UCD.zip";
+my $UNIHAN   = "https://www.unicode.org/Public/$UNIVERSION/ucd/Unihan.zip";
 my $IDNADATA = "https://www.unicode.org/Public/idna/$UNIVERSION";
 my $JISDATA  = "https://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS";
 my $KSCDATA  = "https://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/KSC";
@@ -1825,6 +1826,11 @@ my @uni2cp = ();
 my @tolower_table = ();
 my @toupper_table = ();
 my @digitmap_table = ();
+my @halfwidth_table = ();
+my @fullwidth_table = ();
+my @cjk_compat_table = ();
+my @chinese_traditional_table = ();
+my @chinese_simplified_table = ();
 my @category_table = ();
 my @initial_joining_table = ();
 my @direction_table = ();
@@ -2106,7 +2112,18 @@ sub load_data()
             $decomp_compat_table[$src] = \@seq;
         }
 
-        if ($decomp =~ /^<([a-zA-Z]+)>\s+([0-9a-fA-F]+)$/)
+        if ($decomp =~ /^<narrow>\s+([0-9a-fA-F]+)$/)
+        {
+            $halfwidth_table[hex $1] = $src;
+            $fullwidth_table[$src] = hex $1;
+        }
+        elsif ($decomp =~ /^<wide>\s+([0-9a-fA-F]+)$/)
+        {
+            next if hex $1 == 0x5c; # don't remap backslash
+            $fullwidth_table[hex $1] = $src;
+            $halfwidth_table[$src] = hex $1;
+        }
+        elsif ($decomp =~ /^<([a-zA-Z]+)>\s+([0-9a-fA-F]+)$/)
         {
             # decomposition of the form "<foo> 1234" -> use char if type is known
             if ($1 eq "isolated" || $1 eq "final" || $1 eq "initial" || $1 eq "medial")
@@ -2127,8 +2144,10 @@ sub load_data()
             }
             elsif ($decomp =~ /^([0-9a-fA-F]+)$/)
             {
+                my $dst = hex $1;
                 # Single char decomposition
-                $decomp_table[$src] = $decomp_compat_table[$src] = [ hex $1 ];
+                $decomp_table[$src] = $decomp_compat_table[$src] = [ $dst ];
+                $cjk_compat_table[$src] = $dst if $name =~ /^CJK COMPATIBILITY IDEOGRAPH/;
             }
         }
     }
@@ -2213,6 +2232,24 @@ sub load_data()
         }
     }
     close $IDNA;
+
+    # load the Unihan mappings
+
+    my $UNIHAN = open_data_file( $UNIHAN, "Unihan_Variants.txt" );
+    while (<$UNIHAN>)
+    {
+        s/\#.*//;  # remove comments
+        next if /^\s*$/;
+        if (/^U\+([0-9a-fA-F]+)\s+kTraditionalVariant\s+U\+([0-9a-fA-F]+)/)
+        {
+            $chinese_traditional_table[hex $1] = hex $2;
+        }
+        elsif (/^U\+([0-9a-fA-F]+)\s+kSimplifiedVariant\s+U\+([0-9a-fA-F]+)/)
+        {
+            $chinese_simplified_table[hex $1] = hex $2;
+        }
+    }
+    close $UNIHAN;
 }
 
 
@@ -5179,6 +5216,47 @@ sub build_locale_data()
 }
 
 
+################################################################
+# build the charmaps table for locale.nls
+sub build_charmaps_data()
+{
+    my $data = "";
+
+    # MAP_FOLDDIGITS
+    $data .= dump_binary_case_table( @digitmap_table );
+
+    # CJK compatibility map
+    $data .= dump_binary_case_table( @cjk_compat_table );
+
+    # LCMAP_HIRAGANA/KATAKANA
+    my (@hiragana_table, @katakana_table);
+    foreach my $ch (0x3041..0x3096, 0x309d..0x309e)
+    {
+        $hiragana_table[$ch + 0x60] = $ch;
+        $katakana_table[$ch] = $ch + 0x60;
+    }
+    $data .= dump_binary_case_table( @hiragana_table ) . dump_binary_case_table( @katakana_table );
+
+    # LCMAP_HALFWIDTH/FULLWIDTH
+    $halfwidth_table[0x2018] = 0x0027;
+    $halfwidth_table[0x2019] = 0x0027;
+    $halfwidth_table[0x201c] = 0x0022;
+    $halfwidth_table[0x201d] = 0x0022;
+    $halfwidth_table[0x309b] = 0xff9e;
+    $halfwidth_table[0x309c] = 0xff9f;
+    $fullwidth_table[0x309b] = 0x3099;
+    $fullwidth_table[0x309c] = 0x309a;
+    $data .= dump_binary_case_table( @halfwidth_table ) . dump_binary_case_table( @fullwidth_table );
+
+    # LCMAP_TRADITIONAL/SIMPLIFIED_CHINESE
+    $data .= dump_binary_case_table( @chinese_traditional_table ) . dump_binary_case_table( @chinese_simplified_table );
+
+    # FIXME: some more unknown tables here
+
+    return $data;
+}
+
+
 ################################################################
 # build the geoids table for locale.nls
 sub build_geoids_data()
@@ -5237,7 +5315,7 @@ sub dump_locales($$)
     printf "Building $filename\n";
 
     my $locale_data = build_locale_data();
-    my $charmaps_data = "";  # FIXME
+    my $charmaps_data = build_charmaps_data();
     my $geoids_data = build_geoids_data();
     my $scripts_data = "";  # FIXME
 




More information about the wine-cvs mailing list