dovecot-2.0: unichar: Optimized 8bit character conversions.

dovecot at dovecot.org dovecot at dovecot.org
Sun Feb 28 14:13:37 EET 2010


details:   http://hg.dovecot.org/dovecot-2.0/rev/23858ce6422e
changeset: 10811:23858ce6422e
user:      Timo Sirainen <tss at iki.fi>
date:      Sun Feb 28 14:13:30 2010 +0200
description:
unichar: Optimized 8bit character conversions.

diffstat:

 src/lib/unichar.c     |  21 ++++++++++++++-------
 src/lib/unicodemap.pl |  32 ++++++++++++++++++++++++++++++--
 2 files changed, 44 insertions(+), 9 deletions(-)

diffs (120 lines):

diff -r de2798fbbae6 -r 23858ce6422e src/lib/unichar.c
--- a/src/lib/unichar.c	Sun Feb 28 13:41:53 2010 +0200
+++ b/src/lib/unichar.c	Sun Feb 28 14:13:30 2010 +0200
@@ -187,7 +187,9 @@
 {
 	unsigned int idx;
 
-	if (chr <= 0xffff) {
+	if (chr <= 0xff)
+		return titlecase8_map[chr];
+	else if (chr <= 0xffff) {
 		if (!uint16_find(titlecase16_keys, N_ELEMENTS(titlecase16_keys),
 				 chr, &idx))
 			return chr;
@@ -206,16 +208,21 @@
 {
 	unsigned int idx;
 
-	if (*chr <= 0xffff) {
+	if (*chr <= 0xff) {
+		if (uni8_decomp_map[*chr] == *chr)
+			return FALSE;
+		*chr = uni8_decomp_map[*chr];
+	} else if (*chr <= 0xffff) {
+		if (*chr < uni16_decomp_keys[0])
+			return FALSE;
+
 		if (!uint16_find(uni16_decomp_keys,
-				 N_ELEMENTS(uni16_decomp_keys),
-				 *chr, &idx))
+				 N_ELEMENTS(uni16_decomp_keys), *chr, &idx))
 			return FALSE;
 		*chr = uni16_decomp_values[idx];
 	} else {
 		if (!uint32_find(uni32_decomp_keys,
-				 N_ELEMENTS(uni32_decomp_keys),
-				 *chr, &idx))
+				 N_ELEMENTS(uni32_decomp_keys), *chr, &idx))
 			return FALSE;
 		*chr = uni32_decomp_values[idx];
 	}
@@ -247,7 +254,7 @@
 	const uint16_t *value;
 	unsigned int idx;
 
-	if (chr > 0xffff)
+	if (chr < multidecomp_keys[0] || chr > 0xffff)
 		return FALSE;
 
 	if (!uint32_find(multidecomp_keys, N_ELEMENTS(multidecomp_keys),
diff -r de2798fbbae6 -r 23858ce6422e src/lib/unicodemap.pl
--- a/src/lib/unicodemap.pl	Sun Feb 28 13:41:53 2010 +0200
+++ b/src/lib/unicodemap.pl	Sun Feb 28 14:13:30 2010 +0200
@@ -1,6 +1,7 @@
 #!/usr/bin/env perl
 use strict;
 
+my (%titlecase8, %uni8_decomp);
 my (@titlecase16_keys, @titlecase16_values);
 my (@titlecase32_keys, @titlecase32_values);
 my (@uni16_decomp_keys, @uni16_decomp_values);
@@ -18,7 +19,11 @@
     my $value = eval("0x$titlecode");
     if ($value == $code) { 
       # the same character, ignore
-    } elsif ($code <= 0xffff && $value <= 0xffff) {
+    } elsif ($code <= 0xff) {
+      die "Error: We've assumed 8bit keys have max. 16bit values" if ($value > 0xffff);
+      $titlecase8{$code} = $value;
+    } elsif ($code <= 0xffff) {
+      die "Error: We've assumed 16bit keys have max. 16bit values" if ($value > 0xffff);
       push @titlecase16_keys, $code;
       push @titlecase16_values, $value;
     } else {
@@ -35,7 +40,9 @@
 	print STDERR "Error: We've assumed decomposition codes are max. 16bit\n";
 	exit 1;
       }
-      if ($code <= 0xffff) {
+      if ($code <= 0xff) {
+        $uni8_decomp{$code} = $value;
+      } elsif ($code <= 0xffff) {
 	push @uni16_decomp_keys, $code;
 	push @uni16_decomp_values, $value;
       } else {
@@ -89,6 +96,23 @@
    NOTE: decompositions for characters having titlecase characters
    are not included, because we first translate everything to titlecase */\n";
 
+sub print_map8 {
+  my %map = %{$_[0]};
+  my @list;
+  for (my $i = 0; $i <= 0xff; $i++) {
+    if (defined($map{$i})) {
+      push @list, $map{$i};
+    } else {
+      push @list, $i;
+    }
+  }
+  print_list(\@list);
+}
+
+print "static const uint16_t titlecase8_map[256] = {\n\t";
+print_map8(\%titlecase8);
+print "\n};\n";
+
 print "static const uint16_t titlecase16_keys[] = {\n\t";
 print_list(\@titlecase16_keys);
 print "\n};\n";
@@ -105,6 +129,10 @@
 print_list(\@titlecase32_values);
 print "\n};\n";
 
+print "static const uint16_t uni8_decomp_map[256] = {\n\t";
+print_map8(\%uni8_decomp);
+print "\n};\n";
+
 print "static const uint16_t uni16_decomp_keys[] = {\n\t";
 print_list(\@uni16_decomp_keys);
 print "\n};\n";


More information about the dovecot-cvs mailing list