dovecot-2.0: unichar: Optimized 8bit character conversions.
dovecot at dovecot.org
dovecot at dovecot.org
Sun Feb 28 14:13:37 EET 2010
details: http://hg.dovecot.org/dovecot-2.0/rev/23858ce6422e
changeset: 10811:23858ce6422e
user: Timo Sirainen <tss at iki.fi>
date: Sun Feb 28 14:13:30 2010 +0200
description:
unichar: Optimized 8bit character conversions.
diffstat:
src/lib/unichar.c | 21 ++++++++++++++-------
src/lib/unicodemap.pl | 32 ++++++++++++++++++++++++++++++--
2 files changed, 44 insertions(+), 9 deletions(-)
diffs (120 lines):
diff -r de2798fbbae6 -r 23858ce6422e src/lib/unichar.c
--- a/src/lib/unichar.c Sun Feb 28 13:41:53 2010 +0200
+++ b/src/lib/unichar.c Sun Feb 28 14:13:30 2010 +0200
@@ -187,7 +187,9 @@
{
unsigned int idx;
- if (chr <= 0xffff) {
+ if (chr <= 0xff)
+ return titlecase8_map[chr];
+ else if (chr <= 0xffff) {
if (!uint16_find(titlecase16_keys, N_ELEMENTS(titlecase16_keys),
chr, &idx))
return chr;
@@ -206,16 +208,21 @@
{
unsigned int idx;
- if (*chr <= 0xffff) {
+ if (*chr <= 0xff) {
+ if (uni8_decomp_map[*chr] == *chr)
+ return FALSE;
+ *chr = uni8_decomp_map[*chr];
+ } else if (*chr <= 0xffff) {
+ if (*chr < uni16_decomp_keys[0])
+ return FALSE;
+
if (!uint16_find(uni16_decomp_keys,
- N_ELEMENTS(uni16_decomp_keys),
- *chr, &idx))
+ N_ELEMENTS(uni16_decomp_keys), *chr, &idx))
return FALSE;
*chr = uni16_decomp_values[idx];
} else {
if (!uint32_find(uni32_decomp_keys,
- N_ELEMENTS(uni32_decomp_keys),
- *chr, &idx))
+ N_ELEMENTS(uni32_decomp_keys), *chr, &idx))
return FALSE;
*chr = uni32_decomp_values[idx];
}
@@ -247,7 +254,7 @@
const uint16_t *value;
unsigned int idx;
- if (chr > 0xffff)
+ if (chr < multidecomp_keys[0] || chr > 0xffff)
return FALSE;
if (!uint32_find(multidecomp_keys, N_ELEMENTS(multidecomp_keys),
diff -r de2798fbbae6 -r 23858ce6422e src/lib/unicodemap.pl
--- a/src/lib/unicodemap.pl Sun Feb 28 13:41:53 2010 +0200
+++ b/src/lib/unicodemap.pl Sun Feb 28 14:13:30 2010 +0200
@@ -1,6 +1,7 @@
#!/usr/bin/env perl
use strict;
+my (%titlecase8, %uni8_decomp);
my (@titlecase16_keys, @titlecase16_values);
my (@titlecase32_keys, @titlecase32_values);
my (@uni16_decomp_keys, @uni16_decomp_values);
@@ -18,7 +19,11 @@
my $value = eval("0x$titlecode");
if ($value == $code) {
# the same character, ignore
- } elsif ($code <= 0xffff && $value <= 0xffff) {
+ } elsif ($code <= 0xff) {
+ die "Error: We've assumed 8bit keys have max. 16bit values" if ($value > 0xffff);
+ $titlecase8{$code} = $value;
+ } elsif ($code <= 0xffff) {
+ die "Error: We've assumed 16bit keys have max. 16bit values" if ($value > 0xffff);
push @titlecase16_keys, $code;
push @titlecase16_values, $value;
} else {
@@ -35,7 +40,9 @@
print STDERR "Error: We've assumed decomposition codes are max. 16bit\n";
exit 1;
}
- if ($code <= 0xffff) {
+ if ($code <= 0xff) {
+ $uni8_decomp{$code} = $value;
+ } elsif ($code <= 0xffff) {
push @uni16_decomp_keys, $code;
push @uni16_decomp_values, $value;
} else {
@@ -89,6 +96,23 @@
NOTE: decompositions for characters having titlecase characters
are not included, because we first translate everything to titlecase */\n";
+sub print_map8 {
+ my %map = %{$_[0]};
+ my @list;
+ for (my $i = 0; $i <= 0xff; $i++) {
+ if (defined($map{$i})) {
+ push @list, $map{$i};
+ } else {
+ push @list, $i;
+ }
+ }
+ print_list(\@list);
+}
+
+print "static const uint16_t titlecase8_map[256] = {\n\t";
+print_map8(\%titlecase8);
+print "\n};\n";
+
print "static const uint16_t titlecase16_keys[] = {\n\t";
print_list(\@titlecase16_keys);
print "\n};\n";
@@ -105,6 +129,10 @@
print_list(\@titlecase32_values);
print "\n};\n";
+print "static const uint16_t uni8_decomp_map[256] = {\n\t";
+print_map8(\%uni8_decomp);
+print "\n};\n";
+
print "static const uint16_t uni16_decomp_keys[] = {\n\t";
print_list(\@uni16_decomp_keys);
print "\n};\n";
More information about the dovecot-cvs
mailing list