1 2 3 4 5 6 7 8 9 10
$dbh->do(q( create table cc( c varchar(1) charset utf8mb4 )default charset utf8mb4 )); $dbh->do("set names utf8mb4"); $dbh->{mysql_enable_utf8mb4} = 1; $dbh->do("insert into cc(c)values(?)",{}, pack "U", 0x10CE2); print Dumper $dbh->selectrow_hashref("select hex(c),c,upper(c) from cc"); $dbh->do('drop table cc');
1
2
3
4
5
$VAR1 = {
'upper(c)' => "\x{10ce2}",
'hex(c)' => 'F090B3A2',
'c' => "\x{10ce2}"
};
1
2
3
4
5
6
show character set {
'Charset' => 'utf8mb4',
'Default collation' => 'utf8mb4_general_ci',
'Description' => 'UTF-8 Unicode',
'Maxlen' => '4'
},
QuoteFor collations of Unicode character sets, LOWER() and UPPER() work according to the Unicode Collation Algorithm (UCA) version in the collation name, if there is one, and UCA 4.0.0 if no version is specified. For example, utf8mb4_0900_ai_ci and utf8mb3_unicode_520_ci work according to UCA 9.0.0 and 5.2.0, respectively, whereas utf8mb3_unicode_ci works according to UCA 4.0.0. See Section 10.10.1, “Unicode Character Sets”.
This function is multibyte safe.
— https://dev.mysql.com/doc/refman/8.0/en/string-fun...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
MariaDB [testdb]> status
--------------
mysql Ver 15.1 Distrib 10.5.19-MariaDB, for debian-linux-gnu (x86_64) using EditLine wrapper
Connection id: 33
Current database: testdb
Current user: test@localhost
SSL: Not in use
Current pager: stdout
Using outfile: ''
Using delimiter: ;
Server: MariaDB
Server version: 10.5.19-MariaDB-0+deb11u2 Debian 11
Protocol version: 10
Connection: Localhost via UNIX socket
Server characterset: utf8mb4
Db characterset: utf8mb4
Client characterset: utf8mb4
Conn. characterset: utf8mb4
UNIX socket: /run/mysqld/mysqld.sock
Uptime: 8 min 32 sec
Threads: 1 Questions: 77 Slow queries: 0 Opens: 35 Open tables: 27 Queries per second avg: 0.150
--------------
MariaDB [testdb]>
QuoteDie MySQL-Entwickler haben diesen Fehler nie behoben. Sie haben 2010 einen Workaround veröffentlicht: einen neuen Zeichensatz namens „utf8mb4“.
QuoteDer Name „utf8“ war immer ein Fehler. Es ist ein proprietärer Zeichensatz. Es schuf neue Probleme, und es löste nicht das Problem, das es zu lösen bedeutete.
Quote| utf8mb4 | UTF-8 Unicode | utf8mb4_0900_ai_ci | 4 |
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
[testdb]> SHOW COLLATION WHERE Charset LIKE 'utf8mb4%';
+------------------------------+---------+------+---------+----------+---------+
| Collation | Charset | Id | Default | Compiled | Sortlen |
+------------------------------+---------+------+---------+----------+---------+
| utf8mb4_general_ci | utf8mb4 | 45 | Yes | Yes | 1 |
| utf8mb4_bin | utf8mb4 | 46 | | Yes | 1 |
| utf8mb4_unicode_ci | utf8mb4 | 224 | | Yes | 8 |
| utf8mb4_icelandic_ci | utf8mb4 | 225 | | Yes | 8 |
| utf8mb4_latvian_ci | utf8mb4 | 226 | | Yes | 8 |
| utf8mb4_romanian_ci | utf8mb4 | 227 | | Yes | 8 |
| utf8mb4_slovenian_ci | utf8mb4 | 228 | | Yes | 8 |
| utf8mb4_polish_ci | utf8mb4 | 229 | | Yes | 8 |
| utf8mb4_estonian_ci | utf8mb4 | 230 | | Yes | 8 |
| utf8mb4_spanish_ci | utf8mb4 | 231 | | Yes | 8 |
| utf8mb4_swedish_ci | utf8mb4 | 232 | | Yes | 8 |
| utf8mb4_turkish_ci | utf8mb4 | 233 | | Yes | 8 |
| utf8mb4_czech_ci | utf8mb4 | 234 | | Yes | 8 |
| utf8mb4_danish_ci | utf8mb4 | 235 | | Yes | 8 |
| utf8mb4_lithuanian_ci | utf8mb4 | 236 | | Yes | 8 |
| utf8mb4_slovak_ci | utf8mb4 | 237 | | Yes | 8 |
| utf8mb4_spanish2_ci | utf8mb4 | 238 | | Yes | 8 |
| utf8mb4_roman_ci | utf8mb4 | 239 | | Yes | 8 |
| utf8mb4_persian_ci | utf8mb4 | 240 | | Yes | 8 |
| utf8mb4_esperanto_ci | utf8mb4 | 241 | | Yes | 8 |
| utf8mb4_hungarian_ci | utf8mb4 | 242 | | Yes | 8 |
| utf8mb4_sinhala_ci | utf8mb4 | 243 | | Yes | 8 |
| utf8mb4_german2_ci | utf8mb4 | 244 | | Yes | 8 |
| utf8mb4_croatian_mysql561_ci | utf8mb4 | 245 | | Yes | 8 |
| utf8mb4_unicode_520_ci | utf8mb4 | 246 | | Yes | 8 |
| utf8mb4_vietnamese_ci | utf8mb4 | 247 | | Yes | 8 |
| utf8mb4_croatian_ci | utf8mb4 | 608 | | Yes | 8 |
| utf8mb4_myanmar_ci | utf8mb4 | 609 | | Yes | 8 |
| utf8mb4_thai_520_w2 | utf8mb4 | 610 | | Yes | 4 |
| utf8mb4_general_nopad_ci | utf8mb4 | 1069 | | Yes | 1 |
| utf8mb4_nopad_bin | utf8mb4 | 1070 | | Yes | 1 |
| utf8mb4_unicode_nopad_ci | utf8mb4 | 1248 | | Yes | 8 |
| utf8mb4_unicode_520_nopad_ci | utf8mb4 | 1270 | | Yes | 8 |
+------------------------------+---------+------+---------+----------+---------+
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
$ docker exec -it mysql-test bash
bash-4.4# mysql -u root -p
Enter password:
Welcome to the MySQL monitor. Commands end with ; or \g.
Your MySQL connection id is 9
Server version: 8.0.33 MySQL Community Server - GPL
Copyright (c) 2000, 2023, Oracle and/or its affiliates.
Oracle is a registered trademark of Oracle Corporation and/or its
affiliates. Other names may be trademarks of their respective
owners.
Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.
mysql> SHOW COLLATION WHERE Charset LIKE 'utf8mb4%';
+----------------------------+---------+-----+---------+----------+---------+---------------+
| Collation | Charset | Id | Default | Compiled | Sortlen | Pad_attribute |
+----------------------------+---------+-----+---------+----------+---------+---------------+
| utf8mb4_0900_ai_ci | utf8mb4 | 255 | Yes | Yes | 0 | NO PAD |
| utf8mb4_0900_as_ci | utf8mb4 | 305 | | Yes | 0 | NO PAD |
| utf8mb4_0900_as_cs | utf8mb4 | 278 | | Yes | 0 | NO PAD |
| utf8mb4_0900_bin | utf8mb4 | 309 | | Yes | 1 | NO PAD |
| utf8mb4_bg_0900_ai_ci | utf8mb4 | 318 | | Yes | 0 | NO PAD |
| utf8mb4_bg_0900_as_cs | utf8mb4 | 319 | | Yes | 0 | NO PAD |
| utf8mb4_bin | utf8mb4 | 46 | | Yes | 1 | PAD SPACE |
| utf8mb4_bs_0900_ai_ci | utf8mb4 | 316 | | Yes | 0 | NO PAD |
| utf8mb4_bs_0900_as_cs | utf8mb4 | 317 | | Yes | 0 | NO PAD |
| utf8mb4_croatian_ci | utf8mb4 | 245 | | Yes | 8 | PAD SPACE |
| utf8mb4_cs_0900_ai_ci | utf8mb4 | 266 | | Yes | 0 | NO PAD |
| utf8mb4_cs_0900_as_cs | utf8mb4 | 289 | | Yes | 0 | NO PAD |
| utf8mb4_czech_ci | utf8mb4 | 234 | | Yes | 8 | PAD SPACE |
| utf8mb4_danish_ci | utf8mb4 | 235 | | Yes | 8 | PAD SPACE |
| utf8mb4_da_0900_ai_ci | utf8mb4 | 267 | | Yes | 0 | NO PAD |
| utf8mb4_da_0900_as_cs | utf8mb4 | 290 | | Yes | 0 | NO PAD |
| utf8mb4_de_pb_0900_ai_ci | utf8mb4 | 256 | | Yes | 0 | NO PAD |
| utf8mb4_de_pb_0900_as_cs | utf8mb4 | 279 | | Yes | 0 | NO PAD |
| utf8mb4_eo_0900_ai_ci | utf8mb4 | 273 | | Yes | 0 | NO PAD |
| utf8mb4_eo_0900_as_cs | utf8mb4 | 296 | | Yes | 0 | NO PAD |
| utf8mb4_esperanto_ci | utf8mb4 | 241 | | Yes | 8 | PAD SPACE |
| utf8mb4_estonian_ci | utf8mb4 | 230 | | Yes | 8 | PAD SPACE |
| utf8mb4_es_0900_ai_ci | utf8mb4 | 263 | | Yes | 0 | NO PAD |
| utf8mb4_es_0900_as_cs | utf8mb4 | 286 | | Yes | 0 | NO PAD |
| utf8mb4_es_trad_0900_ai_ci | utf8mb4 | 270 | | Yes | 0 | NO PAD |
| utf8mb4_es_trad_0900_as_cs | utf8mb4 | 293 | | Yes | 0 | NO PAD |
| utf8mb4_et_0900_ai_ci | utf8mb4 | 262 | | Yes | 0 | NO PAD |
| utf8mb4_et_0900_as_cs | utf8mb4 | 285 | | Yes | 0 | NO PAD |
| utf8mb4_general_ci | utf8mb4 | 45 | | Yes | 1 | PAD SPACE |
| utf8mb4_german2_ci | utf8mb4 | 244 | | Yes | 8 | PAD SPACE |
| utf8mb4_gl_0900_ai_ci | utf8mb4 | 320 | | Yes | 0 | NO PAD |
| utf8mb4_gl_0900_as_cs | utf8mb4 | 321 | | Yes | 0 | NO PAD |
| utf8mb4_hr_0900_ai_ci | utf8mb4 | 275 | | Yes | 0 | NO PAD |
| utf8mb4_hr_0900_as_cs | utf8mb4 | 298 | | Yes | 0 | NO PAD |
| utf8mb4_hungarian_ci | utf8mb4 | 242 | | Yes | 8 | PAD SPACE |
| utf8mb4_hu_0900_ai_ci | utf8mb4 | 274 | | Yes | 0 | NO PAD |
| utf8mb4_hu_0900_as_cs | utf8mb4 | 297 | | Yes | 0 | NO PAD |
| utf8mb4_icelandic_ci | utf8mb4 | 225 | | Yes | 8 | PAD SPACE |
| utf8mb4_is_0900_ai_ci | utf8mb4 | 257 | | Yes | 0 | NO PAD |
| utf8mb4_is_0900_as_cs | utf8mb4 | 280 | | Yes | 0 | NO PAD |
| utf8mb4_ja_0900_as_cs | utf8mb4 | 303 | | Yes | 0 | NO PAD |
| utf8mb4_ja_0900_as_cs_ks | utf8mb4 | 304 | | Yes | 24 | NO PAD |
| utf8mb4_latvian_ci | utf8mb4 | 226 | | Yes | 8 | PAD SPACE |
| utf8mb4_la_0900_ai_ci | utf8mb4 | 271 | | Yes | 0 | NO PAD |
| utf8mb4_la_0900_as_cs | utf8mb4 | 294 | | Yes | 0 | NO PAD |
| utf8mb4_lithuanian_ci | utf8mb4 | 236 | | Yes | 8 | PAD SPACE |
| utf8mb4_lt_0900_ai_ci | utf8mb4 | 268 | | Yes | 0 | NO PAD |
| utf8mb4_lt_0900_as_cs | utf8mb4 | 291 | | Yes | 0 | NO PAD |
| utf8mb4_lv_0900_ai_ci | utf8mb4 | 258 | | Yes | 0 | NO PAD |
| utf8mb4_lv_0900_as_cs | utf8mb4 | 281 | | Yes | 0 | NO PAD |
| utf8mb4_mn_cyrl_0900_ai_ci | utf8mb4 | 322 | | Yes | 0 | NO PAD |
| utf8mb4_mn_cyrl_0900_as_cs | utf8mb4 | 323 | | Yes | 0 | NO PAD |
| utf8mb4_nb_0900_ai_ci | utf8mb4 | 310 | | Yes | 0 | NO PAD |
| utf8mb4_nb_0900_as_cs | utf8mb4 | 311 | | Yes | 0 | NO PAD |
| utf8mb4_nn_0900_ai_ci | utf8mb4 | 312 | | Yes | 0 | NO PAD |
| utf8mb4_nn_0900_as_cs | utf8mb4 | 313 | | Yes | 0 | NO PAD |
| utf8mb4_persian_ci | utf8mb4 | 240 | | Yes | 8 | PAD SPACE |
| utf8mb4_pl_0900_ai_ci | utf8mb4 | 261 | | Yes | 0 | NO PAD |
| utf8mb4_pl_0900_as_cs | utf8mb4 | 284 | | Yes | 0 | NO PAD |
| utf8mb4_polish_ci | utf8mb4 | 229 | | Yes | 8 | PAD SPACE |
| utf8mb4_romanian_ci | utf8mb4 | 227 | | Yes | 8 | PAD SPACE |
| utf8mb4_roman_ci | utf8mb4 | 239 | | Yes | 8 | PAD SPACE |
| utf8mb4_ro_0900_ai_ci | utf8mb4 | 259 | | Yes | 0 | NO PAD |
| utf8mb4_ro_0900_as_cs | utf8mb4 | 282 | | Yes | 0 | NO PAD |
| utf8mb4_ru_0900_ai_ci | utf8mb4 | 306 | | Yes | 0 | NO PAD |
| utf8mb4_ru_0900_as_cs | utf8mb4 | 307 | | Yes | 0 | NO PAD |
| utf8mb4_sinhala_ci | utf8mb4 | 243 | | Yes | 8 | PAD SPACE |
| utf8mb4_sk_0900_ai_ci | utf8mb4 | 269 | | Yes | 0 | NO PAD |
| utf8mb4_sk_0900_as_cs | utf8mb4 | 292 | | Yes | 0 | NO PAD |
| utf8mb4_slovak_ci | utf8mb4 | 237 | | Yes | 8 | PAD SPACE |
| utf8mb4_slovenian_ci | utf8mb4 | 228 | | Yes | 8 | PAD SPACE |
| utf8mb4_sl_0900_ai_ci | utf8mb4 | 260 | | Yes | 0 | NO PAD |
| utf8mb4_sl_0900_as_cs | utf8mb4 | 283 | | Yes | 0 | NO PAD |
| utf8mb4_spanish2_ci | utf8mb4 | 238 | | Yes | 8 | PAD SPACE |
| utf8mb4_spanish_ci | utf8mb4 | 231 | | Yes | 8 | PAD SPACE |
| utf8mb4_sr_latn_0900_ai_ci | utf8mb4 | 314 | | Yes | 0 | NO PAD |
| utf8mb4_sr_latn_0900_as_cs | utf8mb4 | 315 | | Yes | 0 | NO PAD |
| utf8mb4_sv_0900_ai_ci | utf8mb4 | 264 | | Yes | 0 | NO PAD |
| utf8mb4_sv_0900_as_cs | utf8mb4 | 287 | | Yes | 0 | NO PAD |
| utf8mb4_swedish_ci | utf8mb4 | 232 | | Yes | 8 | PAD SPACE |
| utf8mb4_tr_0900_ai_ci | utf8mb4 | 265 | | Yes | 0 | NO PAD |
| utf8mb4_tr_0900_as_cs | utf8mb4 | 288 | | Yes | 0 | NO PAD |
| utf8mb4_turkish_ci | utf8mb4 | 233 | | Yes | 8 | PAD SPACE |
| utf8mb4_unicode_520_ci | utf8mb4 | 246 | | Yes | 8 | PAD SPACE |
| utf8mb4_unicode_ci | utf8mb4 | 224 | | Yes | 8 | PAD SPACE |
| utf8mb4_vietnamese_ci | utf8mb4 | 247 | | Yes | 8 | PAD SPACE |
| utf8mb4_vi_0900_ai_ci | utf8mb4 | 277 | | Yes | 0 | NO PAD |
| utf8mb4_vi_0900_as_cs | utf8mb4 | 300 | | Yes | 0 | NO PAD |
| utf8mb4_zh_0900_as_cs | utf8mb4 | 308 | | Yes | 0 | NO PAD |
+----------------------------+---------+-----+---------+----------+---------+---------------+
89 rows in set (0.01 sec)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
root@ute:~# mysql
Welcome to the MySQL monitor. Commands end with ; or \g.
Your MySQL connection id is 9
Server version: 8.0.33-0ubuntu0.22.04.2 (Ubuntu)
mysql> select c,hex(c),upper(c),hex(upper(c)) from t_utf8mb4_0900_ai_ci;
+------+--------+----------+---------------+
| c | hex(c) | upper(c) | hex(upper(c)) |
+------+--------+----------+---------------+
| ä | C3A4 | Ä | C384 |
| ß | C39F | ß | C39F |
+------+--------+----------+---------------+
2 rows in set (0,00 sec)
1
2
3
4
5
Python 3.10.8 (tags/v3.10.8:aaaf517, Oct 11 2022, 16:50:30) [MSC v.1933 64 bit (AMD64)] on win32
Type "help", "copyright", "credits" or "license" for more information.
>>> 'ß'.upper()
'SS'
>>>
1
2
3
4
5
6
7
8
9
10
11
~ # php -v
PHP 7.4.33 (cli) (built: Nov 2 2022 16:00:55) ( ZTS Visual C++ 2017 x64 )
Copyright (c) The PHP Group
Zend Engine v3.4.0, Copyright (c) Zend Technologies
~ # php -a
Interactive shell
php > print mb_strtoupper("äöüß");
ÄÖÜSS
php >
1
2
3
4
14:42:40.572 "ä".toLocaleUpperCase()
14:42:40.592 'Ä'
14:42:47.739 "ß".toLocaleUpperCase()
14:42:47.757 'SS'
select character_name, code_value from unicodedata where character_name like "cyril%" and lowercase_mapping > '' order by codepoint desc
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
$dbh->do(q( create table cc( c varchar(1) charset utf8mb4 collate utf8mb4_hungarian_ci )default charset utf8mb4 )); $dbh->do("set names utf8mb4 collate utf8mb4_hungarian_ci"); $dbh->{mysql_enable_utf8mb4} = 1; $dbh->do("insert into cc(c)values(?)",{}, pack "U", 0x10CE2); print Dumper $dbh->selectrow_hashref("select hex(c),c,upper(c) from cc"); #print Dumper $dbh->selectall_arrayref("show variables like 'character_set%'",{Slice=>{}}); $dbh->do('drop table cc'); $VAR1 = { 'c' => "\x{10ce2}", 'hex(c)' => 'F090B3A2', 'upper(c)' => "\x{10ce2}" };