diff --git a/README.md b/README.md index f8d8d32..0964816 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ Also, going through the MySQL external library *blocks* the Crystal thread using ## Status -This driver is a work in progress. +This driver is a work in progress. It implements mysql's binary protocol to create prepared statements. Contributions are most welcome. @@ -77,3 +77,17 @@ Then use the example above changing the `DB.open` line to ```crystal DB.open "mysql://test:yourpassword@localhost/test" do |db| ``` + +### Connection URI + +The connection string has the following syntax: + +``` +mysql://[user[:[password]]@]host[:port][/schema][?param1=value1¶m2=value2] +``` + +Connection query params: + +- encoding: The collation & charset (character set) to use during the connection. + If empty or not defined, it will be set to `utf8_general_ci`. + The list of available collations is defined in [`MySql::Collations::COLLATIONS_IDS_BY_NAME`](src/mysql/collations.cr) diff --git a/spec/driver_spec.cr b/spec/driver_spec.cr index 428d3e8..adc369e 100644 --- a/spec/driver_spec.cr +++ b/spec/driver_spec.cr @@ -37,6 +37,33 @@ describe Driver do end end + it "should connect with default encoding & collation for the connection set to utf8" do + with_db do |db| + db.exec "DROP DATABASE IF EXISTS crystal_mysql_test" + db.exec "CREATE DATABASE crystal_mysql_test" + + # By default, the encoding for the DB connection is set to utf8_general_ci + DB.open "mysql://crystal_test:secret@#{database_host}/crystal_mysql_test" do |db| + db.scalar("SELECT @@collation_connection").should eq("utf8_general_ci") + db.scalar("SELECT @@character_set_connection").should eq("utf8") + end + db.exec "DROP DATABASE IF EXISTS crystal_mysql_test" + end + end + + it "should connect with requested encoding" do + with_db do |db| + db.exec "DROP DATABASE IF EXISTS crystal_mysql_test" + db.exec "CREATE DATABASE crystal_mysql_test" + + DB.open "mysql://crystal_test:secret@#{database_host}/crystal_mysql_test?encoding=utf8mb4_unicode_520_ci" do |db| + db.scalar("SELECT @@collation_connection").should eq("utf8mb4_unicode_520_ci") + db.scalar("SELECT @@character_set_connection").should eq("utf8mb4") + end + db.exec "DROP DATABASE IF EXISTS crystal_mysql_test" + end + end + it "create and drop test database" do sql = "SELECT count(*) FROM INFORMATION_SCHEMA.SCHEMATA WHERE SCHEMA_NAME = 'crystal_mysql_test'" diff --git a/src/mysql/collations.cr b/src/mysql/collations.cr new file mode 100644 index 0000000..86d091e --- /dev/null +++ b/src/mysql/collations.cr @@ -0,0 +1,159 @@ +module MySql::Collations + # Available collations mapped to the internal ID. + # Handshake packet have only 1 byte for collation_id. + # Only collations with ID > 255 are used during the handshake + # The list of collation is from this SQL query: + # SELECT ID, COLLATION_NAME FROM information_schema.COLLATIONS WHERE ID <= 255 ORDER BY ID; + # + # ucs2, utf16, and utf32 are excluded since they cannot be set as connection charset. + # https://dev.mysql.com/doc/refman/5.7/en/charset-connection.html#charset-connection-impermissible-client-charset + COLLATIONS_IDS_BY_NAME = { + "big5_chinese_ci": 1, + "latin2_czech_cs": 2, + "dec8_swedish_ci": 3, + "cp850_general_ci": 4, + "latin1_german1_ci": 5, + "hp8_english_ci": 6, + "koi8r_general_ci": 7, + "latin1_swedish_ci": 8, + "latin2_general_ci": 9, + "swe7_swedish_ci": 10, + "ascii_general_ci": 11, + "ujis_japanese_ci": 12, + "sjis_japanese_ci": 13, + "cp1251_bulgarian_ci": 14, + "latin1_danish_ci": 15, + "hebrew_general_ci": 16, + "tis620_thai_ci": 18, + "euckr_korean_ci": 19, + "latin7_estonian_cs": 20, + "latin2_hungarian_ci": 21, + "koi8u_general_ci": 22, + "cp1251_ukrainian_ci": 23, + "gb2312_chinese_ci": 24, + "greek_general_ci": 25, + "cp1250_general_ci": 26, + "latin2_croatian_ci": 27, + "gbk_chinese_ci": 28, + "cp1257_lithuanian_ci": 29, + "latin5_turkish_ci": 30, + "latin1_german2_ci": 31, + "armscii8_general_ci": 32, + "utf8_general_ci": 33, + "cp1250_czech_cs": 34, + "cp866_general_ci": 36, + "keybcs2_general_ci": 37, + "macce_general_ci": 38, + "macroman_general_ci": 39, + "cp852_general_ci": 40, + "latin7_general_ci": 41, + "latin7_general_cs": 42, + "macce_bin": 43, + "cp1250_croatian_ci": 44, + "utf8mb4_general_ci": 45, + "utf8mb4_bin": 46, + "latin1_bin": 47, + "latin1_general_ci": 48, + "latin1_general_cs": 49, + "cp1251_bin": 50, + "cp1251_general_ci": 51, + "cp1251_general_cs": 52, + "macroman_bin": 53, + "cp1256_general_ci": 57, + "cp1257_bin": 58, + "cp1257_general_ci": 59, + "binary": 63, + "armscii8_bin": 64, + "ascii_bin": 65, + "cp1250_bin": 66, + "cp1256_bin": 67, + "cp866_bin": 68, + "dec8_bin": 69, + "greek_bin": 70, + "hebrew_bin": 71, + "hp8_bin": 72, + "keybcs2_bin": 73, + "koi8r_bin": 74, + "koi8u_bin": 75, + "utf8_tolower_ci": 76, + "latin2_bin": 77, + "latin5_bin": 78, + "latin7_bin": 79, + "cp850_bin": 80, + "cp852_bin": 81, + "swe7_bin": 82, + "utf8_bin": 83, + "big5_bin": 84, + "euckr_bin": 85, + "gb2312_bin": 86, + "gbk_bin": 87, + "sjis_bin": 88, + "tis620_bin": 89, + "ujis_bin": 91, + "geostd8_general_ci": 92, + "geostd8_bin": 93, + "latin1_spanish_ci": 94, + "cp932_japanese_ci": 95, + "cp932_bin": 96, + "eucjpms_japanese_ci": 97, + "eucjpms_bin": 98, + "cp1250_polish_ci": 99, + "utf8_unicode_ci": 192, + "utf8_icelandic_ci": 193, + "utf8_latvian_ci": 194, + "utf8_romanian_ci": 195, + "utf8_slovenian_ci": 196, + "utf8_polish_ci": 197, + "utf8_estonian_ci": 198, + "utf8_spanish_ci": 199, + "utf8_swedish_ci": 200, + "utf8_turkish_ci": 201, + "utf8_czech_ci": 202, + "utf8_danish_ci": 203, + "utf8_lithuanian_ci": 204, + "utf8_slovak_ci": 205, + "utf8_spanish2_ci": 206, + "utf8_roman_ci": 207, + "utf8_persian_ci": 208, + "utf8_esperanto_ci": 209, + "utf8_hungarian_ci": 210, + "utf8_sinhala_ci": 211, + "utf8_german2_ci": 212, + "utf8_croatian_ci": 213, + "utf8_unicode_520_ci": 214, + "utf8_vietnamese_ci": 215, + "utf8_general_mysql500_ci": 223, + "utf8mb4_unicode_ci": 224, + "utf8mb4_icelandic_ci": 225, + "utf8mb4_latvian_ci": 226, + "utf8mb4_romanian_ci": 227, + "utf8mb4_slovenian_ci": 228, + "utf8mb4_polish_ci": 229, + "utf8mb4_estonian_ci": 230, + "utf8mb4_spanish_ci": 231, + "utf8mb4_swedish_ci": 232, + "utf8mb4_turkish_ci": 233, + "utf8mb4_czech_ci": 234, + "utf8mb4_danish_ci": 235, + "utf8mb4_lithuanian_ci": 236, + "utf8mb4_slovak_ci": 237, + "utf8mb4_spanish2_ci": 238, + "utf8mb4_roman_ci": 239, + "utf8mb4_persian_ci": 240, + "utf8mb4_esperanto_ci": 241, + "utf8mb4_hungarian_ci": 242, + "utf8mb4_sinhala_ci": 243, + "utf8mb4_german2_ci": 244, + "utf8mb4_croatian_ci": 245, + "utf8mb4_unicode_520_ci": 246, + "utf8mb4_vietnamese_ci": 247, + } + + def self.default_collation + "utf8_general_ci" + end + + def self.id_for_collation(collation : String) + return COLLATIONS_IDS_BY_NAME.fetch collation, 0 + end +end diff --git a/src/mysql/connection.cr b/src/mysql/connection.cr index 0f30acd..53db73b 100644 --- a/src/mysql/connection.cr +++ b/src/mysql/connection.cr @@ -11,6 +11,9 @@ class MySql::Connection < DB::Connection username = context.uri.user password = context.uri.password + charset = context.uri.query_params.fetch "encoding", Collations.default_collation + charset_id = Collations.id_for_collation(charset).to_u8 + path = context.uri.path if path && path.size > 1 initial_catalog = path[1..-1] @@ -22,7 +25,7 @@ class MySql::Connection < DB::Connection handshake = read_packet(Protocol::HandshakeV10) write_packet(1) do |packet| - Protocol::HandshakeResponse41.new(username, password, initial_catalog, handshake.auth_plugin_data).write(packet) + Protocol::HandshakeResponse41.new(username, password, initial_catalog, handshake.auth_plugin_data, charset_id).write(packet) end read_ok_or_err do |packet, status| diff --git a/src/mysql/packets.cr b/src/mysql/packets.cr index c462920..4895a83 100644 --- a/src/mysql/packets.cr +++ b/src/mysql/packets.cr @@ -3,8 +3,9 @@ require "openssl/sha1" module MySql::Protocol struct HandshakeV10 getter auth_plugin_data : Bytes + getter charset : UInt8 - def initialize(@auth_plugin_data) + def initialize(@auth_plugin_data, @charset) end def self.read(packet : MySql::ReadPacket) @@ -28,7 +29,7 @@ module MySql::Protocol packet.read_byte! packet.read_string - HandshakeV10.new(auth_data) + HandshakeV10.new(auth_data, charset) end end @@ -59,7 +60,7 @@ module MySql::Protocol CLIENT_SESSION_TRACK = 0x00800000 CLIENT_DEPRECATE_EOF = 0x01000000 - def initialize(@username : String?, @password : String?, @initial_catalog : String?, @auth_plugin_data : Bytes) + def initialize(@username : String?, @password : String?, @initial_catalog : String?, @auth_plugin_data : Bytes, @charset : UInt8) end def write(packet : MySql::WritePacket) @@ -72,7 +73,7 @@ module MySql::Protocol packet.write_bytes caps, IO::ByteFormat::LittleEndian packet.write_bytes 0x00000000u32, IO::ByteFormat::LittleEndian - packet.write_byte 0x21u8 # utf8_general_ci + packet.write_byte @charset 23.times { packet.write_byte 0_u8 } packet << @username