1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
// Copyright 2016 Simon Sapin. // // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your // option. This file may not be copied, modified, or distributed // except according to those terms. //! This Rust crate implements IDNA //! [per the WHATWG URL Standard](https://url.spec.whatwg.org/#idna). //! //! It also exposes the underlying algorithms from [*Unicode IDNA Compatibility Processing* //! (Unicode Technical Standard #46)](http://www.unicode.org/reports/tr46/) //! and [Punycode (RFC 3492)](https://tools.ietf.org/html/rfc3492). //! //! Quoting from [UTS #46’s introduction](http://www.unicode.org/reports/tr46/#Introduction): //! //! > Initially, domain names were restricted to ASCII characters. //! > A system was introduced in 2003 for internationalized domain names (IDN). //! > This system is called Internationalizing Domain Names for Applications, //! > or IDNA2003 for short. //! > This mechanism supports IDNs by means of a client software transformation //! > into a format known as Punycode. //! > A revision of IDNA was approved in 2010 (IDNA2008). //! > This revision has a number of incompatibilities with IDNA2003. //! > //! > The incompatibilities force implementers of client software, //! > such as browsers and emailers, //! > to face difficult choices during the transition period //! > as registries shift from IDNA2003 to IDNA2008. //! > This document specifies a mechanism //! > that minimizes the impact of this transition for client software, //! > allowing client software to access domains that are valid under either system. #[macro_use] extern crate matches; extern crate unicode_bidi; extern crate unicode_normalization; pub mod punycode; pub mod uts46; /// The [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) algorithm. /// /// Return the ASCII representation a domain name, /// normalizing characters (upper-case to lower-case and other kinds of equivalence) /// and using Punycode as necessary. /// /// This process may fail. pub fn domain_to_ascii(domain: &str) -> Result<String, uts46::Errors> { uts46::to_ascii(domain, uts46::Flags { use_std3_ascii_rules: false, transitional_processing: true, // XXX: switch when Firefox does verify_dns_length: false, }) } /// The [domain to Unicode](https://url.spec.whatwg.org/#concept-domain-to-unicode) algorithm. /// /// Return the Unicode representation of a domain name, /// normalizing characters (upper-case to lower-case and other kinds of equivalence) /// and decoding Punycode as necessary. /// /// This may indicate [syntax violations](https://url.spec.whatwg.org/#syntax-violation) /// but always returns a string for the mapped domain. pub fn domain_to_unicode(domain: &str) -> (String, Result<(), uts46::Errors>) { uts46::to_unicode(domain, uts46::Flags { use_std3_ascii_rules: false, // Unused: transitional_processing: true, verify_dns_length: false, }) }