| 1 | ## common/xmpp_stringprep.py |
|---|
| 2 | ## |
|---|
| 3 | ## Contributors for this file: |
|---|
| 4 | ## - Yann Le Boulanger <asterix@lagaule.org> |
|---|
| 5 | ## - Nikos Kouremenos <kourem@gmail.com> |
|---|
| 6 | ## |
|---|
| 7 | ## Copyright (C) 2001-2005 Twisted Matrix Laboratories. |
|---|
| 8 | ## Copyright (C) 2005 Gajim Team |
|---|
| 9 | ## |
|---|
| 10 | ## This program is free software; you can redistribute it and/or modify |
|---|
| 11 | ## it under the terms of the GNU General Public License as published |
|---|
| 12 | ## by the Free Software Foundation; version 2 only. |
|---|
| 13 | ## |
|---|
| 14 | ## This program is distributed in the hope that it will be useful, |
|---|
| 15 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of |
|---|
| 16 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|---|
| 17 | ## GNU General Public License for more details. |
|---|
| 18 | ## |
|---|
| 19 | |
|---|
| 20 | import sys, warnings |
|---|
| 21 | |
|---|
| 22 | if sys.version_info < (2,3,2): |
|---|
| 23 | import re |
|---|
| 24 | |
|---|
| 25 | class IDNA: |
|---|
| 26 | dots = re.compile(u"[\u002E\u3002\uFF0E\uFF61]") |
|---|
| 27 | def nameprep(self, label): |
|---|
| 28 | return label.lower() |
|---|
| 29 | |
|---|
| 30 | idna = IDNA() |
|---|
| 31 | |
|---|
| 32 | crippled = True |
|---|
| 33 | |
|---|
| 34 | warnings.warn("Accented and non-Western Jabber IDs will not be properly " |
|---|
| 35 | "case-folded with this version of Python, resulting in " |
|---|
| 36 | "incorrect protocol-level behavior. It is strongly " |
|---|
| 37 | "recommended you upgrade to Python 2.3.2 or newer if you " |
|---|
| 38 | "intend to use Twisted's Jabber support.") |
|---|
| 39 | |
|---|
| 40 | else: |
|---|
| 41 | import stringprep |
|---|
| 42 | import unicodedata |
|---|
| 43 | from encodings import idna |
|---|
| 44 | |
|---|
| 45 | crippled = False |
|---|
| 46 | |
|---|
| 47 | del sys, warnings |
|---|
| 48 | |
|---|
| 49 | class ILookupTable: |
|---|
| 50 | """ Interface for character lookup classes. """ |
|---|
| 51 | |
|---|
| 52 | def lookup(self, c): |
|---|
| 53 | """ Return whether character is in this table. """ |
|---|
| 54 | |
|---|
| 55 | class IMappingTable: |
|---|
| 56 | """ Interface for character mapping classes. """ |
|---|
| 57 | |
|---|
| 58 | def map(self, c): |
|---|
| 59 | """ Return mapping for character. """ |
|---|
| 60 | |
|---|
| 61 | class LookupTableFromFunction: |
|---|
| 62 | |
|---|
| 63 | __implements__ = ILookupTable |
|---|
| 64 | |
|---|
| 65 | def __init__(self, in_table_function): |
|---|
| 66 | self.lookup = in_table_function |
|---|
| 67 | |
|---|
| 68 | class LookupTable: |
|---|
| 69 | |
|---|
| 70 | __implements__ = ILookupTable |
|---|
| 71 | |
|---|
| 72 | def __init__(self, table): |
|---|
| 73 | self._table = table |
|---|
| 74 | |
|---|
| 75 | def lookup(self, c): |
|---|
| 76 | return c in self._table |
|---|
| 77 | |
|---|
| 78 | class MappingTableFromFunction: |
|---|
| 79 | |
|---|
| 80 | __implements__ = IMappingTable |
|---|
| 81 | |
|---|
| 82 | def __init__(self, map_table_function): |
|---|
| 83 | self.map = map_table_function |
|---|
| 84 | |
|---|
| 85 | class EmptyMappingTable: |
|---|
| 86 | |
|---|
| 87 | __implements__ = IMappingTable |
|---|
| 88 | |
|---|
| 89 | def __init__(self, in_table_function): |
|---|
| 90 | self._in_table_function = in_table_function |
|---|
| 91 | |
|---|
| 92 | def map(self, c): |
|---|
| 93 | if self._in_table_function(c): |
|---|
| 94 | return None |
|---|
| 95 | else: |
|---|
| 96 | return c |
|---|
| 97 | |
|---|
| 98 | class Profile: |
|---|
| 99 | def __init__(self, mappings=[], normalize=True, prohibiteds=[], |
|---|
| 100 | check_unassigneds=True, check_bidi=True): |
|---|
| 101 | self.mappings = mappings |
|---|
| 102 | self.normalize = normalize |
|---|
| 103 | self.prohibiteds = prohibiteds |
|---|
| 104 | self.do_check_unassigneds = check_unassigneds |
|---|
| 105 | self.do_check_bidi = check_bidi |
|---|
| 106 | |
|---|
| 107 | def prepare(self, string): |
|---|
| 108 | result = self.map(string) |
|---|
| 109 | if self.normalize: |
|---|
| 110 | result = unicodedata.normalize("NFKC", result) |
|---|
| 111 | self.check_prohibiteds(result) |
|---|
| 112 | if self.do_check_unassigneds: |
|---|
| 113 | self.check_unassigneds(result) |
|---|
| 114 | if self.do_check_bidi: |
|---|
| 115 | self.check_bidirectionals(result) |
|---|
| 116 | return result |
|---|
| 117 | |
|---|
| 118 | def map(self, string): |
|---|
| 119 | result = [] |
|---|
| 120 | |
|---|
| 121 | for c in string: |
|---|
| 122 | result_c = c |
|---|
| 123 | |
|---|
| 124 | for mapping in self.mappings: |
|---|
| 125 | result_c = mapping.map(c) |
|---|
| 126 | if result_c != c: |
|---|
| 127 | break |
|---|
| 128 | |
|---|
| 129 | if result_c is not None: |
|---|
| 130 | result.append(result_c) |
|---|
| 131 | |
|---|
| 132 | return u"".join(result) |
|---|
| 133 | |
|---|
| 134 | def check_prohibiteds(self, string): |
|---|
| 135 | for c in string: |
|---|
| 136 | for table in self.prohibiteds: |
|---|
| 137 | if table.lookup(c): |
|---|
| 138 | raise UnicodeError, "Invalid character %s" % repr(c) |
|---|
| 139 | |
|---|
| 140 | def check_unassigneds(self, string): |
|---|
| 141 | for c in string: |
|---|
| 142 | if stringprep.in_table_a1(c): |
|---|
| 143 | raise UnicodeError, "Unassigned code point %s" % repr(c) |
|---|
| 144 | |
|---|
| 145 | def check_bidirectionals(self, string): |
|---|
| 146 | found_LCat = False |
|---|
| 147 | found_RandALCat = False |
|---|
| 148 | |
|---|
| 149 | for c in string: |
|---|
| 150 | if stringprep.in_table_d1(c): |
|---|
| 151 | found_RandALCat = True |
|---|
| 152 | if stringprep.in_table_d2(c): |
|---|
| 153 | found_LCat = True |
|---|
| 154 | |
|---|
| 155 | if found_LCat and found_RandALCat: |
|---|
| 156 | raise UnicodeError, "Violation of BIDI Requirement 2" |
|---|
| 157 | |
|---|
| 158 | if found_RandALCat and not (stringprep.in_table_d1(string[0]) and |
|---|
| 159 | stringprep.in_table_d1(string[-1])): |
|---|
| 160 | raise UnicodeError, "Violation of BIDI Requirement 3" |
|---|
| 161 | |
|---|
| 162 | |
|---|
| 163 | class NamePrep: |
|---|
| 164 | """ Implements preparation of internationalized domain names. |
|---|
| 165 | |
|---|
| 166 | This class implements preparing internationalized domain names using the |
|---|
| 167 | rules defined in RFC 3491, section 4 (Conversion operations). |
|---|
| 168 | |
|---|
| 169 | We do not perform step 4 since we deal with unicode representations of |
|---|
| 170 | domain names and do not convert from or to ASCII representations using |
|---|
| 171 | punycode encoding. When such a conversion is needed, the L{idna} standard |
|---|
| 172 | library provides the C{ToUnicode()} and C{ToASCII()} functions. Note that |
|---|
| 173 | L{idna} itself assumes UseSTD3ASCIIRules to be false. |
|---|
| 174 | |
|---|
| 175 | The following steps are performed by C{prepare()}: |
|---|
| 176 | |
|---|
| 177 | * Split the domain name in labels at the dots (RFC 3490, 3.1) |
|---|
| 178 | * Apply nameprep proper on each label (RFC 3491) |
|---|
| 179 | * Enforce the restrictions on ASCII characters in host names by |
|---|
| 180 | assuming STD3ASCIIRules to be true. (STD 3) |
|---|
| 181 | * Rejoin the labels using the label separator U+002E (full stop). |
|---|
| 182 | """ |
|---|
| 183 | |
|---|
| 184 | # Prohibited characters. |
|---|
| 185 | prohibiteds = [unichr(n) for n in range(0x00, 0x2c + 1) + |
|---|
| 186 | range(0x2e, 0x2f + 1) + |
|---|
| 187 | range(0x3a, 0x40 + 1) + |
|---|
| 188 | range(0x5b, 0x60 + 1) + |
|---|
| 189 | range(0x7b, 0x7f + 1) ] |
|---|
| 190 | |
|---|
| 191 | def prepare(self, string): |
|---|
| 192 | result = [] |
|---|
| 193 | |
|---|
| 194 | labels = idna.dots.split(string) |
|---|
| 195 | |
|---|
| 196 | if labels and len(labels[-1]) == 0: |
|---|
| 197 | trailing_dot = '.' |
|---|
| 198 | del labels[-1] |
|---|
| 199 | else: |
|---|
| 200 | trailing_dot = '' |
|---|
| 201 | |
|---|
| 202 | for label in labels: |
|---|
| 203 | result.append(self.nameprep(label)) |
|---|
| 204 | |
|---|
| 205 | return ".".join(result)+trailing_dot |
|---|
| 206 | |
|---|
| 207 | def check_prohibiteds(self, string): |
|---|
| 208 | for c in string: |
|---|
| 209 | if c in self.prohibiteds: |
|---|
| 210 | raise UnicodeError, "Invalid character %s" % repr(c) |
|---|
| 211 | |
|---|
| 212 | def nameprep(self, label): |
|---|
| 213 | label = idna.nameprep(label) |
|---|
| 214 | self.check_prohibiteds(label) |
|---|
| 215 | if label[0] == '-': |
|---|
| 216 | raise UnicodeError, "Invalid leading hyphen-minus" |
|---|
| 217 | if label[-1] == '-': |
|---|
| 218 | raise UnicodeError, "Invalid trailing hyphen-minus" |
|---|
| 219 | return label |
|---|
| 220 | |
|---|
| 221 | if crippled: |
|---|
| 222 | case_map = MappingTableFromFunction(lambda c: c.lower()) |
|---|
| 223 | nodeprep = Profile(mappings=[case_map], |
|---|
| 224 | normalize=False, |
|---|
| 225 | prohibiteds=[LookupTable([u' ', u'"', u'&', u"'", u'/', |
|---|
| 226 | u':', u'<', u'>', u'@'])], |
|---|
| 227 | check_unassigneds=False, |
|---|
| 228 | check_bidi=False) |
|---|
| 229 | |
|---|
| 230 | resourceprep = Profile(normalize=False, |
|---|
| 231 | check_unassigneds=False, |
|---|
| 232 | check_bidi=False) |
|---|
| 233 | |
|---|
| 234 | else: |
|---|
| 235 | C_11 = LookupTableFromFunction(stringprep.in_table_c11) |
|---|
| 236 | C_12 = LookupTableFromFunction(stringprep.in_table_c12) |
|---|
| 237 | C_21 = LookupTableFromFunction(stringprep.in_table_c21) |
|---|
| 238 | C_22 = LookupTableFromFunction(stringprep.in_table_c22) |
|---|
| 239 | C_3 = LookupTableFromFunction(stringprep.in_table_c3) |
|---|
| 240 | C_4 = LookupTableFromFunction(stringprep.in_table_c4) |
|---|
| 241 | C_5 = LookupTableFromFunction(stringprep.in_table_c5) |
|---|
| 242 | C_6 = LookupTableFromFunction(stringprep.in_table_c6) |
|---|
| 243 | C_7 = LookupTableFromFunction(stringprep.in_table_c7) |
|---|
| 244 | C_8 = LookupTableFromFunction(stringprep.in_table_c8) |
|---|
| 245 | C_9 = LookupTableFromFunction(stringprep.in_table_c9) |
|---|
| 246 | |
|---|
| 247 | B_1 = EmptyMappingTable(stringprep.in_table_b1) |
|---|
| 248 | B_2 = MappingTableFromFunction(stringprep.map_table_b2) |
|---|
| 249 | |
|---|
| 250 | nodeprep = Profile(mappings=[B_1, B_2], |
|---|
| 251 | prohibiteds=[C_11, C_12, C_21, C_22, |
|---|
| 252 | C_3, C_4, C_5, C_6, C_7, C_8, C_9, |
|---|
| 253 | LookupTable([u'"', u'&', u"'", u'/', |
|---|
| 254 | u':', u'<', u'>', u'@'])]) |
|---|
| 255 | |
|---|
| 256 | resourceprep = Profile(mappings=[B_1,], |
|---|
| 257 | prohibiteds=[C_12, C_21, C_22, |
|---|
| 258 | C_3, C_4, C_5, C_6, C_7, C_8, C_9]) |
|---|
| 259 | |
|---|
| 260 | nameprep = NamePrep() |
|---|