Remove special-use domains and just don't support them at all

HypothesisWorks · Feb 5, 2023 · 7dea288 · 7dea288
1 parent a3bd3b8
commit 7dea288
Showing 1 changed file with 5 additions and 13 deletions.
diff --git a/hypothesis-python/src/hypothesis/provisional.py b/hypothesis-python/src/hypothesis/provisional.py
@@ -41,10 +41,10 @@
     ).splitlines()
 
 assert _tlds[0].startswith("#")
-TOP_LEVEL_DOMAINS = ["COM"] + sorted(_tlds[1:], key=len)
 
-# https://www.iana.org/assignments/special-use-domain-names/special-use-domain-names.txt
-TOP_LEVEL_DOMAINS_SPECIAL_USE = ["ARPA", "INVALID", "LOCAL", "LOCALHOST", "ONION", "TEST"]
+# Remove special-use domain names from the list. For more discussion
+# see https://github.com/HypothesisWorks/hypothesis/pull/3572
+TOP_LEVEL_DOMAINS = ["COM"] + sorted(filter(lambda tld: tld != "ARPA", _tlds[1:]), key=len)
 
 class DomainNameStrategy(st.SearchStrategy):
     @staticmethod
@@ -61,7 +61,7 @@ def clean_inputs(minimum, maximum, value, variable_name):
             )
         return value
 
-    def __init__(self, max_length=None, max_element_length=None, allow_special_use=True):
+    def __init__(self, max_length=None, max_element_length=None):
         """
         A strategy for :rfc:`1035` fully qualified domain names.
 
@@ -95,21 +95,13 @@ def __init__(self, max_length=None, max_element_length=None, allow_special_use=T
                 maximum_center_character_pattern_repetitions,
             )
 
-        # By default we allow special-use domain names; if the user prefers
-        # to omit special-use domain names then we filter them from the list
-        # of original top-level domain names before we draw from it.
-        if allow_special_use:
-            self.tlds = TOP_LEVEL_DOMAINS
-        else:
-            self.tlds = [tld for tld in TOP_LEVEL_DOMAINS if tld not in TOP_LEVEL_DOMAINS_SPECIAL_USE]
-
     def do_draw(self, data):
         # 1 - Select a valid top-level domain (TLD) name
         # 2 - Check that the number of characters in our selected TLD won't
         # prevent us from generating at least a 1 character subdomain.
         # 3 - Randomize the TLD between upper and lower case characters.
         domain = data.draw(
-            st.sampled_from(self.tlds)
+            st.sampled_from(TOP_LEVEL_DOMAINS)
             .filter(lambda tld: len(tld) + 2 <= self.max_length)
             .flatmap(
                 lambda tld: st.tuples(