Vanilla AppleScript email address validation

I need to ‘sanitise’ large quantities of text by sometimes removing email addresses entirely and sometimes replacing the “@” with something that makes the address more difficult for online harvesters to identify. While working on these projects the comments of BBS members have been very helpful … thanks.

One comment … about how “heinously difficult” it was to check the validity of an email address (some awfully complex REGEXPs were being discussed at the time) … piqued my curiosity. If it works as intended, the attached script should check the input text for compliance with the email-address-format ‘standards’ (RFCs 2821 and 2822).

I have no doubt that the structure of the script, and probably the way it does lots of things, is awkward and unorthodox (comes from having no idea what is actually orthodox) … but it seems to work. I’d appreciate advice on ways to tidy this up, and any incorrect tests it may produce … for example I’ve built a repeat loop, right at the start, to allow the “Try another” button on the results dialog to restart the whole script again. Surely there’s a tidier way of doing this … I tried a number of things but this is the only way I achieved the desired result.

As for utility … this beast is no more efficient (false-positives or false-negatives), for my purpose, than a very simple algorithm checking for 6-or-more characters, at least one “at” (@) and at-least one dot (.). Oh well, I think I’ve learnt a lot about AppleScript, and email address formatting, in the process :rolleyes:

As for the REGEXP … I am now not sure that it’s actually possible to build a single REGEXP that can do all of this, but I haven’t dissected the ex-parrot monster to see what it actually does (http://www.ex-parrot.com/~pdw/Mail-RFC822-Address.html).

Cheers
Dougal


-- Email address RFC compliance test

-- A script to try and check the compliance of an email address, 
-- against RFCs 2821 & 2822, using vanilla AppleScript only ... no shell, 
-- no PERL, and no regexps.
-- Script does not check whether the test address is real, or whether the 
-- domain (or the top-level domain) of the test address is real ... it only 
-- tests for compliance with the RFC provisions

set ContinueOption to "Try another"

repeat while ContinueOption = "Try another"
	set ContinueOption to "unknown"
	
	define_variables_and_lists()
	
	address_to_test(Test_Addresses)
	
	-- Is the Addr-spec too long to comply?
	set AddrSpecLength to length of PossibleAddress
	if AddrSpecLength > 320 then
		set ComplianceStatus to "failed"
		set FailReason to "the test address is " & AddrSpecLength & " characters in length. A compliant email address cannot exceed 320 characters: 64 for the local part; 255 for the domain; and one for the @. (Ref: RFC2821, 4.5.3.1)"
		concluding_response(PossibleAddress, ComplianceStatus, FailReason)
	else
		-- split the address at the @ and see what comes out
		set text item delimiters to "@"
		set AddressMainParts to text items of PossibleAddress
		set text item delimiters to space
		
		-- first option: there's no @-split
		if (count of AddressMainParts) < 2 then
			set ComplianceStatus to "failed"
			set FailReason to "the test address does not contain an \"at\" (@) character. All compliant email addresses have the form local-part@domain ... and so always contain at least one @ character. (Ref: RFC2822, 3.4.1; RFC2821, 4.1.2.)"
			concluding_response(PossibleAddress, ComplianceStatus, FailReason)
			
			-- second option: the @-split results in two parts	
		else if (count of AddressMainParts) = 2 then
			set LocalPart to text item 1 of AddressMainParts as text
			set Domain to text item 2 of AddressMainParts as text
			
			-- What format is the local-part?
			if LocalPart does not contain "\"" then
				set LocalPartFormat to "dot-atom"
			else
				if LocalPart starts with "\"" and LocalPart ends with "\"" then
					set LocalPartFormat to "quoted-string"
				else
					set ComplianceStatus to "failed"
					set FailReason to "the local-part of the address contains a double-quote character (\") but is not as part of a quoted-string. (Ref: RFC2822, 3.4.1 & 3.2.5; RFC2821, 4.1.2.)"
					concluding_response(PossibleAddress, ComplianceStatus, FailReason)
				end if
			end if
			
			-- What format is the domain?
			if Domain does not start with "[" or Domain does not end with "]" then
				set DomainFormat to "dot-atom" -- it may not actually be but that is tested later
			else
				set DomainFormat to "domain-literal"
			end if
			
		else -- third option: the @-split results in more than two parts
			-- the address contains more than one @ characters ( c- d- and q- text)
			-- need to determine the format of the local-part and the domain so that 
			-- the address can be correctly split into those parts
			if PossibleAddress ends with "]" and PossibleAddress contains "@[" then
				set DomainFormat to "domain-literal"
			else
				set DomainFormat to "dot-atom"
			end if
			
			if PossibleAddress starts with "\"" and PossibleAddress contains "\"@" then
				set LocalPartFormat to "quoted-string"
			else
				set LocalPartFormat to "dot-atom"
			end if
			
			-- the address can't have more than one @ and be in dot-atom@dot-atom format	
			if LocalPartFormat = "dot-atom" and DomainFormat = "dot-atom" then
				set ComplianceStatus to "failed"
				set FailReason to "the test address contains more than one \"at\" (@). Additional @ characters can only exist in a compliant address if they're contained within a quoted-string format local-part or a domain-literal format domain. The local-part of the test addres is not in quoted-string format and the domain is not in domain-literal format.  (Ref: RFC2822, 3.4.1 & 3.2.5; RFC2821, 4.1.2.)."
				concluding_response(PossibleAddress, ComplianceStatus, FailReason)
			end if
			
			-- extract the local-part now that its format is known
			if LocalPartFormat = "dot-atom" then
				set text item delimiters to "@"
				set LocalPart to item 1 of (text items of PossibleAddress) as text
				set text item delimiters to space
			else if LocalPartFormat = "quoted-string" then
				set text item delimiters to "\"@"
				set LocalPart to item 1 of (text items of PossibleAddress) as text
				set LocalPart to LocalPart & "\""
				set text item delimiters to space
			end if
			
			-- extract the domain now that its format is known
			if DomainFormat = "dot-atom" then
				set text item delimiters to "@"
				set Domain to last item of (text items of PossibleAddress) as text
				set text item delimiters to space
			else if DomainFormat = "domain-literal" then
				set text item delimiters to "@["
				set Domain to item 2 of (text items of PossibleAddress) as text
				set Domain to "[" & Domain
				set text item delimiters to space
			end if
			
		end if
	end if
	
	-- by now the test address has either been rejected or the local-part and domain 
	-- have been identified, extracted, and tentatively categorised 
	-- (but domain has not yet been checked as IPv4 address-literal)
	
	--  if local-part appears to be in the dot-atom format then check that it's compliant  ------------------------------------
	if ComplianceStatus is not "failed" and (LocalPartFormat = "dot-atom") then
		set TestTextSet to RFC2822_atext & {"."}
		set TestPart to "local-part"
		dot_atomiser(PossibleAddress, LocalPart, TestTextSet, TestPart)
		
		if DotAtomComplianceStatus = "passed" then
			global LocalPartComplianceStatus
			set LocalPartComplianceStatus to "passed"
		end if
	end if
	
	-- if local-part appears to be in the  quoted-string format then check that it's compliant ------------------------------
	if ComplianceStatus is not "failed" and (LocalPartFormat = "quoted-string") then
		check_quoted_string(LocalPart, RFC2822_text, RFC2822_qtext, PossibleAddress)
	end if
	
	-- if appropriate check that the dot-atom format of the domain is compliant ------------------------
	if ComplianceStatus is not "failed" and (DomainFormat = "dot-atom") then
		set TestTextSet to RFC2822_atext & {"."}
		set DotAtomComplianceStatus to "unknown"
		set TestPart to "domain"
		dot_atomiser(PossibleAddress, Domain, TestTextSet, TestPart)
		
		if DotAtomComplianceStatus = "passed" then
			-- need to also ensure that domain's final dot has two or more characters after it
			set text item delimiters to "."
			set TopLevelDomain to last item of (text items of Domain) as text
			set text item delimiters to space
			if length of TopLevelDomain < 2 then
				set FailReason to "the domain of the test address appears to be in the dot-atom format but there is only one character after the last dot (.). All Top Level Domains are two or more characters in length. (Ref: RFC1591, 3.2.4 & 3.4.1)"
				set DotAtomComplianceStatus to "failed"
				set DomainComplianceStatus to "failed"
				set ComplianceStatus to "failed"
				concluding_response(PossibleAddress, ComplianceStatus, FailReason)
			else
				set DomainComplianceStatus to "passed"
				-- could check the validity of the TLD here ... if you really wanted to
				-- it may be marginally easier to use a whois check rather that try 
				-- and integrate the provisions of ISO3166 etc
				
			end if
		end if
	end if
	
	-- if the domain is in the domain-literal format check that it's compliant ... then, 
	-- if it's domain-literal compliant, check to see whether it's in address-literal format
	--  ... then, if it's address-literal, check that that is also compliant
	if ComplianceStatus is not "failed" and (DomainFormat = "domain-literal") then
		check_domain_literal(Domain, RFC2822_text, RFC2822_dtext, PossibleAddress)
		-- check for address-literal
		if DomainComplianceStatus is "passed" then
			check_address_literal(DomainCharacters, ASCII_DIGIT, PossibleAddress, OldDomain)
		end if
	end if
	
	
	-- mop-up and just in case ------------------------------------------------------------------ 
	
	if (ComplianceStatus is not equal to "failed") then
		if LocalPartComplianceStatus = "passed" and DomainComplianceStatus = "passed" then
			set ComplianceStatus to "passed"
			set FailReason to ""
			concluding_response(PossibleAddress, ComplianceStatus, FailReason)
		else
			set ComplianceStatus to "neither passed nor failed"
			set FailReason to "the test address hasn't yet been subjected to a complete battery of evaluation tests ... or, alternatively, I've missed a possibility in writing this script."
			concluding_response(PossibleAddress, ComplianceStatus, FailReason)
		end if
	end if
	
end repeat

-----------------------------------------------------------------------------------------------------
--- Subroutines ----------------------------------------------------------------------------------- 


on address_to_test(Test_Addresses)
	global PossibleAddress
	set PossibleAddress to some item of Test_Addresses
	set PossibleAddress to (some item of Test_Addresses) as text
	set PossibleAddress to the text returned of (display dialog "Enter the address you want tested," & return & "or use the test address supplied: " default answer PossibleAddress)
end address_to_test

-----------------------------------------------------------------------------------------------------


on concluding_response(PossibleAddress, ComplianceStatus, FailReason)
	global ContinueOption
	set ContinueOption to "unknown"
	
	if ComplianceStatus = "failed" then
		set ConcludingCaveat to return & "This was the first non-compliance identified. It is possible that " & PossibleAddress & " has additional non-compliances that were not checked during this evaluation."
		set DialogToDisplay to "The test address (" & PossibleAddress & ") has " & ComplianceStatus & " this evaluation." & return & return & "This was because " & FailReason & ConcludingCaveat
		set ContinueOption to the button returned of (display dialog DialogToDisplay buttons {"Cancel", "Try another"} default button "Try another" with icon stop)
	else if ComplianceStatus = "passed" then
		set ConcludingCaveat to return & "Although it has passed this compliance evaluation this does not mean that " & PossibleAddress & " is an actual email address. Other methods should be utilised to determine that."
		set DialogToDisplay to "The test address (" & PossibleAddress & ") has " & ComplianceStatus & " this evaluation." & return & return & ConcludingCaveat
		set ContinueOption to the button returned of (display dialog DialogToDisplay buttons {"Cancel", "Try another"} default button "Try another" with icon note)
	else
		set ComplianceStatus to "neither passed nor failed"
		set FailReason to "the test address hasn't yet been subjected to a complete battery of evaluation tests."
		set ConcludingCaveat to ""
		set DialogToDisplay to "The test address (" & PossibleAddress & ") has " & ComplianceStatus & " this evaluation." & return & return & "This was because " & FailReason & ConcludingCaveat
		set ContinueOption to the button returned of (display dialog DialogToDisplay buttons {"Cancel", "Try another"} default button "Try another" with icon caution)
	end if
	
end concluding_response


-----------------------------------------------------------------------------------------------------

on dot_atomiser(PossibleAddress, TestString, TestTextSet, TestPart)
	global DotAtomComplianceStatus
	global ComplianceStatus
	global FailReason
	
	set DotAtomComplianceStatus to "unknown"
	
	-- check for two or more dots in a row
	if TestString contains ".." then
		set DotAtomComplianceStatus to "failed"
		set ComplianceStatus to "failed"
		set FailReason to "the " & TestPart & " of the test address appears to be in the dot-atom format but contains two or more dots (.) in a row. The RFC2822 dot-atom format requires  only 'atext' characters between each dot. (Ref: RFC2822, 3.2.4 & 3.4.1)"
		concluding_response(PossibleAddress, ComplianceStatus, FailReason)
	else if first character of TestString is "." or last character of TestString is "." then
		set DotAtomComplianceStatus to "failed"
		set ComplianceStatus to "failed"
		set FailReason to "the " & TestPart & " of the test address appears to be in the dot-atom format but contains a dot (.) at its start and / or finish. The RFC2822 dot-atom format requires 'atext' characters at the start and end of a dot-atom sequence. (Ref: RFC2822, 3.2.4 & 3.4.1)"
		concluding_response(PossibleAddress, ComplianceStatus, FailReason)
	else
		-- check that it's only atext
		set TestCharacters to characters of TestString
		repeat with n from 1 to count of TestCharacters
			set TestCharacter to item n of TestCharacters
			if TestTextSet does not contain TestCharacter then
				set DotAtomComplianceStatus to "failed"
				set ComplianceStatus to "failed"
				set FailReason to "the " & TestPart & " of the test address appears to be in the dot-atom format but contains characters that do not belong in the RFC2822 'atext' subset of ASCII (Ref: RFC2822, 3.4.1 & 3.2.5)." & return & "The offending item is character number " & n & " (" & TestCharacter & ") of the " & TestPart & " (" & TestString & ")."
				concluding_response(PossibleAddress, ComplianceStatus, FailReason)
				exit repeat
			end if
		end repeat
		if DotAtomComplianceStatus is not "failed" then
			set DotAtomComplianceStatus to "passed"
		end if
	end if
	
end dot_atomiser


------------------------------------------------------------------------------------------------------

on check_quoted_string(LocalPart, RFC2822_text, RFC2822_qtext, PossibleAddress)
	
	global OldLocalPart
	global QuotedStringComplianceStatus
	global LocalPartComplianceStatus
	global ComplianceStatus
	global TestCharacter
	global x
	global FailReason
	
	set QuotedStringComplianceStatus to "unknown"
	set LocalPartComplianceStatus to "unknown"
	
	-- remove the DQUOTEs at each end of the quoted-string
	set OldLocalPart to LocalPart
	set text item delimiters to ""
	set LocalPart to characters 2 through ((length of LocalPart) - 1) of LocalPart as text
	set text item delimiters to space
	set LocalPartCharacters to characters of LocalPart
	
	-- test each character for being either qtext of quoted-pair
	repeat with n from 1 to count of LocalPartCharacters
		set TestCharacter to item n of LocalPartCharacters
		
		if n is greater than 1 then
			set z to (n - 1)
			set PreviousCharacter to item z of LocalPartCharacters
		else
			set PreviousCharacter to "NULL"
		end if
		
		-- check if the TestCharacter belongs to the qtext subset
		if RFC2822_qtext contains TestCharacter then
		else if n = 1 and TestCharacter = "\\" then
		else if n is greater than 1 and TestCharacter = "\\" then
		else if n is greater than 1 and PreviousCharacter = "\\" then
			if RFC2822_text does not contain TestCharacter then
				set x to (n + 1)
				set QuotedStringComplianceStatus to "failed"
				set LocalPartComplianceStatus to "failed"
				set ComplianceStatus to "failed"
				set FailReason to "the local-part of the test address appears to be in the quoted-string format but contains characters that do not belong in the RFC2822 'text' subset of ASCII as part of a quoted-pair (Ref: RFC2822, 3.4.1 & 3.2.5)." & return & "The offending item is character number " & x & " (" & TestCharacter & ") of the local-part (" & OldLocalPart & ")."
				concluding_response(PossibleAddress, ComplianceStatus, FailReason)
				exit repeat
			end if
		else
			if RFC2822_qtext does not contain TestCharacter then
				set x to (n + 1)
				set QuotedStringlComplianceStatus to "failed"
				set LocalPartComplianceStatus to "failed"
				set ComplianceStatus to "failed"
				set FailReason to "the local-part of the test address appears to be in the quoted-string format but contains characters that do not belong in the RFC2822 'qtext' subset of ASCII and are not part of a quoted-pair (Ref: RFC2822, 3.4.1 & 3.2.5)." & return & "The offending item is character number " & x & " (" & TestCharacter & ") of the local-part (" & OldLocalPart & ")."
				concluding_response(PossibleAddress, ComplianceStatus, FailReason)
				exit repeat
			end if
		end if
	end repeat
	if QuotedStringComplianceStatus is not "failed" and LocalPartComplianceStatus is not "failed" then
		set LocalPartComplianceStatus to "passed"
	end if
end check_quoted_string


-----------------------------------------------------------------------------------------------------

on check_domain_literal(Domain, RFC2822_text, RFC2822_dtext, PossibleAddress)
	
	global OldDomain
	global DomainLiteralComplianceStatus
	global DomainComplianceStatus
	global ComplianceStatus
	global TestCharacter
	global x
	global FailReason
	global DomainCharacters
	
	set DomainLiteralComplianceStatus to "unknown"
	set DomainComplianceStatus to "unknown"
	
	-- remove the square brackets at each end
	set OldDomain to Domain
	set text item delimiters to ""
	set Domain to characters 2 through ((length of Domain) - 1) of Domain as text
	set text item delimiters to space
	set DomainCharacters to characters of Domain
	
	-- test each character for being either dtext of quoted-pair
	repeat with n from 1 to count of DomainCharacters
		set TestCharacter to item n of DomainCharacters
		
		if n is greater than 1 then
			set z to (n - 1)
			set PreviousCharacter to item z of DomainCharacters
		else
			set PreviousCharacter to "NULL"
		end if
		
		-- check if the TestCharacter belongs to the dtext subset
		if RFC2822_dtext contains TestCharacter then
		else if n = 1 and TestCharacter = "\\" then
		else if n is greater than 1 and TestCharacter = "\\" then
		else if n is greater than 1 and PreviousCharacter = "\\" then
			if RFC2822_text does not contain TestCharacter then
				set x to (n + 1)
				set DomainLiteralComplianceStatus to "failed"
				set DomainComplianceStatus to "failed"
				set ComplianceStatus to "failed"
				set FailReason to "the domain of the test address appears to be in the domain-literal format but contains characters that do not belong in the RFC2822 'text' subset of ASCII as part of a quoted-pair (Ref: RFC2822, 3.4.1 & 3.2.5)." & return & "The offending item is character number " & x & " (" & TestCharacter & ") of the domain (" & OldDomain & ")."
				concluding_response(PossibleAddress, ComplianceStatus, FailReason)
				exit repeat
			end if
		else
			if RFC2822_dtext does not contain TestCharacter then
				set x to (n + 1)
				set DomainLiteralComplianceStatus to "failed"
				set DomainComplianceStatus to "failed"
				set ComplianceStatus to "failed"
				set FailReason to "the domain of the test address appears to be in the domain-literal format but contains characters that do not belong in the RFC2822 'dtext' subset of ASCII and are not part of a quoted-pair (Ref: RFC2822, 3.4.1 & 3.2.5)." & return & "The offending item is character number " & x & " (" & TestCharacter & ") of the domain (" & OldDomain & ")."
				concluding_response(PossibleAddress, ComplianceStatus, FailReason)
				exit repeat
			end if
		end if
	end repeat
	if DomainLiteralComplianceStatus is not "failed" and DomainComplianceStatus is not "failed" then
		set DomainComplianceStatus to "passed"
	end if
	
end check_domain_literal


-----------------------------------------------------------------------------------------------------

on check_address_literal(DomainCharacters, ASCII_DIGIT, PossibleAddress, OldDomain)
	global DomainFormat
	global ComplianceStatus
	
	repeat with n from 1 to count of DomainCharacters
		set TestCharacter to item n of DomainCharacters
		set TestTextSet to ASCII_DIGIT & {"."}
		
		if ComplianceStatus is "failed" then exit repeat
		
		if TestTextSet does not contain TestCharacter then
			
			set DomainFormat to "domain-literal"
			exit repeat
		else
			set DomainFormat to "address-literal"
			set AddressLiteralComplianceStatus to "unknown"
			
			-- reform the address-literal and extract the octets
			set text item delimiters to ""
			set Domain to DomainCharacters as text
			set text item delimiters to "."
			set DomainOctets to text items of Domain
			set text item delimiters to space
			
			-- check that there are four octets and that they're all less than 256
			if (count of DomainOctets) is not 4 then
				set AddressLiteralComplianceStatus to "failed"
				set DomainComplianceStatus to "failed"
				set ComplianceStatus to "failed"
				set FailReason to "the domain of the test address (" & OldDomain & ") appears to be in the address-literal format but does not contain only four 'octets' of integers separated by dots (.) (Ref: RFC2821, 4.1.3)."
				concluding_response(PossibleAddress, ComplianceStatus, FailReason)
				exit repeat
			else
				repeat with n from 1 to 4
					if ComplianceStatus is "failed" then exit repeat
					set TestItem to item n of DomainOctets as integer
					if TestItem > 255 then
						set AddressLiteralComplianceStatus to "failed"
						set DomainComplianceStatus to "failed"
						set ComplianceStatus to "failed"
						set FailReason to "the domain of the test address (" & OldDomain & ") appears to be in the address-literal format but contains 'octets' of integers that are greater than 255 (Ref: RFC2821, 4.1.3)." & "The offending item is 'octet' number " & n & " of  the domain: (" & TestItem & ")."
						concluding_response(PossibleAddress, ComplianceStatus, FailReason)
						exit repeat
					end if
				end repeat
			end if
			
			if AddressLiteralComplianceStatus is not "failed" then
				set AddressLiteralComplianceStatus to "passed"
				set DomainComplianceStatus to "passed"
			end if
			
		end if
	end repeat
end check_address_literal


-----------------------------------------------------------------------------------------------------

on define_variables_and_lists()
	
	global ComplianceStatus
	set ComplianceStatus to "unknown"
	
	global FailReason
	set FailReason to "unknown"
	
	global Test_Addresses
	set Test_Addresses to {"Foo.Bar@[1.2.3.4]", "8pbn20$oqv$1@example.com", "Foo.Bar@[111.222.333.444]", "Foo.Bar@[12345.67890]", "Foo.Bar@example.com", "\"Foo\"Bar\"@example.com", "\"Foo\\\"Bar\"@example.com", "Foo.Bar@exa[mple.com", "Foo.Bar@[exa[mple.com]", "Foo.Bar@[ex\\[mple.com]", "Foo.Bar@example.c", "#!/bin/sh++@[1.2.3.4]", "Foo..Bar@example.com", "Foo.Bar@.example.com", "\"Foo.Bar\"@example.com", "\"Foo@Bar@[example.com", "Foo.Bar@[ex@mple.com]", "\"Foo.B@r\"@[ex@mple.com]", "\"Foo.B@r\"@example.com", "Foo.Bar@example.com", "\"F@t.Foo.Bar\"@[ex@mple.com]", "Foo.Bar[at]example.com", "Foo.Bar@example-isp-name.com", "Foo.Bar@example_isp_name.com", "Foo.Bar@example.com", "foo+bar@example.com", "\\<@example.com", "\"foo\\,bar\"@example.com", "\"foo+bar@example.com"}
	
	
	local ASCII_ALPHA
	set ASCII_ALPHA to {"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S	", "T", "U", "V", "W", "X", "Y", "Z"} & {"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"}
	
	-- 	ALPHA     	=	%d65-90 /       	; 
	--         				%d97-122 /       	; 
	
	
	global ASCII_DIGIT
	set ASCII_DIGIT to {"1", "2", "3", "4", "5", "6", "7", "8", "9", "0"}
	
	-- 	DIGIT     	=	%d48-57 /       	; 
	
	local ASCII_controls_without_whitespace
	set ASCII_controls_without_whitespace to {ASCII character 1, ASCII character 2, ASCII character 3, ASCII character 4, ASCII character 5, ASCII character 6, ASCII character 7, ASCII character 8, ASCII character 11, ASCII character 12, ASCII character 14, ASCII character 15, ASCII character 16, ASCII character 17, ASCII character 18, ASCII character 19, ASCII character 20, ASCII character 21, ASCII character 22, ASCII character 23, ASCII character 24, ASCII character 25, ASCII character 26, ASCII character 27, ASCII character 28, ASCII character 29, ASCII character 30, ASCII character 31, ASCII character 127}
	
	local ASCII_controls_without_CR_LF
	set ASCII_controls_without_CR_LF to ASCII_controls_without_whitespace & {ASCII character 9}
	
	local ASCII_control_characters
	set ASCII_control_characters to ASCII_controls_without_whitespace & {ASCII character 9, ASCII character 10, ASCII character 13}
	
	-- 	control characters     	=		%d1-31 /       	;
	
	
	local ASCII_rest_of_noncontrol_characters_except -- specials and space
	set ASCII_rest_of_noncontrol_characters_except to {"!", "#", "$", "%", "&", "'", "*", "+", "-", "/", "=", "?", "^", "_", "`", "{", "|", "}", "~", ASCII character 127}
	
	local RFC2822_Specials
	set RFC2822_Specials to {"(", ")", "<", ">", "[", "]", ":", ";", "@", "\\", ",", ".", "\""}
	
	local ASCII_rest_of_noncontrol_characters
	set ASCII_rest_of_noncontrol_characters to ASCII_rest_of_noncontrol_characters_except & RFC2822_Specials & {space}
	
	-- 	rest of noncontrol characters     	=		%d32-47 /       	;  Non ALPHA, non DIGIT,
	-- 										%d58-64 /       	;  and non Control
	-- 										%d91-96 /       	;  Character
	-- 										%d123-127 /       	; 
	
	local RFC2822_Specials
	set RFC2822_Specials to {"(", ")", "<", ">", "[", "]", ":", ";", "@", "\\", ",", ".", "\""}
	
	local RFC2822_NO_WS_CTL
	set RFC2822_NO_WS_CTL to ASCII_controls_without_whitespace
	
	--	NO-WS-CTL	=	%d1-8 / 		; US-ASCII control characters
	--					%d11 /		;  that do not include the
	--					%d12 /		;  carriage return (13), line feed (10),
	--					%d14-31 /	;  and white space characters (9)
	--					%d127
	
	global RFC2822_text
	set RFC2822_text to ASCII_ALPHA & ASCII_DIGIT & ASCII_rest_of_noncontrol_characters & ASCII_controls_without_CR_LF
	
	--	text		=		%d1-9 /		; Characters excluding CR and LF
	--					%d11 /
	--					%d12 /
	--					%d14-127 /
	
	global RFC2822_atext
	set RFC2822_atext to ASCII_ALPHA & ASCII_DIGIT & ASCII_rest_of_noncontrol_characters_except
	
	-- 	atext          	 =       	ALPHA / DIGIT / 	; Any character except controls,
	--                      			"!" / "#" /     		;  SP, and specials.
	--                     			"$" / "%" /     	;  Used for atoms
	--                     			"&" / "'" /
	--                     			"*" / "+" /
	--                     			"-" / "/" /
	--                     			"=" / "?" /
	--                     			"^" / "_" /
	--                     			"`" / "{" /
	--                     			"|" / "}" /
	--                   			"~"
	
	global RFC2822_ctext
	set RFC2822_ctext to RFC2822_NO_WS_CTL & RFC2822_atext & {"<", ">", "[", "]", ":", ";", "@", ",", ".", "\""}
	
	-- 	ctext      	=	NO-WS-CTL /     	; Non white space controls
	--				%d33-39 /       	; The rest of the US-ASCII
	--         			%d42-91 /       	;  characters not including "(",
	--            		%d93-126       	;  ")", or "\"
	
	global RFC2822_dtext
	set RFC2822_dtext to RFC2822_NO_WS_CTL & RFC2822_atext & {"(", ")", "<", ">", ":", ";", "@", ",", ".", "\""}
	
	--	dtext	=	NO-WS-CTL /		; Non white space controls
	--				%d33-90 /		; The rest of the US-ASCII
	--				%d94-126		;  characters not including "[",
	-- 								;  "]", or "\"
	
	global RFC2822_qtext
	set RFC2822_qtext to RFC2822_NO_WS_CTL & RFC2822_atext & {"(", ")", "<", ">", "[", "]", ":", ";", "@", ",", "."}
	
	-- qtext	=	NO-WS-CTL /		; Non white space controls
	-- 			%d33 /			; The rest of the US-ASCII
	--			%d35-91 /		;  characters not including "\"
	--			%d93-126		;  or the quote character
	
end define_variables_and_lists

Wow; a monster script! :lol:

Moving to Code Exchange.