Web Page Error Checker...a bit slow...

Feel like this is a bit brute-force, so maybe someone else can suggest something simpler/faster.

PREMISE: Given a web page open in Firefox (Safari is not an option unless it can really shave time off the script’s execution), return:

–CSS Validation Results (through proxy server to W3C)…css_results
–HTML Valudation Results (W3C validator installed internal to our proxy)…html_results
–Description metadata (if present)…seo_description
–Keyword metadata (if present)…seo_keywords
–Verify if Google Analytics are used…seo_analytics
–Get window title…seo_title

I acknowledge the script is a bit rough, been throwing it together between other projects (and doing actual web QA using current methods). Eventually the results will be displayed on a panel (using FaceSpan, which is why there currently are results, but no UI display of them) and the script initiated from an X-Keys button.

--
--INITIALIZE SCRIPT
--
set apple_TID to AppleScript's text item delimiters -- save previous value
set desktop_path to path to desktop as text
set temporay_items_path to path to temporary items
set temp_html to temporay_items_path & "web_helper_html_temp.txt" as text
set ccs_html_validator to "http://validator.someaddress.com/check?verbose=1&uri="
set ccs_css_validator to "http://jigsaw.w3.org/css-validator/validator?profile=css21&warning=0&uri="
set my_proxy to "http://123.456.789.123:80"
--


--
--MAIN SCRIPT
--

--get URL from Firefox
--
tell application "Firefox"
	activate
	tell application "System Events"
		tell process "Firefox"
			click menu item 3 of menu 1 of menu bar item 3 of menu bar 1 --Open Location (get to URL bar)
			delay 0.1
			click menu item 5 of menu 1 of menu bar item 4 of menu bar 1 --copy to clipboard
			delay 0.1
		end tell
	end tell
end tell

--store URL
--
set location_raw to the clipboard
set location_quoted to quoted form of location_raw

--swap ":" to %3A to submit to W3C
set text item delimiters to ":"
set remove_colons to every text item of location_raw
set text item delimiters to "%3A"
set colons_swapped to remove_colons as string
--swap "/" in URLs to %2F to submit to W3C
set text item delimiters to "/"
set remove_slashes to every text item of colons_swapped
set text item delimiters to "%2F"
--
set location_w3c to remove_slashes as string


--get page web code
--
set raw_html to (do shell script "/usr/bin/curl " & location_quoted)

--write to temp file
--
set temp_file_ref to open for access file temp_html with write permission
write (raw_html as text) to temp_file_ref starting at eof
close access temp_file_ref


--get window title with TIDs
--
set text item delimiters to "<TITLE>"
set check_raw_title to text item 2 of raw_html
set text item delimiters to "</TITLE>"
set seo_title to text item 1 of check_raw_title

--get keywords with TIDs
--
try
	set text item delimiters to "<meta name=\"keywords\" content=\""
	set check_raw_keywords to text item 2 of raw_html
	set check_raw_keywords to paragraph 1 of check_raw_keywords
	set text item delimiters to "\" />"
	set seo_keywords to text item 1 of check_raw_keywords
on error
	try
		set text item delimiters to "<meta name='keywords' content='"
		set check_raw_keywords to text item 2 of raw_html
		set check_raw_keywords to paragraph 1 of check_raw_keywords
		set text item delimiters to "' />"
		set seo_keywords to text item 1 of check_raw_keywords
	on error
		set seo_keywords to "(no keywords found)"
	end try
end try

--get description with TIDs
--
try
	set text item delimiters to "<meta name=\"description\" content=\""
	set check_raw_description to text item 2 of raw_html
	set check_raw_description to paragraph 1 of check_raw_description
	set text item delimiters to "\" />"
	set seo_description to text item 1 of check_raw_description
on error
	try
		set text item delimiters to "<meta name='description' content='"
		set check_raw_description to text item 2 of raw_html
		set check_raw_description to paragraph 1 of check_raw_description
		set text item delimiters to "' />"
		set seo_description to text item 1 of check_raw_description
	on error
		set seo_description to "(no description found)"
	end try
end try

--do Google Analytics exist?
--
try
	do shell script "/usr/bin/grep " & "'google-analytics.com/ga.js' --count " & " " & quoted form of POSIX path of temp_html
	set grep_result to result
on error
	set grep_result to "0"
end try
--
if grep_result ≥ 1 then
	set seo_analytics to "YES"
else
	set seo_analytics to "NO"
end if


--get HTML validation
--
--get results page web code
set validator_submit_html to quoted form of (ccs_html_validator & location_w3c as string)
set raw_validation_results_html to (do shell script "/usr/bin/curl " & validator_submit_html)
--get results with TIDs
try
	set text item delimiters to "class=\"invalid\">
      Failed validation, "
	set check_raw_html_results to text item 2 of raw_validation_results_html
	set text item delimiters to " Errors"
	set html_results to text item 1 of check_raw_html_results
on error
	set html_results to "0"
end try


--get CSS validation
--
--get results page web code
set validator_submit_css to quoted form of (ccs_css_validator & location_w3c as string)
set raw_validation_results_css to (do shell script "/usr/bin/curl -x" & my_proxy & " " & validator_submit_css)
--get results with TIDs
try
	set text item delimiters to ">Errors ("
	set check_raw_css_results to text item 2 of raw_validation_results_css
	set text item delimiters to ")</a></li>"
	set css_results to text item 1 of check_raw_css_results
on error
	set css_results to "0"
end try


--
--EXIT SCRIPT
--
set AppleScript's text item delimiters to apple_TID
tell application "Finder" to delete file temp_html

Euhm, I’m getting an error at the cURL command (line 144):

You have to insert your proxy address…or edit that line of code quick to eliminate the need for the proxy. :wink:

Unfortunately for external addresses, curl has to go through the corporate proxy server. :frowning: