Feel like this is a bit brute-force, so maybe someone else can suggest something simpler/faster.
PREMISE: Given a web page open in Firefox (Safari is not an option unless it can really shave time off the script’s execution), return:
–CSS Validation Results (through proxy server to W3C)…css_results
–HTML Valudation Results (W3C validator installed internal to our proxy)…html_results
–Description metadata (if present)…seo_description
–Keyword metadata (if present)…seo_keywords
–Verify if Google Analytics are used…seo_analytics
–Get window title…seo_title
I acknowledge the script is a bit rough, been throwing it together between other projects (and doing actual web QA using current methods). Eventually the results will be displayed on a panel (using FaceSpan, which is why there currently are results, but no UI display of them) and the script initiated from an X-Keys button.
--
--INITIALIZE SCRIPT
--
set apple_TID to AppleScript's text item delimiters -- save previous value
set desktop_path to path to desktop as text
set temporay_items_path to path to temporary items
set temp_html to temporay_items_path & "web_helper_html_temp.txt" as text
set ccs_html_validator to "http://validator.someaddress.com/check?verbose=1&uri="
set ccs_css_validator to "http://jigsaw.w3.org/css-validator/validator?profile=css21&warning=0&uri="
set my_proxy to "http://123.456.789.123:80"
--
--
--MAIN SCRIPT
--
--get URL from Firefox
--
tell application "Firefox"
activate
tell application "System Events"
tell process "Firefox"
click menu item 3 of menu 1 of menu bar item 3 of menu bar 1 --Open Location (get to URL bar)
delay 0.1
click menu item 5 of menu 1 of menu bar item 4 of menu bar 1 --copy to clipboard
delay 0.1
end tell
end tell
end tell
--store URL
--
set location_raw to the clipboard
set location_quoted to quoted form of location_raw
--swap ":" to %3A to submit to W3C
set text item delimiters to ":"
set remove_colons to every text item of location_raw
set text item delimiters to "%3A"
set colons_swapped to remove_colons as string
--swap "/" in URLs to %2F to submit to W3C
set text item delimiters to "/"
set remove_slashes to every text item of colons_swapped
set text item delimiters to "%2F"
--
set location_w3c to remove_slashes as string
--get page web code
--
set raw_html to (do shell script "/usr/bin/curl " & location_quoted)
--write to temp file
--
set temp_file_ref to open for access file temp_html with write permission
write (raw_html as text) to temp_file_ref starting at eof
close access temp_file_ref
--get window title with TIDs
--
set text item delimiters to "<TITLE>"
set check_raw_title to text item 2 of raw_html
set text item delimiters to "</TITLE>"
set seo_title to text item 1 of check_raw_title
--get keywords with TIDs
--
try
set text item delimiters to "<meta name=\"keywords\" content=\""
set check_raw_keywords to text item 2 of raw_html
set check_raw_keywords to paragraph 1 of check_raw_keywords
set text item delimiters to "\" />"
set seo_keywords to text item 1 of check_raw_keywords
on error
try
set text item delimiters to "<meta name='keywords' content='"
set check_raw_keywords to text item 2 of raw_html
set check_raw_keywords to paragraph 1 of check_raw_keywords
set text item delimiters to "' />"
set seo_keywords to text item 1 of check_raw_keywords
on error
set seo_keywords to "(no keywords found)"
end try
end try
--get description with TIDs
--
try
set text item delimiters to "<meta name=\"description\" content=\""
set check_raw_description to text item 2 of raw_html
set check_raw_description to paragraph 1 of check_raw_description
set text item delimiters to "\" />"
set seo_description to text item 1 of check_raw_description
on error
try
set text item delimiters to "<meta name='description' content='"
set check_raw_description to text item 2 of raw_html
set check_raw_description to paragraph 1 of check_raw_description
set text item delimiters to "' />"
set seo_description to text item 1 of check_raw_description
on error
set seo_description to "(no description found)"
end try
end try
--do Google Analytics exist?
--
try
do shell script "/usr/bin/grep " & "'google-analytics.com/ga.js' --count " & " " & quoted form of POSIX path of temp_html
set grep_result to result
on error
set grep_result to "0"
end try
--
if grep_result ≥ 1 then
set seo_analytics to "YES"
else
set seo_analytics to "NO"
end if
--get HTML validation
--
--get results page web code
set validator_submit_html to quoted form of (ccs_html_validator & location_w3c as string)
set raw_validation_results_html to (do shell script "/usr/bin/curl " & validator_submit_html)
--get results with TIDs
try
set text item delimiters to "class=\"invalid\">
Failed validation, "
set check_raw_html_results to text item 2 of raw_validation_results_html
set text item delimiters to " Errors"
set html_results to text item 1 of check_raw_html_results
on error
set html_results to "0"
end try
--get CSS validation
--
--get results page web code
set validator_submit_css to quoted form of (ccs_css_validator & location_w3c as string)
set raw_validation_results_css to (do shell script "/usr/bin/curl -x" & my_proxy & " " & validator_submit_css)
--get results with TIDs
try
set text item delimiters to ">Errors ("
set check_raw_css_results to text item 2 of raw_validation_results_css
set text item delimiters to ")</a></li>"
set css_results to text item 1 of check_raw_css_results
on error
set css_results to "0"
end try
--
--EXIT SCRIPT
--
set AppleScript's text item delimiters to apple_TID
tell application "Finder" to delete file temp_html