meonkeys commented on code in PR #56: URL: https://github.com/apache/fineract-site/pull/56#discussion_r2990719750
########## scripts/run_whimsy_checks.rb: ########## @@ -0,0 +1,239 @@ +#!/usr/bin/env ruby +# Replicates exactly how whimsy's site-scan.rb uses sitestandards.rb +# Source: https://github.com/apache/whimsy/blob/master/tools/site-scan.rb +# Source: https://github.com/apache/whimsy/blob/master/lib/whimsy/sitestandards.rb + +require 'net/http' +require 'nokogiri' +require 'uri' + +######################################################################### +# Exact copy of SiteStandards module from sitestandards.rb +######################################################################### +module SiteStandards + CHECK_TEXT = 'text' + CHECK_CAPTURE = 'capture' + CHECK_VALIDATE = 'validate' + CHECK_TYPE = 'type' + CHECK_POLICY = 'policy' + CHECK_DOC = 'doc' + + COMMON_CHECKS = { + 'foundation' => { + CHECK_TEXT => %r{apache|asf|foundation}i, + CHECK_CAPTURE => %r{^(https?:)?//(www\.)?apache\.org/?$}, + CHECK_VALIDATE => %r{apache|asf|foundation}i, + CHECK_TYPE => 'text', + CHECK_POLICY => 'https://www.apache.org/foundation/marks/pmcs#navigation', + CHECK_DOC => 'All projects must feature some prominent link back to the main ASF homepage at http://www.apache.org/', + }, + 'events' => { + CHECK_TEXT => nil, + CHECK_CAPTURE => %r{apachecon\.com/event-images|events\.apache\.org|apache\.org/events/current-event}, + CHECK_VALIDATE => %r{^https?://((www\.)?apache\.org/events/current-event|events\.apache\.org|www\.apachecon\.com/event-images/snippet\.js)}, + CHECK_TYPE => 'href', + CHECK_POLICY => 'https://www.apachecon.com/event-images/', + CHECK_DOC => 'Projects SHOULD include a link to any current CommunityOverCode event.', + }, + 'license' => { + CHECK_TEXT => /^license$/, + CHECK_CAPTURE => %r{apache\.org}, + CHECK_VALIDATE => %r{^https?://.*apache.org/licenses/?$}, + CHECK_TYPE => 'href', + CHECK_POLICY => 'https://www.apache.org/foundation/marks/pmcs#navigation', + CHECK_DOC => 'There should be a "License" (*not* "Licenses") navigation link which points to: http[s]://www.apache.org/licenses[/]. (Do not link to sub-pages)', + }, + 'thanks' => { + CHECK_TEXT => /\A(sponsors|thanks!?|thanks to our sponsors)\z/, + CHECK_CAPTURE => /\A(sponsors|thanks!?|thanks to our sponsors)\z/, + CHECK_VALIDATE => %r{^https?://.*apache.org/foundation/(thanks|sponsors)}, + CHECK_TYPE => 'href', + CHECK_POLICY => 'https://www.apache.org/foundation/marks/pmcs#navigation', + CHECK_DOC => '"Sponsors", "Thanks" or "Thanks to our Sponsors" should link to: http://www.apache.org/foundation/thanks.html or sponsors.html', + }, + 'security' => { + CHECK_TEXT => /security/, + CHECK_CAPTURE => /security/, + CHECK_VALIDATE => %r{^(https?://.*apache.org|[^:]*)/.*[Ss]ecurity}, + CHECK_TYPE => 'href', + CHECK_POLICY => 'https://www.apache.org/foundation/marks/pmcs#navigation', + CHECK_DOC => '"Security" should link to a project-specific page or http://www.apache.org/security/', + }, + 'sponsorship' => { + CHECK_TEXT => %r{sponsorship|\bdonate\b|sponsor\sapache|sponsoring\sapache|\bsponsor\b}, + CHECK_CAPTURE => %r{sponsorship|\bdonate\b|sponsor\sapache|sponsoring\sapache|\bsponsor\b}, + CHECK_VALIDATE => %r{^https?://.*apache.org/foundation/sponsorship}, + CHECK_TYPE => 'href', + CHECK_POLICY => 'https://www.apache.org/foundation/marks/pmcs#navigation', + CHECK_DOC => '"Sponsorship", "Sponsor Apache", or "Donate" should link to: http://www.apache.org/foundation/sponsorship.html', + }, + 'trademarks' => { + CHECK_TEXT => %r{\btrademarks\b}, + CHECK_CAPTURE => %r{\btrademarks\b}, + CHECK_VALIDATE => %r{trademarks of [Tt]he Apache Software Foundation}, + CHECK_TYPE => 'text', + CHECK_POLICY => 'https://www.apache.org/foundation/marks/pmcs#attributions', + CHECK_DOC => 'All project homepages must feature a prominent trademark attribution.', + }, + 'copyright' => { + CHECK_TEXT => %r{((Copyright|©).*apache|apache.*(Copyright|©))}i, + CHECK_CAPTURE => %r{(Copyright|©)}i, + CHECK_VALIDATE => %r{((Copyright|©).*apache|apache.*(Copyright|©))}i, + CHECK_TYPE => 'text', + CHECK_POLICY => 'https://www.apache.org/legal/src-headers.html#headers', + CHECK_DOC => 'All website content SHOULD include a copyright notice for the ASF.', + }, + 'privacy' => { + CHECK_TEXT => %r{Privacy Policy}i, + CHECK_CAPTURE => %r{(Privacy)}i, + CHECK_VALIDATE => %r{\Ahttps://privacy\.apache\.org/policies/privacy-policy-public\.html\z + | + \Ahttps?://(?:www\.)?apache\.org/foundation/policies/privacy\.html\z + }ix, + CHECK_TYPE => 'href', + CHECK_POLICY => 'https://www.apache.org/foundation/marks/pmcs.html#navigation', + CHECK_DOC => 'All websites must link to the Privacy Policy.', + }, + } +end + +######################################################################### +# Exact copy of helper functions from site-scan.rb +######################################################################### +def squash(text) + text.scrub.gsub(/[[:space:]]+/, ' ').strip +end + +def get_link_text(anode) + bits = [] + anode.traverse do |node| + if node.name == 'text' + bits << node.text unless node.parent.name == 'span' and + node.parent.attribute('class')&.value&.end_with? 'sr-only' + end + end + squash(bits.join(' ')) +end + +def save_events(data, value) + prev = data[:events] + if prev and prev != value + puts "Events: already have '#{prev}', not storing '#{value}'" + else + data[:events] = value + end +end + +<<<<<<< HEAD Review Comment: Looks like a merge conflict marker. I think this is breaking the build. ########## .github/workflows/whimsy-daily-check.yml: ########## @@ -0,0 +1,63 @@ +name: Daily Whimsy Check + +on: + schedule: + - cron: '0 8 * * *' # runs every day at 8am UTC + workflow_dispatch: + +jobs: + whimsy-check: + runs-on: ubuntu-latest + steps: + - name: Check whimsy site-scan.json for fineract errors + run: | + echo "Fetching whimsy site-scan.json..." + DATA=$(curl -s https://whimsy.apache.org/public/site-scan.json | jq '.fineract') + + echo "Whimsy scan results for fineract:" + echo "$DATA" | jq . + + # Check all fields from sitestandards.rb COMMON_CHECKS + FAILED=0 + + check_field() { + FIELD=$1 + VALUE=$(echo "$DATA" | jq -r ".$FIELD") + if [ "$VALUE" = "null" ] || [ -z "$VALUE" ]; then + echo "FAIL: $FIELD is null/missing" + FAILED=1 + else + echo "PASS: $FIELD = $VALUE" + fi + } + + check_field "uri" + check_field "foundation" + check_field "events" + check_field "license" + check_field "thanks" + check_field "security" + check_field "sponsorship" + check_field "trademarks" + check_field "copyright" + check_field "privacy" + check_field "resources" + check_field "image" + check_field "csp_check" Review Comment: I think the csp check is done differently. The whimsy test passes if the `csp` property matches, exactly: `default-src 'self' data: blob: 'unsafe-inline' 'unsafe-eval' https://www.apachecon.com/ https://www.communityovercode.org/ https://*.apache.org/ https://apache.org/ https://*.scarf.sh/ ; script-src 'self' data: blob: 'unsafe-inline' 'unsafe-eval' https://www.apachecon.com/ https://www.communityovercode.org/ https://*.apache.org/ https://apache.org/ https://*.scarf.sh/ ; style-src 'self' data: blob: 'unsafe-inline' 'unsafe-eval' https://www.apachecon.com/ https://www.communityovercode.org/ https://*.apache.org/ https://apache.org/ https://*.scarf.sh/ ; frame-ancestors 'self'; frame-src 'self' data: blob: 'unsafe-inline' 'unsafe-eval' https://www.apachecon.com/ https://www.communityovercode.org/ https://*.apache.org/ https://apache.org/ https://*.scarf.sh/ ; worker-src 'self' data: blob:;` So I think we could just match that too? That's my best guess based on examining [failing and succeeding checks for ASF projects in the JSON](https://whimsy.apache.org/site/check/csp_check). Take a look at, e.g.: `jq .fineract.csp` (OK), `jq .cordova.csp` (OK), `jq .cayenne.csp` (FAIL) https://infra.apache.org/tools/csp.html might help us confirm/deny my guess ########## scripts/test_run_whimsy_checks.rb: ########## @@ -0,0 +1,373 @@ +#!/usr/bin/env ruby Review Comment: How about we run this for PRs too? e.g. add a `test` line after the `build` (last) line in `.github/workflows/site-pr-check.yml` ########## .github/workflows/whimsy-daily-check.yml: ########## @@ -0,0 +1,63 @@ +name: Daily Whimsy Check + +on: + schedule: + - cron: '0 8 * * *' # runs every day at 8am UTC + workflow_dispatch: + +jobs: + whimsy-check: + runs-on: ubuntu-latest + steps: + - name: Check whimsy site-scan.json for fineract errors + run: | + echo "Fetching whimsy site-scan.json..." Review Comment: please move this shell code into a new file under `scripts/` and run it, similar to how we do it in `.github/workflows/verify-commits.yml`. I like to keep these github actions/workflow yaml config files simple, and support easy local runs -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
