#!/bin/bash # # File: check-orcids # Version: 0.1 # # Checks whether the index.html file has valid ORCIDs # (C) 2025 by Manfred Jeusfeld. This script is made available under the # Creative Commons Attribution-ShareAlike CC-BY-SA 4.0 license. # # The BASH script is part of the scripts used for CEUR-WS.org. No warrantee whatsoever. No support. # # May require the installation of certain packages. # # Note that this script is updated on a regular basis, in particular to cover changes with # the CEURART layout for papers. See ceur-ws.org/Vol-XXX/ for the CEURART specification. # # Call this script in the directory that contains the PDF files that you want to check. # # Manfred 2025-11-16 (2025-11-17) # if [ ! -f "index.html" ]; then echo "No file index.html found for checking orcids." exit 1 fi echo "(*) Checking whether author names match their profile name on ORCID.org" ORCIDMISMATCH="no" # Function to normalize names: remove accents, lowercase, collapse spaces normalize() { echo "$1" | iconv -f utf-8 -t ascii//TRANSLIT 2>/dev/null | tr '[:upper:]' '[:lower:]' | tr -s ' ' | sed 's/^[[:space:]]*//;s/[[:space:]]*$//' } # Loop over all CEUR authors/editors while read -r span; do # Extract the author/editor name name=$(echo "$span" | sed -E 's/.*>([^<]+)<\/span>/\1/' | sed 's/&[a-zA-Z0-9#]\+;/ /g') # Extract the ORCID if present orcid=$(echo "$span" | grep -oP 'itemid="https://orcid\.org/\K[0-9\-X]+') [[ -z "$orcid" ]] && continue # Query ORCID Public API json=$(curl -s -H "Accept: application/json" "https://pub.orcid.org/v3.0/$orcid/person") # Extract main given and family names api_given=$(echo "$json" | grep -oP '"given-names":\s*\{"value":\s*"\K[^"]+') api_family=$(echo "$json" | grep -oP '"family-name":\s*\{"value":\s*"\K[^"]+') api_full="$api_given $api_family" # Extract other-names / aliases aliases=$(echo "$json" | grep -oP '"other-name":\s*\{"content":\s*"\K[^"]+' | tr '\n' ' ') # Build array of all names to check name_list=("$api_full") for alias in $aliases; do name_list+=("$alias") done # Normalize CEUR name ceur_norm=$(normalize "$name") match_found=0 # Check against all ORCID names for n in "${name_list[@]}"; do n_norm=$(normalize "$n") n_words=($n_norm) if [[ ${#n_words[@]} -ge 2 ]]; then given=${n_words[0]} family=${n_words[-1]} if [[ "$ceur_norm" =~ $given ]] && [[ "$ceur_norm" =~ $family ]]; then match_found=1 break fi fi done # Only output mismatches if [[ $match_found -eq 0 ]]; then ORCIDMISMATCH="yes" echo "In index.html: $name → $orcid → On ORCID.org: $api_full (aliases: $aliases)" fi done < <(grep -oP '.*?' index.html) if [[ "$ORCIDMISMATCH" == "yes" ]] ; then echo " " echo " ===> Make sure that the CEURAUTHOR names with ORCIDs match the profile name or its aliases on ORCID.org." echo " It may be that the CEURAUTHOR name in index.html has a spelling error, or the ORCID is false, or" echo " the name in the ORCID profile has a spelling error." echo " The profile should include suitable aliases ('also known as') of the author name. If a mismatch" echo " cannot be corrected, then remove the ORCID tags from the CEURAUTHOR element in index.html." echo " " else echo "ok" echo " " fi