Sync CTS Data Repositories #28
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Sync CTS Data Repositories | |
| on: | |
| # schedule: | |
| # - cron: '0 0 * * *' # Runs daily at midnight UTC | |
| workflow_dispatch: # Allows manual triggering | |
| jobs: | |
| sync-repos: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout src_data repository | |
| uses: actions/checkout@v3 | |
| with: | |
| persist-credentials: false # Prevents issues with default authentication | |
| - name: Set up Git | |
| run: | | |
| git config --global user.email "193622162+nauarchus@users.noreply.github.com" | |
| git config --global user.name "github-actions[bot]" | |
| - name: Clone and Merge Permissively Licensed XML Data Only | |
| run: | | |
| # Define repositories | |
| REPOS=( | |
| "https://github.com/nauarchus/Marcion_Evangelion.git" | |
| "https://github.com/nauarchus/Evangelium_Petri.git" | |
| "https://github.com/nauarchus/Marcion_Apostolos.git" | |
| "https://github.com/nauarchus/Ignatius_Epistulae.git" | |
| ) | |
| # Ensure data directory exists | |
| mkdir -p data | |
| for REPO in "${REPOS[@]}"; do | |
| REPO_NAME=$(basename "$REPO" .git) | |
| echo "Processing $REPO_NAME..." | |
| # Clone repo in a temp directory | |
| git clone --depth=1 "$REPO" "$REPO_NAME" | |
| # Check XML files for permissive license before copying, except for __cts__.xml files | |
| if [ -d "$REPO_NAME/data" ]; then | |
| find "$REPO_NAME/data" -name "*.xml" | while read -r FILE; do | |
| REL_PATH="${FILE#*/data/}" | |
| # If file is __cts__.xml, only copy if it does not already exist | |
| if [[ "$FILE" == *"__cts__.xml"* ]]; then | |
| if [ ! -f "data/$REL_PATH" ]; then | |
| mkdir -p "data/$(dirname "$REL_PATH")" | |
| cp "$FILE" "data/$REL_PATH" | |
| else | |
| echo "Skipping existing $REL_PATH." | |
| fi | |
| elif grep -qE '<licence[^>]*target="(https://creativecommons.org/licenses/by/4.0/|https://creativecommons.org/licenses/by-nc-sa/4.0/|https://creativecommons.org/licenses/by-sa/4.0/)"' "$FILE"; then | |
| mkdir -p "data/$(dirname "$REL_PATH")" | |
| cp "$FILE" "data/$REL_PATH" | |
| else | |
| echo "Skipping $FILE due to non-permissive license." | |
| fi | |
| done | |
| fi | |
| # Clean up the cloned repository | |
| rm -rf "$REPO_NAME" | |
| done | |
| - name: Commit and Push Changes | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| run: | | |
| git add data/ | |
| git commit -m "sync doc repos diff XML data - $(date)" || echo "No changes to commit" | |
| git push https://x-access-token:${GITHUB_TOKEN}@github.com/nauarchus/src_data.git public |