Skip to content

Sync CTS Data Repositories #28

Sync CTS Data Repositories

Sync CTS Data Repositories #28

Workflow file for this run

name: Sync CTS Data Repositories
on:
# schedule:
# - cron: '0 0 * * *' # Runs daily at midnight UTC
workflow_dispatch: # Allows manual triggering
jobs:
sync-repos:
runs-on: ubuntu-latest
steps:
- name: Checkout src_data repository
uses: actions/checkout@v3
with:
persist-credentials: false # Prevents issues with default authentication
- name: Set up Git
run: |
git config --global user.email "193622162+nauarchus@users.noreply.github.com"
git config --global user.name "github-actions[bot]"
- name: Clone and Merge Permissively Licensed XML Data Only
run: |
# Define repositories
REPOS=(
"https://github.com/nauarchus/Marcion_Evangelion.git"
"https://github.com/nauarchus/Evangelium_Petri.git"
"https://github.com/nauarchus/Marcion_Apostolos.git"
"https://github.com/nauarchus/Ignatius_Epistulae.git"
)
# Ensure data directory exists
mkdir -p data
for REPO in "${REPOS[@]}"; do
REPO_NAME=$(basename "$REPO" .git)
echo "Processing $REPO_NAME..."
# Clone repo in a temp directory
git clone --depth=1 "$REPO" "$REPO_NAME"
# Check XML files for permissive license before copying, except for __cts__.xml files
if [ -d "$REPO_NAME/data" ]; then
find "$REPO_NAME/data" -name "*.xml" | while read -r FILE; do
REL_PATH="${FILE#*/data/}"
# If file is __cts__.xml, only copy if it does not already exist
if [[ "$FILE" == *"__cts__.xml"* ]]; then
if [ ! -f "data/$REL_PATH" ]; then
mkdir -p "data/$(dirname "$REL_PATH")"
cp "$FILE" "data/$REL_PATH"
else
echo "Skipping existing $REL_PATH."
fi
elif grep -qE '<licence[^>]*target="(https://creativecommons.org/licenses/by/4.0/|https://creativecommons.org/licenses/by-nc-sa/4.0/|https://creativecommons.org/licenses/by-sa/4.0/)"' "$FILE"; then
mkdir -p "data/$(dirname "$REL_PATH")"
cp "$FILE" "data/$REL_PATH"
else
echo "Skipping $FILE due to non-permissive license."
fi
done
fi
# Clean up the cloned repository
rm -rf "$REPO_NAME"
done
- name: Commit and Push Changes
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
git add data/
git commit -m "sync doc repos diff XML data - $(date)" || echo "No changes to commit"
git push https://x-access-token:${GITHUB_TOKEN}@github.com/nauarchus/src_data.git public