Zotero Addons Scraping #5231
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Zotero Addons Scraping | |
| on: | |
| workflow_dispatch: # manual | |
| schedule: | |
| - cron: '25 4,7,10,14,19,23 * * *' # +8(beijing) | |
| push: | |
| branches: [master] | |
| permissions: | |
| contents: write | |
| actions: write | |
| issues: write | |
| env: | |
| TZ: Asia/Shanghai | |
| FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true | |
| jobs: | |
| scrape_data: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout Repository | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 0 | |
| - name: Checkout publish branch (for release_cache) | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: publish | |
| path: publish_branch | |
| continue-on-error: true | |
| - name: Restore release_cache from publish branch | |
| run: | | |
| if [ -d "publish_branch/release_cache" ]; then | |
| cp -r publish_branch/release_cache ./release_cache | |
| echo "Restored release_cache from publish branch" | |
| ls -la release_cache | head -20 | |
| else | |
| echo "No release_cache found in publish branch, starting fresh" | |
| fi | |
| - name: Set up Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: 3.x | |
| - name: Install Dependencies | |
| run: | | |
| pip install -r requirements.txt | |
| - name: Determine changed addon files | |
| id: changed_addons | |
| if: github.event_name == 'push' | |
| shell: bash | |
| run: | | |
| before_sha="${{ github.event.before }}" | |
| if [ -z "$before_sha" ] || [ "$before_sha" = "0000000000000000000000000000000000000000" ]; then | |
| files=$(git diff-tree --no-commit-id --name-only -r "${{ github.sha }}" -- addons || true) | |
| else | |
| files=$(git diff --name-only --diff-filter=AM "$before_sha" "${{ github.sha }}" -- addons || true) | |
| fi | |
| { | |
| echo "changed_addons<<EOF" | |
| echo "$files" | |
| echo "EOF" | |
| } >> "$GITHUB_OUTPUT" | |
| - name: Validate changed addon tags | |
| if: github.event_name == 'push' && steps.changed_addons.outputs.changed_addons != '' | |
| shell: bash | |
| env: | |
| PYTHONPATH: src | |
| run: | | |
| mapfile -t files < <(printf '%s\n' "${{ steps.changed_addons.outputs.changed_addons }}" | sed '/^$/d') | |
| if [ "${#files[@]}" -eq 0 ]; then | |
| echo "No addon files changed" | |
| exit 0 | |
| fi | |
| python3 -m zotero_scraper.tag_review \ | |
| --files "${files[@]}" \ | |
| --summary-file "$GITHUB_STEP_SUMMARY" | |
| - name: Run Scraper | |
| run: | | |
| python main.py \ | |
| -i addons \ | |
| -o published/addon_infos.json \ | |
| --release-cache-dir release_cache \ | |
| --github_repository "${GITHUB_REPOSITORY}" \ | |
| --github_token "${GITHUB_TOKEN}" \ | |
| --create_release True | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| PYTHONPATH: src | |
| - name: Copy release_cache to published folder | |
| run: | | |
| cp -r release_cache published/release_cache | |
| echo "Contents of published folder:" | |
| ls -la published | |
| - name: Publish to publish branch | |
| uses: peaceiris/actions-gh-pages@v4 | |
| with: | |
| github_token: ${{ secrets.GITHUB_TOKEN }} | |
| publish_branch: publish | |
| user_name: 'github-actions[bot]' | |
| user_email: 'github-actions[bot]@users.noreply.github.com' | |
| publish_dir: ./published | |
| force_orphan: true |