- #!/bin/bash
- set -euo pipefail
- # ---------------------------------------------
- # Konfiguracja
- # ---------------------------------------------
- #SUBREDDITS=("classicusenet" "polska") # lista śledzonych subredditów
- SUBREDDITS=("polska") # lista śledzonych subredditów
- BASE="/home/ppp/poczta/reddit" # wspólna baza Maildirów
- UA="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36"
- mkdir -p "$BASE"
- echo "[INFO] Start. Subreddity: ${SUBREDDITS[*]}"
- # -------------------------------------------------
- fetch() {
- local url="$1" outfile="$2"
- echo "[INFO] Pobieram: $url"
- curl -f -s -A "$UA" "$url" -o "$outfile"
- sleep $(( RANDOM % 1 + 2 ))
- }
- # -------------------------------------------------
- emit() {
- local id="$1" parent="$2" subject="$3" author="$4" date_rfc="$5" body="$6" maildir="$7"
- local fname="${id}.reddit:2,"
- for dir in cur new tmp; do
- [[ -e "$maildir/$dir/$fname" ]] && { echo "[INFO] Pomijam duplikat $id"; return; }
- done
- local tmpfile="$maildir/tmp/$fname"
- local newfile="$maildir/new/$fname"
- echo "[INFO] Zapisuję msg $id ($subject)"
- {
- echo "From: $author <$author@reddit.com>"
- echo "Subject: $subject"
- echo "Date: $date_rfc"
- echo "Message-ID: <$id@reddit.com>"
- [[ -n "$parent" ]] && {
- echo "In-Reply-To: <$parent@reddit.com>"
- echo "References: <$parent@reddit.com>"
- }
- echo "Content-Type: text/plain; charset=UTF-8"
- echo
- echo "$body" | while IFS= read -r line; do
- if grep -qE 'https?://\S+' <<<"$line"; then
- echo "$line"
- else
- echo "$line" | fold -s -w72
- fi
- done
- } >"$tmpfile"
- mv "$tmpfile" "$newfile"
- }
- # -------------------------------------------------
- walk_comments() {
- local comments_json="$1" parent_id="$2" subject_top="$3" maildir="$4"
- [[ "$comments_json" == "\"\"" || "$comments_json" == "null" ]] && return
- jq -c '.data.children[] | select(.kind=="t1")' <<<"$comments_json" |
- while IFS= read -r cm; do
- cid=$(jq -r '.data.id' <<<"$cm")
- author=$(jq -r '.data.author' <<<"$cm")
- body=$(jq -r '.data.body' <<<"$cm")
- ts=$(jq -r '.data.created_utc' <<<"$cm")
- date_rfc=$(date -u -d @"$ts" -R)
- emit "$cid" "$parent_id" "Re: $subject_top" "$author" "$date_rfc" "$body" "$maildir"
- repl=$(jq -c '.data.replies' <<<"$cm")
- walk_comments "$repl" "$cid" "$subject_top" "$maildir"
- done
- }
- # -------------------------------------------------
- # Główna pętla
- # -------------------------------------------------
- for sub in "${SUBREDDITS[@]}"; do
- echo "[INFO] ---- Subreddit /r/$sub ----"
- MAILDIR="$BASE/$sub"
- mkdir -p "$MAILDIR"/{cur,new,tmp}
- tmp_json=$(mktemp)
- trap 'rm -f "$tmp_json"' RETURN
- fetch "https://www.reddit.com/r/$sub.json" "$tmp_json" || { echo "[WARN] Nie udało się pobrać listingu $sub"; continue; }
- jq -c '.data.children[] | select(.kind=="t3")' "$tmp_json" |
- while IFS= read -r post; do
- subject_top=$(jq -r '.data.title' <<<"$post")
- pid=$(jq -r '.data.id' <<<"$post")
- echo "[INFO] Wątek: \"$subject_top\" (ID: $pid)"
- author=$(jq -r '.data.author' <<<"$post")
- ts=$(jq -r '.data.created_utc' <<<"$post")
- date_rfc=$(date -u -d @"$ts" -R)
- body=$(jq -r '.data.selftext' <<<"$post")
- [[ -z "$body" ]] && body=$(jq -r '.data.crosspost_parent_list[0].selftext // empty' <<<"$post")
- [[ -z "$body" ]] && body=$(jq -r '.data.url_overridden_by_dest // .data.url // ""' <<<"$post")
- emit "$pid" "" "$subject_top" "$author" "$date_rfc" "$body" "$MAILDIR"
- thread_json=$(mktemp)
- fetch "https://www.reddit.com/r/$sub/comments/$pid.json" "$thread_json" || { rm -f "$thread_json"; continue; }
- comments_part=$(jq -c '.[1]' "$thread_json")
- walk_comments "$comments_part" "$pid" "$subject_top" "$MAILDIR"
- rm -f "$thread_json"
- done
- done
- echo "[INFO] Zakończono."
redd
z Subtle Cassowary, 19 godziny temu, napisane w Plain Text, wyświetlone 6 razy.
[paste_expire] 11 miesiące.
URL https://pastebin.k4be.pl/view/95af3b5c
Udostępnij
Pobierz wklejkę lub Pokaż surowy tekst
— Rozwiń pełna szerokość przeglądarki