#!/bin/bash set -euo pipefail # --------------------------------------------- # Konfiguracja # --------------------------------------------- #SUBREDDITS=("classicusenet" "polska") # lista śledzonych subredditów SUBREDDITS=("polska") # lista śledzonych subredditów BASE="/home/ppp/poczta/reddit" # wspólna baza Maildirów UA="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36" mkdir -p "$BASE" echo "[INFO] Start. Subreddity: ${SUBREDDITS[*]}" # ------------------------------------------------- fetch() { local url="$1" outfile="$2" echo "[INFO] Pobieram: $url" curl -f -s -A "$UA" "$url" -o "$outfile" sleep $(( RANDOM % 1 + 2 )) } # ------------------------------------------------- emit() { local id="$1" parent="$2" subject="$3" author="$4" date_rfc="$5" body="$6" maildir="$7" local fname="${id}.reddit:2," for dir in cur new tmp; do [[ -e "$maildir/$dir/$fname" ]] && { echo "[INFO] Pomijam duplikat $id"; return; } done local tmpfile="$maildir/tmp/$fname" local newfile="$maildir/new/$fname" echo "[INFO] Zapisuję msg $id ($subject)" { echo "From: $author <$author@reddit.com>" echo "Subject: $subject" echo "Date: $date_rfc" echo "Message-ID: <$id@reddit.com>" [[ -n "$parent" ]] && { echo "In-Reply-To: <$parent@reddit.com>" echo "References: <$parent@reddit.com>" } echo "Content-Type: text/plain; charset=UTF-8" echo echo "$body" | while IFS= read -r line; do if grep -qE 'https?://\S+' <<<"$line"; then echo "$line" else echo "$line" | fold -s -w72 fi done } >"$tmpfile" mv "$tmpfile" "$newfile" } # ------------------------------------------------- walk_comments() { local comments_json="$1" parent_id="$2" subject_top="$3" maildir="$4" [[ "$comments_json" == "\"\"" || "$comments_json" == "null" ]] && return jq -c '.data.children[] | select(.kind=="t1")' <<<"$comments_json" | while IFS= read -r cm; do cid=$(jq -r '.data.id' <<<"$cm") author=$(jq -r '.data.author' <<<"$cm") body=$(jq -r '.data.body' <<<"$cm") ts=$(jq -r '.data.created_utc' <<<"$cm") date_rfc=$(date -u -d @"$ts" -R) emit "$cid" "$parent_id" "Re: $subject_top" "$author" "$date_rfc" "$body" "$maildir" repl=$(jq -c '.data.replies' <<<"$cm") walk_comments "$repl" "$cid" "$subject_top" "$maildir" done } # ------------------------------------------------- # Główna pętla # ------------------------------------------------- for sub in "${SUBREDDITS[@]}"; do echo "[INFO] ---- Subreddit /r/$sub ----" MAILDIR="$BASE/$sub" mkdir -p "$MAILDIR"/{cur,new,tmp} tmp_json=$(mktemp) trap 'rm -f "$tmp_json"' RETURN fetch "https://www.reddit.com/r/$sub.json" "$tmp_json" || { echo "[WARN] Nie udało się pobrać listingu $sub"; continue; } jq -c '.data.children[] | select(.kind=="t3")' "$tmp_json" | while IFS= read -r post; do subject_top=$(jq -r '.data.title' <<<"$post") pid=$(jq -r '.data.id' <<<"$post") echo "[INFO] Wątek: \"$subject_top\" (ID: $pid)" author=$(jq -r '.data.author' <<<"$post") ts=$(jq -r '.data.created_utc' <<<"$post") date_rfc=$(date -u -d @"$ts" -R) body=$(jq -r '.data.selftext' <<<"$post") [[ -z "$body" ]] && body=$(jq -r '.data.crosspost_parent_list[0].selftext // empty' <<<"$post") [[ -z "$body" ]] && body=$(jq -r '.data.url_overridden_by_dest // .data.url // ""' <<<"$post") emit "$pid" "" "$subject_top" "$author" "$date_rfc" "$body" "$MAILDIR" thread_json=$(mktemp) fetch "https://www.reddit.com/r/$sub/comments/$pid.json" "$thread_json" || { rm -f "$thread_json"; continue; } comments_part=$(jq -c '.[1]' "$thread_json") walk_comments "$comments_part" "$pid" "$subject_top" "$MAILDIR" rm -f "$thread_json" done done echo "[INFO] Zakończono."