redd

z Subtle Cassowary, 19 godziny temu, napisane w Plain Text, wyświetlone 6 razy. [paste_expire] 11 miesiące.
URL https://pastebin.k4be.pl/view/95af3b5c Udostępnij
Pobierz wklejkę lub Pokaż surowy tekst
  1. #!/bin/bash
  2. set -euo pipefail
  3.  
  4. # ---------------------------------------------
  5. # Konfiguracja
  6. # ---------------------------------------------
  7. #SUBREDDITS=("classicusenet" "polska")            # lista śledzonych subredditów
  8. SUBREDDITS=("polska")            # lista śledzonych subredditów
  9. BASE="/home/ppp/poczta/reddit"                  # wspólna baza Maildirów
  10. UA="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36"
  11.  
  12. mkdir -p "$BASE"
  13.  
  14. echo "[INFO] Start. Subreddity: ${SUBREDDITS[*]}"
  15.  
  16. # -------------------------------------------------
  17. fetch() {
  18.   local url="$1" outfile="$2"
  19.   echo "[INFO] Pobieram: $url"
  20.   curl -f -s -A "$UA" "$url" -o "$outfile"
  21.   sleep $(( RANDOM % 1 + 2 ))
  22. }
  23.  
  24. # -------------------------------------------------
  25. emit() {
  26.   local id="$1" parent="$2" subject="$3" author="$4" date_rfc="$5" body="$6" maildir="$7"
  27.  
  28.   local fname="${id}.reddit:2,"
  29.   for dir in cur new tmp; do
  30.     [[ -e "$maildir/$dir/$fname" ]] && { echo "[INFO] Pomijam duplikat $id"; return; }
  31.   done
  32.  
  33.   local tmpfile="$maildir/tmp/$fname"
  34.   local newfile="$maildir/new/$fname"
  35.  
  36.   echo "[INFO] Zapisuję msg $id ($subject)"
  37.  
  38.   {
  39.     echo "From: $author <$author@reddit.com>"
  40.     echo "Subject: $subject"
  41.     echo "Date: $date_rfc"
  42.     echo "Message-ID: <$id@reddit.com>"
  43.     [[ -n "$parent" ]] && {
  44.       echo "In-Reply-To: <$parent@reddit.com>"
  45.       echo "References: <$parent@reddit.com>"
  46.     }
  47.     echo "Content-Type: text/plain; charset=UTF-8"
  48.     echo
  49.     echo "$body" | while IFS= read -r line; do
  50.   if grep -qE 'https?://\S+' <<<"$line"; then
  51.     echo "$line"
  52.   else
  53.     echo "$line" | fold -s -w72
  54.   fi
  55. done
  56.   } >"$tmpfile"
  57.  
  58.   mv "$tmpfile" "$newfile"
  59. }
  60.  
  61. # -------------------------------------------------
  62. walk_comments() {
  63.   local comments_json="$1" parent_id="$2" subject_top="$3" maildir="$4"
  64.   [[ "$comments_json" == "\"\"" || "$comments_json" == "null" ]] && return
  65.  
  66.   jq -c '.data.children[] | select(.kind=="t1")' <<<"$comments_json" |
  67.   while IFS= read -r cm; do
  68.     cid=$(jq -r '.data.id' <<<"$cm")
  69.     author=$(jq -r '.data.author' <<<"$cm")
  70.     body=$(jq -r '.data.body' <<<"$cm")
  71.     ts=$(jq -r '.data.created_utc' <<<"$cm")
  72.     date_rfc=$(date -u -d @"$ts" -R)
  73.  
  74.     emit "$cid" "$parent_id" "Re: $subject_top" "$author" "$date_rfc" "$body" "$maildir"
  75.  
  76.     repl=$(jq -c '.data.replies' <<<"$cm")
  77.     walk_comments "$repl" "$cid" "$subject_top" "$maildir"
  78.   done
  79. }
  80.  
  81. # -------------------------------------------------
  82. # Główna pętla
  83. # -------------------------------------------------
  84. for sub in "${SUBREDDITS[@]}"; do
  85.   echo "[INFO] ---- Subreddit /r/$sub ----"
  86.   MAILDIR="$BASE/$sub"
  87.   mkdir -p "$MAILDIR"/{cur,new,tmp}
  88.  
  89.   tmp_json=$(mktemp)
  90.   trap 'rm -f "$tmp_json"' RETURN
  91.  
  92.   fetch "https://www.reddit.com/r/$sub.json" "$tmp_json" || { echo "[WARN] Nie udało się pobrać listingu $sub"; continue; }
  93.  
  94.   jq -c '.data.children[] | select(.kind=="t3")' "$tmp_json" |
  95.   while IFS= read -r post; do
  96.     subject_top=$(jq -r '.data.title' <<<"$post")
  97.     pid=$(jq -r '.data.id' <<<"$post")
  98.     echo "[INFO] Wątek: \"$subject_top\" (ID: $pid)"
  99.  
  100.     author=$(jq -r '.data.author' <<<"$post")
  101.     ts=$(jq -r '.data.created_utc' <<<"$post")
  102.     date_rfc=$(date -u -d @"$ts" -R)
  103.  
  104.     body=$(jq -r '.data.selftext' <<<"$post")
  105.     [[ -z "$body" ]] && body=$(jq -r '.data.crosspost_parent_list[0].selftext // empty' <<<"$post")
  106.     [[ -z "$body" ]] && body=$(jq -r '.data.url_overridden_by_dest // .data.url // ""' <<<"$post")
  107.  
  108.     emit "$pid" "" "$subject_top" "$author" "$date_rfc" "$body" "$MAILDIR"
  109.  
  110.     thread_json=$(mktemp)
  111.     fetch "https://www.reddit.com/r/$sub/comments/$pid.json" "$thread_json" || { rm -f "$thread_json"; continue; }
  112.     comments_part=$(jq -c '.[1]' "$thread_json")
  113.     walk_comments "$comments_part" "$pid" "$subject_top" "$MAILDIR"
  114.     rm -f "$thread_json"
  115.   done
  116.  
  117. done
  118.  
  119. echo "[INFO] Zakończono."
  120.  
  121.  

odpowiedź "redd"

Tutaj możesz odpowiedzieć na wklejkę z góry

captcha