# Convenience aliases for daily runs (zsh) # Source this file in your shell: source scripts/aliases.zsh # --- Project defaults (edit as needed) --- # Channel and files export CH="https://t.me/Premier_League_Update" export POSTS_CSV="data/premier_league_update.csv" export REPLIES_CSV="data/premier_league_replies.csv" export FORWARDS_CSV="data/premier_league_forwards.csv" export TAGS_CFG="config/tags.yaml" export FIXTURES_CSV="data/premier_league_schedule_2025-08-15_to_2025-10-15.csv" # Default fixtures date range (used by run_all) export FIXTURES_START_DATE="2025-08-15" export FIXTURES_END_DATE="2025-10-15" # Sessions directory outside iCloud (avoid sqlite locks) export SESSION_DIR="$HOME/.local/share/telethon_sessions" mkdir -p "$SESSION_DIR" # --- Aliases (zsh functions) --- # Fast replies: resume+append, prioritizes parents with replies, tuned concurrency fast_replies() { local ch="${1:-$CH}" local posts="${2:-$POSTS_CSV}" local out="${3:-$REPLIES_CSV}" local conc="${4:-15}" local sess="${5:-$SESSION_DIR/telegram_replies}" ./run_scraper.sh replies \ -c "$ch" \ --from-csv "$posts" \ -o "$out" \ --min-replies 1 \ --concurrency "$conc" \ --resume \ --append \ --session-name "$sess" } # Chunked forwards: concurrent chunk scan with progress logs chunked_forwards() { local ch="${1:-$CH}" local posts="${2:-$POSTS_CSV}" local out="${3:-$FORWARDS_CSV}" local scan="${4:-20000}" local conc="${5:-10}" local chunk="${6:-1500}" local sess="${7:-$SESSION_DIR/telegram_forwards}" ./run_scraper.sh forwards \ -c "$ch" \ --from-csv "$posts" \ -o "$out" \ --scan-limit "$scan" \ --concurrency "$conc" \ --chunk-size "$chunk" \ --append \ --session-name "$sess" } # Combined analyze: posts + replies + fixtures with tags; writes augmented CSVs analyze_combined() { local posts="${1:-$POSTS_CSV}" local replies="${2:-$REPLIES_CSV}" local tags="${3:-$TAGS_CFG}" local fixtures="${4:-$FIXTURES_CSV}" local ch="${5:-$CH}" ./run_scraper.sh analyze \ -i "$posts" \ --channel "$ch" \ --tags-config "$tags" \ --replies-csv "$replies" \ --fixtures-csv "$fixtures" \ --write-augmented-csv \ --write-combined-csv \ --save-plots # Tip: add plot sizing/labels, e.g.: --plot-width-scale 0.8 --plot-max-width 120 --plot-height 8 --activity-top-n 8 --labels-stagger-rows 3 } # Emoji-aware analyze with sensible defaults (keep + boost) analyze_emoji() { local posts="${1:-$POSTS_CSV}" local replies="${2:-$REPLIES_CSV}" local tags="${3:-$TAGS_CFG}" local fixtures="${4:-$FIXTURES_CSV}" local ch="${5:-$CH}" local mode="${6:-keep}" # keep | demojize | strip ./run_scraper.sh analyze \ -i "$posts" \ --channel "$ch" \ --tags-config "$tags" \ --replies-csv "$replies" \ --fixtures-csv "$fixtures" \ --write-augmented-csv \ --write-combined-csv \ --save-plots \ --emoji-mode "$mode" \ --emoji-boost } # Analyze with transformers (and export labels/probs) analyze_transformers() { local posts="${1:-$POSTS_CSV}" local replies="${2:-$REPLIES_CSV}" local tags="${3:-$TAGS_CFG}" local fixtures="${4:-$FIXTURES_CSV}" local ch="${5:-$CH}" local model="${6:-distilbert-base-uncased}" ./run_scraper.sh analyze \ -i "$posts" \ --channel "$ch" \ --tags-config "$tags" \ --replies-csv "$replies" \ --fixtures-csv "$fixtures" \ --sentiment-backend transformers \ --transformers-model "$model" \ --export-transformers-details \ --write-augmented-csv \ --write-combined-csv \ --save-plots } # Plot graphs from labeled sentiment CSV plot_labeled() { local labeled_csv="${1:-data/labeled_sentiment.csv}" local out_dir="${2:-data}" ./.venv/bin/python -m src.plot_labeled \ --input "$labeled_csv" \ --out-dir "$out_dir" } # Merge labeled CSV back into posts/replies to reuse analyzer plots apply_labels_and_analyze() { local labeled_csv="${1:-data/labeled_sentiment.csv}" local posts_in="${2:-$POSTS_CSV}" local replies_in="${3:-$REPLIES_CSV}" local posts_out="${4:-data/premier_league_update_with_labels.csv}" local replies_out="${5:-data/premier_league_replies_with_labels.csv}" ./.venv/bin/python -m src.apply_labels \ --labeled-csv "$labeled_csv" \ --posts-csv "$posts_in" \ --replies-csv "$replies_in" \ --posts-out "$posts_out" \ --replies-out "$replies_out" # Reuse analyzer with the merged CSVs; it will pick up sentiment_label if present ./run_scraper.sh analyze \ -i "$posts_out" \ --replies-csv "$replies_out" \ --fixtures-csv "$FIXTURES_CSV" \ --tags-config "$TAGS_CFG" \ --write-augmented-csv \ --write-combined-csv \ --save-plots } # Auto-label sentiment without manual annotation (VADER backend) auto_label_vader() { local posts="${1:-$POSTS_CSV}" local replies="${2:-$REPLIES_CSV}" local out="${3:-data/labeled_sentiment.csv}" ./.venv/bin/python -m src.auto_label_sentiment \ --posts-csv "$posts" \ --replies-csv "$replies" \ --backend vader \ --vader-pos 0.05 \ --vader-neg -0.05 \ --vader-margin 0.20 \ --only-confident \ -o "$out" } # Auto-label sentiment using a pretrained transformers model auto_label_transformers() { local posts="${1:-$POSTS_CSV}" local replies="${2:-$REPLIES_CSV}" local model="${3:-cardiffnlp/twitter-roberta-base-sentiment-latest}" local out="${4:-data/labeled_sentiment.csv}" ./.venv/bin/python -m src.auto_label_sentiment \ --posts-csv "$posts" \ --replies-csv "$replies" \ --backend transformers \ --transformers-model "$model" \ --min-prob 0.6 \ --min-margin 0.2 \ --only-confident \ -o "$out" } # Train a transformers model with the project venv train_transformers() { local train_csv="${1:-data/labeled_sentiment.csv}" local text_col="${2:-message}" local label_col="${3:-label}" local base_model="${4:-distilbert-base-uncased}" local out_dir="${5:-models/sentiment-distilbert}" ./.venv/bin/python -m src.train_sentiment \ --train-csv "$train_csv" \ --text-col "$text_col" \ --label-col "$label_col" \ --model-name "$base_model" \ --output-dir "$out_dir" \ --epochs 3 \ --batch-size 16 } # Evaluate a fine-tuned transformers model eval_transformers() { local csv="${1:-data/labeled_holdout.csv}" local text_col="${2:-message}" local label_col="${3:-label}" local model_dir="${4:-models/sentiment-distilbert}" ./.venv/bin/python -m src.eval_sentiment \ --csv "$csv" \ --text-col "$text_col" \ --label-col "$label_col" \ --model "$model_dir" } # Build a labeling CSV from existing posts+replies make_label_set() { local posts="${1:-$POSTS_CSV}" local replies="${2:-$REPLIES_CSV}" local out="${3:-data/labeled_sentiment.csv}" local n="${4:-1000}" ./.venv/bin/python -m src.make_labeling_set \ --posts-csv "$posts" \ --replies-csv "$replies" \ --sample-size "$n" \ -o "$out" } # One-shot daily pipeline: fast replies then combined analyze run_daily() { local ch="${1:-$CH}" local posts="${2:-$POSTS_CSV}" local replies="${3:-$REPLIES_CSV}" local conc="${4:-15}" fast_replies "$ch" "$posts" "$replies" "$conc" "$SESSION_DIR/telegram_replies" analyze_emoji "$posts" "$replies" "$TAGS_CFG" "$FIXTURES_CSV" "$ch" keep } # One-shot daily pipeline with forwards in parallel run_daily_with_forwards() { local ch="${1:-$CH}" local posts="${2:-$POSTS_CSV}" local replies="${3:-$REPLIES_CSV}" local forwards="${4:-$FORWARDS_CSV}" local rep_conc="${5:-15}" local f_scan="${6:-20000}" local f_conc="${7:-10}" local f_chunk="${8:-1500}" local sess_r="${9:-$SESSION_DIR/telegram_replies}" local sess_f="${10:-$SESSION_DIR/telegram_forwards}" # Launch replies and forwards in parallel with separate sessions local pid_r pid_f fast_replies "$ch" "$posts" "$replies" "$rep_conc" "$sess_r" & pid_r=$! chunked_forwards "$ch" "$posts" "$forwards" "$f_scan" "$f_conc" "$f_chunk" "$sess_f" & pid_f=$! # Wait for completion and then analyze with emoji handling wait $pid_r wait $pid_f analyze_emoji "$posts" "$replies" "$TAGS_CFG" "$FIXTURES_CSV" "$ch" keep } # End-to-end, non-interactive pipeline (from scratch): scrape -> replies -> fixtures -> analyze # Requirements: # - .env has TELEGRAM_API_ID and TELEGRAM_API_HASH (and TELEGRAM_2FA_PASSWORD if 2FA is enabled) # - CH/POSTS_CSV/REPLIES_CSV/FIXTURES_CSV/TAGS_CFG are set (defaults are defined above) # - Provide optional start/end dates; defaults use FIXTURES_START_DATE/FIXTURES_END_DATE # - Choose sentiment backend via arg 11: vader | transformers | gpt (default: transformers) run_all() { local ch="${1:-$CH}" local start="${2:-$FIXTURES_START_DATE}" local end="${3:-$FIXTURES_END_DATE}" local posts="${4:-$POSTS_CSV}" local replies="${5:-$REPLIES_CSV}" local fixtures="${6:-$FIXTURES_CSV}" local tags="${7:-$TAGS_CFG}" local sess_scrape="${8:-$SESSION_DIR/telegram_scrape}" local sess_replies="${9:-$SESSION_DIR/telegram_replies}" local rep_conc="${10:-15}" local backend="${11:-transformers}" # vader | transformers | gpt local model="${12:-models/sentiment-distilbert}" local gpt_model="${13:-llama3}" local gpt_url="${14:-http://localhost:11434}" # 1) Scrape posts (overwrite) ./run_scraper.sh scrape \ -c "$ch" \ -o "$posts" \ --start-date "$start" \ --end-date "$end" \ --session-name "$sess_scrape" # 2) Fetch replies (resume+append safe) ./run_scraper.sh replies \ -c "$ch" \ --from-csv "$posts" \ -o "$replies" \ --min-replies 1 \ --concurrency "$rep_conc" \ --resume \ --append \ --session-name "$sess_replies" # 3) Fetch fixtures for the same period ./run_scraper.sh fixtures \ --start-date "$start" \ --end-date "$end" \ -o "$fixtures" # 4) Analyze with plots (non-interactive) local args=( -i "$posts" --tags-config "$tags" --replies-csv "$replies" --fixtures-csv "$fixtures" --write-augmented-csv --write-combined-csv --emoji-mode keep --emoji-boost --save-plots --plot-width-scale 0.8 --plot-max-width 120 --plot-height 8 --activity-top-n 8 --labels-stagger-rows 3 ) if [[ "$backend" == "transformers" ]]; then args+=( --sentiment-backend transformers --transformers-model "$model" --export-transformers-details ) elif [[ "$backend" == "gpt" ]]; then args+=( --sentiment-backend gpt --gpt-model "$gpt_model" --gpt-base-url "$gpt_url" ) else args+=( --sentiment-backend vader ) fi ./run_scraper.sh analyze "${args[@]}" }