Compare commits
No commits in common. "main" and "master" have entirely different histories.
9 changed files with 528 additions and 5 deletions
14
.env.example
Normal file
14
.env.example
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
# GTS Server Configuration
|
||||||
|
GTS_SERVER_URL=https://your-gts-instance.tld
|
||||||
|
GTS_ACCESS_TOKEN=your_gts_access_token_here
|
||||||
|
|
||||||
|
# Processing Configuration
|
||||||
|
MAX_POSTS_PER_RUN=75
|
||||||
|
DELAY_BETWEEN_REQUESTS=1
|
||||||
|
LOG_LEVEL=INFO
|
||||||
|
|
||||||
|
# RSS Configuration
|
||||||
|
RSS_URLS_FILE=/app/rss_feeds.txt
|
||||||
|
|
||||||
|
# Optional: Monitoring
|
||||||
|
# HEALTHCHECK_URL=https://hc-ping.com/your-uuid-here
|
29
.gitignore
vendored
Normal file
29
.gitignore
vendored
Normal file
|
@ -0,0 +1,29 @@
|
||||||
|
# Secrets and local config
|
||||||
|
.env
|
||||||
|
rss_feeds.txt
|
||||||
|
|
||||||
|
# Data directory
|
||||||
|
data/
|
||||||
|
*.json
|
||||||
|
|
||||||
|
# Python
|
||||||
|
__pycache__/
|
||||||
|
*.pyc
|
||||||
|
*.pyo
|
||||||
|
|
||||||
|
# Docker
|
||||||
|
.dockerignore
|
||||||
|
|
||||||
|
# Logs
|
||||||
|
*.log
|
||||||
|
|
||||||
|
# Editor files
|
||||||
|
.vscode/
|
||||||
|
.idea/
|
||||||
|
*.swp
|
||||||
|
*.swo
|
||||||
|
*~
|
||||||
|
|
||||||
|
# OS files
|
||||||
|
.DS_Store
|
||||||
|
Thumbs.db
|
24
Dockerfile
Normal file
24
Dockerfile
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
# Dockerfile
|
||||||
|
FROM python:3.11-slim
|
||||||
|
|
||||||
|
# Set working directory
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Copy and install requirements
|
||||||
|
COPY requirements.txt .
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Create data directory
|
||||||
|
RUN mkdir -p /app/data
|
||||||
|
|
||||||
|
# Create non-root user
|
||||||
|
RUN useradd -r -u 1000 holmirdas
|
||||||
|
|
||||||
|
# Set ownership
|
||||||
|
RUN chown -R holmirdas:holmirdas /app
|
||||||
|
|
||||||
|
# Switch to non-root user
|
||||||
|
USER holmirdas
|
||||||
|
|
||||||
|
# Default command (will be overridden by docker-compose)
|
||||||
|
CMD ["python", "gts_holmirdas.py"]
|
20
LICENSE
20
LICENSE
|
@ -1,9 +1,21 @@
|
||||||
MIT License
|
MIT License
|
||||||
|
|
||||||
Copyright (c) 2024 oliverpifferi
|
Copyright (c) 2025 Matthias
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
|
|
113
README.md
113
README.md
|
@ -1 +1,112 @@
|
||||||
https://keyoxide.org/AF953733C09F6368797CA49E2F4968B5F7988AA5
|
# GTS-HolMirDas 🚀
|
||||||
|
|
||||||
|
RSS-based content discovery for [GoToSocial](https://codeberg.org/superseriousbusiness/gotosocial) instances.
|
||||||
|
|
||||||
|
Automatically discovers and federates content from RSS feeds across the Fediverse, helping small GoToSocial instances populate their federated timeline without relying on traditional relays.
|
||||||
|
|
||||||
|
Inspired by the original [HolMirDas](https://github.com/aliceif/HolMirDas) by [@aliceif](https://mkultra.x27.one/@aliceif), adapted for GoToSocial with enhanced Docker deployment and multi-instance processing.
|
||||||
|
|
||||||
|
## ✨ Key Features
|
||||||
|
|
||||||
|
- **📡 Multi-Instance Discovery** - Fetches content from configurable RSS feeds across Fediverse instances
|
||||||
|
- **⚡ Performance Scaling** - 20-100 posts per feed with URL parameters (`?limit=100`)
|
||||||
|
- **🐳 Production Ready** - Docker deployment, environment-based config, health monitoring
|
||||||
|
- **📊 Comprehensive Stats** - Runtime metrics, federation growth, performance tracking
|
||||||
|
- **🔧 Zero Maintenance** - Runs automatically every hour with duplicate detection
|
||||||
|
|
||||||
|
## 🚀 Quick Start
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Clone the repository
|
||||||
|
git clone https://git.klein.ruhr/matthias/gts-holmirdas
|
||||||
|
cd gts-holmirdas
|
||||||
|
|
||||||
|
# Copy configuration templates
|
||||||
|
cp .env.example .env
|
||||||
|
cp rss_feeds.example.txt rss_feeds.txt
|
||||||
|
|
||||||
|
# Edit configuration
|
||||||
|
nano .env # Add your GTS credentials
|
||||||
|
nano rss_feeds.txt # Customize RSS feeds
|
||||||
|
|
||||||
|
# Deploy
|
||||||
|
docker compose up -d
|
||||||
|
|
||||||
|
# Monitor
|
||||||
|
docker compose logs -f
|
||||||
|
```
|
||||||
|
|
||||||
|
## 📈 Performance at Scale
|
||||||
|
|
||||||
|
**Real Production Data:**
|
||||||
|
```
|
||||||
|
📊 Runtime: 8:42 | 487 posts processed | 3,150+ instances discovered
|
||||||
|
⚡ 56 posts/minute | 102 RSS feeds | +45 new instances per run
|
||||||
|
💾 Resource usage: ~450MB RAM total (GoToSocial + tools)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Scaling Options:**
|
||||||
|
- **Conservative:** 20 posts/feed (~100 posts/run)
|
||||||
|
- **Balanced:** 50 posts/feed (~300 posts/run)
|
||||||
|
- **Aggressive:** 100 posts/feed (~600 posts/run)
|
||||||
|
|
||||||
|
## 🛠️ Configuration Essentials
|
||||||
|
|
||||||
|
### Environment Variables (.env)
|
||||||
|
```bash
|
||||||
|
# Required
|
||||||
|
GTS_SERVER_URL=https://your-gts-instance.tld
|
||||||
|
GTS_ACCESS_TOKEN=your_gts_access_token
|
||||||
|
|
||||||
|
# Performance Tuning
|
||||||
|
MAX_POSTS_PER_RUN=25 # Posts per feed per run
|
||||||
|
DELAY_BETWEEN_REQUESTS=1 # Seconds between API calls
|
||||||
|
LOG_LEVEL=INFO # DEBUG for troubleshooting
|
||||||
|
```
|
||||||
|
|
||||||
|
### RSS Feeds (rss_feeds.txt)
|
||||||
|
```bash
|
||||||
|
# Use URL parameters to scale performance
|
||||||
|
https://mastodon.social/tags/homelab.rss?limit=50
|
||||||
|
https://fosstodon.org/tags/selfhosting.rss?limit=100
|
||||||
|
https://infosec.exchange/tags/security.rss?limit=75
|
||||||
|
```
|
||||||
|
|
||||||
|
### GoToSocial Access Token
|
||||||
|
1. Login to your GoToSocial instance
|
||||||
|
2. Settings → Applications → Create new application
|
||||||
|
3. Required scopes: `read`, `read:search`, `read:statuses`
|
||||||
|
4. Copy access token to `.env` file
|
||||||
|
|
||||||
|
## 📖 Complete Documentation
|
||||||
|
|
||||||
|
For detailed information, visit our **[Wiki](https://git.klein.ruhr/matthias/gts-holmirdas/wiki)**:
|
||||||
|
|
||||||
|
- **[📋 Installation Guide](https://git.klein.ruhr/matthias/gts-holmirdas/wiki/Installation-Guide.-)** - Detailed setup, Docker configuration, deployment options
|
||||||
|
- **[📈 Performance & Scaling](https://git.klein.ruhr/matthias/gts-holmirdas/wiki/Performance-%26-Scaling)** - Optimization tables, scaling strategies, resource planning
|
||||||
|
- **[🛠️ Troubleshooting](https://git.klein.ruhr/matthias/gts-holmirdas/wiki/Troubleshooting)** - Common issues, Docker problems, debugging guide
|
||||||
|
- **[⚙️ Advanced Configuration](https://git.klein.ruhr/matthias/gts-holmirdas/wiki/Advanced-Configuration)** - Environment variables, RSS strategies, production tips
|
||||||
|
- **[📊 Monitoring & Stats](https://git.klein.ruhr/matthias/gts-holmirdas/wiki/Monitoring-%26-Stats)** - Understanding output, health monitoring, metrics
|
||||||
|
- **[❓ FAQ](https://git.klein.ruhr/matthias/gts-holmirdas/wiki/FAQ+-+Frequently+Asked+Questions.-)** - Common questions and answers
|
||||||
|
|
||||||
|
## 🤝 Community & Support
|
||||||
|
|
||||||
|
- **[Contributing Guide](Contributing)** - Development setup and contribution guidelines *(coming soon)*
|
||||||
|
- **Issues**: [Report bugs or request features](https://git.klein.ruhr/matthias/gts-holmirdas/issues)
|
||||||
|
- **Contact**: [@matthias@me.klein.ruhr](https://me.klein.ruhr/@matthias) on the Fediverse
|
||||||
|
|
||||||
|
## 🔗 Related Projects
|
||||||
|
|
||||||
|
- **[FediFetcher](https://github.com/nanos/fedifetcher)** - Fetches missing replies and posts
|
||||||
|
- **[GoToSocial](https://github.com/superseriousbusiness/gotosocial)** - Lightweight ActivityPub server
|
||||||
|
- **[slurp](https://github.com/VyrCossont/slurp)** - Import posts from other instances
|
||||||
|
|
||||||
|
## 📄 License
|
||||||
|
|
||||||
|
MIT License - see [LICENSE](LICENSE) file for details.
|
||||||
|
|
||||||
|
## 🙏 Acknowledgments
|
||||||
|
|
||||||
|
- Inspired by [HolMirDas](https://github.com/aliceif/HolMirDas) by [@aliceif](https://mkultra.x27.one/@aliceif)
|
||||||
|
- Built for the GoToSocial community
|
||||||
|
- RSS-to-ActivityPub federation approach
|
32
compose.yml
Normal file
32
compose.yml
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
services:
|
||||||
|
gts-holmirdas:
|
||||||
|
build: .
|
||||||
|
container_name: gts-holmirdas
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
|
env_file:
|
||||||
|
- .env
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
- ./data:/app/data
|
||||||
|
- ./gts_holmirdas.py:/app/gts_holmirdas.py:ro
|
||||||
|
- ./rss_feeds.txt:/app/rss_feeds.txt:ro
|
||||||
|
|
||||||
|
# Run every 3 hours (balanced frequency)
|
||||||
|
entrypoint: >
|
||||||
|
sh -c "
|
||||||
|
while true; do
|
||||||
|
echo 'Starting GTS-HolMirDas run...'
|
||||||
|
python gts_holmirdas.py
|
||||||
|
echo 'GTS-HolMirDas run completed. Sleeping for 1 hour...'
|
||||||
|
sleep 3600
|
||||||
|
done
|
||||||
|
"
|
||||||
|
|
||||||
|
# Resource limits
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: 512M
|
||||||
|
reservations:
|
||||||
|
memory: 256M
|
281
gts_holmirdas.py
Normal file
281
gts_holmirdas.py
Normal file
|
@ -0,0 +1,281 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
GTS-HolMirDas: RSS-based content discovery for GoToSocial
|
||||||
|
|
||||||
|
Inspired by HolMirDas by @aliceif:
|
||||||
|
- GitHub: https://github.com/aliceif/HolMirDas
|
||||||
|
- Fediverse: @aliceif@mkultra.x27.one
|
||||||
|
|
||||||
|
This GoToSocial adaptation extends the original RSS-to-ActivityPub concept
|
||||||
|
with Docker deployment, multi-instance processing, and comprehensive monitoring.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import requests
|
||||||
|
import feedparser
|
||||||
|
from datetime import timedelta
|
||||||
|
from urllib.parse import quote_plus
|
||||||
|
|
||||||
|
class GTSHolMirDas:
|
||||||
|
def __init__(self):
|
||||||
|
"""Initialize the RSS fetcher with configuration"""
|
||||||
|
self.config = {
|
||||||
|
"server_url": os.getenv("GTS_SERVER_URL", "https://your-gts-instance"),
|
||||||
|
"access_token": os.getenv("GTS_ACCESS_TOKEN", ""),
|
||||||
|
"max_posts_per_run": int(os.getenv("MAX_POSTS_PER_RUN", "25")),
|
||||||
|
"delay_between_requests": int(os.getenv("DELAY_BETWEEN_REQUESTS", "2")),
|
||||||
|
"healthcheck_url": os.getenv("HEALTHCHECK_URL", ""),
|
||||||
|
"log_level": os.getenv("LOG_LEVEL", "INFO")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Setup logging FIRST
|
||||||
|
logging.basicConfig(
|
||||||
|
level=getattr(logging, self.config["log_level"]),
|
||||||
|
format='%(asctime)s - %(levelname)s - %(message)s'
|
||||||
|
)
|
||||||
|
self.logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Load RSS URLs from file or environment
|
||||||
|
rss_urls_file = os.getenv("RSS_URLS_FILE")
|
||||||
|
if rss_urls_file and os.path.exists(rss_urls_file):
|
||||||
|
# Load from file
|
||||||
|
try:
|
||||||
|
with open(rss_urls_file, 'r') as f:
|
||||||
|
self.config["rss_urls"] = [
|
||||||
|
line.split('#', 1)[0].strip() for line in f
|
||||||
|
if line.strip() and not line.strip().startswith('#')
|
||||||
|
]
|
||||||
|
self.logger.info(f"Loaded {len(self.config['rss_urls'])} RSS URLs from file: {rss_urls_file}")
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Could not load RSS URLs from file {rss_urls_file}: {e}")
|
||||||
|
self.config["rss_urls"] = []
|
||||||
|
else:
|
||||||
|
# Fallback to environment variable
|
||||||
|
self.config["rss_urls"] = [
|
||||||
|
url.strip() for url in os.getenv("RSS_URLS", "").split(",")
|
||||||
|
if url.strip()
|
||||||
|
]
|
||||||
|
if self.config["rss_urls"]:
|
||||||
|
self.logger.info(f"Loaded {len(self.config['rss_urls'])} RSS URLs from environment")
|
||||||
|
|
||||||
|
# Load processed URLs from persistent storage
|
||||||
|
self.processed_urls_file = "/app/data/processed_urls.json"
|
||||||
|
self.processed_urls = self.load_processed_urls()
|
||||||
|
|
||||||
|
# Statistics tracking
|
||||||
|
self.previous_instances = getattr(self, 'previous_instances', 0)
|
||||||
|
|
||||||
|
def load_processed_urls(self):
|
||||||
|
"""Load previously processed URLs and instance count from file"""
|
||||||
|
try:
|
||||||
|
if os.path.exists(self.processed_urls_file):
|
||||||
|
with open(self.processed_urls_file, 'r') as f:
|
||||||
|
data = json.load(f)
|
||||||
|
# Load previous instance count for statistics
|
||||||
|
self.previous_instances = data.get('previous_instances', 0)
|
||||||
|
return set(data.get('processed_urls', []))
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.warning(f"Could not load processed URLs: {e}")
|
||||||
|
|
||||||
|
return set()
|
||||||
|
|
||||||
|
def save_processed_urls(self, current_instances=None):
|
||||||
|
"""Save processed URLs and current instance count to file"""
|
||||||
|
try:
|
||||||
|
os.makedirs(os.path.dirname(self.processed_urls_file), exist_ok=True)
|
||||||
|
data = {
|
||||||
|
'processed_urls': list(self.processed_urls),
|
||||||
|
'last_updated': time.time()
|
||||||
|
}
|
||||||
|
# Save current instance count for next run
|
||||||
|
if current_instances is not None and current_instances != 'unknown':
|
||||||
|
data['previous_instances'] = current_instances
|
||||||
|
|
||||||
|
with open(self.processed_urls_file, 'w') as f:
|
||||||
|
json.dump(data, f, indent=2)
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Could not save processed URLs: {e}")
|
||||||
|
|
||||||
|
def fetch_rss_urls(self, rss_url):
|
||||||
|
"""Fetch URLs from RSS feed"""
|
||||||
|
try:
|
||||||
|
self.logger.info(f"Fetching RSS feed: {rss_url}")
|
||||||
|
|
||||||
|
# Parse RSS feed
|
||||||
|
feed = feedparser.parse(rss_url)
|
||||||
|
|
||||||
|
if feed.bozo:
|
||||||
|
self.logger.warning(f"RSS feed may have issues: {rss_url}")
|
||||||
|
|
||||||
|
# Extract URLs from entries
|
||||||
|
urls = []
|
||||||
|
for entry in feed.entries:
|
||||||
|
if hasattr(entry, 'link'):
|
||||||
|
urls.append(entry.link)
|
||||||
|
|
||||||
|
self.logger.info(f"Found {len(urls)} URLs in RSS feed")
|
||||||
|
return urls
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Error fetching RSS feed {rss_url}: {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
def lookup_post(self, post_url):
|
||||||
|
"""Look up a post URL using GTS search API"""
|
||||||
|
try:
|
||||||
|
# Prepare search API call
|
||||||
|
search_url = f"{self.config['server_url']}/api/v2/search"
|
||||||
|
params = {
|
||||||
|
'q': post_url,
|
||||||
|
'type': 'statuses',
|
||||||
|
'resolve': 'true',
|
||||||
|
'limit': 1
|
||||||
|
}
|
||||||
|
headers = {
|
||||||
|
'Authorization': f'Bearer {self.config["access_token"]}',
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
}
|
||||||
|
|
||||||
|
# Make API call
|
||||||
|
response = requests.get(
|
||||||
|
search_url,
|
||||||
|
params=params,
|
||||||
|
headers=headers,
|
||||||
|
timeout=30
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
results = response.json()
|
||||||
|
if results.get('statuses') or results.get('accounts'):
|
||||||
|
self.logger.info(f"Successfully looked up: {post_url}")
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
self.logger.warning(f"No results for: {post_url}")
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
self.logger.error(f"API error {response.status_code} for {post_url}: {response.text}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
self.logger.error(f"Error looking up {post_url}: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def process_feeds(self):
|
||||||
|
"""Process all configured RSS feeds"""
|
||||||
|
total_processed = 0
|
||||||
|
|
||||||
|
# Record start time for statistics
|
||||||
|
self.start_time = time.time()
|
||||||
|
|
||||||
|
# Ping healthcheck start
|
||||||
|
self.ping_healthcheck("/start")
|
||||||
|
|
||||||
|
try:
|
||||||
|
for rss_url in self.config["rss_urls"]:
|
||||||
|
if not rss_url.strip():
|
||||||
|
continue
|
||||||
|
|
||||||
|
self.logger.info(f"Processing feed: {rss_url}")
|
||||||
|
|
||||||
|
# Get URLs from RSS
|
||||||
|
urls = self.fetch_rss_urls(rss_url)
|
||||||
|
|
||||||
|
# Filter out already processed URLs
|
||||||
|
new_urls = [url for url in urls if url not in self.processed_urls]
|
||||||
|
|
||||||
|
if not new_urls:
|
||||||
|
self.logger.info("No new URLs to process")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Rate limiting: max posts per run
|
||||||
|
urls_to_process = new_urls[:self.config["max_posts_per_run"]]
|
||||||
|
|
||||||
|
self.logger.info(f"Processing {len(urls_to_process)} new URLs")
|
||||||
|
|
||||||
|
for url in urls_to_process:
|
||||||
|
if self.lookup_post(url):
|
||||||
|
self.processed_urls.add(url)
|
||||||
|
total_processed += 1
|
||||||
|
|
||||||
|
# Rate limiting: delay between requests
|
||||||
|
time.sleep(self.config["delay_between_requests"])
|
||||||
|
|
||||||
|
# Calculate runtime
|
||||||
|
end_time = time.time()
|
||||||
|
runtime_seconds = end_time - self.start_time
|
||||||
|
runtime_formatted = str(timedelta(seconds=int(runtime_seconds)))
|
||||||
|
|
||||||
|
# Get current instance count
|
||||||
|
try:
|
||||||
|
instance_info = requests.get(f"{self.config['server_url']}/api/v1/instance",
|
||||||
|
headers={'Authorization': f'Bearer {self.config["access_token"]}'},
|
||||||
|
timeout=10)
|
||||||
|
if instance_info.status_code == 200:
|
||||||
|
current_instances = instance_info.json().get('stats', {}).get('domain_count', 'unknown')
|
||||||
|
else:
|
||||||
|
current_instances = 'unknown'
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Failed to get instance count: {e}")
|
||||||
|
current_instances = 'unknown'
|
||||||
|
|
||||||
|
# Calculate new instances (if we have previous data)
|
||||||
|
new_instances = 'unknown'
|
||||||
|
if self.previous_instances > 0 and current_instances != 'unknown':
|
||||||
|
new_instances = current_instances - self.previous_instances
|
||||||
|
|
||||||
|
# Print comprehensive statistics
|
||||||
|
print(f"\n📊 GTS-HolMirDas Run Statistics:")
|
||||||
|
print(f" ⏱️ Runtime: {runtime_formatted}")
|
||||||
|
print(f" 📄 Total posts processed: {total_processed}")
|
||||||
|
print(f" 🌐 Current known instances: {current_instances}")
|
||||||
|
if new_instances != 'unknown' and new_instances > 0:
|
||||||
|
print(f" ➕ New instances discovered: +{new_instances}")
|
||||||
|
elif new_instances == 0:
|
||||||
|
print(f" ➕ New instances discovered: +0")
|
||||||
|
print(f" 📡 RSS feeds processed: {len(self.config['rss_urls'])}")
|
||||||
|
if runtime_seconds > 60:
|
||||||
|
print(f" ⚡ Posts per minute: {total_processed / (runtime_seconds / 60):.1f}")
|
||||||
|
|
||||||
|
self.save_processed_urls(current_instances)
|
||||||
|
|
||||||
|
# Ping healthcheck success
|
||||||
|
self.ping_healthcheck("")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Error during processing: {e}")
|
||||||
|
# Ping healthcheck failure
|
||||||
|
self.ping_healthcheck("/fail")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def ping_healthcheck(self, endpoint=""):
|
||||||
|
"""Ping healthchecks.io for monitoring"""
|
||||||
|
if not self.config.get("healthcheck_url"):
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
url = self.config["healthcheck_url"] + endpoint
|
||||||
|
requests.get(url, timeout=10)
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.warning(f"Failed to ping healthcheck: {e}")
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Main entry point"""
|
||||||
|
try:
|
||||||
|
fetcher = GTSHolMirDas()
|
||||||
|
|
||||||
|
# Validate required config
|
||||||
|
if not fetcher.config["access_token"]:
|
||||||
|
raise ValueError("GTS_ACCESS_TOKEN environment variable is required")
|
||||||
|
|
||||||
|
fetcher.process_feeds()
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Fatal error: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
3
requirements.txt
Normal file
3
requirements.txt
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
requests==2.31.0
|
||||||
|
feedparser==6.0.10
|
||||||
|
urllib3==2.0.7
|
17
rss_feeds.example.txt
Normal file
17
rss_feeds.example.txt
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
# Example RSS feeds - customize for your interests
|
||||||
|
|
||||||
|
# Add ?limit=X parameter to increase posts per feed (default: 20, max: 100)
|
||||||
|
# Higher limits = more content discovery, but longer processing time
|
||||||
|
# Performance tip: Start with limit=50, then increase to 100 if needed
|
||||||
|
|
||||||
|
# homelab (up to 100 posts per feed)
|
||||||
|
https://mastodon.social/tags/homelab.rss # 20 posts/feed (default)
|
||||||
|
https://fosstodon.org/tags/homelab.rss?limit=50 # 50 posts/feed
|
||||||
|
|
||||||
|
# selfhosting (up to 100 posts per feed)
|
||||||
|
https://mastodon.social/tags/selfhosting.rss?limit=100 # 100 posts/feed
|
||||||
|
https://infosec.exchange/tags/selfhosting.rss?limit=100 # 100 posts/feed
|
||||||
|
|
||||||
|
# docker (up to 100 posts per feed)
|
||||||
|
https://social.tchncs.de/tags/docker.rss?limit=100 # 100 posts/feed
|
||||||
|
https://fosstodon.org/tags/docker.rss?limit=100 # 100 posts/feed
|
Loading…
Add table
Add a link
Reference in a new issue