#!/usr/bin/python3 -su

## Copyright (C) 2017 - 2025 ENCRYPTED SUPPORT LLC <adrelanos@whonix.org>
## See the file COPYING for copying conditions.

## AI-Assisted

## Per-URL connectivity probe for the time sources configured in
## /etc/sdwdate.d/*.conf. Uses the standard sdwdate stack
## (config.read_pools + remote_times.get_time_from_servers) so the same
## logic that drives production runs through every URL exactly once.
##
## Usage:
##   onion-tester              probe every URL in the conf (default).
##   onion-tester URL [URL...] subset mode: probe ONLY the given URLs (a subset of
##                             the conf) -- e.g. to re-test just the URLs that failed
##                             a prior run, without re-probing the whole set.
##
## Output per URL:
##   pool N url URL: ONLINE  unixtime=...  diff=...s
##   pool N url URL: OFFLINE status=...    (Curl --head also Not OK)
##   FAILED_URL URL            stable, machine-readable marker for each failed URL
##                             (parse these to drive a targeted retry).
##
## Exit codes:
##   0 = every probed URL responded
##   1 = at least one URL failed
##
## URLs are read from /etc/sdwdate.d/*.conf which is root-owned and
## trusted. subprocess.run() with a list of arguments (no shell) is
## used in exec_curl() to pass URLs as a single argument to curl,
## avoiding shell interpretation.

import sys
sys.dont_write_bytecode = True

import os
import subprocess
import time

from sdwdate.config import read_pools
from sdwdate.proxy_settings import proxy_settings
from sdwdate.remote_times import get_time_from_servers


def _env_int(name, default):
    """A positive-int env override, else the default (bad/empty value ignored)."""
    try:
        val = int(os.environ.get(name, "") or default)
    except ValueError:
        return default
    return val if val >= 1 else default


## How many URLs to probe concurrently per chunk. get_time_from_servers probes a
## chunk in parallel (ThreadPoolExecutor), so a chunk containing an unreachable
## onion costs one ~120s primary timeout regardless of the other URLs in it. A
## larger chunk lets those timeouts OVERLAP instead of serializing across chunks
## -- the difference between a ~13-min and a ~2-min full sweep. Default 3 matches
## production sdwdate's per-pool sampling; the CI wrapper raises it (ONION_TESTER_CHUNK).
CHUNK_SIZE = _env_int("ONION_TESTER_CHUNK", 3)
## curl --head reachability timeout for the secondary (diagnostic) check on a URL
## already judged OFFLINE. Overridable so CI can shorten this per-failure overhead.
HEAD_MAX_TIME = str(_env_int("ONION_TESTER_HEAD_MAXTIME", 60))


def chunks(my_list, n):
    """Yield successive n-sized chunks from my_list."""
    for i in range(0, len(my_list), n):
        yield my_list[i:i + n]


def exec_curl(c_url, proxy_ip, proxy_port):
    try:
        return_code = subprocess.run(
            [
                'curl',
                '--socks5-hostname', '{}:{}'.format(proxy_ip, proxy_port),
                '--max-time', HEAD_MAX_TIME,
                '--head', c_url,
            ],
            stdout=subprocess.DEVNULL,
            stderr=subprocess.DEVNULL,
        ).returncode
        if return_code == 0:
            return ' (Curl --head is OK)'
        return ' (Curl --head also Not OK)'
    except Exception:
        return ' (Curl --head also Not OK)'


class Pool:
    ## Attributes are .url / .comment (singular) to match what
    ## sdwdate.config.get_comment() expects.
    def __init__(self, pool):
        self.url, self.comment = read_pools(pool, 'test')


class CheckRemotes:
    def __init__(self, proxy_ip, proxy_port, only_urls=None):
        self.proxy_ip = proxy_ip
        self.proxy_port = proxy_port
        ## Optional allowlist: when non-empty, probe ONLY these URLs (a subset of
        ## the conf), so a caller can re-test just the URLs that failed a prior run
        ## instead of the whole set. Empty/None => probe every conf URL (default).
        self.only_urls = set(only_urls or ())
        self.number_of_pools = 3
        self.pools = [Pool(p) for p in range(self.number_of_pools)]
        self.online = 0
        self.offline = 0

    def probe_pool(self, pool_index, pool):
        urls = pool.url
        if self.only_urls:
            urls = [u for u in pool.url if u in self.only_urls]
        print('### Pool {} ({} URL(s))'.format(pool_index + 1, len(urls)))
        if not urls:
            print('  (empty)')
            return

        online = 0
        offline = 0
        tot_diff = 0

        for url_chunk in chunks(urls, CHUNK_SIZE):
            print('Testing the URL Chunk:')
            for u in url_chunk:
                print('  ' + u)

            urls_list, status_list, remote_unixtime_list, \
                took_time_list, half_took_time_list, \
                time_diff_raw_int_list, \
                time_diff_lag_cleaned_float_list = get_time_from_servers(
                    self.pools, url_chunk, self.proxy_ip, self.proxy_port)

            for i in range(len(urls_list)):
                url = urls_list[i]
                status = status_list[i]
                remote_unixtime = remote_unixtime_list[i]

                ## Match production sdwdate.sdwdate (line ~721): only
                ## status == "ok" is a usable remote (URL responded AND
                ## time sanity checks passed). "done" means the URL
                ## responded but a sanity check failed and production
                ## would put it in failed_urls. "timeout"/"error" mean
                ## the URL didn't respond.
                if status == 'ok':
                    if remote_unixtime:
                        diff = int(time.time()) - int(remote_unixtime)
                        tot_diff += abs(diff)
                        msg = ('pool %d url %s: ONLINE  unixtime=%s  '
                               'diff=%ds' % (pool_index + 1, url,
                                             remote_unixtime, diff))
                    else:
                        msg = ('pool %d url %s: ONLINE  (no unixtime)' %
                               (pool_index + 1, url))
                    online += 1
                elif status == 'done':
                    msg = ('pool %d url %s: REACHABLE-but-INVALID  '
                           'status=done  (time sanity check did not '
                           'pass; production sdwdate would reject)' %
                           (pool_index + 1, url))
                    offline += 1
                else:
                    msg = ('pool %d url %s: OFFLINE  status=%s' %
                           (pool_index + 1, url, status))
                    msg += exec_curl(url, self.proxy_ip, self.proxy_port)
                    offline += 1
                print(msg)
                ## Stable, machine-readable marker for every failed URL (both the
                ## OFFLINE and REACHABLE-but-INVALID cases count as a failure and set
                ## the non-zero exit). A caller can collect these to retry only the
                ## URLs that failed. One URL per line; onion/clearnet URLs have no
                ## spaces, so a space separator parses unambiguously.
                if status != 'ok':
                    print('FAILED_URL ' + url)
            print('')

        avg = (tot_diff / online) if online else 0
        print('##############################')
        print('Pool {} summary: {} ONLINE, {} OFFLINE.  '
              'Avg |time diff| over ONLINE: {:.2f}s'.format(
                  pool_index + 1, online, offline, avg))
        print('##############################')
        print('')
        self.online += online
        self.offline += offline

    def loop(self):
        print('Current Time: {}'.format(int(time.time())))
        print('Using SOCKS proxy {}:{}'.format(
            self.proxy_ip, self.proxy_port))
        print('Starting remotes check...')
        print('')
        for pool_index, pool in enumerate(self.pools):
            self.probe_pool(pool_index, pool)

        print('==============================')
        print('TOTAL: {} ONLINE, {} OFFLINE'.format(
            self.online, self.offline))
        print('==============================')


def main():
    ## Optional positional args = a URL allowlist (subset mode): probe ONLY those
    ## URLs. No args => probe every URL in the conf (default / production behavior).
    only_urls = sys.argv[1:]
    proxy_ip, proxy_port = proxy_settings()
    remotes = CheckRemotes(proxy_ip, proxy_port, only_urls)
    remotes.loop()
    sys.exit(0 if remotes.offline == 0 else 1)


if __name__ == '__main__':
    main()
