<?php
/**
 * External Integrations Class
 *
 * Handles external API calls for reputation checks:
 * - Google Safe Browsing API v5
 * - PhishTank API
 *
 * Features: opt-in, cached responses, rate limiting, error handling.
 *
 * @package ContentGuardPro
 * @since   1.0.0
 */

// If this file is called directly, abort.
if ( ! defined( 'ABSPATH' ) ) {
	exit;
}

/**
 * Class CGP_Integrations
 *
 * Per PRD Section 3.10: Integration Capabilities
 * - Google Safe Browsing v5
 * - PhishTank
 * - Opt-in, cached, rate-limited
 *
 * @since 1.0.0
 */
class CGP_Integrations {

	/**
	 * Cache duration for reputation checks (24 hours).
	 *
	 * @since 1.0.0
	 * @var int
	 */
	const CACHE_DURATION = 86400;

	/**
	 * Rate limit: Max API calls per hour.
	 *
	 * @since 1.0.0
	 * @var int
	 */
	const RATE_LIMIT_PER_HOUR = 100;

	/**
	 * Google Safe Browsing API endpoint.
	 *
	 * @since 1.0.0
	 * @var string
	 */
	const GOOGLE_SB_ENDPOINT = 'https://safebrowsing.googleapis.com/v5/threatMatches:find';

	/**
	 * PhishTank API endpoint.
	 *
	 * @since 1.0.0
	 * @var string
	 */
	const PHISHTANK_ENDPOINT = 'https://checkurl.phishtank.com/checkurl/';

	/**
	 * Public Key for Rule Pack Verification (PEM format).
	 *
	 * @since 1.0.0
	 * @var string
	 */
	const PUBLIC_KEY = "-----BEGIN PUBLIC KEY-----\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA...\n-----END PUBLIC KEY-----";

	/**
	 * Check URL against Google Safe Browsing API v5.
	 *
	 * Returns reputation information or null if clean/unavailable.
	 *
	 * @since 1.0.0
	 * @param string $url The URL to check.
	 * @return array|null Array with 'threat' and 'details' keys, or null if clean.
	 */
	public static function check_google_safe_browsing( $url ) {
		// Check if enabled in settings.
		$settings = get_option( 'content_guard_pro_settings', array() );
		if ( empty( $settings['google_safe_browsing_enabled'] ) ) {
			return null;
		}

		// Check cache first (before rate limiting).
		$cache_key = 'content_guard_pro_gsb_' . md5( $url );
		$cached    = get_transient( $cache_key );
		if ( false !== $cached ) {
			self::log_debug( "Google Safe Browsing cache hit for: {$url}" );
			return ( 'clean' === $cached ) ? null : $cached;
		}

		// Check rate limit before making API call.
		if ( ! self::check_rate_limit( 'google_sb' ) ) {
			self::log_debug( 'Google Safe Browsing rate limit exceeded' );
			return null;
		}

		// Increment rate limit counter BEFORE API call to prevent race condition.
		self::increment_rate_limit( 'google_sb' );

		// Get API key from settings.
		$api_key = isset( $settings['google_safe_browsing_api_key'] ) ? $settings['google_safe_browsing_api_key'] : '';
		if ( empty( $api_key ) ) {
			self::log_debug( 'Google Safe Browsing API key not configured' );
			return null;
		}

		// Prepare API request.
		$endpoint = self::GOOGLE_SB_ENDPOINT . '?key=' . $api_key;
		$body     = array(
			'client'       => array(
				'clientId'      => get_site_url(),
				'clientVersion' => CONTENT_GUARD_PRO_VERSION,
			),
			'threatInfo'   => array(
				'threatTypes'      => array(
					'MALWARE',
					'SOCIAL_ENGINEERING',
					'UNWANTED_SOFTWARE',
					'POTENTIALLY_HARMFUL_APPLICATION',
				),
				'platformTypes'    => array( 'ANY_PLATFORM' ),
				'threatEntryTypes' => array( 'URL' ),
				'threatEntries'    => array(
					array( 'url' => $url ),
				),
			),
		);

		// Make API request.
		$response = wp_remote_post(
			$endpoint,
			array(
				'headers' => array(
					'Content-Type' => 'application/json',
				),
				'body'    => wp_json_encode( $body ),
				'timeout' => 10,
			)
		);

		// Handle errors.
		if ( is_wp_error( $response ) ) {
			self::log_error( 'Google Safe Browsing API error: ' . $response->get_error_message() );
			// Cache negative result for shorter duration (1 hour) to retry later.
			set_transient( $cache_key, 'clean', 3600 );
			return null;
		}

		$response_code = wp_remote_retrieve_response_code( $response );
		if ( 200 !== $response_code ) {
			self::log_error( "Google Safe Browsing API returned code: {$response_code}" );
			set_transient( $cache_key, 'clean', 3600 );
			return null;
		}

		// Parse response.
		$response_body = wp_remote_retrieve_body( $response );
		$data          = json_decode( $response_body, true );

		// Check if JSON decoding failed.
		if ( null === $data && json_last_error() !== JSON_ERROR_NONE ) {
			self::log_error( 'Google Safe Browsing JSON decode error: ' . json_last_error_msg() );
			set_transient( $cache_key, 'clean', 3600 );
			return null;
		}

		// Check for matches.
		if ( ! empty( $data['matches'] ) ) {
			$match = $data['matches'][0];
			$result = array(
				'source'      => 'google_safe_browsing',
				'threat'      => true,
				'threat_type' => isset( $match['threatType'] ) ? $match['threatType'] : 'UNKNOWN',
				'platform'    => isset( $match['platformType'] ) ? $match['platformType'] : 'ANY_PLATFORM',
				'details'     => $match,
			);

			// Cache the threat result (24 hours).
			set_transient( $cache_key, $result, self::CACHE_DURATION );

			self::log_debug( "Google Safe Browsing threat detected: {$url} - {$result['threat_type']}" );

			return $result;
		}

		// No threats found - cache as clean.
		set_transient( $cache_key, 'clean', self::CACHE_DURATION );
		self::log_debug( "Google Safe Browsing: URL clean: {$url}" );

		return null;
	}

	/**
	 * Check URL against PhishTank API.
	 *
	 * Returns reputation information or null if clean/unavailable.
	 *
	 * @since 1.0.0
	 * @param string $url The URL to check.
	 * @return array|null Array with 'threat' and 'details' keys, or null if clean.
	 */
	public static function check_phishtank( $url ) {
		// Check if enabled in settings.
		$settings = get_option( 'content_guard_pro_settings', array() );
		if ( empty( $settings['phishtank_enabled'] ) ) {
			return null;
		}

		// Check cache first (before rate limiting).
		$cache_key = 'content_guard_pro_pt_' . md5( $url );
		$cached    = get_transient( $cache_key );
		if ( false !== $cached ) {
			self::log_debug( "PhishTank cache hit for: {$url}" );
			return ( 'clean' === $cached ) ? null : $cached;
		}

		// Check rate limit before making API call.
		if ( ! self::check_rate_limit( 'phishtank' ) ) {
			self::log_debug( 'PhishTank rate limit exceeded' );
			return null;
		}

		// Increment rate limit counter BEFORE API call to prevent race condition.
		self::increment_rate_limit( 'phishtank' );

		// Get API key from settings (optional for PhishTank).
		$api_key = isset( $settings['phishtank_api_key'] ) ? $settings['phishtank_api_key'] : '';

		// Prepare API request.
		$request_body = array(
			'url'    => $url,
			'format' => 'json',
		);

		if ( ! empty( $api_key ) ) {
			$request_body['app_key'] = $api_key;
		}

		// Make API request.
		$response = wp_remote_post(
			self::PHISHTANK_ENDPOINT,
			array(
				'headers' => array(
					'User-Agent' => 'Content-Guard-Pro/' . CONTENT_GUARD_PRO_VERSION,
				),
				'body'    => $request_body,
				'timeout' => 10,
			)
		);

		// Handle errors.
		if ( is_wp_error( $response ) ) {
			self::log_error( 'PhishTank API error: ' . $response->get_error_message() );
			// Cache negative result for shorter duration (1 hour).
			set_transient( $cache_key, 'clean', 3600 );
			return null;
		}

		$response_code = wp_remote_retrieve_response_code( $response );
		if ( 200 !== $response_code ) {
			self::log_error( "PhishTank API returned code: {$response_code}" );
			set_transient( $cache_key, 'clean', 3600 );
			return null;
		}

		// Parse response.
		$response_body = wp_remote_retrieve_body( $response );
		$data          = json_decode( $response_body, true );

		// Check if JSON decoding failed.
		if ( null === $data && json_last_error() !== JSON_ERROR_NONE ) {
			self::log_error( 'PhishTank JSON decode error: ' . json_last_error_msg() );
			set_transient( $cache_key, 'clean', 3600 );
			return null;
		}

		// Check for valid phish.
		if ( ! empty( $data['results'] ) && ! empty( $data['results']['in_database'] ) && ! empty( $data['results']['valid'] ) ) {
			$result = array(
				'source'     => 'phishtank',
				'threat'     => true,
				'verified'   => ! empty( $data['results']['verified'] ),
				'phish_id'   => isset( $data['results']['phish_id'] ) ? $data['results']['phish_id'] : '',
				'details'    => $data['results'],
			);

			// Cache the threat result (24 hours).
			set_transient( $cache_key, $result, self::CACHE_DURATION );

			self::log_debug( "PhishTank threat detected: {$url} - Phish ID: {$result['phish_id']}" );

			return $result;
		}

		// No threats found - cache as clean.
		set_transient( $cache_key, 'clean', self::CACHE_DURATION );
		self::log_debug( "PhishTank: URL clean: {$url}" );

		return null;
	}

	/**
	 * Check URL against all enabled reputation services.
	 *
	 * Returns combined reputation information.
	 *
	 * @since 1.0.0
	 * @param string $url The URL to check.
	 * @return array Array with 'threat' boolean and 'sources' array.
	 */
	public static function check_url_reputation( $url ) {
		$result = array(
			'threat'  => false,
			'sources' => array(),
			'score'   => 0,
		);

		// Check Google Safe Browsing.
		$gsb_result = self::check_google_safe_browsing( $url );
		if ( $gsb_result ) {
			$result['threat']    = true;
			$result['sources'][] = $gsb_result;
			$result['score']    += 50; // Per PRD Appendix F: +50 for reputation hit.
		}

		// Check PhishTank.
		$pt_result = self::check_phishtank( $url );
		if ( $pt_result ) {
			$result['threat']    = true;
			$result['sources'][] = $pt_result;
			$result['score']    += 50; // Per PRD Appendix F: +50 for reputation hit.
		}

		// Cap score at 100.
		$result['score'] = min( 100, $result['score'] );

		return $result;
	}

	/**
	 * Check rate limit for API service.
	 *
	 * Returns true if within rate limit, false if exceeded.
	 *
	 * @since 1.0.0
	 * @param string $service Service name ('google_sb', 'phishtank').
	 * @return bool True if within limit, false if exceeded.
	 */
	private static function check_rate_limit( $service ) {
		$transient_key = "content_guard_pro_rate_limit_{$service}";
		$count         = get_transient( $transient_key );

		if ( false === $count ) {
			return true; // No limit set yet.
		}

		return absint( $count ) < self::RATE_LIMIT_PER_HOUR;
	}

	/**
	 * Increment rate limit counter for API service.
	 *
	 * @since 1.0.0
	 * @param string $service Service name ('google_sb', 'phishtank').
	 */
	private static function increment_rate_limit( $service ) {
		$transient_key = "content_guard_pro_rate_limit_{$service}";
		$count         = get_transient( $transient_key );

		if ( false === $count ) {
			// First call this hour.
			set_transient( $transient_key, 1, 3600 );
		} else {
			// Increment counter.
			set_transient( $transient_key, absint( $count ) + 1, 3600 );
		}
	}

	/**
	 * Get rate limit statistics.
	 *
	 * Returns current usage for all services.
	 *
	 * @since 1.0.0
	 * @return array Rate limit stats.
	 */
	public static function get_rate_limit_stats() {
		return array(
			'google_sb' => array(
				'used'  => absint( get_transient( 'content_guard_pro_rate_limit_google_sb' ) ),
				'limit' => self::RATE_LIMIT_PER_HOUR,
			),
			'phishtank' => array(
				'used'  => absint( get_transient( 'content_guard_pro_rate_limit_phishtank' ) ),
				'limit' => self::RATE_LIMIT_PER_HOUR,
			),
		);
	}

	/**
	 * Clear all reputation caches.
	 *
	 * Useful for testing or forcing fresh checks.
	 *
	 * @since 1.0.0
	 * @return int Number of cache entries cleared.
	 */
	public static function clear_reputation_cache() {
		global $wpdb;

		// Delete all transients starting with content_guard_pro_gsb_ or content_guard_pro_pt_.
		$count = $wpdb->query(
			"DELETE FROM `{$wpdb->options}`
			WHERE option_name LIKE '_transient_content_guard_pro_gsb_%'
			   OR option_name LIKE '_transient_content_guard_pro_pt_%'
			   OR option_name LIKE '_transient_timeout_content_guard_pro_gsb_%'
			   OR option_name LIKE '_transient_timeout_content_guard_pro_pt_%'"
		);

		self::log_debug( "Cleared {$count} reputation cache entries" );

		return absint( $count );
	}

	/**
	 * Test API connectivity and credentials.
	 *
	 * Returns status for each service.
	 *
	 * @since 1.0.0
	 * @return array Status array for each service.
	 */
	public static function test_api_connectivity() {
		$results = array();
		
		// Get settings once to avoid multiple database queries.
		$settings = get_option( 'content_guard_pro_settings', array() );

		// Test Google Safe Browsing with a known malicious URL.
		$test_url   = 'http://malware.testing.google.test/testing/malware/';
		$gsb_result = self::check_google_safe_browsing( $test_url );

		$results['google_safe_browsing'] = array(
			'enabled'    => ! empty( $settings['google_safe_browsing_enabled'] ),
			'configured' => ! empty( $settings['google_safe_browsing_api_key'] ),
			'working'    => null !== $gsb_result,
			'message'    => null !== $gsb_result ? __( 'API responding correctly', 'content-guard-pro' ) : __( 'No response or not configured', 'content-guard-pro' ),
		);

		// Test PhishTank (use example.com as it's safe).
		$pt_result = self::check_phishtank( 'https://www.example.com/' );

		$results['phishtank'] = array(
			'enabled'    => ! empty( $settings['phishtank_enabled'] ),
			'configured' => true, // PhishTank doesn't require API key.
			'working'    => null === $pt_result, // example.com should be clean.
			'message'    => null === $pt_result ? __( 'API responding correctly', 'content-guard-pro' ) : __( 'Unexpected response', 'content-guard-pro' ),
		);

		return $results;
	}

	/**
	 * Extract unique URLs from content.
	 *
	 * Helper method to find all URLs in content for reputation checking.
	 *
	 * @since 1.0.0
	 * @param string $content Content to scan.
	 * @return array Array of unique URLs.
	 */
	public static function extract_urls( $content ) {
		$urls = array();

		// Find all URLs (http/https).
		$result = preg_match_all(
			'#(?:href|src|data-src)=["\']?(https?://[^"\'\s>]+)["\']?#i',
			$content,
			$matches
		);

		// Check if preg_match_all failed.
		if ( false === $result ) {
			self::log_error( 'preg_match_all failed in extract_urls()' );
			return $urls;
		}

		if ( ! empty( $matches[1] ) ) {
			// Deduplicate and filter.
			$urls = array_unique( $matches[1] );
			$urls = array_values( $urls );
		}

		return $urls;
	}

	/**
	 * Log debug message.
	 *
	 * @since 1.0.0
	 * @param string $message Debug message.
	 */
	private static function log_debug( $message ) {
		if ( defined( 'WP_DEBUG' ) && WP_DEBUG ) {
			cgp_log( '[Content Guard Pro - Integrations] ' . $message );
		}
	}

	/**
	 * Log error message.
	 *
	 * @since 1.0.0
	 * @param string $message Error message.
	 */
	private static function log_error( $message ) {
		cgp_log( '[Content Guard Pro - Integrations ERROR] ' . $message );
	}

	/**
	 * Verify rule pack signature.
	 *
	 * Verifies that the downloaded rule pack was signed by the vendor.
	 * Uses OpenSSL to verify the SHA256 signature against the hardcoded public key.
	 *
	 * @since 1.0.0
	 * @param string $json_data JSON content of the rule pack.
	 * @param string $signature Base64 encoded signature.
	 * @return bool True if signature is valid, false otherwise.
	 */
	public static function verify_rule_pack_signature( $json_data, $signature ) {
		if ( empty( $json_data ) || empty( $signature ) ) {
			self::log_error( 'Signature verification failed: Missing data or signature.' );
			return false;
		}

		if ( ! function_exists( 'openssl_verify' ) ) {
			self::log_error( 'Signature verification failed: OpenSSL extension not available.' );
			return false;
		}

		// Decode the signature.
		$binary_signature = base64_decode( $signature );
		if ( ! $binary_signature ) {
			self::log_error( 'Signature verification failed: Invalid base64 signature.' );
			return false;
		}

		// Verify using SHA256.
		$result = openssl_verify(
			$json_data,
			$binary_signature,
			self::PUBLIC_KEY,
			OPENSSL_ALGO_SHA256
		);

		if ( 1 === $result ) {
			return true;
		} elseif ( 0 === $result ) {
			self::log_error( 'Signature verification failed: Invalid signature.' );
		} else {
			self::log_error( 'Signature verification error: ' . openssl_error_string() );
		}

		return false;
	}

	/**
	 * Report a finding as false positive to the vendor.
	 *
	 * Sends anonymized data about the finding to help improve detection rules.
	 *
	 * @since 1.0.0
	 * @param array $finding_data Finding data (rule_id, matched_text, etc.).
	 * @return bool True on success, false on failure.
	 */
	public static function report_false_positive( $finding_data ) {
		// TODO: Replace with actual vendor API endpoint.
		$endpoint = 'https://api.contentguardpro.com/v1/report-false-positive';
		
		// Anonymize data.
		$report = array(
			'rule_id'      => isset( $finding_data['rule_id'] ) ? $finding_data['rule_id'] : 'unknown',
			'matched_text' => isset( $finding_data['matched_excerpt'] ) ? $finding_data['matched_excerpt'] : '',
			'fingerprint'  => isset( $finding_data['fingerprint'] ) ? $finding_data['fingerprint'] : '',
			'version'      => CONTENT_GUARD_PRO_VERSION,
			'timestamp'    => time(),
		);

		// Send report.
		$response = wp_remote_post(
			$endpoint,
			array(
				'headers' => array(
					'Content-Type' => 'application/json',
				),
				'body'    => wp_json_encode( $report ),
				'timeout' => 5,
				'blocking' => false, // Don't wait for response.
			)
		);

		if ( is_wp_error( $response ) ) {
			self::log_error( 'Failed to report false positive: ' . $response->get_error_message() );
			return false;
		}

		self::log_debug( 'False positive reported for rule: ' . $report['rule_id'] );
		return true;
	}
}

