<?php
/**
 * Detector Class - Content Analysis and Pattern Matching
 *
 * Responsible for analyzing content and detecting security threats using
 * regex patterns and heuristics from PRD Appendix B.
 *
 * @package ContentGuardPro
 * @since   1.0.0
 */

// If this file is called directly, abort.
if ( ! defined( 'ABSPATH' ) ) {
	exit;
}

/**
 * Class CGP_Detector
 *
 * Single Responsibility: Content analysis and threat detection.
 * Applies all detection patterns from PRD Appendix B:
 * - URL shorteners
 * - Hidden elements with external links
 * - External scripts/iframes
 * - Obfuscation techniques
 * - SEO spam keywords
 * - Anomalous link profiles
 * - Inline event handlers (onclick, onerror, etc.)
 * - document.write() calls
 * - javascript: URIs
 * - Object/Embed/Applet tags
 * - Meta refresh redirects
 * - PHP function patterns in serialized data
 * - Extended CSS cloaking
 * - SVG with embedded scripts
 * - Cryptocurrency miners
 *
 * @since 1.0.0
 */
class CGP_Detector {

	/**
	 * Scan content for security threats.
	 *
	 * Main public method that applies all detection patterns to the provided content.
	 * Recursively scans Gutenberg blocks if present.
	 *
	 * @since 1.0.0
	 * @param string $content     Content to scan (HTML, blocks, etc.).
	 * @param int    $object_id   Object ID (post ID, meta ID, etc.).
	 * @param string $object_type Object type (post, postmeta, option).
	 * @param string $field       Field name (e.g., post_content, meta_value).
	 * @return array Array of findings.
	 */
	public function scan_content( $content, $object_id = 0, $object_type = 'post', $field = 'post_content' ) {
		$findings = array();

		if ( empty( $content ) ) {
			return $findings;
		}

		// Decode HTML entities before scanning to catch encoded attacks.
		$decoded_content = $this->decode_html_entities( $content );

		// Parse Gutenberg blocks if this is block-based content.
		$blocks = $this->parse_blocks( $content );

		// Recursively scan blocks and their content.
		foreach ( $blocks as $block ) {
			$block_findings = $this->scan_block_recursive( $block, $object_id, $object_type, $field );
			$findings       = array_merge( $findings, $block_findings );
		}

		// Also scan the raw content (catches non-block content and hidden patterns).
		$raw_findings = $this->apply_detection_patterns( $content, $object_id, $object_type, $field );
		$findings     = array_merge( $findings, $raw_findings );

		// Scan decoded content for entity-encoded attacks.
		if ( $decoded_content !== $content ) {
			$decoded_findings = $this->apply_detection_patterns( $decoded_content, $object_id, $object_type, $field );
			$findings         = array_merge( $findings, $decoded_findings );
		}

		// Deduplicate findings by fingerprint.
		$findings = $this->deduplicate_findings( $findings );

		return $findings;
	}

	/**
	 * Recursively scan a Gutenberg block and its children.
	 *
	 * @since 1.0.0
	 * @param array  $block       Block array from parse_blocks().
	 * @param int    $object_id   Object ID.
	 * @param string $object_type Object type.
	 * @param string $field       Field name.
	 * @return array Array of findings.
	 */
	private function scan_block_recursive( $block, $object_id, $object_type, $field ) {
		$findings = array();

		// Scan block innerHTML.
		if ( ! empty( $block['innerHTML'] ) ) {
			$html_findings = $this->apply_detection_patterns( $block['innerHTML'], $object_id, $object_type, $field );
			$findings      = array_merge( $findings, $html_findings );
		}

		// Scan block attributes (may contain URLs, code, etc.).
		if ( ! empty( $block['attrs'] ) && is_array( $block['attrs'] ) ) {
			foreach ( $block['attrs'] as $attr_value ) {
				if ( is_string( $attr_value ) ) {
					$attr_findings = $this->apply_detection_patterns( $attr_value, $object_id, $object_type, $field );
					$findings      = array_merge( $findings, $attr_findings );
				}
			}
		}

		// Recursively scan inner blocks.
		if ( ! empty( $block['innerBlocks'] ) && is_array( $block['innerBlocks'] ) ) {
			foreach ( $block['innerBlocks'] as $inner_block ) {
				$inner_findings = $this->scan_block_recursive( $inner_block, $object_id, $object_type, $field );
				$findings       = array_merge( $findings, $inner_findings );
			}
		}

		return $findings;
	}

	/**
	 * Apply all detection patterns to content.
	 *
	 * Implements patterns from PRD Appendix B plus enhanced detection.
	 *
	 * @since 1.0.0
	 * @param string $content     Content to scan.
	 * @param int    $object_id   Object ID.
	 * @param string $object_type Object type.
	 * @param string $field       Field name.
	 * @return array Array of findings.
	 */
	private function apply_detection_patterns( $content, $object_id, $object_type, $field ) {
		$findings = array();

		// Pattern 1: URL Shorteners.
		$shortener_findings = $this->detect_url_shorteners( $content, $object_id, $object_type, $field );
		$findings           = array_merge( $findings, $shortener_findings );

		// Pattern 2: Hidden elements with external links (extended CSS cloaking).
		$hidden_findings = $this->detect_hidden_external_content( $content, $object_id, $object_type, $field );
		$findings        = array_merge( $findings, $hidden_findings );

		// Pattern 3: External scripts/iframes.
		$external_resource_findings = $this->detect_external_resources( $content, $object_id, $object_type, $field );
		$findings                   = array_merge( $findings, $external_resource_findings );

		// Pattern 4: Obfuscation techniques.
		$obfuscation_findings = $this->detect_obfuscation( $content, $object_id, $object_type, $field );
		$findings             = array_merge( $findings, $obfuscation_findings );

		// Pattern 5: SEO spam keywords (expanded lexicon).
		$spam_findings = $this->detect_seo_spam( $content, $object_id, $object_type, $field );
		$findings      = array_merge( $findings, $spam_findings );

		// Pattern 6: Anomalous link profile.
		$link_profile_findings = $this->detect_anomalous_links( $content, $object_id, $object_type, $field );
		$findings              = array_merge( $findings, $link_profile_findings );

		// Pattern 7: Inline event handlers (onclick, onerror, onload, etc.).
		$event_handler_findings = $this->detect_inline_event_handlers( $content, $object_id, $object_type, $field );
		$findings               = array_merge( $findings, $event_handler_findings );

		// Pattern 8: document.write() calls.
		$document_write_findings = $this->detect_document_write( $content, $object_id, $object_type, $field );
		$findings                = array_merge( $findings, $document_write_findings );

		// Pattern 9: javascript: URIs.
		$js_uri_findings = $this->detect_javascript_uris( $content, $object_id, $object_type, $field );
		$findings        = array_merge( $findings, $js_uri_findings );

		// Pattern 10: Object/Embed/Applet tags.
		$embed_findings = $this->detect_embed_objects( $content, $object_id, $object_type, $field );
		$findings       = array_merge( $findings, $embed_findings );

		// Pattern 11: Meta refresh redirects.
		$meta_refresh_findings = $this->detect_meta_refresh( $content, $object_id, $object_type, $field );
		$findings              = array_merge( $findings, $meta_refresh_findings );

		// Pattern 12: PHP function patterns in content/serialized data.
		$php_pattern_findings = $this->detect_php_patterns( $content, $object_id, $object_type, $field );
		$findings             = array_merge( $findings, $php_pattern_findings );

		// Pattern 13: SVG with embedded scripts.
		$svg_findings = $this->detect_svg_scripts( $content, $object_id, $object_type, $field );
		$findings     = array_merge( $findings, $svg_findings );

		// Pattern 14: Cryptocurrency miners.
		$crypto_miner_findings = $this->detect_crypto_miners( $content, $object_id, $object_type, $field );
		$findings              = array_merge( $findings, $crypto_miner_findings );

		// Pattern 15: Suspicious window.location redirects.
		$redirect_findings = $this->detect_js_redirects( $content, $object_id, $object_type, $field );
		$findings          = array_merge( $findings, $redirect_findings );

		return $findings;
	}

	/**
	 * Detect URL shorteners (PRD Appendix B).
	 *
	 * Pattern: bit.ly, t.co, tinyurl.com, etc.
	 *
	 * @since 1.0.0
	 * @param string $content     Content to scan.
	 * @param int    $object_id   Object ID.
	 * @param string $object_type Object type.
	 * @param string $field       Field name.
	 * @return array Findings.
	 */
	private function detect_url_shorteners( $content, $object_id, $object_type, $field ) {
		$findings = array();

		// Regex from PRD Appendix B.
		$pattern = '/(?:^|[\s"\'(])(?:bit\.ly|t\.co|tinyurl\.com|goo\.gl|ow\.ly|buff\.ly|is\.gd|cutt\.ly|lnkd\.in|vk\.cc|grabify\.|iplogger\.)/i';

		$result = preg_match_all( $pattern, $content, $matches, PREG_OFFSET_CAPTURE );
		
		// Check for preg_match_all error (returns false on error).
		if ( false === $result ) {
			return $findings; // Return empty findings on regex error.
		}

		if ( $result > 0 ) {
			foreach ( $matches[0] as $match ) {
				$findings[] = array(
					'rule_id'      => 'url_shortener',
					'category'     => 'suspicious_urls',
					'severity'     => 'suspicious',
					'confidence'   => 60,
					'matched_text' => trim( $match[0] ),
					'position'     => $match[1],
					'object_id'    => $object_id,
					'object_type'  => $object_type,
					'field'        => $field,
				);
			}
		}

		return $findings;
	}

	/**
	 * Detect hidden elements with external links (PRD Appendix B + Extended CSS Cloaking).
	 *
	 * Enhanced detection for:
	 * - display:none, visibility:hidden
	 * - opacity:0, font-size:0
	 * - position:absolute with negative offsets
	 * - text-indent:-9999px
	 * - clip:rect(0,0,0,0)
	 * - z-index:-1
	 * - max-height:0 with overflow:hidden
	 *
	 * @since 1.0.0
	 * @param string $content     Content to scan.
	 * @param int    $object_id   Object ID.
	 * @param string $object_type Object type.
	 * @param string $field       Field name.
	 * @return array Findings.
	 */
	private function detect_hidden_external_content( $content, $object_id, $object_type, $field ) {
		$findings = array();

		// Extended CSS cloaking patterns.
		$hiding_patterns = array(
			'display\s*:\s*none',
			'visibility\s*:\s*hidden',
			'opacity\s*:\s*0(?:[;\s"\']|$)',
			'font-size\s*:\s*0(?:px|em|rem|%)?(?:[;\s"\']|$)',
			'left\s*:\s*-\d{4,}px',
			'top\s*:\s*-\d{4,}px',
			'text-indent\s*:\s*-\d{4,}(?:px|em)',
			'clip\s*:\s*rect\s*\(\s*0',
			'z-index\s*:\s*-\d+',
			'max-height\s*:\s*0.*overflow\s*:\s*hidden',
			'height\s*:\s*0.*overflow\s*:\s*hidden',
			'width\s*=\s*["\']?1\b',
			'height\s*=\s*["\']?1\b',
			'color\s*:\s*transparent',
			'position\s*:\s*(?:absolute|fixed).*(?:left|top)\s*:\s*-\d{3,}',
		);

		// Build combined pattern for hidden elements with external links.
		$hiding_regex = implode( '|', $hiding_patterns );
		$pattern = '/<(?:div|span|a|p|section|article)[^>]*(?:' . $hiding_regex . ')[^>]*>.*?<a[^>]+href=["\'](https?:\/\/[^"\']+)/is';

		$result = preg_match_all( $pattern, $content, $matches, PREG_SET_ORDER );
		
		if ( false === $result ) {
			return $findings;
		}

		if ( $result > 0 ) {
			$allowlist_pattern = $this->get_allowlist_pattern();

			foreach ( $matches as $match ) {
				$url    = isset( $match[1] ) ? $match[1] : '';
				$domain = $this->extract_domain( $url );

				// Check if URL is on allowlist.
				if ( preg_match( '/' . $allowlist_pattern . '/i', $domain ) ) {
					continue; // Skip allowlisted domains.
				}

				$confidence = 75;
				$severity   = 'suspicious';
				$extra_data = array();

				$reputation_result = $this->apply_reputation_checks( $url, $domain, $confidence, $severity, $extra_data );
				$confidence = $reputation_result['confidence'];
				$severity   = $reputation_result['severity'];

				$findings[] = array(
					'rule_id'      => 'hidden_external_link',
					'category'     => 'hidden_content',
					'severity'     => $severity,
					'confidence'   => $confidence,
					'matched_text' => wp_strip_all_tags( substr( $match[0], 0, 200 ) ),
					'domain'       => $domain,
					'url'          => $url,
					'object_id'    => $object_id,
					'object_type'  => $object_type,
					'field'        => $field,
					'extra'        => $extra_data,
				);
			}
		}

		return $findings;
	}

	/**
	 * Detect external scripts and iframes (PRD Appendix B).
	 *
	 * Pattern: <script> or <iframe> with external src not on allowlist.
	 *
	 * @since 1.0.0
	 * @param string $content     Content to scan.
	 * @param int    $object_id   Object ID.
	 * @param string $object_type Object type.
	 * @param string $field       Field name.
	 * @return array Findings.
	 */
	private function detect_external_resources( $content, $object_id, $object_type, $field ) {
		$findings = array();

		$allowlist_pattern = $this->get_allowlist_pattern();

		// Regex from PRD Appendix B.
		$pattern = '/<(script|iframe)\b[^>]+(?:src|data-src)=["\'](https?:\/\/(?!' . $allowlist_pattern . ')[^"\']+)/i';

		$result = preg_match_all( $pattern, $content, $matches, PREG_SET_ORDER );
		
		if ( false === $result ) {
			return $findings;
		}

		if ( $result > 0 ) {
			foreach ( $matches as $match ) {
				$tag    = isset( $match[1] ) ? $match[1] : '';
				$url    = isset( $match[2] ) ? $match[2] : '';
				$domain = $this->extract_domain( $url );

				$confidence = ( 'script' === strtolower( $tag ) ) ? 90 : 85;
				$severity   = 'critical';
				$extra_data = array( 'tag' => $tag );

				$reputation_result = $this->apply_reputation_checks( $url, $domain, $confidence, $severity, $extra_data );
				$confidence = $reputation_result['confidence'];
				$severity   = $reputation_result['severity'];

				$findings[] = array(
					'rule_id'      => 'ext_' . strtolower( $tag ) . '_non_allowlist',
					'category'     => 'external_resources',
					'severity'     => $severity,
					'confidence'   => $confidence,
					'matched_text' => substr( $match[0], 0, 200 ),
					'tag'          => $tag,
					'domain'       => $domain,
					'url'          => $url,
					'object_id'    => $object_id,
					'object_type'  => $object_type,
					'field'        => $field,
					'extra'        => $extra_data,
				);
			}
		}

		return $findings;
	}

	/**
	 * Detect obfuscation techniques (PRD Appendix B).
	 *
	 * Pattern: fromCharCode, atob, base64 data URLs, eval.
	 *
	 * @since 1.0.0
	 * @param string $content     Content to scan.
	 * @param int    $object_id   Object ID.
	 * @param string $object_type Object type.
	 * @param string $field       Field name.
	 * @return array Findings.
	 */
	private function detect_obfuscation( $content, $object_id, $object_type, $field ) {
		$findings = array();

		// Pattern 1: fromCharCode, atob, eval with atob.
		$pattern1 = '/fromCharCode\s*\(|atob\s*\(|btoa\s*\(|(?:eval|Function)\s*\(\s*atob\(/i';

		$result1 = preg_match_all( $pattern1, $content, $matches, PREG_OFFSET_CAPTURE );
		
		if ( false !== $result1 && $result1 > 0 ) {
			foreach ( $matches[0] as $match ) {
				$findings[] = array(
					'rule_id'      => 'obfuscation_js',
					'category'     => 'obfuscation',
					'severity'     => 'suspicious',
					'confidence'   => 80,
					'matched_text' => trim( $match[0] ),
					'position'     => $match[1],
					'object_id'    => $object_id,
					'object_type'  => $object_type,
					'field'        => $field,
				);
			}
		}

		// Pattern 2: Large base64 data URLs.
		$pattern2 = '/data:\s*text\/html;\s*base64,\s*[A-Za-z0-9+\/]{80,}={0,2}/i';

		$result2 = preg_match_all( $pattern2, $content, $matches, PREG_OFFSET_CAPTURE );
		
		if ( false !== $result2 && $result2 > 0 ) {
			foreach ( $matches[0] as $match ) {
				$findings[] = array(
					'rule_id'      => 'obfuscation_base64_data_url',
					'category'     => 'obfuscation',
					'severity'     => 'suspicious',
					'confidence'   => 75,
					'matched_text' => substr( $match[0], 0, 100 ) . '...',
					'position'     => $match[1],
					'object_id'    => $object_id,
					'object_type'  => $object_type,
					'field'        => $field,
				);
			}
		}

		// Pattern 3: String concatenation obfuscation (e.g., 'scr'+'ipt').
		$pattern3 = '/["\'][a-z]{2,5}["\']\s*\+\s*["\'][a-z]{2,5}["\']/i';

		$result3 = preg_match_all( $pattern3, $content, $matches, PREG_OFFSET_CAPTURE );
		
		if ( false !== $result3 && $result3 > 0 ) {
			foreach ( $matches[0] as $match ) {
				// Check if it forms a suspicious word when concatenated.
				$combined = preg_replace( '/["\'\s+]/', '', $match[0] );
				$suspicious_words = array( 'script', 'iframe', 'eval', 'document', 'cookie', 'location' );
				
				foreach ( $suspicious_words as $word ) {
					if ( stripos( $combined, $word ) !== false ) {
						$findings[] = array(
							'rule_id'      => 'obfuscation_string_concat',
							'category'     => 'obfuscation',
							'severity'     => 'suspicious',
							'confidence'   => 65,
							'matched_text' => $match[0],
							'position'     => $match[1],
							'object_id'    => $object_id,
							'object_type'  => $object_type,
							'field'        => $field,
						);
						break;
					}
				}
			}
		}

		return $findings;
	}

	/**
	 * Detect SEO spam keywords (PRD Appendix B) - Expanded lexicon.
	 *
	 * Uses word boundaries (\b) to prevent false positives on legitimate words
	 * like "oporny" (Polish) matching "porn", or "sextant" matching "sex".
	 *
	 * Pattern: viagra, cialis, casino, betting, essay writing, etc.
	 *
	 * @since 1.0.0
	 * @param string $content     Content to scan.
	 * @param int    $object_id   Object ID.
	 * @param string $object_type Object type.
	 * @param string $field       Field name.
	 * @return array Findings.
	 */
	private function detect_seo_spam( $content, $object_id, $object_type, $field ) {
		$findings = array();

		// Expanded SEO spam lexicon grouped by category.
		// All patterns use word boundaries (\b) to prevent false positives.
		$spam_patterns = array(
			// Pharma (High confidence) - These drug names are specific enough.
			'pharma_high' => array(
				'pattern' => '/\b(viagra|cialis|levitra|tramadol|xanax|ambien|modafinil|phentermine|online\s+pharmacy|buy\s+(?:viagra|cialis|pills|medications?))\b/i',
				'severity' => 'suspicious',
				'confidence' => 70,
			),
			// Gambling/Casino - Use word boundaries; "slots" with \b won't match "timeslots".
			'gambling' => array(
				'pattern' => '/\b(casino|betting|poker|slot\s*machine|blackjack|roulette|gambling|online\s+casino|sports?\s+betting|bet365|betway|bwin)\b/i',
				'severity' => 'suspicious',
				'confidence' => 65,
			),
			// Financial scams - Phrase-based to reduce false positives.
			'financial' => array(
				'pattern' => '/\b(payday\s+loans?|forex\s+trading|binary\s+options?|make\s+money\s+(?:fast|online|now)|work\s+from\s+home\s+(?:scam|opportunity)|investment\s+opportunity|get\s+rich\s+quick)\b/i',
				'severity' => 'suspicious',
				'confidence' => 60,
			),
			// Adult content - Word boundaries prevent "oporny", "sporny" false positives.
			// For "sex", only match in explicit spam context (sex video, sex chat, etc.).
			// "porn" with \b is safe - legitimate words don't contain "porn" as a word.
			'adult' => array(
				'pattern' => '/\b(porn(?:o|ography|hub|star|site|tube)?|p[o0]rn|xxx+|adult\s+(?:dating|content|site|video|film)|escort\s+(?:service|girl|agency)|webcam\s+(?:girl|model|show)s?|hookups?|sex\s+(?:video|chat|dating|cam|movie|tape|site)|live\s+(?:sex|cam|nude))\b/i',
				'severity' => 'suspicious',
				'confidence' => 70,
			),
			// Academic fraud - Phrase-based for accuracy.
			'academic' => array(
				'pattern' => '/\b(essay\s+writing\s+(?:service)?|term\s+paper\s+(?:help|service|writing)|dissertation\s+(?:help|service|writing)|buy\s+(?:essay|papers?|thesis)|homework\s+(?:help|service)|write\s+my\s+(?:essay|paper))\b/i',
				'severity' => 'review',
				'confidence' => 50,
			),
			// Counterfeit goods - Brand + context to reduce false positives.
			'counterfeit' => array(
				'pattern' => '/\b(replica\s+(?:watch|bag|rolex|handbag)|fake\s+(?:designer|louis\s+vuitton|gucci|rolex)|knockoff|cheap\s+(?:gucci|prada|chanel|louis\s+vuitton|designer))\b/i',
				'severity' => 'suspicious',
				'confidence' => 65,
			),
			// Crypto scams - Specific scam phrases.
			'crypto_scam' => array(
				'pattern' => '/\b(bitcoin\s+(?:giveaway|doubler|generator)|free\s+(?:crypto|bitcoin|ethereum|btc)|wallet\s+recovery\s+(?:scam|service)?|crypto\s+(?:investment|scam)|double\s+your\s+(?:bitcoin|crypto|btc))\b/i',
				'severity' => 'suspicious',
				'confidence' => 70,
			),
		);

		foreach ( $spam_patterns as $category => $config ) {
			$result = preg_match_all( $config['pattern'], $content, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE );
			
			if ( false === $result ) {
				continue;
			}

			if ( $result > 0 ) {
				foreach ( $matches as $match ) {
					$keyword = isset( $match[1][0] ) ? $match[1][0] : '';

					$findings[] = array(
						'rule_id'      => 'seo_spam_' . $category,
						'category'     => 'seo_spam',
						'severity'     => $config['severity'],
						'confidence'   => $config['confidence'],
						'matched_text' => $keyword,
						'position'     => isset( $match[0][1] ) ? $match[0][1] : 0,
						'object_id'    => $object_id,
						'object_type'  => $object_type,
						'field'        => $field,
						'extra'        => array( 'spam_category' => $category ),
					);
				}
			}
		}

		return $findings;
	}

	/**
	 * Detect anomalous link profiles (PRD Appendix B).
	 *
	 * Checks if content has an unusual number of external links compared to internal.
	 *
	 * @since 1.0.0
	 * @param string $content     Content to scan.
	 * @param int    $object_id   Object ID.
	 * @param string $object_type Object type.
	 * @param string $field       Field name.
	 * @return array Findings.
	 */
	private function detect_anomalous_links( $content, $object_id, $object_type, $field ) {
		$findings = array();

		$pattern = '/<a[^>]+href=["\'](https?:\/\/[^"\']+)/i';

		$result = preg_match_all( $pattern, $content, $matches );
		
		if ( false === $result || 0 === $result ) {
			return $findings;
		}

		$urls = isset( $matches[1] ) ? $matches[1] : array();

		if ( empty( $urls ) ) {
			return $findings;
		}

		$site_domain = $this->extract_domain( home_url() );

		$external_count = 0;
		$internal_count = 0;
		$external_domains = array();

		foreach ( $urls as $url ) {
			$domain = $this->extract_domain( $url );

			if ( $domain === $site_domain ) {
				$internal_count++;
			} else {
				$external_count++;
				$external_domains[] = $domain;
			}
		}

		// Flag if external_count >= 10 AND ratio >= 3 (from PRD Appendix B).
		if ( $external_count >= 10 ) {
			$ratio = $internal_count > 0 ? ( $external_count / $internal_count ) : $external_count;

			if ( $ratio >= 3 ) {
				$unique_domains = array_unique( $external_domains );

				$findings[] = array(
					'rule_id'      => 'anomalous_link_profile',
					'category'     => 'link_analysis',
					'severity'     => 'review',
					'confidence'   => 60,
					'matched_text' => sprintf( '%d external links, %d internal links (ratio: %.1f)', $external_count, $internal_count, $ratio ),
					'object_id'    => $object_id,
					'object_type'  => $object_type,
					'field'        => $field,
					'extra'        => array(
						'external_count'   => $external_count,
						'internal_count'   => $internal_count,
						'ratio'            => $ratio,
						'external_domains' => $unique_domains,
					),
				);
			}
		}

		return $findings;
	}

	/**
	 * Detect inline event handlers (onclick, onerror, onload, etc.).
	 *
	 * These are common attack vectors for XSS and malicious code injection.
	 *
	 * @since 1.0.0
	 * @param string $content     Content to scan.
	 * @param int    $object_id   Object ID.
	 * @param string $object_type Object type.
	 * @param string $field       Field name.
	 * @return array Findings.
	 */
	private function detect_inline_event_handlers( $content, $object_id, $object_type, $field ) {
		$findings = array();

		// Comprehensive list of event handlers.
		$event_handlers = array(
			'onclick', 'ondblclick', 'onmousedown', 'onmouseup', 'onmouseover', 'onmouseout', 'onmousemove',
			'onkeydown', 'onkeypress', 'onkeyup',
			'onload', 'onerror', 'onabort', 'onunload', 'onbeforeunload',
			'onfocus', 'onblur', 'onchange', 'onsubmit', 'onreset', 'onselect',
			'ondragstart', 'ondrag', 'ondragend', 'ondragenter', 'ondragleave', 'ondragover', 'ondrop',
			'onscroll', 'onresize', 'oninput', 'oninvalid',
			'onanimationstart', 'onanimationend', 'onanimationiteration',
			'ontransitionend', 'ontouchstart', 'ontouchmove', 'ontouchend',
			'oncontextmenu', 'oncopy', 'oncut', 'onpaste',
			'onafterprint', 'onbeforeprint', 'onhashchange', 'onmessage', 'onoffline', 'ononline',
			'onpopstate', 'onstorage', 'onpagehide', 'onpageshow',
		);

		$handlers_pattern = implode( '|', $event_handlers );
		
		// Pattern to match event handlers with values.
		$pattern = '/\b(' . $handlers_pattern . ')\s*=\s*["\']([^"\']{1,500})/i';

		$result = preg_match_all( $pattern, $content, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE );
		
		if ( false === $result || $result === 0 ) {
			return $findings;
		}

		foreach ( $matches as $match ) {
			$handler = isset( $match[1][0] ) ? $match[1][0] : '';
			$value = isset( $match[2][0] ) ? $match[2][0] : '';
			$position = isset( $match[0][1] ) ? $match[0][1] : 0;

			// Determine severity based on handler value.
			$confidence = 70;
			$severity = 'suspicious';

			// Higher confidence for dangerous patterns.
			$dangerous_patterns = array( 'eval', 'document', 'window', 'location', 'cookie', 'fetch', 'XMLHttpRequest', 'innerHTML', 'outerHTML' );
			foreach ( $dangerous_patterns as $dangerous ) {
				if ( stripos( $value, $dangerous ) !== false ) {
					$confidence = 85;
					$severity = 'critical';
					break;
				}
			}

			// onerror and onload are commonly abused.
			if ( in_array( strtolower( $handler ), array( 'onerror', 'onload' ), true ) ) {
				$confidence = min( 95, $confidence + 10 );
			}

			$findings[] = array(
				'rule_id'      => 'inline_event_handler',
				'category'     => 'javascript_injection',
				'severity'     => $severity,
				'confidence'   => $confidence,
				'matched_text' => $handler . '="' . substr( $value, 0, 100 ) . ( strlen( $value ) > 100 ? '...' : '' ) . '"',
				'position'     => $position,
				'object_id'    => $object_id,
				'object_type'  => $object_type,
				'field'        => $field,
				'extra'        => array(
					'handler' => $handler,
					'value_length' => strlen( $value ),
				),
			);
		}

		return $findings;
	}

	/**
	 * Detect document.write() calls.
	 *
	 * document.write() is often used to inject malicious content dynamically.
	 *
	 * @since 1.0.0
	 * @param string $content     Content to scan.
	 * @param int    $object_id   Object ID.
	 * @param string $object_type Object type.
	 * @param string $field       Field name.
	 * @return array Findings.
	 */
	private function detect_document_write( $content, $object_id, $object_type, $field ) {
		$findings = array();

		// Pattern for document.write() and document.writeln().
		$pattern = '/document\s*\.\s*write(?:ln)?\s*\(/i';

		$result = preg_match_all( $pattern, $content, $matches, PREG_OFFSET_CAPTURE );
		
		if ( false === $result || $result === 0 ) {
			return $findings;
		}

		foreach ( $matches[0] as $match ) {
			$findings[] = array(
				'rule_id'      => 'document_write',
				'category'     => 'javascript_injection',
				'severity'     => 'suspicious',
				'confidence'   => 75,
				'matched_text' => $match[0],
				'position'     => $match[1],
				'object_id'    => $object_id,
				'object_type'  => $object_type,
				'field'        => $field,
			);
		}

		return $findings;
	}

	/**
	 * Detect javascript: URIs.
	 *
	 * javascript: URIs can execute arbitrary JavaScript when clicked.
	 *
	 * @since 1.0.0
	 * @param string $content     Content to scan.
	 * @param int    $object_id   Object ID.
	 * @param string $object_type Object type.
	 * @param string $field       Field name.
	 * @return array Findings.
	 */
	private function detect_javascript_uris( $content, $object_id, $object_type, $field ) {
		$findings = array();

		// Pattern for javascript: URIs in href, src, action, etc.
		$pattern = '/(?:href|src|action|formaction|data)\s*=\s*["\']?\s*javascript\s*:/i';

		$result = preg_match_all( $pattern, $content, $matches, PREG_OFFSET_CAPTURE );
		
		if ( false === $result || $result === 0 ) {
			return $findings;
		}

		foreach ( $matches[0] as $match ) {
			$findings[] = array(
				'rule_id'      => 'javascript_uri',
				'category'     => 'javascript_injection',
				'severity'     => 'critical',
				'confidence'   => 90,
				'matched_text' => $match[0],
				'position'     => $match[1],
				'object_id'    => $object_id,
				'object_type'  => $object_type,
				'field'        => $field,
			);
		}

		return $findings;
	}

	/**
	 * Detect <object>, <embed>, and <applet> tags.
	 *
	 * These tags can embed external content similar to iframes.
	 *
	 * @since 1.0.0
	 * @param string $content     Content to scan.
	 * @param int    $object_id   Object ID.
	 * @param string $object_type Object type.
	 * @param string $field       Field name.
	 * @return array Findings.
	 */
	private function detect_embed_objects( $content, $object_id, $object_type, $field ) {
		$findings = array();

		$allowlist_pattern = $this->get_allowlist_pattern();

		// Pattern for object, embed, applet with external sources.
		$pattern = '/<(object|embed|applet)\b[^>]+(?:data|src|code|codebase)\s*=\s*["\']?(https?:\/\/(?!' . $allowlist_pattern . ')[^"\'\s>]+)/i';

		$result = preg_match_all( $pattern, $content, $matches, PREG_SET_ORDER );
		
		if ( false === $result || $result === 0 ) {
			return $findings;
		}

		foreach ( $matches as $match ) {
			$tag = isset( $match[1] ) ? $match[1] : '';
			$url = isset( $match[2] ) ? $match[2] : '';
			$domain = $this->extract_domain( $url );

			$confidence = 85;
			$severity = 'critical';
			$extra_data = array( 'tag' => $tag );

			$reputation_result = $this->apply_reputation_checks( $url, $domain, $confidence, $severity, $extra_data );
			$confidence = $reputation_result['confidence'];
			$severity = $reputation_result['severity'];

			$findings[] = array(
				'rule_id'      => 'ext_' . strtolower( $tag ) . '_non_allowlist',
				'category'     => 'external_resources',
				'severity'     => $severity,
				'confidence'   => $confidence,
				'matched_text' => substr( $match[0], 0, 200 ),
				'tag'          => $tag,
				'domain'       => $domain,
				'url'          => $url,
				'object_id'    => $object_id,
				'object_type'  => $object_type,
				'field'        => $field,
				'extra'        => $extra_data,
			);
		}

		return $findings;
	}

	/**
	 * Detect <meta http-equiv="refresh"> redirects.
	 *
	 * Meta refresh can be used for redirect spam.
	 *
	 * @since 1.0.0
	 * @param string $content     Content to scan.
	 * @param int    $object_id   Object ID.
	 * @param string $object_type Object type.
	 * @param string $field       Field name.
	 * @return array Findings.
	 */
	private function detect_meta_refresh( $content, $object_id, $object_type, $field ) {
		$findings = array();

		// Pattern for meta refresh redirects.
		$pattern = '/<meta[^>]+http-equiv\s*=\s*["\']?refresh["\']?[^>]+content\s*=\s*["\']?\d+\s*;\s*url\s*=\s*([^"\'\s>]+)/i';

		$result = preg_match_all( $pattern, $content, $matches, PREG_SET_ORDER );
		
		if ( false === $result || $result === 0 ) {
			return $findings;
		}

		foreach ( $matches as $match ) {
			$url = isset( $match[1] ) ? $match[1] : '';
			$domain = $this->extract_domain( $url );

			$confidence = 80;
			$severity = 'critical';
			$extra_data = array();

			$reputation_result = $this->apply_reputation_checks( $url, $domain, $confidence, $severity, $extra_data );
			$confidence = $reputation_result['confidence'];
			$severity = $reputation_result['severity'];

			$findings[] = array(
				'rule_id'      => 'meta_refresh_redirect',
				'category'     => 'redirects',
				'severity'     => $severity,
				'confidence'   => $confidence,
				'matched_text' => substr( $match[0], 0, 200 ),
				'domain'       => $domain,
				'url'          => $url,
				'object_id'    => $object_id,
				'object_type'  => $object_type,
				'field'        => $field,
				'extra'        => $extra_data,
			);
		}

		return $findings;
	}

	/**
	 * Detect PHP function patterns in content/serialized data.
	 *
	 * These patterns may indicate code injection attempts.
	 * Uses context-aware detection to avoid false positives on common English
	 * words like "system" (e.g., "braking system (ABS)" should not match).
	 *
	 * @since 1.0.0
	 * @param string $content     Content to scan.
	 * @param int    $object_id   Object ID.
	 * @param string $object_type Object type.
	 * @param string $field       Field name.
	 * @return array Findings.
	 */
	private function detect_php_patterns( $content, $object_id, $object_type, $field ) {
		$findings = array();

		// Dangerous PHP functions that are unlikely to appear in normal text.
		// These can be detected with simple pattern matching.
		$unambiguous_functions = array(
			// Code execution - these are clearly code-related.
			'eval', 'assert', 'create_function', 'call_user_func', 'call_user_func_array',
			// System execution - specific enough to not appear in normal text.
			'exec', 'shell_exec', 'passthru', 'popen', 'proc_open', 'pcntl_exec',
			// File operations - unlikely in normal prose.
			'file_get_contents', 'file_put_contents', 'fwrite', 'fputs',
			// Encoding/obfuscation - clearly code-related.
			'base64_decode', 'gzinflate', 'gzuncompress', 'str_rot13', 'convert_uudecode',
		);

		// Ambiguous functions that are common English words.
		// "system" appears in "braking system", "operating system", etc.
		// These require additional context to detect (must look like actual PHP code).
		$ambiguous_functions = array( 'system' );

		// Pattern 1: Unambiguous functions - simple detection.
		$unambiguous_pattern = implode( '|', array_map( 'preg_quote', $unambiguous_functions ) );
		$pattern1 = '/\b(' . $unambiguous_pattern . ')\s*\(/i';

		// Pattern 2: Ambiguous functions - require PHP-like argument syntax.
		// Must be followed by: $variable, 'string', "string", or another function.
		// This prevents matching "braking system (ABS)" or "operating system (Windows)".
		$ambiguous_pattern = implode( '|', array_map( 'preg_quote', $ambiguous_functions ) );
		$pattern2 = '/\b(' . $ambiguous_pattern . ')\s*\(\s*[\$\'\"]/i';

		// Check unambiguous functions (simple pattern).
		$result1 = preg_match_all( $pattern1, $content, $matches1, PREG_SET_ORDER | PREG_OFFSET_CAPTURE );
		
		if ( false !== $result1 && $result1 > 0 ) {
			foreach ( $matches1 as $match ) {
				$function = isset( $match[1][0] ) ? $match[1][0] : '';
				$position = isset( $match[0][1] ) ? $match[0][1] : 0;
				$matched_text = $match[0][0];

				// For post titles, require PHP-like context to avoid false positives.
				// Titles often contain descriptive text like "Eval function (description)" which is not code.
				// Only match if followed by PHP-like syntax: $variable, 'string', "string", or another function.
				if ( 'post_title' === $field ) {
					// Check what comes after the opening parenthesis.
					$after_paren = substr( $content, $position + strlen( $matched_text ), 50 );
					// Must start with $, ', ", or another function call (PHP-like context).
					if ( ! preg_match( '/^\s*[\$\'"\w]/', $after_paren ) ) {
						// Not PHP-like context - likely false positive (e.g., "Eval (description)").
						continue;
					}
				}

				// Higher severity for code execution functions.
				$code_exec = array( 'eval', 'assert', 'exec', 'shell_exec', 'passthru', 'popen', 'proc_open' );
				$is_code_exec = in_array( strtolower( $function ), $code_exec, true );

				$findings[] = array(
					'rule_id'      => 'php_dangerous_function',
					'category'     => 'code_injection',
					'severity'     => $is_code_exec ? 'critical' : 'suspicious',
					'confidence'   => $is_code_exec ? 90 : 75,
					'matched_text' => $matched_text,
					'position'     => $position,
					'object_id'    => $object_id,
					'object_type'  => $object_type,
					'field'        => $field,
					'extra'        => array(
						'function' => $function,
						'is_code_exec' => $is_code_exec,
					),
				);
			}
		}

		// Check ambiguous functions (require PHP-like context).
		// Only matches "system" when followed by $, ', or " (actual PHP code).
		$result2 = preg_match_all( $pattern2, $content, $matches2, PREG_SET_ORDER | PREG_OFFSET_CAPTURE );
		
		if ( false !== $result2 && $result2 > 0 ) {
			foreach ( $matches2 as $match ) {
				$function = isset( $match[1][0] ) ? $match[1][0] : '';
				$position = isset( $match[0][1] ) ? $match[0][1] : 0;

				$findings[] = array(
					'rule_id'      => 'php_dangerous_function',
					'category'     => 'code_injection',
					'severity'     => 'critical',
					'confidence'   => 85, // Slightly lower since "system" is common.
					'matched_text' => $match[0][0],
					'position'     => $position,
					'object_id'    => $object_id,
					'object_type'  => $object_type,
					'field'        => $field,
					'extra'        => array(
						'function' => $function,
						'is_code_exec' => true,
						'context_aware' => true,
					),
				);
			}
		}

		// Also check for serialized PHP object patterns.
		$serialized_pattern = '/O:\d+:"[^"]+"/';
		$serialized_result = preg_match_all( $serialized_pattern, $content, $serialized_matches, PREG_OFFSET_CAPTURE );
		
		if ( false !== $serialized_result && $serialized_result > 0 ) {
			foreach ( $serialized_matches[0] as $match ) {
				$findings[] = array(
					'rule_id'      => 'php_serialized_object',
					'category'     => 'code_injection',
					'severity'     => 'review',
					'confidence'   => 50,
					'matched_text' => substr( $match[0], 0, 100 ),
					'position'     => $match[1],
					'object_id'    => $object_id,
					'object_type'  => $object_type,
					'field'        => $field,
				);
			}
		}

		return $findings;
	}

	/**
	 * Detect SVG with embedded scripts.
	 *
	 * SVG files can contain JavaScript that executes in the browser.
	 *
	 * @since 1.0.0
	 * @param string $content     Content to scan.
	 * @param int    $object_id   Object ID.
	 * @param string $object_type Object type.
	 * @param string $field       Field name.
	 * @return array Findings.
	 */
	private function detect_svg_scripts( $content, $object_id, $object_type, $field ) {
		$findings = array();

		// Pattern 1: SVG with onload or other event handlers.
		$pattern1 = '/<svg[^>]*\bon(?:load|error|click|mouseover)[^>]*>/is';

		$result1 = preg_match_all( $pattern1, $content, $matches, PREG_OFFSET_CAPTURE );
		
		if ( false !== $result1 && $result1 > 0 ) {
			foreach ( $matches[0] as $match ) {
				$findings[] = array(
					'rule_id'      => 'svg_event_handler',
					'category'     => 'javascript_injection',
					'severity'     => 'critical',
					'confidence'   => 90,
					'matched_text' => substr( $match[0], 0, 200 ),
					'position'     => $match[1],
					'object_id'    => $object_id,
					'object_type'  => $object_type,
					'field'        => $field,
				);
			}
		}

		// Pattern 2: SVG containing script tags.
		$pattern2 = '/<svg[^>]*>.*?<script[^>]*>.*?<\/script>.*?<\/svg>/is';

		$result2 = preg_match_all( $pattern2, $content, $matches, PREG_OFFSET_CAPTURE );
		
		if ( false !== $result2 && $result2 > 0 ) {
			foreach ( $matches[0] as $match ) {
				$findings[] = array(
					'rule_id'      => 'svg_embedded_script',
					'category'     => 'javascript_injection',
					'severity'     => 'critical',
					'confidence'   => 95,
					'matched_text' => substr( $match[0], 0, 200 ) . '...',
					'position'     => $match[1],
					'object_id'    => $object_id,
					'object_type'  => $object_type,
					'field'        => $field,
				);
			}
		}

		// Pattern 3: SVG with foreignObject containing HTML.
		$pattern3 = '/<svg[^>]*>.*?<foreignObject[^>]*>.*?<\/foreignObject>.*?<\/svg>/is';

		$result3 = preg_match_all( $pattern3, $content, $matches, PREG_OFFSET_CAPTURE );
		
		if ( false !== $result3 && $result3 > 0 ) {
			foreach ( $matches[0] as $match ) {
				$findings[] = array(
					'rule_id'      => 'svg_foreign_object',
					'category'     => 'javascript_injection',
					'severity'     => 'suspicious',
					'confidence'   => 70,
					'matched_text' => substr( $match[0], 0, 200 ) . '...',
					'position'     => $match[1],
					'object_id'    => $object_id,
					'object_type'  => $object_type,
					'field'        => $field,
				);
			}
		}

		return $findings;
	}

	/**
	 * Detect cryptocurrency miners.
	 *
	 * Cryptojacking scripts that mine cryptocurrency in visitors' browsers.
	 *
	 * @since 1.0.0
	 * @param string $content     Content to scan.
	 * @param int    $object_id   Object ID.
	 * @param string $object_type Object type.
	 * @param string $field       Field name.
	 * @return array Findings.
	 */
	private function detect_crypto_miners( $content, $object_id, $object_type, $field ) {
		$findings = array();

		// Known cryptocurrency miner domains and scripts.
		$miner_patterns = array(
			// Domain patterns.
			'coinhive\.com',
			'coin-hive\.com',
			'cryptoloot\.pro',
			'crypto-loot\.com',
			'jsecoin\.com',
			'authedmine\.com',
			'minero\.cc',
			'webmine\.pro',
			'ppoi\.org',
			'monerominer\.rocks',
			// Script/library patterns.
			'coinhive\.min\.js',
			'cryptonight\.wasm',
			'deepMiner\.js',
			'miner\.start\s*\(',
			'CoinHive\.Anonymous',
			'CoinHive\.User',
			'CryptoLoot\.Anonymous',
			// WebAssembly crypto patterns.
			'cryptonight',
			'stratum\+tcp://',
		);

		$pattern = '#(' . implode( '|', $miner_patterns ) . ')#i';

		$result = preg_match_all( $pattern, $content, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE );
		
		if ( false === $result || $result === 0 ) {
			return $findings;
		}

		foreach ( $matches as $match ) {
			$matched = isset( $match[0][0] ) ? $match[0][0] : '';
			$position = isset( $match[0][1] ) ? $match[0][1] : 0;

			$findings[] = array(
				'rule_id'      => 'crypto_miner',
				'category'     => 'cryptojacking',
				'severity'     => 'critical',
				'confidence'   => 95,
				'matched_text' => $matched,
				'position'     => $position,
				'object_id'    => $object_id,
				'object_type'  => $object_type,
				'field'        => $field,
			);
		}

		return $findings;
	}

	/**
	 * Detect JavaScript redirects (window.location, etc.).
	 *
	 * @since 1.0.0
	 * @param string $content     Content to scan.
	 * @param int    $object_id   Object ID.
	 * @param string $object_type Object type.
	 * @param string $field       Field name.
	 * @return array Findings.
	 */
	private function detect_js_redirects( $content, $object_id, $object_type, $field ) {
		$findings = array();

		// Patterns for JavaScript redirects.
		$redirect_patterns = array(
			'window\.location\s*=',
			'window\.location\.href\s*=',
			'window\.location\.replace\s*\(',
			'window\.location\.assign\s*\(',
			'document\.location\s*=',
			'document\.location\.href\s*=',
			'top\.location\s*=',
			'parent\.location\s*=',
			'self\.location\s*=',
		);

		$pattern = '/(' . implode( '|', $redirect_patterns ) . ')\s*["\']?(https?:\/\/[^"\'\s;]+)/i';

		$result = preg_match_all( $pattern, $content, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE );
		
		if ( false === $result || $result === 0 ) {
			return $findings;
		}

		$allowlist_pattern = $this->get_allowlist_pattern();

		foreach ( $matches as $match ) {
			$redirect_type = isset( $match[1][0] ) ? $match[1][0] : '';
			$url = isset( $match[2][0] ) ? $match[2][0] : '';
			$position = isset( $match[0][1] ) ? $match[0][1] : 0;
			$domain = $this->extract_domain( $url );

			// Check if URL is on allowlist.
			if ( preg_match( '/' . $allowlist_pattern . '/i', $domain ) ) {
				continue; // Skip allowlisted domains.
			}

			$confidence = 75;
			$severity = 'suspicious';
			$extra_data = array( 'redirect_type' => $redirect_type );

			$reputation_result = $this->apply_reputation_checks( $url, $domain, $confidence, $severity, $extra_data );
			$confidence = $reputation_result['confidence'];
			$severity = $reputation_result['severity'];

			$findings[] = array(
				'rule_id'      => 'js_redirect',
				'category'     => 'redirects',
				'severity'     => $severity,
				'confidence'   => $confidence,
				'matched_text' => substr( $match[0][0], 0, 200 ),
				'position'     => $position,
				'domain'       => $domain,
				'url'          => $url,
				'object_id'    => $object_id,
				'object_type'  => $object_type,
				'field'        => $field,
				'extra'        => $extra_data,
			);
		}

		return $findings;
	}

	/**
	 * Decode HTML entities to catch encoded attacks.
	 *
	 * @since 1.0.0
	 * @param string $content Content to decode.
	 * @return string Decoded content.
	 */
	private function decode_html_entities( $content ) {
		// Decode standard HTML entities.
		$decoded = html_entity_decode( $content, ENT_QUOTES | ENT_HTML5, 'UTF-8' );

		// Also decode numeric entities (&#60; &#x3C; etc.).
		$decoded = preg_replace_callback(
			'/&#x([0-9a-fA-F]+);/',
			function( $matches ) {
				return chr( hexdec( $matches[1] ) );
			},
			$decoded
		);

		$decoded = preg_replace_callback(
			'/&#(\d+);/',
			function( $matches ) {
				return chr( intval( $matches[1] ) );
			},
			$decoded
		);

		return $decoded;
	}

	/**
	 * Get allowlist pattern for domain matching.
	 *
	 * Reads from content_guard_pro_settings option and caches the compiled pattern.
	 * Patterns automatically match subdomains (www., embed., m., etc.).
	 *
	 * @since 1.0.0
	 * @return string Regex pattern for allowlisted domains.
	 */
	private function get_allowlist_pattern() {
		// Check cache first.
		$cached = get_transient( 'content_guard_pro_allowlist_pattern' );
		if ( false !== $cached ) {
			return $cached;
		}

		// Default common trusted domains (base domains only, subdomains handled automatically).
		$default_allowlist = array(
			'youtube\.com',
			'youtu\.be',
			'vimeo\.com',
			'twitter\.com',
			'x\.com',
			'facebook\.com',
			'instagram\.com',
			'google\.com',
			'googleapis\.com',
			'gstatic\.com',
			'cloudflare\.com',
			'unpkg\.com',
			'jsdelivr\.net',
			'wordpress\.org',
			'wordpress\.com',
			'gravatar\.com',
			'w\.org',
			'spotify\.com',
			'soundcloud\.com',
			'linkedin\.com',
			'pinterest\.com',
			'tiktok\.com',
			'reddit\.com',
			'github\.com',
			'githubusercontent\.com',
			'codepen\.io',
			'jsfiddle\.net',
		);

		// Get custom allowlist from settings.
		$settings        = get_option( 'content_guard_pro_settings', array() );
		$allowlist_text  = isset( $settings['allowlist_domains'] ) ? $settings['allowlist_domains'] : '';
		$custom_allowlist = array();

		// Parse allowlist (one domain per line).
		if ( ! empty( $allowlist_text ) ) {
			$lines = explode( "\n", $allowlist_text );
			foreach ( $lines as $line ) {
				$domain = trim( $line );
				if ( ! empty( $domain ) && ! preg_match( '/^#/', $domain ) ) {
					// Escape special regex characters, but preserve wildcards.
					$domain = preg_quote( $domain, '/' );
					$domain = str_replace( '\*', '.*', $domain );
					$custom_allowlist[] = $domain;
				}
			}
		}

		// Merge all domains.
		$all_domains = array_merge( $default_allowlist, $custom_allowlist );

		// Build pattern with subdomain support.
		// (?:[a-z0-9-]+\.)* matches any subdomain chain (www., m., embed., cdn.assets., etc.)
		$domain_patterns = array();
		foreach ( $all_domains as $domain ) {
			// Add optional subdomain prefix to each domain.
			$domain_patterns[] = '(?:[a-z0-9-]+\.)*' . $domain;
		}

		$pattern = '(?:' . implode( '|', $domain_patterns ) . ')';

		// Validate the pattern before caching/returning.
		$test_regex = '/' . $pattern . '/i';
		if ( @preg_match( $test_regex, '' ) === false ) {
			// Invalid regex pattern, fall back to simple default.
			$fallback_patterns = array();
			foreach ( $default_allowlist as $domain ) {
				$fallback_patterns[] = '(?:[a-z0-9-]+\.)*' . $domain;
			}
			$pattern = '(?:' . implode( '|', $fallback_patterns ) . ')';
		}

		// Cache for 1 hour.
		set_transient( 'content_guard_pro_allowlist_pattern', $pattern, 3600 );

		return $pattern;
	}

	/**
	 * Apply URL reputation checks and adjust confidence/severity.
	 *
	 * @since 1.0.0
	 * @param string $url         URL to check.
	 * @param string $domain      Domain extracted from URL.
	 * @param int    $confidence  Base confidence score.
	 * @param string $severity    Base severity level.
	 * @param array  $extra_data  Additional data array (passed by reference).
	 * @return array Modified confidence and severity: ['confidence' => int, 'severity' => string].
	 */
	private function apply_reputation_checks( $url, $domain, $confidence, $severity, &$extra_data ) {
		// Check denylist first (PRD Section 3.9).
		$on_denylist = $this->check_denylist( $url );
		$extra_data['domain']      = $domain;
		$extra_data['url']         = $url;
		$extra_data['on_denylist'] = $on_denylist;

		if ( $on_denylist ) {
			$confidence = 95;
			$severity   = 'critical';
		}

		// Check reputation (PRD Section 3.10).
		if ( class_exists( 'CGP_Integrations' ) ) {
			$reputation = CGP_Integrations::check_url_reputation( $url );

			if ( ! empty( $reputation['threat'] ) ) {
				$confidence = min( 100, $confidence + $reputation['score'] );
				$severity   = 'critical';
				$extra_data['reputation'] = $reputation;
			}
		}

		return array(
			'confidence' => $confidence,
			'severity'   => $severity,
		);
	}

	/**
	 * Check if URL matches denylist.
	 *
	 * @since 1.0.0
	 * @param string $url URL to check.
	 * @return bool True if URL is on denylist, false otherwise.
	 */
	private function check_denylist( $url ) {
		$patterns = $this->get_denylist_patterns();

		if ( empty( $patterns ) ) {
			return false;
		}

		$domain = $this->extract_domain( $url );

		foreach ( $patterns as $pattern ) {
			if ( preg_match( '/^\/(.+)\/([imsxADSUXJu]*)$/', $pattern, $matches ) ) {
				$regex = '/' . $matches[1] . '/' . ( isset( $matches[2] ) ? $matches[2] : '' );
				
				if ( @preg_match( $regex, '' ) === false ) {
					continue;
				}
				
				if ( preg_match( $regex, $url ) || preg_match( $regex, $domain ) ) {
					return true;
				}
			} else {
				if ( stripos( $domain, $pattern ) !== false ) {
					return true;
				}
			}
		}

		return false;
	}

	/**
	 * Get denylist patterns.
	 *
	 * @since 1.0.0
	 * @return array Array of denylist patterns.
	 */
	private function get_denylist_patterns() {
		$cached = get_transient( 'content_guard_pro_denylist_patterns' );
		if ( false !== $cached ) {
			return $cached;
		}

		$patterns        = array();
		$settings        = get_option( 'content_guard_pro_settings', array() );
		$denylist_text   = isset( $settings['denylist_patterns'] ) ? $settings['denylist_patterns'] : '';

		if ( ! empty( $denylist_text ) ) {
			$lines = explode( "\n", $denylist_text );
			foreach ( $lines as $line ) {
				$pattern = trim( $line );
				if ( ! empty( $pattern ) && ! preg_match( '/^#/', $pattern ) ) {
					$patterns[] = $pattern;
				}
			}
		}

		set_transient( 'content_guard_pro_denylist_patterns', $patterns, 3600 );

		return $patterns;
	}

	/**
	 * Extract domain from URL.
	 *
	 * @since 1.0.0
	 * @param string $url URL to parse.
	 * @return string Domain name.
	 */
	private function extract_domain( $url ) {
		$parsed = wp_parse_url( $url );
		return isset( $parsed['host'] ) ? $parsed['host'] : '';
	}

	/**
	 * Deduplicate findings by generating fingerprints.
	 *
	 * @since 1.0.0
	 * @param array $findings Array of findings.
	 * @return array Deduplicated findings.
	 */
	private function deduplicate_findings( $findings ) {
		$unique = array();
		$seen   = array();

		foreach ( $findings as $finding ) {
			$fingerprint = hash(
				'sha256',
				$finding['rule_id'] .
				$finding['object_id'] .
				$finding['object_type'] .
				( isset( $finding['matched_text'] ) ? $finding['matched_text'] : '' )
			);

			if ( ! isset( $seen[ $fingerprint ] ) ) {
				$finding['fingerprint'] = $fingerprint;
				$unique[] = $finding;
				$seen[ $fingerprint ] = true;
			}
		}

		return $unique;
	}

	/**
	 * Parse Gutenberg blocks from content.
	 *
	 * @since 1.0.0
	 * @param string $content Post content.
	 * @return array Parsed blocks.
	 */
	private function parse_blocks( $content ) {
		if ( function_exists( 'parse_blocks' ) ) {
			return parse_blocks( $content );
		}

		return array(
			array(
				'blockName'   => null,
				'innerHTML'   => $content,
				'innerBlocks' => array(),
				'attrs'       => array(),
			),
		);
	}

	/**
	 * Scan Elementor data structure.
	 *
	 * Parses Elementor's JSON data format and scans all content.
	 *
	 * @since 1.0.0
	 * @param array  $elements   Elementor elements array.
	 * @param int    $object_id  Post ID.
	 * @param string $field      Field name.
	 * @return array Array of findings.
	 */
	public function scan_elementor_data( $elements, $object_id, $field = '_elementor_data' ) {
		$findings = array();

		if ( ! is_array( $elements ) ) {
			return $findings;
		}

		foreach ( $elements as $element ) {
			// Scan element settings.
			if ( isset( $element['settings'] ) && is_array( $element['settings'] ) ) {
				foreach ( $element['settings'] as $setting_key => $setting_value ) {
					if ( is_string( $setting_value ) && ! empty( $setting_value ) ) {
						$element_findings = $this->apply_detection_patterns(
							$setting_value,
							$object_id,
							'postmeta',
							$field . '[' . $setting_key . ']'
						);
						$findings = array_merge( $findings, $element_findings );
					}
				}
			}

			// Recursively scan nested elements.
			if ( isset( $element['elements'] ) && is_array( $element['elements'] ) ) {
				$nested_findings = $this->scan_elementor_data( $element['elements'], $object_id, $field );
				$findings = array_merge( $findings, $nested_findings );
			}
		}

		return $this->deduplicate_findings( $findings );
	}
}
