// If no English title found, try broader searches with fallbacks
if (empty($result['english_title'])) {
// Try looking for images with less strict criteria
$all_images = $xpath->query("//img[@title or @alt]");
foreach ($all_images as $img) {
$title = $img->getAttribute('title');
$alt = $img->getAttribute('alt');
// Accept any meaningful English text (10+ chars, mostly Latin)
if (!empty($title) && strlen($title) > 10 && preg_match('/[a-zA-Z\s]{10,}/', $title)) {
$title_lower = strtolower($title);
// Only skip the most obvious unwanted terms
$strict_unwanted = ['logo', 'banner', 'image', 'photo', 'icon', 'youtube'];
$is_unwanted = false;
foreach ($strict_unwanted as $term) {
if (strpos($title_lower, $term) !== false) {
$is_unwanted = true;
break;
}
// If no description found in styled div, try fallback approaches
if (empty($result['full_description'])) {
// Look for paragraphs with substantial Arabic content
$all_paragraphs = $xpath->query("//p");
foreach ($all_paragraphs as $p) {
$text = trim($p->nodeValue);
// Skip if too short or contains unwanted content
if (strlen($text) < 100 ||
preg_match('/(الهاتف|البريد|رابط|إضغط|تسجيل|أضف|تقويم|\+966|@|www\.|http)/ui', $text)) {
continue;
}
// Check if it's substantial Arabic content
if (preg_match('/[أ-ي]/u', $text)) {
$result['full_description'] = sanitize_textarea_field($text);
break;
}
}
}
// Final fallback: Look for any div with substantial Arabic text
if (empty($result['full_description'])) {
$content_divs = $xpath->query("//div[string-length(normalize-space(text())) > 100]");
foreach ($content_divs as $div) {
$text = trim($div->nodeValue);
// Skip if contains unwanted content
if (preg_match('/(الهاتف|البريد|رابط|إضغط|تسجيل|أضف|تقويم|\+966|@|www\.|http|تواصل|اتصل)/ui', $text)) {
continue;
}
// Check if it's substantial Arabic content
if (strlen($text) > 100 && preg_match('/[أ-ي]/u', $text)) {
// Additional cleaning
$lines = explode("\n", $text);
$clean_lines = [];
foreach ($lines as $line) {
$line = trim($line);
if (!empty($line) &&
!preg_match('/(الهاتف|البريد|رابط|إضغط|تسجيل|أضف|تقويم|\+966|@|www\.|http)/ui', $line)) {
$clean_lines[] = $line;
}
}
$clean_text = implode(' ', $clean_lines);
$clean_text = preg_replace('/\s+/', ' ', $clean_text);
$clean_text = trim($clean_text);
if (strlen($clean_text) > 100) {
$result['full_description'] = sanitize_textarea_field($clean_text);
break;
}
}
}
}
}
if (!$is_unwanted) {
$result['english_title'] = sanitize_text_field($title);
break;
}
} elseif (!empty($alt) && strlen($alt) > 10 && preg_match('/[a-zA-Z\s]{10,}/', $alt)) {
$alt_lower = strtolower($alt);
$strict_unwanted = ['logo', 'banner', 'image', 'photo', 'icon', 'youtube'];
$is_unwanted = false;
foreach ($strict_unwanted as $term) {
if (strpos($alt_lower, $term) !== false) {
$is_unwanted = true;
break;
}
}
if (!$is_unwanted) {
$result['english_title'] = sanitize_text_field($alt);
break;
}
}
}
}
// Final fallback: Look for English text in headers
if (empty($result['english_title'])) {
$text_elements = $xpath->query("//h1 | //h2 | //h3 | //strong | //b");
foreach ($text_elements as $element) {
$text = trim($element->nodeValue);
if (strlen($text) > 10 && strlen($text) < 100 && preg_match('/[a-zA-Z\s]{10,}/', $text)) {
// Check if it's mostly English (more than 60% Latin characters)
$latin_chars = preg_match_all('/[a-zA-Z]/', $text);
$total_chars = mb_strlen(preg_replace('/\s/', '', $text));
if ($total_chars > 0 && $latin_chars > ($total_chars * 0.6)) {
$result['english_title'] = sanitize_text_field($text);
break;
}
}
}
}
Warning: Cannot modify header information - headers already sent by (output started at /var/www/wp-content/plugins/event-extractor/event-extractor.php:1) in /var/www/wp-includes/pluggable.php on line 1450
Warning: Cannot modify header information - headers already sent by (output started at /var/www/wp-content/plugins/event-extractor/event-extractor.php:1) in /var/www/wp-includes/pluggable.php on line 1453