%PDF- %PDF-
Direktori : /var/www/pn/utils/classes/ |
Current File : /var/www/pn/utils/classes/Parser.php |
<?php namespace WebPappers\Parser; abstract class Parser { protected $siteUrl; protected $dom; public function __construct($siteUrl, $dom) { $this->siteUrl = $siteUrl; $this->dom = $dom; } public function getPageHtml($url) { $ch = curl_init($url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); $output = curl_exec($ch); curl_close($ch); return $output; } public function getPageDom($url) { $html = $this->getPageHtml($url); $this->dom->loadHtml($html); return $this->dom; } public function getKeywords() { // $keywords = array('Water and Sanitation', 'digital connectivity', 'Rehabilitation', 'provincial roads', 'quality'); try{ $keysRaw = file_get_contents(PARSER_PATH.'/keywords.txt'); $keysRaw = trim($keysRaw); $keys = explode(',', $keysRaw); $clearedKeys = array(); foreach($keys as $value){ if(empty($value)){ continue; } $clearedKeys[] = trim($value); } return $clearedKeys; }catch (\Exception $e) { echo 'Can`t get keywords: ', $e->getMessage(), "\n"; echo 'From file: ', PARSER_PATH.'/keywords.txt', "\n"; } } public function hasKeywords($html) { $keywords = $this->getKeywords(); $findedKeywords = array(); foreach($keywords as $key){ if(stripos($html, $key) !== false){ $findedKeywords[] = $key; } } return $findedKeywords; } abstract public function parse(); }