%PDF- %PDF-
Direktori : /var/www/pn/utils/classes/ |
Current File : /var/www/pn/utils/classes/IadbParser.php |
<?php namespace WebPappers\IadbParser; use WebPappers\Parser\Parser; use WebPappers\IadbProject\IadbProject; use DiDom\Document; use Exception; class IadbParser extends Parser { private $maxPages; public function getMaxPages() { return $this->maxPages; } public function setMaxPages($maxPages) { $this->maxPages = $maxPages; } public function buildPageUrl($pageId) { // $url = $this->siteUrl . '/en/projects-search?query%5Bcountry%5D=&query%5Bsector%5D=&query%5Bstatus%5D=&query%5Bquery%5D=&page='.$pageId; // $pageId = 40; $url = $this->siteUrl . '/en/projects-search?country=§or=&status=&query=&page='.$pageId; return $url; } public function getProjectLinks($url) { $links = array(); $page = $this->getPageDom($url); $projectLinks = $page->find('.responsive-enabled a'); foreach($projectLinks as $link){ $links[] = trim($this->siteUrl . $link->href); } return $links; } public function getContentHtml($projectDom) { $content = $projectDom->find('#block-iadb-content'); if(empty($content[0])){ throw new Exception('Can`t find content block #block-iadb-content in project details url'); } return $content[0]->html(); } public function parse() { for($i=1153; $i < $this->maxPages; $i++){ $parseUrl = $this->buildPageUrl($i); $links = $this->getProjectLinks($parseUrl); var_dump($parseUrl); foreach($links as $link){ $projectDom = $this->getPageDom($link); $project = new IadbProject($projectDom, $link); $contentHtml = $this->getContentHtml($projectDom); $hasKeywords = $this->hasKeywords($contentHtml); if(!empty($hasKeywords)){ $additionalData = array(); $additionalData['keywords'] = implode(', ',$hasKeywords); $project->parseDataFromRemote($additionalData); $project->save(); } } } } }