%PDF- %PDF-
Direktori : /var/www/pn/utils/classes/ |
Current File : /var/www/pn/utils/classes/AdbParser.php |
<?php namespace WebPappers\AdbParser; use WebPappers\Parser\Parser; use WebPappers\AdbProject\AdbProject; use DiDom\Document; use Exception; class AdbParser extends Parser { private $maxPages; public function getMaxPages() { return $this->maxPages; } public function setMaxPages($maxPages) { $this->maxPages = $maxPages; } public function buildPageUrl($pageId) { if(0 == $pageId){ $url = $this->siteUrl . '/projects'; }else{ $url = $this->siteUrl . '/projects?page='.$pageId; } return $url; } public function getProjectLinks($url) { $links = array(); $page = $this->getPageDom($url); $projectLinks = $page->find('.item-title a'); foreach($projectLinks as $link){ $links[] = trim($this->siteUrl . $link->href); } return $links; } public function getContentHtml($projectDom) { $content = $projectDom->find('article.node-project'); if(empty($content[0])){ return false; // throw new Exception('Can`t find content block article.node-project in project details url'); } return $content[0]->html(); } public function parse() { for($i=0; $i < $this->maxPages; $i++){ $parseUrl = $this->buildPageUrl($i); var_dump($parseUrl); $links = $this->getProjectLinks($parseUrl); foreach($links as $link){ var_dump($link); $projectDom = $this->getPageDom($link); $project = new AdbProject($projectDom, $link); $contentHtml = $this->getContentHtml($projectDom); $hasKeywords = $this->hasKeywords($contentHtml); if(!empty($hasKeywords)){ $additionalData = array(); $additionalData['keywords'] = implode(', ',$hasKeywords); $project->parseDataFromRemote($additionalData); $project->save(); } } } } }