%PDF- %PDF-
Direktori : /var/www/pn/utils/classes/ |
Current File : /var/www/pn/utils/classes/WorldbankParser.php |
<?php namespace WebPappers\WorldbankParser; use WebPappers\Parser\Parser; use WebPappers\WorldbankProject\WorldbankProject; use DiDom\Document; use Exception; class WorldbankParser extends Parser { private $maxPages; public function getMaxPages() { return $this->maxPages; } public function setMaxPages($maxPages) { $this->maxPages = $maxPages; } public function buildPageUrl($pageId) { $offset = $pageId * 20; $url = 'https://search.worldbank.org/api/v2/projects?format=json&rows=20&fct=projectfinancialtype_exact,status_exact,regionname_exact,theme_exact,sector_exact,countryshortname_exact,cons_serv_reqd_ind_exact,esrc_ovrl_risk_rate_exact&fl=id,project_name,countryshortname,totalamt,status,boardapprovaldate,url,totalcommamt,proj_last_upd_date&srt=boardapprovaldate&apilang=en&os='.$offset; return $url; } public function getProjects($url) { $page = $this->getPageHtml($url); if(!$page){ return; } $data = json_decode($page); if(empty($data->projects)){ return false; } return $data->projects; } public function getProjectUrl($projectData) { $url = 'https://search.worldbank.org/api/v2/projects?format=json&fl=*&id='.$projectData->id.'&apilang=en'; return $url; } public function getDataUrlByProjectUrl($url) { global $wpdb; $sql = "SELECT * from " . PROJECT_TABLE_NAME . " WHERE url='" . $url . "'"; $project = $wpdb->get_row( $sql, OBJECT ); return $project->data_url; } public function parse() { for($i=0; $i < $this->maxPages; $i++){ $parseUrl = $this->buildPageUrl($i); $projects = $this->getProjects($parseUrl); foreach($projects as $projectData){ $projectUrl = $this->getProjectUrl($projectData); $projectDetails = $this->getPageHtml($projectUrl); if(!$projectDetails){ continue; } $projectDetails = json_decode($projectDetails); $projectId = $projectData->id; $projectDetails = $projectDetails->projects->$projectId; $contentForSearch = ''; if(!empty($projectDetails->project_abstract->cdata)){ $contentForSearch .= $projectDetails->project_abstract->cdata; } if(!empty($projectDetails->project_name)){ $contentForSearch .= $projectDetails->project_name; } if(!empty($projectDetails->pdo)){ $contentForSearch .= $projectDetails->pdo; } $hasKeywords = $this->hasKeywords($contentForSearch); var_dump($projectData->url); if(empty($hasKeywords)){ continue; } $additionalData = array(); $additionalData['keywords'] = implode(', ',$hasKeywords); $project = new WorldbankProject($projectDetails, $projectData->url); $project->parseDataFromRemote($additionalData); $project->save(); $project->saveDataUrl($projectUrl); } } } }