• 欢迎访问小杰博客网站
  • 欢迎访问小杰博客网站哦

【大数据】php代码抓取310win网站的体育比赛数据分析趋势(php代码部分)

未分类 小杰 9年前 (2015-01-31) 1210次浏览 已收录 0个评论

小杰平时会玩点体育彩票,所以想用大数据分析体育比赛趋势,于是有下面php代码。

主要 原理是用php抓取html页面的源码,然后用正则表达式抓取中间的数据,填入到数据库里面,

剩下的就是用mysql分析比赛数据了,废话不说,上代码:

下面是抓取310win网站的php代码:

<?php

class Lottery_game_infoController extends Controller
{
 /**
  * @var string the default layout for the views. Defaults to ‘//layouts/column2’, meaning
  * using two-column layout. See ‘protected/views/layouts/column2.php’.
  */
 public $layout=’//layouts/column2′;
 /**
  * @var CActiveRecord the currently loaded data model instance.
  */
 private $_model;
 /**
  * @return array action filters
  */
 public function filters()
 {
  return array(
   ‘accessControl’, // perform access control for CRUD operations
  );
 }
 /**
  * Specifies the access control rules.
  * This method is used by the ‘accessControl’ filter.
  * @return array access control rules
  */
 public function accessRules()
 {
  return array(
   array(‘allow’, // allow all users to perform ‘index’ and ‘view’ actions
    ‘actions’=>array(‘index’,’view’,’fetch_data’),
    ‘users’=>array(‘*’),
   ),
   array(‘allow’, // allow authenticated user to perform ‘create’ and ‘update’ actions
    ‘actions’=>array(‘create’,’update’),
    ‘users’=>array(‘@’),
   ),
   array(‘allow’, // allow admin user to perform ‘admin’ and ‘delete’ actions
    ‘actions’=>array(‘admin’,’delete’),
    ‘users’=>array(‘admin’),
   ),
   array(‘deny’, // deny all users
    ‘users’=>array(‘*’),
   ),
  );
 }
 /**
  * Displays a particular model.
  */
 public function actionView()
 {
  $this->render(‘view’,array(
   ‘model’=>$this->loadModel(),
  ));
 }
 /**
  * Creates a new model.
  * If creation is successful, the browser will be redirected to the ‘view’ page.
  */
 public function actionCreate()
 {
  $model=new lottery_game_info;
  // Uncomment the following line if AJAX validation is needed
  // $this->performAjaxValidation($model);
  if(isset($_POST[‘lottery_game_info’]))
  {
   $model->attributes=$_POST[‘lottery_game_info’];
   if($model->save())
    $this->redirect(array(‘view’,’id’=>$model->lottery_game_info_id));
  }
  $this->render(‘create’,array(
   ‘model’=>$model,
  ));
 }
 /**
  * Updates a particular model.
  * If update is successful, the browser will be redirected to the ‘view’ page.
  */
 public function actionUpdate()
 {
  $model=$this->loadModel();
  // Uncomment the following line if AJAX validation is needed
  // $this->performAjaxValidation($model);
  if(isset($_POST[‘lottery_game_info’]))
  {
   $model->attributes=$_POST[‘lottery_game_info’];
   if($model->save())
    $this->redirect(array(‘view’,’id’=>$model->lottery_game_info_id));
  }
  $this->render(‘update’,array(
   ‘model’=>$model,
  ));
 }
 /**
  * Deletes a particular model.
  * If deletion is successful, the browser will be redirected to the ‘index’ page.
  */
 public function actionDelete()
 {
  if(Yii::app()->request->isPostRequest)
  {
   // we only allow deletion via POST request
   $this->loadModel()->delete();
   // if AJAX request (triggered by deletion via admin grid view), we should not redirect the browser
   if(!isset($_GET[‘ajax’]))
    $this->redirect(array(‘index’));
  }
  else
   throw new CHttpException(400,’Invalid request. Please do not repeat this request again.’);
 }
 /**
  * Lists all models.
  */
 public function actionIndex()
 {
  $dataProvider=new CActiveDataProvider(‘lottery_game_info’);
  $this->render(‘index’,array(
   ‘dataProvider’=>$dataProvider,
  ));
 }
 /**
  * Manages all models.
  */
 public function actionAdmin()
 {
  $model=new lottery_game_info(‘search’);
  $model->unsetAttributes(); // clear any default values
  if(isset($_GET[‘lottery_game_info’]))
   $model->attributes=$_GET[‘lottery_game_info’];
  $this->render(‘admin’,array(
   ‘model’=>$model,
  ));
 }
 /**
  * Returns the data model based on the primary key given in the GET variable.
  * If the data model is not found, an HTTP exception will be raised.
  */
 public function loadModel()
 {
  if($this->_model===null)
  {
   if(isset($_GET[‘id’]))
    $this->_model=lottery_game_info::model()->findbyPk($_GET[‘id’]);
   if($this->_model===null)
    throw new CHttpException(404,’The requested page does not exist.’);
  }
  return $this->_model;
 }
 /**
  * Performs the AJAX validation.
  * @param CModel the model to be validated
  */
 protected function performAjaxValidation($model)
 {
  if(isset($_POST[‘ajax’]) && $_POST[‘ajax’]===’lottery-game-info-form’)
  {
   echo CActiveForm::validate($model);
   Yii::app()->end();
  }
 }
 /************************************************************************************
  * 以下为抓取数据的action代码
  **************************************************************************************/
 /**
  * Manages all models.
  */
 public function actionFetch_data()
 {
  $url = ‘http://www.310win.com/jingcailanqiu/kaijiang_jclq_all.html’;
  $this->render(‘fetch_data’,array(
   ‘model’=>$this->fetch_data($url),
// ‘model’=>$this->loadModel(),
  ));
 }
 function pp($argvs) {
  echo ‘<div style=”text-align: left;”>’;
  foreach ($argvs as $k => $v) {
   echo “<xmp>”;
   print_r($v);
   echo “</xmp>”;
  }
  echo ‘</div>’;
 }
 /**
  * 抓取win310数据
  * ljzhou 2014-4-13 
  */
 public function fetch_data($url) {
  $result = array();
// $result[‘url’] = $url;
// $result[‘data’] = $url;
  
  $content = $this->get_http($url);
  $patten = $this->get_patten();
// $lottory_menus =$this->get_word_single($content,$patten[‘lottory_menu’][‘start’],$patten[‘lottory_menu’][‘end’]);
// if(!empty($lottory_menus)) {
// $lottery_menu_details = $this->get_word_all($lottory_menus,$patten[‘lottory_menu_detail’][‘start’],$patten[‘lottory_menu_detail’][‘end’]);
// $result[‘lottery_menu’][0] = $lottery_menu_details;
// }
  
  $lottery_container =$this->get_word_single($content,$patten[‘lottery_container’][‘start’],$patten[‘lottery_container’][‘end’]);
// print_r(‘$lottery_container’);
// print_r($lottery_container);
  $lottery_games =$this->get_word_all($lottery_container,$patten[‘lottery_game’][‘start’],$patten[‘lottery_game’][‘end’]);
  if(!empty($lottery_games)) {
   foreach($lottery_games as $key=>$lottery_game) {
    // 第0个是标题
    if($key == 0) continue;
    $lottery_game_details = $this->get_word_all($lottery_game,$patten[‘lottery_game_detail’][‘start’],$patten[‘lottery_game_detail’][‘end’]);
    $lottery_game_details_new = array();
    if(!empty($lottery_game_details)) {
     foreach($lottery_game_details as $key2=>$lottery_game_detail) {
      $lottery_game_details[$key2] = $this->preg_filter($lottery_game_detail);
     }
     
     // 星期
           $preg = ‘/<td>(.*?)\d+/s’;
           preg_match_all($preg,$lottery_game_details[0],$week_days);
           $week_day=$week_days[1][0];
           
           // 日期
           $dates = explode(‘<br>’,$lottery_game_details[2]);
           $date = $dates[0];
           $pk_time = $dates[1];
           // 分数
           $scores = explode(‘-‘,$lottery_game_details[4]);
           $home_score = $scores[0];
           $visit_score = $scores[1];
           $score_total = (int)$home_score + (int)$visit_score;
           
           $lottery_game_details_new[‘site_code’] = ‘310win’;
           $lottery_game_details_new[‘www_code’] = $url;
           $lottery_game_details_new[‘lottery_code’] = $lottery_game_details[1];
           $lottery_game_details_new[‘date’] = $date;
           $lottery_game_details_new[‘pk_time’] = $pk_time;
           $lottery_game_details_new[‘week_day’] = $week_day;
           $lottery_game_details_new[‘home_team_name’] = $lottery_game_details[3];
           $lottery_game_details_new[‘visit_team_name’] = $lottery_game_details[5];
           $lottery_game_details_new[‘pk_score’] = $lottery_game_details[4];
           $lottery_game_details_new[‘home_score’] = $home_score;
           $lottery_game_details_new[‘visit_score’] = $visit_score;
           $lottery_game_details_new[‘score_total’] = $score_total;
           $lottery_game_details_new[‘the_point_position’] = $lottery_game_details[10];
           $lottery_game_details_new[‘big_or_small’] = $lottery_game_details[11];
           $lottery_game_details_new[‘point_spread’] = $lottery_game_details[7];
           $lottery_game_details_new[‘team_win’] = $lottery_game_details[8];
           $lottery_game_details_new[‘win_detail’] = $lottery_game_details[9];
           $lottery_game_details_new[‘create_stamp’] = date(‘y-m-d h:i:s’,time());
           $lottery_game_details_new[‘last_updated_stamp’] = date(‘y-m-d h:i:s’,time());
           $result[‘lottery_game’][$key] = $lottery_game_details_new;
            
    }
   }
  }
// $result[‘patten’] = $patten;
// $result[‘lottery_container’] = $lottery_container;
  print_r(‘start pp’);
// print_r($result);
  $this->pp($result);
  print_r(‘end pp’);
// $this->pp($result[‘lottery_game’]);
// return;
        foreach($result[‘lottery_game’] as $lottery_game_detail) {
         $lottery_game_info = array(
          ‘site_code’ => $lottery_game_detail[‘site_code’],
    ‘www_code’ => $lottery_game_detail[‘www_code’],
    ‘lottery_code’ => $lottery_game_detail[‘lottery_code’],
    ‘date’ => $lottery_game_detail[‘date’],
    ‘pk_time’ => $lottery_game_detail[‘pk_time’],
    ‘week_day’ => $lottery_game_detail[‘week_day’],
    ‘home_team_name’ => $lottery_game_detail[‘home_team_name’],
    ‘visit_team_name’ => $lottery_game_detail[‘visit_team_name’],
    ‘pk_score’ => $lottery_game_detail[‘pk_score’],
    ‘home_score’ => $lottery_game_detail[‘home_score’],
    ‘visit_score’ => $lottery_game_detail[‘visit_score’],
    ‘score_total’ => $lottery_game_detail[‘score_total’],
    ‘the_point_position’ => $lottery_game_detail[‘the_point_position’],
    ‘big_or_small’ => $lottery_game_detail[‘big_or_small’],
    ‘point_spread’ => $lottery_game_detail[‘point_spread’],
    ‘team_win’ => $lottery_game_detail[‘team_win’],
    ‘win_detail’ => $lottery_game_detail[‘win_detail’],
    ‘create_stamp’ => $lottery_game_detail[‘create_stamp’],
    ‘last_updated_stamp’ => $lottery_game_detail[‘last_updated_stamp’],
         );
         print_r($lottery_game_info);
         $model=new lottery_game_info;
         $model->attributes=$lottery_game_info;
   if(!$model->save()) {
    echo ‘save false’;
   };
        }
  
  return $result;
 }
 /**
  * 正则过滤
  */
 public function preg_filter($html) {
  $pattens = $this->get_patten();
  $preg_filters = $pattens[‘lottery_filter’];
  // 每个规则都过滤一遍
  foreach($preg_filters as $key=>$preg_filter) {
// print_r($key);
   $result=$this->get_word_single($html,$preg_filter[‘start’],$preg_filter[‘end’]);
   if($result) {
    $html = $result;
   } 
  }
  return $html;
 }
 /**
  * 抓取网页内容
  */
 public function get_http($url) {
  $result = file_get_contents($url);
  return $result;
 }
 // 返回匹配的第一个结果
 public function get_word_single($html,$star,$end,$pos=0){
  $word = array();
  $pattern3 = ‘/’.$star.'(.*?)’.$end.’/s’;
  if(!preg_match_all($pattern3,$html,$match2)) {
// print_r(‘preg_match_all get_word_single false’);
     return false;
  }else{
     $word= $match2[1][$pos];
  }
  return $word;
 }
 // 返回匹配的所有数组
 public function get_word_all($html,$star,$end){
  $word = array();
  $pattern3 = ‘/’.$star.'(.*?)’.$end.’/s’;
  if(!preg_match_all($pattern3,$html,$match2)) {
// print_r(‘preg_match_all get_word_all false’);
     return false;
  }else{
// print_r(‘get_word_all’);
// print_r($match2);echo ‘</br>’;
  //var_dump(‘下面是全匹配match2[1][$pos]:’);echo ‘</br>’;
  //var_dump($match2[1][$pos]);echo ‘</br>’;
  //pp($match2[1][0]);
     $word= $match2[1];
  }
  return $word;
 }
 public function get_patten() {
  return array (
     ‘lottory_menu’ => array(
    ‘start’=>'<ul id=”mymenu”>’,
    ‘end’=>'<\/ul>’,
   ),
   ‘lottory_menu_detail’ => array(
    ‘start’=>’>’,
    ‘end’=>'<\/a>’,
   ), 
     ‘lottery_container’ => array(
    ‘start’=>'<div id=”lottery_container”>’,
    ‘end’=>'<\/div>’,
   ),
   ‘lottery_game’ => array(
    ‘start’=>'<tr’,
    ‘end’=>'<\/tr>’,
   ),
      ‘lottery_game_detail’ => array(
    ‘start’=>’>’,
    ‘end’=>'<\/td>’,
   ),
   ‘lottery_filter’ => array(
    ‘tds’ => array(
        ‘start’=>’>’,
        ‘end’=>'<‘,
       ),
       ‘a’ => array(
        ‘start’=>’>’,
        ‘end’=>'<\/a>’,
       ),
       ‘span’ => array(
        ‘start’=>”,
        ‘end’=>'<\/span>’,
       ),
       ‘b’ => array(
        ‘start’=>’>’,
        ‘end’=>'<\/b>’,
       ),
       ‘u’ => array(
        ‘start’=>’>’,
        ‘end’=>'<\/u>’,
       ),
       ‘font’ => array(
        ‘start’=>’>’,
        ‘end’=>'<\/font>’,
       ),
       ‘u’ => array(
        ‘start’=>”,
        ‘end’=>'<\/u>’,
       ),
   ),
   ‘lottery_filter_bracket’ => array(
       ‘bracket’ => array(
        ‘start’=>'<‘,
        ‘end’=>’>’,
       ),
   ),
    );
 }

}

表结构的创建在 下面文章中 :【大数据】php代码抓取310win网站的体育比赛数据分析趋势(数据结构部分)


小杰博客 , 版权所有丨如未注明 , 均为原创丨本网站采用BY-NC-SA协议进行授权
转载请注明原文链接:【大数据】php代码抓取310win网站的体育比赛数据分析趋势(php代码部分)
喜欢 (0)
发表我的评论
取消评论
表情 贴图 加粗 删除线 居中 斜体 签到

Hi,您需要填写昵称和邮箱!

  • 昵称 (必填)
  • 邮箱 (必填)
  • 网址