复制代码 代码如下:
<?php
 /**
 * HOST: components.arrow.com
 */
 //set_time_limit(0);
 // base function
 function curl_get($url, $data = array(), $header = array(), $timeout = 15, $port = 80, $reffer = "", $proxy = "")
 {
 $ch = curl_init();
 if (!empty($data)) {
 $data = is_array($data)?http_build_query($data): $data;
 $url .= (strpos($url,"?")? "&": "?") . $data;
 }
 curl_setopt($ch, CURLOPT_URL, $url);
 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
 curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
 curl_setopt($ch, CURLOPT_POST, 0);
 curl_setopt($ch, CURLOPT_PORT, $port);
 curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
 curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); //是否抓取跳转后的页面
 $reffer && curl_setopt($ch, CURLOPT_REFERER, $reffer);
 if($proxy) {
 curl_setopt($ch, CURLOPT_PROXY, $proxy);
 curl_setopt($ch, CURLOPT_PROXYPORT, 1723);
 curl_setopt($ch, CURLOPT_PROXYUSERPWD,"andhm001:andhm123");
 }$result = array();
 $result["result"] = curl_exec($ch);
 if (0 != curl_errno($ch)) {
 $result["error"] = "Error:" . curl_error($ch);}
 curl_close($ch);
 return $result;
 }

复制代码 代码如下:
function curl_post($url, $data = array(), $header = array(), $timeout = 15, $port = 80)
 {
 $ch = curl_init();
 curl_setopt($ch, CURLOPT_URL, $url);
 curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
 curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
 curl_setopt($ch, CURLOPT_PORT, $port);
 !empty ($header) && curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
 curl_setopt($ch, CURLOPT_POST, 1);
 curl_setopt($ch, CURLOPT_POSTFIELDS, $data);$result = array();
 $result["result"] = curl_exec($ch);
 if (0 != curl_errno($ch)) {
 $result["error"] = "Error:" . curl_error($ch);}
 curl_close($ch);return $result;
 }/**
 * 获取列表页的html源码
 * @param string $keywords 搜索关键字
 * @param int $start 开始记录数
 * @return boolean|array
 */
 function getListHtml($keywords, $start = 0)
 {
 if ($start < 0)
 {
 return false;
 }$postData = array(
 "search_token" => $keywords,
 "start" => $start,
 "limit" => 100,
 );$result = curl_post("http://components.arrow.com/part/search/" . $keywords, http_build_query($postData));
 if ( isset($result["error"]) )
 {
 return false;
 //exit($result["error"]);
 }
 $result = $result["result"];return $result;
 }/**
 * 获取列表页 连接href
 * @param string $html html源码
 * @return array
 */
 function getListHref($html)
 {
 $pattern = "/<tds+class="col_mfr_part_num"><as+href="(.[^>]+)">/isU";
 if (preg_match_all($pattern, $html, $matches))
 {
 return $matches[1];
 } else {
 // 没有匹配项
 return array();
 }
 }/**
 * 获取下一页数字start
 * @param string $html html源码
 * @return number
 */
 function getListNextPage($html)
 {
 $pattern = "/<scripts+language="javascript">buildPagination("d+","d+","(d+)",d+);</script>/isU";
 if (preg_match($pattern, $html, $matches))
 {
 return intval($matches[1]);
 } else {
 return -1;
 }
 }/**
 * 获取列表也所有的详细列表
 * @param string $keywords 搜索关键字
 * @return boolean|array
 */
 function getListHrefAll($keywords)
 {
 if (empty($keywords))
 {
 return false;
 }$html = getListHtml($keywords);
 $hrefList = getListHref($html);
 if (empty($hrefList))
 {
 // 没有结果
 return array();
 }
 $nextPage = getListNextPage($html);
 $loop =0;
 while ($nextPage > 0)
 {
 $html = getListHtml($keywords, $nextPage);
 $tmpHrefList = getListHref($html);
 $hrefList = array_merge($hrefList, $tmpHrefList);
 $nextPage = getListNextPage($html);
 $loop ++;
 }
 return $hrefList;
 }/**
 * 获取详情页信息
 * @param string $url url地址
 * @return array()
 */
 function getDetail($url)
 {
 if ( empty($url) )
 {
 return false;
 }
 $host = "http://components.arrow.com";$url = $host . $url;
 $result = curl_get($url);
 if ( isset($result["error"]) )
 {
 return array();
 //exit($result["error"]);
 }
 $html = $result["result"];$result = array(
 "sup_part" => "", // 供应商型
 "sup_id" => "", // 供应商ID
 "mfg_part" => "", // 制造商型号
 "mfg_name" => "", // 制造商名称
 "cat_name" => "", // 分类名称
 "para" => "", // 属性
 "desc" => "", // 描述
 "pdf_url" => "", // PDF地址
 "sup_stock" => "", // 库存
 "min_purch" => "", // 最小订购量
 "price" => "", // 价格
 "img_url" => "", // 图片地址
 "createtime" => "", // 创建时间
 "datacode" => "", // 批号
 "package" => "", // 封装
 "page_url" => "", // 页面地址
 );// mfg_part
 $pattern = "/<li>[s]*<strong>Part No:s*</strong>(.+)</li>/isU";
 if (preg_match($pattern, $html, $matches))
 {
 $result["mfg_part"] = trim($matches[1]);
 } else {file_put_contents("page.txt", $html);die("xxx");
 return array();
 }// mfg_name
 $pattern = "/<li>[s]*<strong>Manufacturer: </strong>(.+)</li>/isU";
 if (preg_match($pattern, $html, $matches))
 {
 $result["mfg_name"] = trim($matches[1]);
 }// cat_name
 $pattern = "/displayCategory("(.[^"]+)");/isU";
 if (preg_match($pattern, $html, $matches))
 {
 $result["cat_name"] = trim($matches[1]);
 $result["cat_name"] = str_replace("|", ">", $result["cat_name"]);
 }// para
 $tablepattern = "/<tables+id="part_specs".[^>]*>(.+)</table>/isU";
 if (preg_match($tablepattern, $html, $matches))
 {
 $pattern = "/<tr>[s]*<td><strong>(.+)</strong></td><td>(.+)</td>[s]*</tr>/isU";
 if (preg_match_all($pattern, $matches[1], $matches))
 {
 foreach($matches[1] as $k=>$v)
 {
 $v = trim($v);
 if ("Package Type" == $v)
 {
 $result["package"] = trim($matches[2][$k]);
 continue;
 }
 $result["para"][$v] = trim($matches[2][$k]);
 }
 }
 }// desc
 $pattern = "/<divs+id="part_title">.+<h4>(.+)</h4>[s]*</div>/isU";
 if (preg_match($pattern, $html, $matches))
 {
 $result["desc"] = trim($matches[1]);
 }// pdf_url
 $pattern = "/<lis+class="datasheet">[s]*<strong>Datasheet:</strong><as+href="(.[^"]+)"/isU";
 if (preg_match($pattern, $html, $matches))
 {
 $result["pdf_url"] = $host . trim($matches[1]);
 }// sup_stock
 $pattern = "/<tds+id="inv_1"s+class="li_inv">([d,]+)</td>/isU";
 if (preg_match($pattern, $html, $matches))
 {
 $result["sup_stock"] = trim($matches[1]);
 $result["sup_stock"] = str_replace(",", "", $result["sup_stock"]);
 }// min_purch
 $pattern = "/<spans+id="multiples">[s]*<strong>Multiple:s*</strong>(.+)</span>/isU";
 if (preg_match($pattern, $html, $matches))
 {
 $result["min_purch"] = trim($matches[1]);
 }// price
 $pattern = "/<divs+id="price_1"s+class="li_price">(.[^<]+)</div>/isU";
 if (preg_match($pattern, $html, $matches))
 {
 $result["price"][1] = trim($matches[1]);
 }
 $pattern = "/<divs+id="price_1"s+class="li_price">[s]*<span.[^>]+title="(.[^"]+)">/isU";
 if (preg_match($pattern, $html, $matches))
 {
 $priceurl = str_replace("&", "&", $matches[1]);
 $json = curl_get($priceurl);
 $json = $json["result"];
 if (! empty($json))
 {
 $jsonresult = json_decode($json, true);
 foreach ($jsonresult["parts"][0]["webprice"]["resale"] as $k=>$v)
 {
 $result["price"][$v["minqty"]] = $v["price"];
 }
 }
 }// img_url
 $pattern = "/<divs+id="part_image">[s]*<imgs+src="(.[^"]+)"/isU";
 if (preg_match($pattern, $html, $matches))
 {
 $result["img_url"] = trim($matches[1]);
 }// page_url
 $result["page_url"] = $url;return $result;
 }/**
 * 最终调用函数
 * @param string $keywords 搜索关键字
 * @return array
 */
 function getData($keywords)
 {
 $hrefList = getListHrefAll($keywords);
 $result = array();foreach ($hrefList as $k=>$v)
 {
 $result[] = getDetail($v);
 }return $result;
 }// Test Script
 $keywords = trim($_GET["keywords"]);
 $result = getData($keywords);print_r($result);