/**
 * 采集产品和图片，包括falsh,音乐等
 */
package mall.spider.bean;
import java.lang.Runnable;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.Vector;

import dao.MallScSpiderarticle ;
import dao.MallScSpidersites;
import dao.MallScSpiderurls;
import dao.MallScSpiderarticleconfig;
import java.lang.Thread;
import java.net.MalformedURLException;
import java.net.URL;

import mall.jianTai.bean.PublishConfig;
import mall.kgmall.spider.web.Html;

import mall.serviceBean.ShopConfigBean;
import mall.spider.bean.HttpFileBean;
import mall.spider.bean.ImageSpiderBean;

import dao.MallScSpiderarticleconfig;
import dao.MallScSpiderjob;
import service.entryService.MallScSpiderarticleEntryService;
import service.entryService.MallScSpiderarticleconfigEntryService;

import dao.Systemconfig;
import dao.MallScSpiderarticle ;


import mall.security.portal.core.MallSecurityUserUtil;
import mall.security.portal.core.MallUserRoseConfig;
import net.juluu.app.security.core.PortalUserLoginConfig;
import net.juluu.app.security.core.SecurityActionCheckUtil;
import net.juuluu.spider.core.SpiderPath;
public class SpiderProductProcessor extends Thread {

	/**
	 * 
	 */
	private int threadId;
	
	private MallScSpiderurls url;
	private boolean isWorking=false;
	private static final long serialVersionUID = 1L;
	private MallScSpiderarticleconfig config;
	private Vector <SpiderProductProcessor> spiderProcessors;
	private Vector<MallScSpiderurls>  ScSpiderurls;
	private ArticleUrlsBean ArticleUrlsBean;
	//采集中的rul thread同步
	private MallScSpiderurls spiderIngUrl;
	
	private StringBuffer htmlContent;
	private Html htmlArticleSpiderTmp;
	private String host;
	private MallScSpiderjob job;
	private SpiderPath spiderPath;
	private HttpFileBean httpBean;
	private ImageSpiderBean imageBean;
	private String allHtmlTag="table script a h1 h2 h3 br p tr hr div ul li ";
	
	private StringBuffer log=new StringBuffer();
	
	private dao.MallScSpidersites siteSpider;
	private String webSiteDir;
	
	
	public void setArticleUrls (Vector<MallScSpiderurls>  ScSpiderurls){
		this.ScSpiderurls=ScSpiderurls;
	}
	

	public SpiderProductProcessor(MallScSpidersites site,MallScSpiderjob job,ArticleUrlsBean ArticleUrlsBean) {
		// TODO Auto-generated constructor stub
		this.ArticleUrlsBean=ArticleUrlsBean;
		this.siteSpider=site;
		this.job=job;
		init( site, job);
	}

	public SpiderProductProcessor(MallScSpidersites site, MallScSpiderjob job2) {
		// TODO Auto-generated constructor stub
		
		init( site, job2);
	}
	
	public void init(MallScSpidersites site,MallScSpiderjob job){
		
		
		this.htmlArticleSpiderTmp=new Html();
		this.spiderPath=new SpiderPath();
		
		int jobid=job.getJobId();
		this.config=MallScSpiderarticleconfigEntryService.getInstance()
	     .queryArticleConfog(jobid);
		
		//产品编码
		if(job.getSourceEncode()!=null&&job.getSourceEncode().equals("")==false){
			//System.out.println("由采集任务设置编码"+job.getSourceEncode());
			spiderPath.setFileEncode(job.getSourceEncode());
			spiderPath.setEncodeSource(job.getSourceEncode());
			
		}else{
			if(config.getSourceEncode()!=null&&config.getSourceEncode().equals("")==false){
				//System.out.println("由默认配置设置编码"+job.getSourceEncode());
				spiderPath.setFileEncode(config.getSourceEncode());
				spiderPath.setEncodeSource(config.getSourceEncode());
				
			}else{
				//System.out.println("无编码配置，设置为utf-8"+job.getSourceEncode());
				spiderPath.setFileEncode("utf-8");
				spiderPath.setEncodeSource("utf-8");
				
			}
			
			
		}
		

		//路径配置
		ShopConfigBean cb=ShopConfigBean.getInstance();
		int portsite=80;
		cb.configSpiderPathBean(80,this.spiderPath, job.getHost(), site.getSpiderFileTypeList(),site.getSaveFileTypeList());
	

		//网络文件下载器
		this.httpBean=new HttpFileBean();
		//图片下载器
		this.imageBean=new ImageSpiderBean(site,job);

	}

	@Override
	public   void run() {
		while(true){
			//从链接表中获取链接				
			Vector<MallScSpiderurls> urls= getArticleUrlsBean().getSomeURLs();
		
			//为本采集器设置产品链接
			this.ScSpiderurls=urls;
			
		
			while(urls.size()!=0){
				
					isWorking=true;
					spider();
					isWorking=false;
					try {
						sleep(1000);
					} catch (InterruptedException e) {
						// TODO Auto-generated catch block
						e.printStackTrace();
					}catch(java.lang.Exception e){
						System.out.println("采集器出现错误:"+e.getMessage());
						e.printStackTrace();
					}
				
			}
			
		if(urls.size()==0){
			/////System.out.println("采集器没有发现产品的，链接可能采集中，请等待。 ");
			isWorking=false;
			try {
				sleep(1000);
				
				//从链接表中获取链接				
				 urls= getArticleUrlsBean().getSomeURLs();
			
				//为本采集器设置产品链接
				this.ScSpiderurls=urls;
			
			} catch (InterruptedException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
		  }
		
		
	}
	
	

	}
	/*
	 * 采集进程执行采集
	 */
	public void spider(){
		
		System.out.println("采集 ：第"+getThreadId()+"个thread");
		// TODO Auto-generated method stub
			int size=ScSpiderurls.size();
	    	//System.out.println("共有"+size+"篇产品。 ");
		
			if(size>0){
				isWorking=true;
			}
			//int size=100;
			while(ScSpiderurls.size()!=0){
				//分析产品链接
				//采集完，暂停，
				MallScSpiderurls url=getSpiderUrlLock();
				if(url==null){
					//System.out.println("无法获取产品路径 ：getSpiderUrlLock()");
					break;
				}
				
				//已存在
				 
				 service.entryService.MallScSpiderarticleEntryService bean=MallScSpiderarticleEntryService.getInstance();
			int jobid=getJob().getJobId();
			
			int urlid=url.getSpiderUrslsId();
			System.out.println("检测是否已采集,链接ID："+urlid+"url:"+url.getUrlPath());
			System.out.println("#####################产品采集########################");
			
			MallScSpiderarticle productSpider=null;
			try{
				productSpider=bean.queryByJobIdUrlsId(jobid,urlid );
			
			}catch(java.lang.Exception e){
				log.append(e.getMessage());
				System.out.println("错误,重复采集,链接ID："+urlid+"url:"+url.getUrlPath());
				e.printStackTrace();
			}
			
			if(productSpider==null){
				
				try{spiderByarticle(url);	
				}catch(java.lang.Exception e){
					System.out.println("采集器出现错误:"+e.getMessage());
					e.printStackTrace();
				}
				}
			else{
				System.out.println("产品已采集，忽略。");
			}
				
				//保存产品
				
				//从集合中删除URL
				getArticleUrlsBean().delUrl(url);
				revomeURL(url);
				
				
				//System.out.println("该产品已采集，删除url ID:"+url.getSpiderUrslsId()+" thread:"+getThreadId()+"");

				size=ScSpiderurls.size();
			}
			//System.out.println("采集完: "+"第"+getThreadId()+"个thread");
				
			isWorking=false;
			
			try {
				
				sleep(5);
				
			} catch (InterruptedException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
	
	}
	
	public void spiderByarticle(MallScSpiderurls url){
		MallScSpiderarticle a=  ScSpiderarticle(url);
	
		
		
         	}
	//保存采集产品到临时库
	public MallScSpiderarticle ScSpiderarticle(MallScSpiderurls ArticleUrl){
		log.delete(0, log.length());
	//System.out.println("第"+getThreadId()+"个thread 采集集中。。。。");
	////System.out.println("采集链接："+ArticleUrl.getUrlPath()+" ID:"+ArticleUrl.getSpiderUrslsId());
		//分析采集器是否完成，如果空闲，分析采集任务URL
		
		//设置采集产品路径
		 SpiderPath spiderPath=getSpiderPath();
		 spiderPath.setUrlPath(ArticleUrl.getUrlPath());
		//设置产品下载器
		 httpBean.setSpiderPath(spiderPath);
		 //编码设置
		 httpBean.setFileEncode(this.job.getSourceEncode());
		 
		 this.htmlContent=httpBean.getWebFileByFullPath();
		 log.append("采集产品："+ArticleUrl.getUrlPath());
		 log.append("网页长度："+this.htmlContent.length()+"\n");
    this.setHtmlArticleSpiderTmp(new Html());
    //转成小写
	getHtmlArticleSpiderTmp().setHtmlContent(htmlContent.toString());
	
	//采集
	
	  String title= spiderTitle();
	  if(title==null||title.equals("")){
		  log.append("标题无法获取,请正确设置规则！");
		  
	  }
	  if(title!=null&&title.length()>0){
		  log.append("标题采集成功:"+title);
	  }
	  //内容采集
	  //是否淘宝商城
	  String content="";
	String siteType= this.getSiteSpider().getSiteType();
	if(siteType!=null&&siteType.equals("taobao.com")){
		log.append("采集淘宝商城内容：");
		content=spiderProductDescription_taoBao_com();
	}else{
		content=spiderContent();
	}
		
		
		if(content==null||content.equals("")){
			  log.append("内容无法获取,请正确设置规则！");
			  
		  }
		 if(content!=null&&content.length()>0){
			  log.append("内容采集成功,长度:"+content.length());
		  }
		//价格
		 String priceSpider=this.spiderPrice();
		 if(priceSpider==null||priceSpider.equals("")){
			  log.append("价格无法获取,请正确设置规则！");
			  
		  }
		 if(priceSpider!=null&&priceSpider.length()>0){
			  log.append("价格采集成功:"+priceSpider);
			  ////System.out.println("价格采集成功:"+priceSpider);
		  }

		 
	
		
	String addtime=	spiderTimeAdd();
		String author=spiderAthor();
		String come=spiderComeFrom();
	//生成产品
	dao.MallScSpiderarticle artilce=new MallScSpiderarticle();
	artilce.setJobId(getJob().getJobId());
	//设置价格，主图
		artilce.setSc_price(priceSpider);
		
/*
		
		log.append("#########图片链接采集：" +
				"标题："+title+"\n" +
				"addtime："+addtime+"\n" +
				"come："+come+"\n" );
			*/		
		artilce.setAuthor(author);
		artilce.setTitle(title);

		artilce.setComeFrom(come);
		artilce.setAddTime(addtime);

		 //
		 artilce.setContent(content);

		 artilce.setSourceSite(getJob().getHost());
		 
		 
		 //
		 artilce.setSpiderUrslsId(ArticleUrl.getSpiderUrslsId());
		 artilce.setJobId(getJob().getJobId());
		
		 Date createTime=new Date(System.currentTimeMillis());
		 artilce.setCreateDate(createTime);
		//编码
		 artilce.setEncodeSource(getHttpBean().getFileEncode());
		 
		 service.entryService.MallScSpiderarticleEntryService bean=MallScSpiderarticleEntryService.getInstance();
		
	 ////System.out.println("###########保存产品内容：\n"+content+"###########\n");

		//保存产品
		 MallScSpiderarticle saved=bean.saveCheckJobIdURLID(artilce);
		 if(saved!=null){
			   getImageBean().setarticle(saved);
				//返回包含新图片路径的内容
				 content=ScSpiderimage(saved, content);
				 
				 //采集产品主图
				 //产品主图代码
				 String imgMainProductCode=this.spiderProductMainImgCodeParse();
				 String imgPathSpider="";
				 if(imgMainProductCode==null||imgMainProductCode.equals("")){
					  log.append("产品主图代码获取失败,请正确设置规则！");
					  
				  }
				 if(imgMainProductCode!=null&&imgMainProductCode.length()>0){
					  log.append("产品主图代码获取成功:"+imgMainProductCode);
					  ////System.out.println("产品主图代码获取成功:"+imgMainProductCode);
						 ImageSpiderBean imgMainBean=new ImageSpiderBean(this.siteSpider,this.job);
						 //网站根目录
						
							
						 imgMainBean.setArticleContent(new StringBuffer(imgMainProductCode));
						 imgMainBean.setarticle(saved);
						 //采集网络产品主图并保存到数据库
						  imgPathSpider=imgMainBean.spiderImg_Shop_Main();
						 
						 if(imgPathSpider==null||imgPathSpider.equals("")){
							  log.append("产品主图获取失败,请正确设置规则！");
							  
						  }
						 if(imgPathSpider!=null&&imgPathSpider.length()>0){
							  log.append("产品主图获取成功:"+imgPathSpider);
							 
						  }
				 }else{
					 ////System.out.println("产品主图代码获取失败,请正确设置规则！");
				 }
				 
				 
				 if(content!=null){
					 saved.setContent(content);
					 bean.merge(saved);
				 }else if(content==null){
					 log.append("内容为空，不更新图片采集内容。");
				 }
		 }else{
			 log.append("保存失败，格式不符，标题，内容可能为空。");
		 }
	
		
		 /*
		 log.append("产品内容：\n"+content+"##############end\n");
		*/
		return saved;
		
	}
	
	
	//保存采集产品到临时库
	public MallScSpiderarticle reScSpiderarticleByURL(MallScSpiderarticle s,MallScSpiderurls ArticleUrl){
		
	//	////System.out.println("第"+getThreadId()+"个thread 采集集中。。。。");
		//分析采集器是否完成，如果空闲，分析采集任务URL
		
		//设置采集产品路径
		 SpiderPath spiderPath=getSpiderPath();
		 String ss=ArticleUrl.getUrlPath();
		 spiderPath.setUrlPath(ss);
		//设置产品下载器
		 httpBean.setSpiderPath(spiderPath);
		 this.htmlContent=httpBean.getWebFileByFullPath();
    this.setHtmlArticleSpiderTmp(new Html());
    //转成小写
	getHtmlArticleSpiderTmp().setHtmlContent(htmlContent.toString().toLowerCase());
	
	//采集
	
	  String title= spiderTitle();
		String content=spiderContent();
	String addtime=	spiderTimeAdd();
		String author=spiderAthor();
		String come=spiderComeFrom();
	//生成产品
	dao.MallScSpiderarticle artilce=s;
	int jobid=getJob().getJobId();
	artilce.setJobId(jobid);
		
/*
		
		log.append("#########图片链接采集：" +
				"标题："+title+"\n" +
				"addtime："+addtime+"\n" +
				"come："+come+"\n" );
			*/		
		artilce.setAuthor(author);
		artilce.setTitle(title);

		artilce.setComeFrom(come);
		artilce.setAddTime(addtime);

		 //
		 artilce.setContent(content);

		 artilce.setSourceSite(getJob().getHost());
		 
		 
		 //
		 artilce.setSpiderUrslsId(ArticleUrl.getSpiderUrslsId());
		 artilce.setJobId(getJob().getJobId());
		
		 Date createTime=new Date(System.currentTimeMillis());
		 artilce.setCreateDate(createTime);
		//编码
		 artilce.setEncodeSource(getHttpBean().getFileEncode());
		 
		 service.entryService.MallScSpiderarticleEntryService bean=MallScSpiderarticleEntryService.getInstance();
		
	//	 ////System.out.println("###########保存产品内容：\n"+content+"###########\n");

		//保存产品
	 MallScSpiderarticle saved=bean.saveCheckJobIdURLID(artilce);
	   getImageBean().setarticle(saved);
		//返回包含新图片路径的内容
		 content=ScSpiderimage(saved, content);
		 saved.setContent(content);
		 bean.update(saved);
		 /*
		 log.append("产品内容：\n"+content+"##############end\n");
		*/
		return artilce;
		
	}
	
	
	public ArrayList<String> getElemenByStrList(String parseStr){
		ArrayList<String> list=new ArrayList<String>();
		
		java.util.StringTokenizer st=new java.util.StringTokenizer(parseStr," ");
		while(st.hasMoreTokens()){
			list.add(st.nextToken());
		}
		return list;
	}
	/**
	 * 获取清除标签列表
	 * @param url
	 */
	//生成过虑文件类型列表	
	public ArrayList<String> getContentClearTagList(){
		ArrayList<String> type=new ArrayList<String>();
		String s= getConfig().getClearTagListContent();
		java.util.StringTokenizer st=new java.util.StringTokenizer(s," ");
		while(st.hasMoreTokens()){
			type.add(st.nextToken());
		}
		return type;
	}
	synchronized public	  void revomeURL(MallScSpiderurls url){
		 getScSpiderurls().remove(url);
	}
	synchronized public	  MallScSpiderurls getSpiderUrlLock(){
		Vector<MallScSpiderurls>  ScSpiderurls=getScSpiderurls();
		if(ScSpiderurls.size()!=0){
			MallScSpiderurls url=ScSpiderurls.firstElement();
			////System.out.println("取一个路径:"+url.getUrlPath());
			return url;
		
		}else{
			////System.out.println("没有元素。");
			return null;
		}
		
	}
	
//产品标题采集
	
	public String spiderTitle(){
		////System.out.println("spiderTitle()");
		log.append("采集标题：");
		 dao.MallScSpiderarticleconfig object = getConfig();

		 String titleStart=object.getTitleStart ();
		 String titleEnd=object.getTitleEnd ();
		 if(titleStart==null||titleEnd==null||titleEnd.equals("")||titleStart.equals("")){
			 log.append("标题无法采集，开始或结束符未设置。");
			 ////System.out.println("标题无法采集，开始或结束符未设置。");
			 return "";
		 }

		 
		 String clearTagByTitle=object.getClearTagByTitle ();
		
		 String clearTextByTitle=object.getClearTextByTitle ();
	   //取网络文件
		Html html =getHtmlArticleSpiderTmp();
		
		//取值
	
		String titles=html.getStringBetweenStr(titleStart, titleEnd);
		
		
		Html t=new Html();
		
		t.setHtmlContent(titles);
		//清除标签
		t.delAllHtmlTagByTagList(clearTagByTitle);
		t.delAllTextByTextList(clearTextByTitle);
		
		 return t.getHtmlContent().toString();

	
	}
	//淘宝产品描述采集 淘宝通过外部网址方式加入产品描述
	public String spiderProductDescription_taoBao_com(){
		////System.out.println("spiderContent()");
		 dao.MallScSpiderarticleconfig object = getConfig();
		//提取描述网址
		// String contentStart="g_hubble_item_desc_requested = +new Date();})('";
		 //String contentEnd="');</SCRIPT>";
		 String contentStart=object.getContentStart ();
		 String contentEnd=object.getContentEnd ();
		 if(contentStart==null||contentStart.equals("")){
			// contentStart="g_hubble_item_desc_requested = +new Date();})('";
		 }
		 if(contentEnd==null||contentEnd.equals("")){
			 contentEnd="');</SCRIPT>";
		 }
		 if(contentStart==null||contentEnd==null||contentEnd.equals("")||contentStart.equals("")){
			 log.append("淘宝产品描述网址无法采集，开始或结束符未设置。");
			 ////System.out.println("淘宝描述网址无法采集，开始或结束符未设置。");
			 return "";
		 }
		 
		 //取网络文件
			Html html =getHtmlArticleSpiderTmp();
			
			//取值
			String contentUrlpath=html.getStringBetweenStr(contentStart, contentEnd);
			if(contentUrlpath==null||contentUrlpath.equals("")==true){
				log.append("通过分隔符获取淘宝产品描述网址失败");
				return "";
			}else{
				 ////System.out.println("通过分隔符获取淘宝产品描述网址 start"+contentStart+"end:"+contentEnd+"\nurl:" +contentUrlpath);
			}
			log.append("通过分隔符获取淘宝产品描述网址，"+contentUrlpath.length());
			
			String descriptionContent="";	
			//获取产品描述内容
			HttpFileBean httpBean=new HttpFileBean();
			URL urldescription;
			try {
				urldescription = new URL(contentUrlpath);
				String encode="gb2312";
				try {
				StringBuffer codeHtml=	httpBean.getHtmlFormWebByURL(urldescription, encode);
				if(codeHtml!=null){
					
					descriptionContent=new String(codeHtml.toString());
				}else{
					log.append("获取淘宝网产品描述失败，网址："+contentUrlpath);
				}
				} catch (Exception e) {
					// TODO Auto-generated catch block
					e.printStackTrace();
				}
			} catch (MalformedURLException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
			
			
			Html t=new Html();
			t.setHtmlContent(descriptionContent);
		
		 //link
		 String clearLinkByPropertyListContent=object.getClearLinkByPropertyListContent ();
		 String clearLinkByTextListContent=object.getClearLinkByTextListContent ();
		
		 //text
		 String clearTextByNameContent=object.getClearTextByNameContent ();
		 String replaceStrListContent=object.getReplaceStrListContent ();
	
			//清除关键词链接含文本
			t.delLinkAndLinkTextByPropertyPrefixList(clearLinkByPropertyListContent);
			
			////System.out.println("清除关键词链接含文本");
			////System.out.println("内容长度："+t.getHtmlContent().length());
			//清除含关键词的链接
			t.delLinkAndLinkTextByTextList(clearLinkByTextListContent);
			////System.out.println("//清除含关键词的链接");
			////System.out.println("内容长度："+t.getHtmlContent().length());
			//清除标签
			 String clearTagListContent=object.getClearTagListContent ();
				////System.out.println("//清除标签");
				////System.out.println("内容长度："+t.getHtmlContent().length());
			//清除标签
		 t.delAllHtmlTagByTagList(clearTagListContent);
		 
			////System.out.println("//清除合部标签");
			////System.out.println("内容长度："+t.getHtmlContent().length());
		//清除文本
			t.delAllTextByTextList(clearTextByNameContent);
			////System.out.println("//清除文本");
			////System.out.println("内容长度："+t.getHtmlContent().length());
			//替换文本
			t.replaceByTextList(replaceStrListContent);
			////System.out.println("//替换文本");
			////System.out.println("内容长度："+t.getHtmlContent().length());
		String res=	t.getHtmlContent().toString();
		if(res==null){
			return "";
		}
		////System.out.println("内容处理结果："+res.length());
			return res;
	}
	
	//产品内容采集
	public String spiderContent(){
		////System.out.println("spiderContent()");
		 dao.MallScSpiderarticleconfig object = getConfig();
 		
		 String contentStart=object.getContentStart ();
		 String contentEnd=object.getContentEnd ();
		 if(contentStart==null||contentEnd==null||contentEnd.equals("")||contentStart.equals("")){
			 log.append("内容无法采集，开始或结束符未设置。");
			 ////System.out.println("内容无法采集，开始或结束符未设置。");
			 return "";
		 }
		 
		 //取网络文件
			Html html =getHtmlArticleSpiderTmp();
			
			//取值
			String content=html.getStringBetweenStr(contentStart, contentEnd);
			if(content==null||content.equals("")==true){
				log.append("通过分隔符获取内容失败");
				return "";
			}
			log.append("通过分隔符获取内容，长度："+content.length());
			Html t=new Html();
			//内容转换为小写后，进行标签处理?
			t.setHtmlContent(content);
				

		
		 //link
		 String clearLinkByPropertyListContent=object.getClearLinkByPropertyListContent ();
		 String clearLinkByTextListContent=object.getClearLinkByTextListContent ();
		
		 //text
		 String clearTextByNameContent=object.getClearTextByNameContent ();
		 String replaceStrListContent=object.getReplaceStrListContent ();
	
			//清除关键词链接含文本
			t.delLinkAndLinkTextByPropertyPrefixList(clearLinkByPropertyListContent);
			
			////System.out.println("清除关键词链接含文本");
			////System.out.println("内容长度："+t.getHtmlContent().length());
			//清除含关键词的链接
			t.delLinkAndLinkTextByTextList(clearLinkByTextListContent);
			////System.out.println("//清除含关键词的链接");
			////System.out.println("内容长度："+t.getHtmlContent().length());
			//清除标签
			 String clearTagListContent=object.getClearTagListContent ();
				////System.out.println("//清除标签");
				////System.out.println("内容长度："+t.getHtmlContent().length());
			//清除标签
		 t.delAllHtmlTagByTagList(clearTagListContent);
		 
			////System.out.println("//清除合部标签");
			////System.out.println("内容长度："+t.getHtmlContent().length());
		//清除文本
			t.delAllTextByTextList(clearTextByNameContent);
			////System.out.println("//清除文本");
			////System.out.println("内容长度："+t.getHtmlContent().length());
			//替换文本
			t.replaceByTextList(replaceStrListContent);
			////System.out.println("//替换文本");
			////System.out.println("内容长度："+t.getHtmlContent().length());
		String res=	t.getHtmlContent().toString();
		if(res==null){
			return "";
		}
		////System.out.println("内容处理结果："+res.length());
			return res;
		
	} 
	
	//发布时间采集
	public String spiderTimeAdd(){
		////System.out.println("spiderTimeAdd()");
		 dao.MallScSpiderarticleconfig object = getConfig();
			 
	     String addTimeStart=object.getAddTimeStart ();
		 String addTimeEnd=object.getAddTimeEnd ();
		 String addTimePrefix=object.getAddTimePrefix ();
		
		 if(addTimeStart==null||addTimeEnd==null
				 ||addTimeEnd.equals("")||addTimeStart.equals("")){
			 log.append("发布时间采集无法采集，开始或结束符未设置。");
			 ////System.out.println("发布时间采集无法采集，开始或结束符未设置。");
			 return "";
		 }
		 
		 //取网络文件
			Html html =getHtmlArticleSpiderTmp();
			
			//取值
			String content=html.getStringBetweenStr(addTimeStart, addTimeEnd);
			if(content==null){
				////System.out.println("spiderTimeAdd() content is null");
				return null;
			}
			StringBuffer bf=new StringBuffer(content);
			int start=bf.indexOf(addTimePrefix);
			String result=null;
			start=start+addTimePrefix.length();
			//截取时间前缀后的第一个空格
			int end=bf.indexOf(" ",start);
			if(start!=-1&&end!=-1){
				result=bf.substring(start, end);
			}
			Html t=new Html();
			t.setHtmlContent(result);
			 t.delAllHtmlTagByTagList(getAllHtmlTag() );
				t.delAllTextByTextList(object.getClearTextByAddTime());
			 result=t.getHtmlContent().toString();
		return result;
	}
	
	//作者
	public String spiderAthor(){
		////System.out.println("spiderAthor()");
		 dao.MallScSpiderarticleconfig object = getConfig();
		 String authorStart=object.getAuthorStart ();
		 String authorEnd=object.getAuthorEnd ();		 
		 String authorPrefix=object.getAuthorPrefix ();
		 
		 if(authorStart==null||authorEnd==null
				 ||authorEnd.equals("")||authorStart.equals("")){
			 log.append("作者采集无法采集，开始或结束符未设置。");
			 ////System.out.println("作者采集无法采集，开始或结束符未设置。");
			 return "";
		 }
		 
		 //取网络文件
			Html html =getHtmlArticleSpiderTmp();
			
			//取值
			String content=html.getStringBetweenStr(authorStart, authorEnd);
			if(content==null){
				////System.out.println("spiderAthor() content is null");
				return null;
			}
			StringBuffer bf=new StringBuffer(content);
			int start=bf.indexOf(authorPrefix);
			String result=null;
			start=start+authorPrefix.length();
			//截取时间前缀后的第一个空格
			int end=bf.indexOf(" ",start);
			if(start!=-1&&end!=-1){
				result=bf.substring(start, end);
			}
			Html t=new Html();
			t.setHtmlContent(result);
			 t.delAllHtmlTagByTagList(getAllHtmlTag() );
			 t.delAllTextByTextList(object.getClearTextByAthor());
				
			 result=t.getHtmlContent().toString();
		return result;
	}
	
	//产品主图代码分析提取
	public String spiderProductMainImgCodeParse(){
		////System.out.println("产品主图代码分析提取");
		 dao.MallScSpiderarticleconfig object = getConfig();
		 String fromeStart=object.getSc_bigImgStart();
		 String fromeEnd=object.getSc_bigImgEnd();
		 String formePrefix=object.getSc_bigImgPrefix();
		 String replaceImgBig=object.getSc_BigIMgReplaceText();
		 
		 if(fromeStart==null||fromeEnd==null
				 ||fromeEnd.equals("")||fromeStart.equals("")){
			 log.append("产品主图代码无法采集，开始或结束符未设置。");
			 ////System.out.println("产品主图代码无法采集，开始或结束符未设置。");
			 return "";
		 }
		 //取网络文件
			Html html =getHtmlArticleSpiderTmp();
			
			//取值
			String content=html.getStringBetweenStr(fromeStart, fromeEnd);

			if(content==null){
				 log.append("产品主图代码，无法获取分析文本,content is null，开始"+fromeStart+"结束"+ fromeEnd);
				////System.out.println("产品主图代码，无法获取分析文本,content is null，开始："+fromeStart+"结束："+ fromeEnd);
				return null;
			}
			StringBuffer bf=new StringBuffer(content);
			int start=0;
			if(formePrefix!=null){
				 start=bf.indexOf(formePrefix);
				
				start=start+formePrefix.length();
			}
			String result=null;
			//截取时间前缀后的第一个空格
			//int end=bf.indexOf(" ",start);
			int end=content.length();
			if(start!=-1&&end!=-1){
				result=bf.substring(start, end);
			}
			Html t=new Html();
			t.setHtmlContent(result);
			 t.delAllHtmlTagByTagList(getAllHtmlTag() );
			 t.delAllTextByTextList(object.getClearTextByComeFrom());
			 result=t.getHtmlContent().toString();
		return result;
	}
	
	
	//产品价格如<strong>价格：66.00元</strong>
	public String spiderPrice(){
		////System.out.println("产品价格分析");
		 dao.MallScSpiderarticleconfig object = getConfig();
		 String fromeStart=object.getSc_priceStar();
		 String fromeEnd=object.getSc_priceEnd();
		 String formePrefix=object.getSc_pricePrefix();
		String replaceText= object.getSc_priceReplaceText();
		 
		 if(fromeStart==null||fromeEnd==null
				 ||fromeEnd.equals("")||fromeStart.equals("")){
			 log.append("产品价格无法采集，开始或结束符未设置。");
			 ////System.out.println("产品价格无法采集，开始或结束符未设置。");
			 return "";
		 }
		 //取网络文件
			Html html =getHtmlArticleSpiderTmp();
			
			//取值
			String content=html.getStringBetweenStr(fromeStart, fromeEnd);

			if(content==null){
				////System.out.println("产品价格，无法获取分析文本,content is null");
				return null;
			}
			StringBuffer bf=new StringBuffer(content);
			int start=0;
			//标识开始符
			if(formePrefix!=null){
				 start=bf.indexOf(formePrefix);
				 start=start+formePrefix.length();
			}			
			String result=null;			
			//截取时间前缀后的第一个空格
			int end=content.length();
			if(start!=-1&&end!=-1){
				result=bf.substring(start, end);
			}
			//清除空格
			result=result.replace(" ", "");
			//清除内容
			if(replaceText!=null){
				result=result.replace(replaceText, "");
			}
			Html t=new Html();
			t.setHtmlContent(result);
			 t.delAllHtmlTagByTagList(getAllHtmlTag() );
			 t.delAllTextByTextList(object.getClearTextByComeFrom());
			 result=t.getHtmlContent().toString();
		return result;
	}
	
	//产品来源
	public String spiderComeFrom(){
		////System.out.println("spiderComeFrom()");
		 dao.MallScSpiderarticleconfig object = getConfig();
		 String fromeStart=object.getFromeStart ();
		 String fromeEnd=object.getFromeEnd ();
		 String formePrefix=object.getFormePrefix ();
		 
		 if(fromeStart==null||fromeEnd==null
				 ||fromeEnd.equals("")||fromeStart.equals("")){
			 log.append("产品来源无法采集，开始或结束符未设置。");
			 ////System.out.println("产品来源无法采集，开始或结束符未设置。");
			 return "";
		 }
		 //取网络文件
			Html html =getHtmlArticleSpiderTmp();
			
			//取值
			String content=html.getStringBetweenStr(fromeStart, fromeEnd);

			if(content==null){
				////System.out.println("spiderAthor() content is null");
				return null;
			}
			StringBuffer bf=new StringBuffer(content);
			int start=bf.indexOf(formePrefix);
			String result=null;
			start=start+formePrefix.length();
			//截取时间前缀后的第一个空格
			int end=bf.indexOf(" ",start);
			if(start!=-1&&end!=-1){
				result=bf.substring(start, end);
			}
			Html t=new Html();
			t.setHtmlContent(result);
			 t.delAllHtmlTagByTagList(getAllHtmlTag() );
			 t.delAllTextByTextList(object.getClearTextByComeFrom());
			 result=t.getHtmlContent().toString();
		return result;
	}
	/**
	 * 采集图片
	 * @param content
	 * @return
	 */
	public String ScSpiderimage( MallScSpiderarticle saved,String content){
		if(saved==null){
			log.append("内容页为空，无法采集");
			return "";
		}
		////System.out.println(" 采集内容页图片："+saved.getSpiderArticleId());
		ImageSpiderBean ib=getImageBean();
		ib.setScSpiderarticle(saved);
		ib.setArticleContent(new StringBuffer(content));
		ib.spider();
		String c=ib.getArticleContent().toString();
		if(c!=null){
			return c;
		}else{
			return null;
		}
	}
	
	public void spiderSwf(){
		
	}
	
	public void spiderMp3(){
		
	}
	
	public void setWorking(boolean isWorking) {
		this.isWorking = isWorking;
	}


	public boolean isWorking() {
		return isWorking;
	}


	public void setUrl(MallScSpiderurls url) {
		this.url = url;
	}


	public MallScSpiderurls getUrl() {
		return url;
	}

	public void setConfig(MallScSpiderarticleconfig config) {
		this.config = config;
	}

	public MallScSpiderarticleconfig getConfig() {
		return config;
	}

	public void setSpiderProcessors(Vector <SpiderProductProcessor> spiderProcessors) {
		this.spiderProcessors = spiderProcessors;
	}

	public Vector <SpiderProductProcessor> getSpiderProcessors() {
		return spiderProcessors;
	}

	public void setScSpiderurls(Vector<MallScSpiderurls> ScSpiderurls) {
		this.ScSpiderurls = ScSpiderurls;
	}

	public Vector<MallScSpiderurls> getScSpiderurls() {
		return ScSpiderurls;
	}

	public void setThreadId(int threadId) {
		this.threadId = threadId;
	}

	public int getThreadId() {
		return threadId;
	}

	public void setArticleUrlsBean(ArticleUrlsBean ArticleUrlsBean) {
		this.ArticleUrlsBean = ArticleUrlsBean;
	}

	public ArticleUrlsBean getArticleUrlsBean() {
		return ArticleUrlsBean;
	}
	


	public void setSpiderIngUrl(MallScSpiderurls spiderIngUrl) {
		this.spiderIngUrl = spiderIngUrl;
	}

	public MallScSpiderurls getSpiderIngUrl() {
		return spiderIngUrl;
	}

	public void setHtmlContent(StringBuffer htmlContent) {
		this.htmlContent = htmlContent;
	}

	public StringBuffer getHtmlContent() {
		return htmlContent;
	}


	public static void main(String argv[]){
		util.DaoService.getIntrance();
		/*

		ArticleUrlsBean urlb=new ArticleUrlsBean();
		urlb.setScSpiderurls(ScSpiderurls);
		for(int k=0;k<50;k++){
			SpiderArticleProcessor p=new SpiderArticleProcessor(urlb);
			p.setThreadId(k);
			//p.setDaemon(true);
			p.start();
		}*/
		
		MallScSpiderjob job=new MallScSpiderjob();
		job.setSiteId(3);
		job.setJobId(3);
		job.setHost("www.hengannet.com");
		
		
		SpiderPath path=new SpiderPath();
		path.setHtmlStoreDir("d:/zsba");
		path.setFileEncode("utf-8");
		path.setUrlPort(80);
		path.setWebFileAgreement("http://");
		
			MallScSpiderurls url=new MallScSpiderurls();
			url.setJobId(2);
			url.setSpiderUrslsId(5);
			url.setUrlPath("new_view.asp?id=6269");
			//ScSpiderurls.add(url);
			/**
			Vector<ScSpiderurls>  ScSpiderurls =new 	Vector<ScSpiderurls> ();
			
			for(int i=0;i<10000;i++){
				ScSpiderurls u=new ScSpiderurls();
				url.setJobId(2);
				url.setSpiderUrslsId(i);
				url.setUrlPath("sdfsdf");
				ScSpiderurls.add(u);
			}
			*/
			
			//ScSpiderarticleconfig config =new ScSpiderarticleconfig ();
			
			ArticleUrlsBean urlb=new ArticleUrlsBean();
			dao.MallScSpidersites site=(MallScSpidersites)MallScSpiderarticleconfigEntryService.getInstance().get(job.getSiteId());
		
			SpiderProductProcessor ar=new SpiderProductProcessor(site,job,urlb);
			//ar.setSpiderPath(path);
			
			ar.spiderByarticle(url);
			////System.out.println(ar.getLog().toString());
			////System.out.println("图片采集日志："+ar.getImageBean().getLog().toString());
	}

	public void setHost(String host) {
		this.host = host;
	}

	public String getHost() {
		return host;
	}

	public void setJob(MallScSpiderjob job) {
		this.job = job;
	}

	public MallScSpiderjob getJob() {
		return job;
	}

	public void setSpiderPath(SpiderPath spiderPath) {
		this.spiderPath = spiderPath;
	}

	public SpiderPath getSpiderPath() {
		return spiderPath;
	}

	public void setHttpBean(HttpFileBean httpBean) {
		this.httpBean = httpBean;
	}

	public HttpFileBean getHttpBean() {
		return httpBean;
	}

	public void setHtmlArticleSpiderTmp(Html htmlArticleSpiderTmp) {
		this.htmlArticleSpiderTmp = htmlArticleSpiderTmp;
	}

	public Html getHtmlArticleSpiderTmp() {
		return htmlArticleSpiderTmp;
	}

	public void setImageBean(ImageSpiderBean imageBean) {
		this.imageBean = imageBean;
	}

	public ImageSpiderBean getImageBean() {
		return imageBean;
	}

	public void setLog(StringBuffer log) {
		this.log = log;
	}

	public StringBuffer getLog() {
		return log;
	}

	public void setAllHtmlTag(String allHtmlTag) {
		this.allHtmlTag = allHtmlTag;
	}

	public String getAllHtmlTag() {
		return allHtmlTag;
	}

	public MallScSpiderarticle reScSpiderarticle( dao.MallScSpiderarticle s, MallScSpiderurls url2) {
		// TODO Auto-generated method stub
		return reScSpiderarticleByURL( s,url2);	
	}

	public void ScSpiderarticleByurl(MallScSpiderurls url2) {
		// TODO Auto-generated method stub
		//已存在 不采集
		 
		 service.entryService.MallScSpiderarticleEntryService bean=MallScSpiderarticleEntryService.getInstance();
	int jobid=getJob().getJobId();
	int urlid=url2.getSpiderUrslsId();
		 if(bean.queryByJobIdUrlsId(jobid,urlid )==null){
		spiderByarticle(url2);	
	}else{
		//System.out.println("产品已采集，忽略。");
	}
		
	}


	public void setSiteSpider(dao.MallScSpidersites siteSpider) {
		this.siteSpider = siteSpider;
	}


	public dao.MallScSpidersites getSiteSpider() {
		return siteSpider;
	}

//设置网站目录
	public void setWebSiteDir(String webSiteDir) {
		
		this.webSiteDir = webSiteDir;
		if(this.webSiteDir!=null){
			spiderPath.setSaveSiteDir(this.webSiteDir);
		}
	}


	public String getWebSiteDir() {
		return webSiteDir;
	}
		
}
