/*
 * Decompiled with CFR 0.152.
 */
package com.dtyunxi.finance.biz.bulelakespider.service.sc;

import com.dtyunxi.finance.biz.bulelakespider.model.IndexPage;
import com.dtyunxi.finance.biz.bulelakespider.service.IIndexWebSpider;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;

public class SCIndexWebSpider
implements IIndexWebSpider {
    private Site site = Site.me().setRetryTimes(3).setSleepTime(100);
    private String nextUri;
    private List<IndexPage> indexPageList = new ArrayList<IndexPage>();
    private String nextPageUri;

    public void process(Page page) {
        String s = page.getHtml().regex("<ul class=\"list-li mt30\">([\\s,\\S]+?)<\\/ul>").toString();
        String pattern = "<li[\\s,\\S]+?href=\"(.+?)\"[\\s,\\S]+?title=\"(.+?)\"[\\s,\\S]+?<span[\\s,\\S]+?>(.+?)<\\/span";
        Pattern r = Pattern.compile(pattern);
        Matcher m = r.matcher(s);
        this.extracted(m);
        IndexPage maxPage = this.getIndexPage();
        if (maxPage == null) {
            throw new NullPointerException();
        }
        this.nextUri = maxPage.getLink();
    }

    private void extracted(Matcher m) {
        while (m.find()) {
            IndexPage indexPage = new IndexPage();
            Date date = new Date();
            String link = m.group(1);
            String title = m.group(2);
            String time = m.group(3);
            try {
                date = new SimpleDateFormat("yyyy-MM-dd").parse(time);
                System.out.println(date);
            }
            catch (ParseException e) {
                e.printStackTrace();
            }
            indexPage.setTime(date);
            indexPage.setTitle(title);
            indexPage.setLink("http://fgw.sc.gov.cn" + link);
            this.indexPageList.add(indexPage);
        }
    }

    @Override
    public IndexPage getIndexPage() {
        IndexPage maxIndexPage = null;
        for (IndexPage page1 : this.indexPageList) {
            if (!page1.getTitle().contains("\u6210\u54c1\u6cb9\u4ef7\u683c\u7684")) continue;
            if (maxIndexPage == null) {
                maxIndexPage = new IndexPage(page1.getTime(), page1.getTitle(), page1.getLink());
                continue;
            }
            if (page1.getTime().compareTo(maxIndexPage.getTime()) <= 0) continue;
            maxIndexPage = page1;
        }
        return maxIndexPage;
    }

    @Override
    public String getNextUri() {
        return this.nextUri;
    }

    public Site getSite() {
        return this.site;
    }

    public static void main(String[] args) {
        SCIndexWebSpider scWebSpider = new SCIndexWebSpider();
        Request request = new Request("http://fgw.sc.gov.cn/sfgw/tzgg/list.shtml");
        request.addHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36");
        Spider spider = Spider.create((PageProcessor)scWebSpider).addRequest(new Request[]{request});
        spider.thread(1).run();
        String nextUri = scWebSpider.getNextUri();
        System.out.println(nextUri);
    }
}

