/*
 * Decompiled with CFR 0.152.
 */
package com.dtyunxi.finance.biz.bulelakespider.service.sc;

import com.dtyunxi.finance.biz.bulelakespider.service.IDataWebSpider;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.Selectable;

public class SCDataWebSpider
implements IDataWebSpider {
    private static final Logger log = LoggerFactory.getLogger(SCDataWebSpider.class);
    private Site site = Site.me().setRetryTimes(3).setSleepTime(100);
    private String maxLiter;

    public void process(Page page) {
        Selectable target = page.getHtml().regex("class=\"con_con\"[\\s,\\S]+?<a href=\"(.+?)\"[\\s,\\S]+?\u96f6\u552e\u4ef7\u683c\u8868", 1);
        String url = page.getUrl().toString();
        url = url.substring(0, url.lastIndexOf("/"));
        String uriDocxPath = url + "/" + target;
        this.getDataForLink(uriDocxPath);
    }

    public void getDataForLink(String uriDocxPath) {
        SCDataWebSpider downloadURLFile = new SCDataWebSpider();
        String data = SCDataWebSpider.getInfo(uriDocxPath);
        Pattern compile = Pattern.compile("\\n0[\\S]\u8f66\u7528.+?\\t.+?\\t.+?\\t(.+?)\\t");
        Matcher matcher = compile.matcher(data);
        if (matcher.find()) {
            String price;
            this.maxLiter = price = matcher.group(1);
        }
    }

    @Override
    public String getMaxLiter() {
        return this.maxLiter;
    }

    public static void main(String[] args) {
        SCDataWebSpider scDataWebSpider = new SCDataWebSpider();
        String uriDocxPath = "http://fgw.sc.gov.cn/sfgw/c106088/2022/6/28/4156af2ea298475a81aae0f93bc04e74/files/dba7c48a329f4014b9e4377be214005b.docx";
        String uriOil = "http://fgw.sc.gov.cn/sfgw/c106088/2022/6/28/4156af2ea298475a81aae0f93bc04e74.shtml";
        Request request = new Request(uriOil);
        request.addHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36");
        Spider.create((PageProcessor)scDataWebSpider).addRequest(new Request[]{request}).thread(1).run();
        System.out.println(scDataWebSpider.maxLiter);
    }

    public static String getInfo(String path) {
        URL url = null;
        try {
            url = new URL(path);
        }
        catch (MalformedURLException e1) {
            e1.printStackTrace();
        }
        URLConnection conn = null;
        try {
            conn = url.openConnection();
        }
        catch (IOException e1) {
            e1.printStackTrace();
        }
        String result = "";
        try {
            if (path.endsWith(".doc")) {
                FileInputStream is = new FileInputStream(path);
                WordExtractor extractor = new WordExtractor((InputStream)is);
                result = extractor.getText();
                System.out.println(extractor.getText());
                System.out.println("=================1=================");
                System.out.println("==================2================");
                System.out.println("\u9875\u811a\uff1a" + extractor.getDocument());
                System.out.println(extractor.getMetadataTextExtractor().getText());
                System.out.println("===============5===================");
                String[] paraTexts = extractor.getParagraphText();
                for (int i = 0; i < paraTexts.length; ++i) {
                    System.out.println("Paragraph " + (i + 1) + " : " + paraTexts[i]);
                }
                System.out.println(extractor.getTextFromPieces());
                System.out.println("=============6=====================");
                System.out.println(extractor.getMetadataTextExtractor());
                System.out.println("===============7===================");
                System.out.println(extractor.getEndnoteText());
                System.out.println("===============8===================");
                extractor.close();
            } else if (path.endsWith(".docx")) {
                InputStream in = conn.getInputStream();
                OPCPackage opcPackage = OPCPackage.open((InputStream)in);
                XWPFWordExtractor extractor = new XWPFWordExtractor(opcPackage);
                result = extractor.getText();
                extractor.close();
            } else {
                System.out.println("\u6b64\u6587\u4ef6\u4e0d\u662fword\u6587\u4ef6");
            }
        }
        catch (Exception e) {
            e.printStackTrace();
        }
        return result;
    }

    public Site getSite() {
        return this.site;
    }
}

