Home Backend Development PHP Tutorial xml file reading and writing examples

xml file reading and writing examples

Nov 30, 2016 am 09:39 AM
xml

This is an implementation of xml file reading and writing that needs to be used in the project I am currently working on. Remember it for future reference and for students in need to learn from.

xml file reading and writing class:

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.input.SAXBuilder;
import org.jdom.output.Format;
import org.jdom.output.XMLOutputter;
import org.lt.cj.config.entities.ConfigModel;
import org.lt.cj.config.entities.TMallConfigModel;
import org.lt.cj.core.Seed;
public class XMLConfigWriter {
    /*创建淘宝商城的配置文件*/
    public Document buildUpMallDocument(TMallConfigModel missionConfig) throws MissionConfigException, EnterUrlsException {
        if (missionConfig == null) {
            throw new MissionConfigException();
        } else if (missionConfig.getSeeds().isEmpty()) {
            return null;
        }
        // Create the root element
        Element rootElement = new Element("website");
        /* 设置网站属性 */
        /* 设置网站名称 */
        rootElement.setAttribute("name", missionConfig.getWebsiteName());
        /*设置网站地址*/
        rootElement.setAttribute("url", missionConfig.getWebsiteUrl());
        //添加任务名称
        Element taskElement = new Element("taskName");
        taskElement.addContent(missionConfig.getTaskName());
        rootElement.addContent(taskElement);
        //构造种子列表节点
        Element seeds = new Element("seeds");
        for (int i = 0; i < missionConfig.getSeeds().size(); i++) {
            Element seedElement = new Element("seed");
            Element seedNameElement = new Element("seedName");
            seedNameElement.addContent(missionConfig.getSeeds().get(i).getSeedName());
            Element seedUrlElement = new Element("seedUrl");
            seedUrlElement.addContent(missionConfig.getSeeds().get(i).getUrl());
            Element seedSortNameElement = new Element("sortName");
            seedSortNameElement.addContent(missionConfig.getSeeds().get(i).getSortName());
            seedElement.addContent(seedSortNameElement);
            seedElement.addContent(seedNameElement);
            seedElement.addContent(seedUrlElement);
            seeds.addContent(seedElement);
        }
        rootElement.addContent(seeds);
        //定义匹配的要采集的URL链接fitUrl的节点
        Element fiturls = new Element("fitUrls");
        for (int i = 0; i < missionConfig.getFitUrlRegs().size(); i++) {
            Element fitUrl = new Element("fit_url");
            fitUrl.addContent(missionConfig.getFitUrlRegs().get(i));
            fiturls.addContent(fitUrl);
        }
        rootElement.addContent(fiturls);//添加到根节点
        //并发工作线程数
        Element workingThreadsElement = new Element("workingThreads");
        workingThreadsElement.addContent("" + missionConfig.getWorkingThreads());
        rootElement.addContent(workingThreadsElement);//添加到根节点
        //定义页面编码节点
        Element pageEncodingElement = new Element("pageEncoding");
        pageEncodingElement.addContent(missionConfig.getPageEncoding());
        rootElement.addContent(pageEncodingElement);//添加到根节点
        //定义下载图片控制标志节点
        Element dwdPhoFlagElement = new Element("dwdPhoFlag");
        dwdPhoFlagElement.addContent(missionConfig.getDwdPhoFlag());
        rootElement.addContent(dwdPhoFlagElement);
        //定义原语言节点
        Element oriLan = new Element("orien_lan");
        oriLan.addContent(missionConfig.getOrigLanguage());
        Element transLan = new Element("trans_lan");
        transLan.addContent(missionConfig.getTranLanguage());
        rootElement.addContent(oriLan);//添加到根节点
        rootElement.addContent(transLan);//添加到根节点
        //定义匹配抓取信息的产品页面Url节点
        Element pageUrlRegs = new Element("pageUrlRegs");
        for (int i = 0; i < missionConfig.getPageReg().size(); i++) {
            Element pageUrl = new Element("pageUrl");
            pageUrl.addContent(missionConfig.getFitUrlRegs().get(i));
            pageUrlRegs.addContent(pageUrl);
        }
        rootElement.addContent(pageUrlRegs);//添加到根节点
        Map<String, List<String>> map = missionConfig.getEntityReg();
        List<String> list = null;
        Element pathElements = new Element("pathElements");
        //直接循环算啦
        //=====================================
        Iterator iter = map.entrySet().iterator();
        while (iter.hasNext()) {
            Map.Entry e = (Map.Entry) iter.next();
            Element element = new Element(e.getKey() + "");
            map = missionConfig.getEntityReg();
            list = map.get(e.getKey() + "");
            for (int i = 0; i < list.size(); i++) {
                Element path = new Element("path");
                path.addContent(list.get(i));
                element.addContent(path);
            }
            pathElements.addContent(element);
        }
        rootElement.addContent(pathElements);
        /*   =====================================================   */
        Document myDocument = new Document(rootElement);
        return myDocument;
    }
    /* 创建文档文件 */
    public void createConfigFile(Document document, String filepath) {
        try {
            /* 定义XML输出器 */
            XMLOutputter xmlOutPutter = new XMLOutputter();
            xmlOutPutter.setFormat(Format.getPrettyFormat());
            File file = new File(filepath);
            if (!file.exists()) {
                if (file.createNewFile()) {
                    FileOutputStream fileOutputStream = new FileOutputStream(filepath);
                    xmlOutPutter.output(document, fileOutputStream);
                    return;
                }
            }
            FileOutputStream fileOutputStream = new FileOutputStream(filepath);
            xmlOutPutter.output(document, fileOutputStream);
        } catch (java.io.IOException e) {
            e.printStackTrace();
        }
    }
    /* 重写文件 */
    public void saveTask(String filePath, ConfigModel configModel) {
        try {
            TMallConfigModel tMallConfigModel = (TMallConfigModel) configModel;
            Document document = buildUpMallDocument(tMallConfigModel);
            if (document != null) {
                createConfigFile(document, filePath);
            }
        } catch (MissionConfigException ex) {
            Logger.getLogger(XMLConfigWriter.class.getName()).log(Level.SEVERE, null, ex);
        } catch (EnterUrlsException ex) {
            Logger.getLogger(XMLConfigWriter.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
    //* xml文件读取方法 */
    public TMallConfigModel readMallDocument(String filePath) {
        TMallConfigModel model = new TMallConfigModel();
        SAXBuilder sb = new SAXBuilder();
        try {
            //读取基本配置信息
            Document doc = sb.build(filePath);           //构造文档对象
            Element root = doc.getRootElement();                     //获取根元素
            String websiteName = root.getAttributeValue("name");     //获取网站名称
            String websiteAddr = root.getAttributeValue("url");      //获取网站地址
            model.setWebsiteName(websiteName);                       //设置网站名称
            model.setWebsiteUrl(websiteAddr);                        //设置网站地址
            Element taskNameElement = root.getChild("taskName");     //获取任务名内容
            String taskName = taskNameElement.getText();
            model.setTaskName(taskName);
            //获取入口种子列表
            List<Seed> seedList = new ArrayList();
            Element seedsElement = root.getChild("seeds");
            List list = seedsElement.getChildren();
            for (int i = 0; i < list.size(); i++) {
                Element element = (Element) seedsElement.getChildren().get(i);
                Seed seed = new Seed();
                Element seedNameElement = element.getChild("seedName");
                Element seedUrlElement = element.getChild("seedUrl");
                Element seedSortNameElement = element.getChild("sortName");
                seed.setSeedName(seedNameElement.getTextTrim());
                seed.setUrl(seedUrlElement.getTextTrim());
                seed.setSortName(seedSortNameElement.getTextTrim());
                Element parentSeedElement = element.getChild("parentSeed");
                if (parentSeedElement != null) {
                    Seed parentSeed = new Seed();
                    Element parentSeedNameElement = parentSeedElement.getChild("seedName");
                    Element parentSeedUrlElement = parentSeedElement.getChild("seedUrl");
                    Element parentSeedSortNameElement = parentSeedElement.getChild("sortName");
                    parentSeed.setSeedName(parentSeedNameElement.getText());
                    parentSeed.setUrl(parentSeedUrlElement.getTextTrim());
                    parentSeed.setSortName(parentSeedSortNameElement.getTextTrim());
                }
                seedList.add(seed);
            }
            model.setSeeds(seedList);
            //获取匹配的要抽取的页面的特定部分内容
            list = new ArrayList();
            Element extractHtmlElement = root.getChild("extractHtml");
            if (extractHtmlElement != null) {
                for (int i = 0; i < extractHtmlElement.getChildren().size(); i++) {
                    Element element = (Element) extractHtmlElement.getChildren().get(i);
                    list.add(element.getText());
                }
            }
            model.setExtractHtmlReg(list);
            //获取匹配URLs
            list = new ArrayList();
            Element fitUrlsElement = root.getChild("fitUrls");
            for (int i = 0; i < fitUrlsElement.getChildren().size(); i++) {
                Element element = (Element) fitUrlsElement.getChildren().get(i);
                list.add(element.getText());
            }
            model.setFitUrlRegs(list);
            //获取线程数量
            Element workingThreadsElement = root.getChild("workingThreads");
            String workingCount = workingThreadsElement.getText();
            model.setWorkingThreads(Integer.valueOf(workingCount));
            //获取解析编码
            Element pageEncodingElement = root.getChild("pageEncoding");
            String pageEncoding = pageEncodingElement.getText();
            model.setPageEncoding(pageEncoding);
            //获取是否下载图片的标志
            Element dwdPhoFlagElement = root.getChild("dwdPhoFlag");
            String dphoFlag = dwdPhoFlagElement.getText();
            model.setDwdPhoFlag(dphoFlag);
            //获取语言
            Element orien_lanElement = root.getChild("orien_lan");
            String orien = orien_lanElement.getText();
            model.setOrigLanguage(orien);
            Element trans_lanElement = root.getChild("trans_lan");
            String trans_lan = trans_lanElement.getText();
            model.setTranLanguage(trans_lan);
            //获取URL正则匹配
            Element pageUrlRegsElement = root.getChild("pageUrlRegs");
            list = new ArrayList();
            for (int i = 0; i < pageUrlRegsElement.getChildren().size(); i++) {
                Element element = (Element) pageUrlRegsElement.getChildren().get(i);
                list.add(element.getText());
            }
            model.setPageReg(list);
            //获取余下的匹配规则
            Map<String, List<String>> entityReg = new HashMap();
            Element pathElements = root.getChild("pathElements");
            for (int i = 0; i < pathElements.getChildren().size(); i++) {
                Element element = (Element) pathElements.getChildren().get(i);
                List<String> pathList = new ArrayList();
                String mapName = element.getName();
                for (int j = 0; j < element.getChildren().size(); j++) {
                    Element childElement = (Element) element.getChildren().get(j);
                    pathList.add(childElement.getText());
                }
                entityReg.put(mapName, pathList);
            }
            model.setEntityReg(entityReg);
        } catch (JDOMException ex) {
            Logger.getLogger(XMLConfigWriter.class.getName()).log(Level.SEVERE, null, ex);
        } catch (IOException ex) {
            Logger.getLogger(XMLConfigWriter.class.getName()).log(Level.SEVERE, null, ex);
        }
        return model;
    }
}
Copy after login

xml file content:

<?xml version="1.0" encoding="UTF-8"?>
<website name="taobao_mall" url="http://www.tmall.com/?ver=2011b">
  <taskName>caiji_tmall_精品男装_T恤</taskName>
  <seeds>
    <seed>
      <sortName>精品男装/T恤</sortName>
      <seedName>精品男装/T恤</seedName>
      <seedUrl>http://item.tmall.com/item.htm?id=9351702393</seedUrl>
    </seed>
  </seeds>
  <extractHtml>
      <path>div class="list item-view item-miniView"</path>
  </extractHtml>
  <fitUrls>
    <fit_url>http://www\.tmall\.com/go/act/tmall/iwanttobuy\.php.*</fit_url>
    <fit_url>http://list\.tmall\.com/.*</fit_url>
    <fit_url>http://item\.tmall\.com/item\.htm.*</fit_url>
  </fitUrls>
  <workingThreads>1</workingThreads>
  <pageEncoding>UTF-8</pageEncoding>
  <orien_lan>zh</orien_lan>
  <trans_lan>en</trans_lan>
  <pageUrlRegs>
    <pageUrl>http://www\.tmall\.com/go/act/tmall/iwanttobuy\.php.*</pageUrl>
  </pageUrlRegs>
  <pathElements>
    <commnents>
      <path>div class="tb-box tshop-psm tshop-psm-bdetailtabl" id="J_Detail"</path>
      <path>div id="reviews" class="J_DetailSection" data-reviewApi</path>
    </commnents>
    <shopAddr>
      <path>div class="clearfix tb-header-nav"</path>
      <path>div class="nav"</path>
      <path>a href</path>
    </shopAddr>
    <productDetail>
      <path>div id="attributes" class="attributes</path>
      <path>ul class="attributes-list</path>
      <path>li</path>
    </productDetail>
    <photosPath>
      <path>div class="tb-detail-bd tb-clear"</path>
      <path>div class="tb-gallery"</path>
      <path>div class="tb-booth tb-pic tb-s310"</path>
      <path>img id="J_ImgBooth" src</path>
    </photosPath>
    <category>
      <path>ul class="mallCrumbs-nav" id="J_crumbs"</path>
      <path>li class="mallCrumbs-nav-item"</path>
    </category>
    <countSold>
      <path>div class="tb-detail-bd tb-clear"</path>
      <path>ul class="tb-meta"</path>
      <path>li class="tb-sold-out tb-clear"</path>
    </countSold>
    <shopInfo>
      <path>div class="shop-intro"</path>
      <path>div class="extend"</path>
      <path>li</path>
    </shopInfo>
    <despPhos>
      <path>script</path>
    </despPhos>
    <thumbPhosPath>
      <path>div class="tb-detail-bd tb-clear"</path>
      <path>div class="tb-gallery"</path>
      <path>ul id="J_UlThumb" class="tb-thumb tb-clearfix"</path>
      <path>img src=</path>
    </thumbPhosPath>
    <productName>
      <path>div class="layout grid-s5m0 "</path>
      <path>div class="tb-detail-hd"</path>
      <path>a target="_blank" href=</path>
    </productName>
    <productPrice>
      <path>div class="tb-detail-bd tb-clear"</path>
      <path>ul class="tb-meta"</path>
      <path>li id="J_StrPriceModBox" class="tb-detail-price tb-clearfix"</path>
    </productPrice>
  </pathElements>
</website>
Copy after login


Statement of this Website
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn

Hot AI Tools

Undresser.AI Undress

Undresser.AI Undress

AI-powered app for creating realistic nude photos

AI Clothes Remover

AI Clothes Remover

Online AI tool for removing clothes from photos.

Undress AI Tool

Undress AI Tool

Undress images for free

Clothoff.io

Clothoff.io

AI clothes remover

Video Face Swap

Video Face Swap

Swap faces in any video effortlessly with our completely free AI face swap tool!

Hot Article

Roblox: Bubble Gum Simulator Infinity - How To Get And Use Royal Keys
3 weeks ago By 尊渡假赌尊渡假赌尊渡假赌
Mandragora: Whispers Of The Witch Tree - How To Unlock The Grappling Hook
3 weeks ago By 尊渡假赌尊渡假赌尊渡假赌
Nordhold: Fusion System, Explained
3 weeks ago By 尊渡假赌尊渡假赌尊渡假赌

Hot Tools

Notepad++7.3.1

Notepad++7.3.1

Easy-to-use and free code editor

SublimeText3 Chinese version

SublimeText3 Chinese version

Chinese version, very easy to use

Zend Studio 13.0.1

Zend Studio 13.0.1

Powerful PHP integrated development environment

Dreamweaver CS6

Dreamweaver CS6

Visual web development tools

SublimeText3 Mac version

SublimeText3 Mac version

God-level code editing software (SublimeText3)

Hot Topics

Java Tutorial
1668
14
PHP Tutorial
1273
29
C# Tutorial
1256
24
Can I open an XML file using PowerPoint? Can I open an XML file using PowerPoint? Feb 19, 2024 pm 09:06 PM

Can XML files be opened with PPT? XML, Extensible Markup Language (Extensible Markup Language), is a universal markup language that is widely used in data exchange and data storage. Compared with HTML, XML is more flexible and can define its own tags and data structures, making the storage and exchange of data more convenient and unified. PPT, or PowerPoint, is a software developed by Microsoft for creating presentations. It provides a comprehensive way of

Convert XML data to CSV format in Python Convert XML data to CSV format in Python Aug 11, 2023 pm 07:41 PM

Convert XML data in Python to CSV format XML (ExtensibleMarkupLanguage) is an extensible markup language commonly used for data storage and transmission. CSV (CommaSeparatedValues) is a comma-delimited text file format commonly used for data import and export. When processing data, sometimes it is necessary to convert XML data to CSV format for easy analysis and processing. Python is a powerful

Handling errors and exceptions in XML using Python Handling errors and exceptions in XML using Python Aug 08, 2023 pm 12:25 PM

Handling Errors and Exceptions in XML Using Python XML is a commonly used data format used to store and represent structured data. When we use Python to process XML, sometimes we may encounter some errors and exceptions. In this article, I will introduce how to use Python to handle errors and exceptions in XML, and provide some sample code for reference. Use try-except statement to catch XML parsing errors When we use Python to parse XML, sometimes we may encounter some

Python implements conversion between XML and JSON Python implements conversion between XML and JSON Aug 07, 2023 pm 07:10 PM

Python implements conversion between XML and JSON Introduction: In the daily development process, we often need to convert data between different formats. XML and JSON are common data exchange formats. In Python, we can use various libraries to convert between XML and JSON. This article will introduce several commonly used methods, with code examples. 1. To convert XML to JSON in Python, we can use the xml.etree.ElementTree module

Python parsing special characters and escape sequences in XML Python parsing special characters and escape sequences in XML Aug 08, 2023 pm 12:46 PM

Python parses special characters and escape sequences in XML XML (eXtensibleMarkupLanguage) is a commonly used data exchange format used to transfer and store data between different systems. When processing XML files, you often encounter situations that contain special characters and escape sequences, which may cause parsing errors or misinterpretation of the data. Therefore, when parsing XML files using Python, we need to understand how to handle these special characters and escape sequences. 1. Special characters and

How to handle XML and JSON data formats in C# development How to handle XML and JSON data formats in C# development Oct 09, 2023 pm 06:15 PM

How to handle XML and JSON data formats in C# development requires specific code examples. In modern software development, XML and JSON are two widely used data formats. XML (Extensible Markup Language) is a markup language used to store and transmit data, while JSON (JavaScript Object Notation) is a lightweight data exchange format. In C# development, we often need to process and operate XML and JSON data. This article will focus on how to use C# to process these two data formats, and attach

How do you parse and process HTML/XML in PHP? How do you parse and process HTML/XML in PHP? Feb 07, 2025 am 11:57 AM

This tutorial demonstrates how to efficiently process XML documents using PHP. XML (eXtensible Markup Language) is a versatile text-based markup language designed for both human readability and machine parsing. It's commonly used for data storage an

How to use PHP functions to process XML data? How to use PHP functions to process XML data? May 05, 2024 am 09:15 AM

Use PHPXML functions to process XML data: Parse XML data: simplexml_load_file() and simplexml_load_string() load XML files or strings. Access XML data: Use the properties and methods of the SimpleXML object to obtain element names, attribute values, and subelements. Modify XML data: add new elements and attributes using the addChild() and addAttribute() methods. Serialized XML data: The asXML() method converts a SimpleXML object into an XML string. Practical example: parse product feed XML, extract product information, transform and store it into a database.

See all articles