Mengurai teks HTML yang berisi tag ‹font› tidak mengatur ulang ukuran dan jenis font setelah ‹/font›
Kode saya berfungsi cukup baik kecuali setelah ‹/font›.
Sebelum ‹font size=9› blablabla.. ukuran teks adalah 11. Saya berharap setelah ‹/font› ukuran teks diatur ulang menjadi 11, tetapi masih tetap di 9. Hal yang sama untuk keluarga font.
Pastinya saya salah paham cara menggunakan jsoup. Lebih baik saya menggunakan CSS, tetapi saya tidak tahu caranya.
Terimakasih atas bantuannya.
package test;
import java.awt.Color;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.math.BigInteger;
import org.apache.poi.xwpf.usermodel.UnderlinePatterns;
import org.apache.poi.xwpf.usermodel.VerticalAlign;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import org.apache.poi.xwpf.usermodel.XWPFTable;
import org.apache.poi.xwpf.usermodel.XWPFTableCell;
import org.apache.poi.xwpf.usermodel.XWPFTableRow;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.nodes.TextNode;
import org.jsoup.select.Elements;
import org.jsoup.select.NodeTraversor;
import org.jsoup.select.NodeVisitor;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTblLayoutType;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.STTblLayoutType;
public class ReadHtml
{
protected static java.util.Vector<String> contenu = null;
org.apache.xmlbeans.XmlCursor cursor = null;
class WStyle
{
protected String police = "Times New Roman";
protected int taille = 11;
protected Color couleur = Color.black;
protected boolean gras = false;
protected boolean italique = false;
public WStyle() // constructeur
{
super();
}
protected String getPolice() {return police;}
protected int getTaille() {return taille;}
protected Color getCouleur() {return couleur;}
protected boolean getGras() {return gras;}
protected boolean getItalique() {return italique;}
protected void setPolice(String p) {police=p;}
protected void setTaille(int t) {taille=t;}
protected void setCouleur(Color c) {couleur=c;}
protected void setGras(boolean g) {gras=g;}
protected void setItalique(boolean i) {italique=i;}
}
public ReadHtml()
{
super();
contenu = new java.util.Vector<String>();
createWordFile();
}
private XWPFParagraph getTableParagraph(XWPFTableCell cell, String html)
{
cell.removeParagraph(0);
XWPFParagraph paragraph = cell.addParagraph();
paragraph.setSpacingAfterLines(0);
paragraph.setSpacingAfter(0);
Document htmlDocument = Jsoup.parse(html);
Elements htmlParagraphs = htmlDocument.select("p");
for(Element htmlParagraph : htmlParagraphs)
{
System.out.println(htmlParagraph);
ParagraphNodeVisitor nodeVisitor = new ParagraphNodeVisitor(paragraph);
NodeTraversor.traverse(nodeVisitor, htmlParagraph);
}
return paragraph;
}
private void createWordFile()
{
XWPFParagraph para = null;
try
{
XWPFDocument document = new XWPFDocument();
FileOutputStream out = new FileOutputStream(new File("./", "NewTable.docx"));
XWPFTable table = document.createTable();
CTTblLayoutType type = table.getCTTbl().getTblPr().addNewTblLayout();
type.setType(STTblLayoutType.FIXED);
table.getCTTbl().addNewTblGrid().addNewGridCol().setW(BigInteger.valueOf(1670));
table.getCTTbl().getTblGrid().addNewGridCol().setW(BigInteger.valueOf(6000));
String myTexte = "<html><head</head><body><p><font face=\"Verdana\" size=11>Good Morning</font> <font size=9 face=\"Times\"> " +
"<i><b>how are you today </b></i></font> Not so bad.<br>Thanks";
// first line
XWPFTableRow tableRow= table.getRow(0);
para = getTableParagraph(tableRow.getCell(0), "<p>Row #1, Col. #1");
tableRow .getCell(0).setParagraph(para);
XWPFTableCell cell = tableRow.createCell();
para = getTableParagraph(cell, myTexte); // Row #1, Col. #2
tableRow .getCell(1).setParagraph(para);
// seconde line
tableRow= table.createRow();
para = getTableParagraph(tableRow.getCell(0), "<p>Row #2, Col. #1");
tableRow .getCell(0).setParagraph(para);
para = getTableParagraph(tableRow.getCell(1), "<p>Row #2, Col. #2");
tableRow.getCell(1).setParagraph(para);
document.write(out);
document.close();
out.close();
System.out.println("NewTable.docx written successully");
}
catch (FileNotFoundException e) {System.out.println("File exception --> " + e.toString()); }
catch (IOException e) {System.out.println("I/O exception --> " + e.toString()); }
catch (Exception e) {System.out.println("Other exception --> " + e.toString()); }
}
public class ParagraphNodeVisitor implements NodeVisitor
{
String nodeName;
String fontFace;
String fontType;
boolean needNewRun;
boolean isItalic;
boolean isBold;
boolean isUnderlined;
int fontSize;
String fontColor;
VerticalAlign align = VerticalAlign.BASELINE ;
XWPFParagraph paragraph;
XWPFRun run;
ParagraphNodeVisitor(XWPFParagraph paragraph)
{
this.paragraph = paragraph;
this.run = paragraph.createRun();
this.nodeName = "";
this.needNewRun = false;
this.isItalic = false;
this.isBold = false;
this.isUnderlined = false;
this.fontSize = 11;
this.fontColor = "000000";
this.fontFace="Times";
}
@Override
public void head(Node node, int depth)
{
nodeName = node.nodeName();
needNewRun = false;
if ("#text".equals(nodeName))
{
run.setText(((TextNode)node).text());
needNewRun = true; //after setting the text in the run a new run is needed
}
else if ("i".equals(nodeName)) {isItalic = true;}
else if ("b".equals(nodeName)) {isBold = true;}
else if ("sup".equals(nodeName)){align = VerticalAlign.SUPERSCRIPT ;}
else if ("u".equals(nodeName)) {isUnderlined = true;}
else if ("br".equals(nodeName)) {run.addBreak();}
else if ("p".equals(nodeName)) {run.addBreak();}
else if ("font".equals(nodeName))
{
fontColor = (!"".equals(node.attr("color")))?node.attr("color").substring(1):"000000";
fontSize = (!"".equals(node.attr("size")))?Integer.parseInt(node.attr("size")):11;
fontFace = (!"".equals(node.attr("face")))?node.attr("face"):"Times";
}
if (needNewRun) run = paragraph.createRun();
needNewRun = false;
run.setItalic(isItalic);
run.setBold(isBold);
if (isUnderlined) run.setUnderline(UnderlinePatterns.SINGLE);
else run.setUnderline(UnderlinePatterns.NONE);
run.setColor(fontColor);
run.setFontSize(fontSize);
run.setFontFamily(fontFace);
run.setSubscript(align);
}
@Override
public void tail(Node node, int depth)
{
nodeName = node.nodeName();
System.out.println("Node=" + nodeName);
if ("i".equals(nodeName)) {isItalic = false;}
else if ("b".equals(nodeName)) {isBold = false;}
else if ("u".equals(nodeName)) {isUnderlined = false;}
else if ("sup".equals(nodeName)) {align= VerticalAlign.BASELINE ;}
else if ("font".equals("nodeName"))
{
fontColor = "000000";
fontSize = 11;
fontFace="Times";
System.out.println("Family=" + fontFace + " Taille=" + fontSize);
}
if (needNewRun) run = paragraph.createRun();
needNewRun = false;
run.setItalic(isItalic);
run.setBold(isBold);
if (isUnderlined) run.setUnderline(UnderlinePatterns.SINGLE); else run.setUnderline(UnderlinePatterns.NONE);
run.setColor(fontColor);
run.setFontSize(fontSize);
run.setFontFamily(fontFace);
run.setSubscript(align);
}
}
public static void main(String[] args)
{
new ReadHtml() ;
}
}