标签:现在 dev rate standard set void folder ast 读取
String sourceFolder2 = "E:\\picture2\\租赁合同2.pdf";
PdfDocument doc = new PdfDocument(new PdfReader(sourceFolder2));
float height = doc.getPage(1).getPageSize().getHeight();
float width = doc.getPage(1).getPageSize().getWidth();
Rectangle rect = new Rectangle(width,height);
FilteredTextEventListener filterListener = new FilteredTextEventListener(new LocationTextExtractionStrategy(), new TextRegionEventFilter(rect));
String extractedText = PdfTextExtractor.getTextFromPage(doc.getPage(1), filterListener);
System.out.println(extractedText);
@Test
public void testWithMultiFilteredRenderListener() throws IOException {
PdfDocument pdfDocument = new PdfDocument(new PdfReader(sourceFolder + "test.pdf"));
float x1, y1, x2, y2;
FilteredEventListener listener = new FilteredEventListener();
x1 = 122;
x2 = 22;
y1 = 678.9f;
y2 = 12;
ITextExtractionStrategy region1Listener = listener.attachEventListener(new LocationTextExtractionStrategy(),
new TextRegionEventFilter(new Rectangle(x1, y1, x2, y2)));
x1 = 156;
x2 = 13;
y1 = 678.9f;
y2 = 12;
ITextExtractionStrategy region2Listener = listener.attachEventListener(new LocationTextExtractionStrategy(),
new TextRegionEventFilter(new Rectangle(x1, y1, x2, y2)));
PdfCanvasProcessor parser = new PdfCanvasProcessor(new GlyphEventListener(listener));
parser.processPageContent(pdfDocument.getPage(1));
Assert.assertEquals("Your", region1Listener.getResultantText());
Assert.assertEquals("dju", region2Listener.getResultantText());
}
static class MyEventListener implements IEventListener {
private List<Rectangle> rectangles = new ArrayList<>();
@Override
public void eventOccurred(IEventData data, EventType type) {
if (type == EventType.RENDER_TEXT) {
TextRenderInfo renderInfo = (TextRenderInfo) data;
Vector startPoint = renderInfo.getDescentLine().getStartPoint();
Vector endPoint = renderInfo.getAscentLine().getEndPoint();
float x1 = Math.min(startPoint.get(0), endPoint.get(0));
float x2 = Math.max(startPoint.get(0), endPoint.get(0));
float y1 = Math.min(startPoint.get(1), endPoint.get(1));
float y2 = Math.max(startPoint.get(1), endPoint.get(1));
rectangles.add(new Rectangle(x1, y1, x2 - x1, y2 - y1));
}
}
@Override
public Set<EventType> getSupportedEvents() {
return new LinkedHashSet<>(Collections.singletonList(EventType.RENDER_TEXT));
}
public List<Rectangle> getRectangles() {
return rectangles;
}
public void clear() {
rectangles.clear();
}
}
static class MyCharacterEventListener extends MyEventListener {
@Override
public void eventOccurred(IEventData data, EventType type) {
if (type == EventType.RENDER_TEXT) {
TextRenderInfo renderInfo = (TextRenderInfo) data;
for (TextRenderInfo tri : renderInfo.getCharacterRenderInfos()) {
super.eventOccurred(tri, type);
}
}
}
}
private void parseAndHighlight(String input, String output, boolean singleCharacters) throws IOException {
PdfDocument pdfDocument = new PdfDocument(new PdfReader(input), new PdfWriter(output));
MyEventListener myEventListener = singleCharacters ? new MyCharacterEventListener() : new MyEventListener();
PdfDocumentContentParser parser = new PdfDocumentContentParser(pdfDocument);
for (int pageNum = 1; pageNum <= pdfDocument.getNumberOfPages(); pageNum++) {
parser.processContent(pageNum, myEventListener);
List<Rectangle> rectangles = myEventListener.getRectangles();
PdfCanvas canvas = new PdfCanvas(pdfDocument.getPage(pageNum));
canvas.setLineWidth(0.5f);
canvas.setStrokeColor(ColorConstants.RED);
for (Rectangle rectangle : rectangles) {
canvas.rectangle(rectangle);
canvas.stroke();
}
myEventListener.clear();
}
pdfDocument.close();
}
@Test
public void highlightNotDefTest() throws IOException, InterruptedException {
String input = sourceFolder + "page229.pdf";
String output = outputPath + "page229.pdf";
//false 表示短语单词为单位 true表示每个字符都遍历
parseAndHighlight(input, output, false);
}
@Test
public void findPosition() throws Exception {
String sourceFolder2 = "E:\\picture2\\租赁合同2.pdf";
String output = "E:\\picture2\\租赁合同2_stroke.pdf";
PdfReader reader = new PdfReader(sourceFolder2);
PdfDocument pdfDocument = new PdfDocument(reader, new PdfWriter(output));
PdfPage lastPage = pdfDocument.getLastPage();
RegexBasedLocationExtractionStrategy strategy = new RegexBasedLocationExtractionStrategy("甲方");
PdfCanvasProcessor canvasProcessor = new PdfCanvasProcessor(strategy);
canvasProcessor.processPageContent(lastPage);
Collection<IPdfTextLocation> resultantLocations = strategy.getResultantLocations();
PdfCanvas pdfCanvas = new PdfCanvas(lastPage);
pdfCanvas.setLineWidth(0.5f);
List<IPdfTextLocation> sets = new ArrayList<>();
for (IPdfTextLocation location : resultantLocations) {
Rectangle rectangle = location.getRectangle();
pdfCanvas.rectangle(rectangle);
pdfCanvas.setStrokeColor(ColorConstants.RED);
pdfCanvas.stroke();
System.out.println(rectangle.getX() + "," + rectangle.getY() + "," + rectangle.getLeft() + "," +
rectangle.getRight() + "," + rectangle.getTop() + "," + rectangle.getBottom() + "," +
rectangle.getWidth() + "," + rectangle.getHeight());
System.out.println(location.getText());
sets.add(location);
}
Collections.sort(sets, new Comparator<IPdfTextLocation>() {
@Override
public int compare(IPdfTextLocation o1, IPdfTextLocation o2) {
return o1.getRectangle().getY() - o2.getRectangle().getY() > 0 ? 1 : o1.getRectangle().getY() - o2.getRectangle().getY() == 0 ? 0 : -1;
}
});
System.out.println(sets.get(0).getRectangle().getY());
pdfDocument.close();
}
88.0,297.53,88.0,115.72,311.53,297.53,27.720001,14.0
甲方
213.0,674.176,213.0,241.0,688.176,674.176,28.0,14.0
甲方
227.75,767.7765,227.75,254.75,781.2765,767.7765,27.0,13.5
甲方
322.25,767.7765,322.25,349.25,781.2765,767.7765,27.0,13.5
甲方
297.53
@Test
public void imagesWithDifferentDepth() throws IOException, InterruptedException {
String outFileName = destinationFolder + "transparencyTest01.pdf";
String cmpFileName = sourceFolder + "cmp_transparencyTest01.pdf";
PdfDocument pdfDocument = new PdfDocument(new PdfWriter(outFileName, new WriterProperties()
.setCompressionLevel(CompressionConstants.NO_COMPRESSION)));
PdfPage page = pdfDocument.addNewPage(PageSize.A3);//默认添加A4
PdfCanvas canvas = new PdfCanvas(page);
canvas.setFillColor(ColorConstants.LIGHT_GRAY).fill();//设置填充背景色
canvas.rectangle(80, 0, 700, 1200).fill();
//开始添加文字
canvas
.saveState()
.beginText()
.moveText(116, 1150) //从哪里开始写
.setFontAndSize(PdfFontFactory.createFont(StandardFonts.HELVETICA), 14) //字体和大小
.setFillColor(ColorConstants.MAGENTA) //字体颜色
.showText("8 bit depth PNG") //具体展示的文字
.endText()
.restoreState();
//读取并添加图片到指定位置
ImageData img = ImageDataFactory.create(sourceFolder + "manualTransparency_8bit.png");
canvas.addImage(img, 100, 780, 200, false);
//收尾步骤,关闭画布和pdf,否则pdf打开错误
canvas.release();
pdfDocument.close();
}
标签:现在 dev rate standard set void folder ast 读取
原文地址:https://www.cnblogs.com/sky-chen/p/10005879.html