码迷,mamicode.com
首页 > 编程语言 > 详细

获取EMF文件内全部文字, 并按照左上到右下的顺序排序

时间:2015-07-28 20:32:11      阅读:303      评论:0      收藏:0      [点我收藏+]

标签:

因为工作要求, 需要对EMF文件文字内容做分析.....SO, 如下代码出现了

懒得加注释了, 反正对外接口属性就那么几个, 根据英文猜吧, 很容易的

 

说明一下:

  这个东西结果会对所有文字内容按照左上到右下的顺序排序(EMF内数据顺序是根据画图顺序来的, 所以不一定是什么顺序, 但是数据分析就要得到行列关系)

但是图片没有行列概念, 所以我简单借鉴了一下纯粹横排版模式, 认为2个文字元素, 只要显示范围的中线在对方范围内, 就会被认为是同一行

PS: 这种排序对于规则的文字输出没问题, 但是如果有一个很大的文字作为背景水印就不行了, 那样会导致所有元素都和这个大水印在同一行, 各位如果有需要自己再考虑排序算法吧

 

unit Comm.EMFInfo;

interface

uses
  System.Types, System.Generics.Collections,
  Vcl.Graphics;

type
  TEMFStrInfo = record
    DisplayRect: TRect;
    Text: string;
  end;
  PEMFStrInfo = ^TEMFStrInfo;

  TEMFStrInfoList = Class
  private
    FList: TList<PEMFStrInfo>;
    FDic: TDictionary<string, UInt32>;
    FMaxHeight: Integer;

    function GetItem(Index: UInt32): TEMFStrInfo;
    function GetCount: UInt32;
  public
    constructor Create(AEMF: TMetafile);
    destructor Destroy; override;

    property Count: UInt32 read GetCount;
    property Items[Index: UInt32]: TEMFStrInfo read GetItem;
    function TryGetInfo(AInfoName: string; var AInfo: TEMFStrInfo; var AIndex: UInt32): Boolean;
    property MaxHeight: Integer read FMaxHeight;
  end;

implementation

uses
  System.SysUtils, System.Classes, System.Generics.Defaults,
  Winapi.Windows;

const
  // if set use ANSI version else UNICODE
  SMALLTEXT_TYPE_ANSI = $200;
  // if set use EMR_SMALLTEXTOUT else use EMR_SMALLTEXTOUTCLIP
  SMALLTEXT_TYPE_WITHOUT_CLIP = $100;

// Structures
type
  EMR_SMALLTEXTOUTCLIPA = RECORD
    emr: emr;
    ptlReference: TPoint; // might be in negative numbers, so take abs
    nChars: DWORD;
    fuOptions: DWORD; // this record type
    // != SMALLTEXT_TYPE_WITHOUT_CLIP
    // == SMALLTEXT_TYPE_ANSI
    // also holds fuOptions like in the ExtTextOut function
    iGraphicsMode: DWORD; // See iMode parameter of SetGraphicsMode
    exScale: Single; { X and Y scales from Page units to .01mm units }
    eyScale: Single; { if graphics mode is GM_COMPATIBLE. }
    rclClip: TRect;
    cString: Array [0 .. 0] of AnsiChar;
    { This is followed by the string array }
  END;

  PEMRSmallTextOutClipA = ^EMR_SMALLTEXTOUTCLIPA;

  EMR_SMALLTEXTOUTCLIPW = RECORD
    emr: emr;
    ptlReference: TPoint;
    nChars: DWORD;
    fuOptions: DWORD; // this record type
    // != SMALLTEXT_TYPE_WITHOUT_CLIP
    // != SMALLTEXT_TYPE_ANSI
    // also holds fuOptions like in the ExtTextOut function
    iGraphicsMode: DWORD; // See iMode parameter of SetGraphicsMode
    exScale: Single; { X and Y scales from Page units to .01mm units }
    eyScale: Single; { if graphics mode is GM_COMPATIBLE. }
    rclClip: TRect;
    cString: Array [0 .. 0] of WideChar;
    { This is followed by the string array }
  END;

  PEMRSmallTextOutClipW = ^EMR_SMALLTEXTOUTCLIPW;

  EMR_SMALLTEXTOUTA = RECORD
    emr: emr;
    ptlReference: TPoint;
    nChars: DWORD;
    fuOptions: DWORD; // this record type
    // == SMALLTEXT_TYPE_WITHOUT_CLIP
    // == SMALLTEXT_TYPE_ANSI
    // also holds fuOptions like in the ExtTextOut function
    iGraphicsMode: DWORD; // See iMode parameter of SetGraphicsMode
    exScale: Single; { X and Y scales from Page units to .01mm units }
    eyScale: Single; { if graphics mode is GM_COMPATIBLE. }
    cString: Array [0 .. 0] of AnsiChar;
    { This is followed by the string array }
  END;

  PEMRSmallTextOutA = ^EMR_SMALLTEXTOUTA;

  EMR_SMALLTEXTOUTW = RECORD
    emr: emr;
    ptlReference: TPoint;
    nChars: DWORD;
    fuOptions: DWORD; // this record type
    // == SMALLTEXT_TYPE_WITHOUT_CLIP
    // != SMALLTEXT_TYPE_ANSI
    // also holds fuOptions like in the ExtTextOut function
    iGraphicsMode: DWORD; // See iMode parameter of SetGraphicsMode
    exScale: Single; { X and Y scales from Page units to .01mm units }
    eyScale: Single; { if graphics mode is GM_COMPATIBLE. }
    cString: Array [0 .. 0] of WideChar;
    { This is followed by the string array }
  END;

  PEMRSmallTextOutW = ^EMR_SMALLTEXTOUTW;

function EnumTextProc(DC: HDC; lpHTable: PHANDLETABLE; EMFR: PENHMETARECORD;
  nObj, lpData: Integer): Integer; stdcall;
var
  nStrA: AnsiString;
  nStrW: WideString;
  nEMRTO: PEMRExtTextOut;
  nEMRSTO: PEMRSmallTextOutClipA;
  nEMFElementList: TList<PEMFStrInfo>;
  nOTR: PEMFStrInfo;
begin
  nEMFElementList := Pointer(lpData);

  if (EMFR.iType = EMR_EXTTEXTOUTA) then
  begin
    nEMRTO := PEMRExtTextOut(EMFR);
    SetLength(nStrA, nEMRTO.EMRText.nChars);

    Move(pointer(2 + Cardinal(@nEMRTO.EMRText) + nEMRTO.EMRText.offString)^,
      nStrA[1], Length(nStrA));

    New(nOTR);
    with nOTR^ do
    begin
      Text := Trim(nStrA);
      DisplayRect := nEMRTO.rclBounds;
    end;
    nOTR^.DisplayRect.NormalizeRect;
    if nOTR^.Text <> ‘‘ then
      nEMFElementList.Add(nOTR)
    else
      Dispose(nOTR);
  end
  else if (EMFR.iType = EMR_EXTTEXTOUTW) then
  begin
    nEMRTO := PEMRExtTextOut(EMFR);
    SetLength(nStrW, nEMRTO.EMRText.nChars);

    Move(pointer(2 + Cardinal(@nEMRTO.EMRText) + nEMRTO.EMRText.offString div 2)^,
      nStrW[1], Length(nStrW) * 2);

    New(nOTR);
    with nOTR^ do
    begin
      Text := Trim(nStrW);
      DisplayRect := nEMRTO.rclBounds;
    end;
    nOTR^.DisplayRect.NormalizeRect;
    if nOTR^.Text <> ‘‘ then
      nEMFElementList.Add(nOTR)
    else
      Dispose(nOTR);
  end
  else if EMFR.iType = EMR_SMALLTEXTOUT then
  begin
    nEMRSTO := PEMRSmallTextOutClipA(EMFR);
    SetLength(nStrA, nEMRSTO.nChars);
    Move(nEMRSTO.cString[0], nStrA[1], Length(nStrA));

    New(nOTR);
    with nOTR^ do
    begin
      Text := Trim(nStrA);
      DisplayRect := nEMRSTO.rclClip;
      DisplayRect.TopLeft := nEMRSTO.ptlReference;
    end;
    nOTR^.DisplayRect.NormalizeRect;
    if nOTR^.Text <> ‘‘ then
      nEMFElementList.Add(nOTR)
    else
      Dispose(nOTR);
  end;

  Result := 1;
end;

type
  TEMFStrInfoCompare = class(TComparer<PEMFStrInfo>)
  public
    function Compare(const Left, Right: PEMFStrInfo): Integer; override;
  end;

{ TEMFStrInfoCompare }

function TEMFStrInfoCompare.Compare(const Left, Right: PEMFStrInfo): Integer;
var
  nCPLeft, nCPRight: TPoint;
  nLIR, nRIL: Int8;
begin
  nCPLeft := Left.DisplayRect.CenterPoint;
  nCPRight := Right.DisplayRect.CenterPoint;

  if nCPLeft.Y <= Right.DisplayRect.Top then
    nLIR := -1
  else if nCPLeft.Y >= Right.DisplayRect.Bottom then
    nLIR := 1
  else
    nLIR := 0;

  if nCPRight.Y <= Left.DisplayRect.Top then
    nRIL := -1
  else if nCPRight.Y >= Left.DisplayRect.Bottom then
    nRIL := 1
  else
    nRIL := 0;

  if (nLIR = 0) or (nRIL = 0) then
  begin
    {有任意left或right在另一方区域内的, 认为在同一行, 通过x位置判断排序}
    if nCPLeft.X < nCPRight.X then {根据左侧判断位置}
      Result := -1
    else if nCPLeft.X > nCPRight.X then
      Result := 1
    else if nCPLeft.Y < nCPRight.Y then
      Result := -1
    else if nCPLeft.Y > nCPRight.Y then
      Result := 1
    else
      Result := 0;
  end
  else
    Result := nLIR;
end;

{ TEMFStrInfoList }

constructor TEMFStrInfoList.Create(AEMF: TMetafile);
var
  nInfoExists: Boolean;
  nCheckPoint: TPoint;
  i: Integer;
  nCompare: TEMFStrInfoCompare;
  nPI: PEMFStrInfo;
begin
  FList := TList<PEMFStrInfo>.Create;
  FDic := TDictionary<string, UInt32>.Create;
  FMaxHeight := 0;

  {读取文件元素存入列表}
  EnumEnhMetafile(0, AEMF.Handle, @EnumTextProc, Pointer(FList), Rect(0, 0, 0, 0));

  try
    {排序}
    nCompare := TEMFStrInfoCompare.Create;
    try
      FList.Sort(nCompare);
    finally
      nCompare.Free;
    end;
  except
  end;

  {元素名称存入字典}
  for i := 0 to FList.Count - 1 do
  begin
    nPI := FList[i];
    if nPI^.DisplayRect.Bottom > FMaxHeight then
      FMaxHeight := nPI^.DisplayRect.Bottom;
    FDic.AddOrSetValue(nPI^.Text, i);
  end;
end;

destructor TEMFStrInfoList.Destroy;
var
  i: Integer;
begin
  for i := 0 to FList.Count - 1 do
    Dispose(FList[i]);
  FList.Free;
  FDic.Free;
  inherited;
end;

function TEMFStrInfoList.GetCount: UInt32;
begin
  Result := FList.Count;
end;

function TEMFStrInfoList.GetItem(Index: UInt32): TEMFStrInfo;
begin
  Result := FList[Index]^;
end;

function TEMFStrInfoList.TryGetInfo(AInfoName: string; var AInfo: TEMFStrInfo; var AIndex: UInt32): Boolean;
begin
  Result := FDic.TryGetValue(AInfoName, AIndex);
  if Result then
    AInfo := FList[AIndex]^;
end;

end.

 

获取EMF文件内全部文字, 并按照左上到右下的顺序排序

标签:

原文地址:http://www.cnblogs.com/hs-kill/p/4683484.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!