From 3e1cf98a11550f6cc299671f28b7c5d6bfc1e7e6 Mon Sep 17 00:00:00 2001 From: datalogics-saharay Date: Thu, 6 Nov 2025 20:02:43 -0600 Subject: [PATCH 01/10] Add C++ OCR Sample --- All/All_Datalogics_32Bit.sln | 6 + All/All_Datalogics_64Bit.sln | 6 + All/All_Datalogics_ARM64.sln | 6 + All/GNUmakefile | 1 + All/build_run_all.bat | 3 +- All/build_run_all_mac.sh | 1 + All/run_all_DL_samples_mac.sh | 7 + OCR/OCRImage/Makefile | 7 + OCR/OCRImage/OCRImage.cpp | 110 ++++++++++++++ OCR/OCRImage/OCRImage.vcxproj | 246 ++++++++++++++++++++++++++++++++ OCR/OCRImage/OCRImage_32Bit.sln | 25 ++++ OCR/OCRImage/OCRImage_64Bit.sln | 25 ++++ OCR/OCRImage/OCRImage_ARM64.sln | 25 ++++ OCR/README.md | 2 + _Input/OCRImage.png | Bin 0 -> 6403 bytes 15 files changed, 469 insertions(+), 1 deletion(-) create mode 100644 OCR/OCRImage/Makefile create mode 100644 OCR/OCRImage/OCRImage.cpp create mode 100644 OCR/OCRImage/OCRImage.vcxproj create mode 100644 OCR/OCRImage/OCRImage_32Bit.sln create mode 100644 OCR/OCRImage/OCRImage_64Bit.sln create mode 100644 OCR/OCRImage/OCRImage_ARM64.sln create mode 100644 OCR/README.md create mode 100644 _Input/OCRImage.png diff --git a/All/All_Datalogics_32Bit.sln b/All/All_Datalogics_32Bit.sln index b3fb481e..e65229f7 100644 --- a/All/All_Datalogics_32Bit.sln +++ b/All/All_Datalogics_32Bit.sln @@ -165,6 +165,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "AddDigitalSignatureCMS", ". EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "AddDigitalSignatureRFC3161", "..\Security\AddDigitalSignatureRFC3161\AddDigitalSignatureRFC3161.vcxproj", "{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}" EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "OCRImage", "..\OCR\OCRImage\OCRImage.vcxproj", "{26B0DAAC-1B6E-4020-BEC0-D47DDBA263C6}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Win32 = Debug|Win32 @@ -491,6 +493,10 @@ Global {1D787362-28C9-4460-9606-840F3B484350}.Debug|Win32.Build.0 = Debug|Win32 {1D787362-28C9-4460-9606-840F3B484350}.Release|Win32.ActiveCfg = Release|Win32 {1D787362-28C9-4460-9606-840F3B484350}.Release|Win32.Build.0 = Release|Win32 + {26B0DAAC-1B6E-4020-BEC0-D47DDBA263C6}.Debug|Win32.ActiveCfg = Debug|Win32 + {26B0DAAC-1B6E-4020-BEC0-D47DDBA263C6}.Debug|Win32.Build.0 = Debug|Win32 + {26B0DAAC-1B6E-4020-BEC0-D47DDBA263C6}.Release|Win32.ActiveCfg = Release|Win32 + {26B0DAAC-1B6E-4020-BEC0-D47DDBA263C6}.Release|Win32.Build.0 = Release|Win32 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/All/All_Datalogics_64Bit.sln b/All/All_Datalogics_64Bit.sln index 4cb3b394..f0eb2384 100644 --- a/All/All_Datalogics_64Bit.sln +++ b/All/All_Datalogics_64Bit.sln @@ -165,6 +165,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "AddDigitalSignatureCMS", ". EndProject Project("{1D787362-28C9-4460-9606-840F3B484350}") = "AddDigitalSignatureRFC3161", "..\Security\AddDigitalSignatureRFC3161\AddDigitalSignatureRFC3161.vcxproj", "{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}" EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "OCRImage", "..\OCR\OCRImage\OCRImage.vcxproj", "{26B0DAAC-1B6E-4020-BEC0-D47DDBA263C6}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|x64 = Debug|x64 @@ -491,6 +493,10 @@ Global {1D787362-28C9-4460-9606-840F3B484350}.Debug|x64.Build.0 = Debug|x64 {1D787362-28C9-4460-9606-840F3B484350}.Release|x64.ActiveCfg = Release|x64 {1D787362-28C9-4460-9606-840F3B484350}.Release|x64.Build.0 = Release|x64 + {26B0DAAC-1B6E-4020-BEC0-D47DDBA263C6}.Debug|x64.ActiveCfg = Debug|x64 + {26B0DAAC-1B6E-4020-BEC0-D47DDBA263C6}.Debug|x64.Build.0 = Debug|x64 + {26B0DAAC-1B6E-4020-BEC0-D47DDBA263C6}.Release|x64.ActiveCfg = Release|x64 + {26B0DAAC-1B6E-4020-BEC0-D47DDBA263C6}.Release|x64.Build.0 = Release|x64 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/All/All_Datalogics_ARM64.sln b/All/All_Datalogics_ARM64.sln index 847ef7e7..266b3871 100644 --- a/All/All_Datalogics_ARM64.sln +++ b/All/All_Datalogics_ARM64.sln @@ -163,6 +163,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "AddDigitalSignatureCMS", ". EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "AddDigitalSignatureRFC3161", "..\Security\AddDigitalSignatureRFC3161\AddDigitalSignatureRFC3161.vcxproj", "{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}" EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "OCRImage", "..\OCR\OCRImage\OCRImage.vcxproj", "{CFABC1FE-3F70-47E9-A911-EA085E6D127A}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|ARM64 = Debug|ARM64 @@ -485,6 +487,10 @@ Global {1D787362-28C9-4460-9606-840F3B484350}.Debug|ARM64.Build.0 = Debug|ARM64 {1D787362-28C9-4460-9606-840F3B484350}.Release|ARM64.ActiveCfg = Release|ARM64 {1D787362-28C9-4460-9606-840F3B484350}.Release|ARM64.Build.0 = Release|ARM64 + {CFABC1FE-3F70-47E9-A911-EA085E6D127A}.Debug|ARM64.ActiveCfg = Debug|ARM64 + {CFABC1FE-3F70-47E9-A911-EA085E6D127A}.Debug|ARM64.Build.0 = Debug|ARM64 + {CFABC1FE-3F70-47E9-A911-EA085E6D127A}.Release|ARM64.ActiveCfg = Release|ARM64 + {CFABC1FE-3F70-47E9-A911-EA085E6D127A}.Release|ARM64.Build.0 = Release|ARM64 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/All/GNUmakefile b/All/GNUmakefile index e6c80aff..a7042398 100644 --- a/All/GNUmakefile +++ b/All/GNUmakefile @@ -78,6 +78,7 @@ SAMPLES = Annotations/CreateAnnotations \ Images/RenderPage \ InformationExtraction/CountColorsInDoc \ InformationExtraction/ExtractDocumentInfo \ + OCR/OCRImage \ Printing/PostScriptInjection \ Security/AddDigitalSignatureCMS \ Security/AddDigitalSignatureRFC3161 \ diff --git a/All/build_run_all.bat b/All/build_run_all.bat index 113f116d..6b649b0f 100644 --- a/All/build_run_all.bat +++ b/All/build_run_all.bat @@ -250,13 +250,14 @@ SET "DL_SAMPLE_LIST=%DL_SAMPLE_LIST% ContentExtraction\ExtractAttachments" SET "DL_SAMPLE_LIST=%DL_SAMPLE_LIST% ContentExtraction\ExtractFonts" SET "DL_SAMPLE_LIST=%DL_SAMPLE_LIST% InformationExtraction\CountColorsInDoc" SET "DL_SAMPLE_LIST=%DL_SAMPLE_LIST% InformationExtraction\ExtractDocumentInfo" +SET "DL_SAMPLE_LIST=%DL_SAMPLE_LIST% OCR\OCRImage" SET "DL_SAMPLE_LIST=%DL_SAMPLE_LIST% FileSystem\AlternateFileSystem" REM *** The total number of DL samples. This must be accurate! IF NOT "%ARCH%"=="ARM64" ( SET /A "NUM_DL_SAMPLES=73" ) ELSE ( - SET /A "NUM_DL_SAMPLES=74" + SET /A "NUM_DL_SAMPLES=75" ) diff --git a/All/build_run_all_mac.sh b/All/build_run_all_mac.sh index ecaeedfe..320ccee5 100755 --- a/All/build_run_all_mac.sh +++ b/All/build_run_all_mac.sh @@ -109,6 +109,7 @@ declare -a DL_SAMPLE_LIST=( \ "Images/OutputPreview" \ "InformationExtraction/CountColorsInDoc" \ "InformationExtraction/ExtractDocumentInfo" \ + "OCR/OCRImage" \ "Printing/PostScriptInjection" \ "Security/AESEncryption" \ "Security/AddDigitalSignatureCMS" \ diff --git a/All/run_all_DL_samples_mac.sh b/All/run_all_DL_samples_mac.sh index 0a08cbaf..4784e436 100755 --- a/All/run_all_DL_samples_mac.sh +++ b/All/run_all_DL_samples_mac.sh @@ -269,6 +269,13 @@ cd ../../InformationExtraction/ExtractDocumentInfo ./ExtractDocumentInfo-${stage}.app/Contents/MacOS/ExtractDocumentInfo-${stage} echo "" +# OCR + +echo Running sample OCRImage +cd ../../OCR/OCRImage +./OCRImage-${stage}.app/Contents/MacOS/OCRImage-${stage} +echo "" + # Printing echo Running sample PostScriptInjection diff --git a/OCR/OCRImage/Makefile b/OCR/OCRImage/Makefile new file mode 100644 index 00000000..abea3b2a --- /dev/null +++ b/OCR/OCRImage/Makefile @@ -0,0 +1,7 @@ +SAMPNAME=OCRImage +OTHER_OBJS = $(SAMPNAME).o + +include ../../dlutils/common.mak + +$(SAMPNAME).o: $(SRC)/$(SAMPNAME).cpp + diff --git a/OCR/OCRImage/OCRImage.cpp b/OCR/OCRImage/OCRImage.cpp new file mode 100644 index 00000000..f80615b6 --- /dev/null +++ b/OCR/OCRImage/OCRImage.cpp @@ -0,0 +1,110 @@ +// +// Copyright (c) 2017-2025, Datalogics, Inc. All rights reserved. +// +// +// The OCRImage sample demonstrates how the Library works to OCR an image. +// +// Command-line: (Optional) +// + +#include + +#include "ASExtraCalls.h" +#include "DLExtrasCalls.h" +#include "PSFCalls.h" +#include "PERCalls.h" +#include "PEWCalls.h" +#include "PagePDECntCalls.h" +#include "PagePDECntCalls.h" +#include "OCREngineCalls.h" + +#include "InitializeLibrary.h" +#include "APDFLDoc.h" + +#define DIR_LOC "../../../../Resources/Sample_Input/" +#define DEF_INPUT "OCRImage.png" +#define DEF_OUTPUT "OCRImage-out.pdf" + +int main(int argc, char **argv) { + APDFLib libInit; + ASErrorCode errCode = 0; + if (libInit.isValid() == false) { + errCode = libInit.getInitError(); + std::cout << "Initialization failed with code " << errCode << std::endl; + return libInit.getInitError(); + } + + std::string csInputFileName(argc > 1 ? argv[1] : DIR_LOC DEF_INPUT); + std::string csOutputFileName(argc > 4 ? argv[4] : DEF_OUTPUT); + std::cout << "Recognizing text in " << csInputFileName.c_str() << std::endl; + + ASPathName sInput = APDFLDoc::makePath(csInputFileName.c_str()); + ASPathName sOutput = APDFLDoc::makePath(csOutputFileName.c_str()); + + DURING + // Sets the correct location for the OCREngine function table. + gOCREngineHFT = InitOCREngineHFT; + + // Initialize the OCREngine plugin. + if (!OCREngineInitialize()) { + std::cout << "The OCREngine plugin failed to initialize." << std::endl; + errCode = -1; + } + + if (0 == errCode) { + // Create a PDEImage object to perform OCR on. + PDEImage image = DLCreatePDEImageFromFile(sInput, nullptr); + + // Set default OCR parameters. + OCRParamsRec ocrParams = PDOCRDefaultParams(); + + // Set languages to configure OCREngine with. + OCRLanguage newLanguages[] = {OCRLanguage_English, OCRLanguage_French, OCRLanguage_ChineseTraditional, + OCRLanguage_ChineseSimplified, OCRLanguage_Japanese}; + + ASInt32 numLanguages = sizeof(newLanguages) / sizeof(newLanguages[0]); + PDOCRParamsSetLanguagesConfigured(&ocrParams, newLanguages, numLanguages); + + // Create the destination document for the created form. + PDDoc doc = PDDocCreate(); + + ASFixedRect mediaBox = {}; + mediaBox.left = fixedZero; + mediaBox.right = FloatToASFixed(72.0 * 8.5); + mediaBox.bottom = fixedZero; + mediaBox.top = FloatToASFixed(72.0 * 11.0); + + PDPage page = PDDocCreatePage(doc, kPDEBeforeFirst, mediaBox); + + // Run OCR on the image to get Form element containing the image with text underneath. + PDEForm form = PDOCRCreateForm(&ocrParams, doc, image, 300, OCRMissingFontStrategy_Raise); + + // Put that form into the page in the destination document. + PDEContent content = PDPageAcquirePDEContent(page, 0); + PDEContentAddElem(content, PDEContentGetNumElems(content) - 1, (PDEElement)form); + PDPageSetPDEContent(page, 0); + + // Save the output. + PDDocSave(doc, PDSaveFull | PDSaveLinearized, sOutput, NULL, NULL, NULL); + + // Release resources. + PDPageReleasePDEContent(page, 0); + PDPageRelease(page); + PDDocClose(doc); + + PDERelease((PDEObject)form); + PDERelease(reinterpret_cast(image)); + ASFileSysReleasePath(NULL, sInput); + ASFileSysReleasePath(NULL, sOutput); + + // Release OCREngine resources and terminate the plugin. + PDOCRReleaseParams(&ocrParams); + OCREngineTerminate(); + } // if 0 == errCode + HANDLER + errCode = ERRORCODE; + libInit.displayError(errCode); + END_HANDLER + + return errCode; // APDFLib's destructor terminates the library. +} diff --git a/OCR/OCRImage/OCRImage.vcxproj b/OCR/OCRImage/OCRImage.vcxproj new file mode 100644 index 00000000..9da97116 --- /dev/null +++ b/OCR/OCRImage/OCRImage.vcxproj @@ -0,0 +1,246 @@ + + + + + Debug + ARM64 + + + Debug + Win32 + + + Debug + x64 + + + Release + ARM64 + + + Release + Win32 + + + Release + x64 + + + + {26B0DAAC-1B6E-4020-BEC0-D47DDBA263C6} + Win32Proj + BlankSample + OCRImage + 10.0 + + + + Application + true + v143 + Unicode + + + Application + true + v143 + Unicode + + + Application + true + v143 + Unicode + + + Application + false + v143 + true + Unicode + + + Application + false + v143 + true + Unicode + + + Application + false + v143 + true + Unicode + + + + + + + + + + + + + + + + + + + + + + + + true + $(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + + + true + $(Platform)\$(Configuration)\ + + + true + $(Platform)\$(Configuration)\ + + + false + $(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + + + false + $(Platform)\$(Configuration)\ + + + false + $(Platform)\$(Configuration)\ + + + + NotUsing + Level3 + Disabled + _CRT_SECURE_NO_WARNINGS;_CONSOLE;_DEBUG;DEBUG;_WIN32;WIN32;WIN_ENV;WIN_PLATFORM;PRODUCT="HFTLibrary.h";PI_ACROCOLOR_VERSION=AcroColorHFT_VERSION_6;%(PreprocessorDefinitions) + true + ..\..\..\Include\Headers;..\..\_Common;..\..\_Common;%(AdditionalIncludeDirectories) + + + + Console + true + ..\..\..\Binaries + kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;DL180PDFL.lib;%(AdditionalDependencies) + DL180pdfl.dll + + + + + NotUsing + Level3 + Disabled + _CRT_SECURE_NO_WARNINGS;_CONSOLE;_DEBUG;DEBUG;_WIN64;WIN64;WIN_ENV;WIN_PLATFORM;PRODUCT="HFTLibrary.h";PI_ACROCOLOR_VERSION=AcroColorHFT_VERSION_6;%(PreprocessorDefinitions) + true + ..\..\..\Include\Headers;..\..\_Common;..\..\_Common;%(AdditionalIncludeDirectories) + + + + Console + true + ..\..\..\Binaries + kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;DL180PDFL.lib;%(AdditionalDependencies) + DL180pdfl.dll + + + + + NotUsing + Level3 + Disabled + _CRT_SECURE_NO_WARNINGS;_CONSOLE;_DEBUG;DEBUG;_WIN64;WIN64;WIN_ENV;WIN_PLATFORM;PRODUCT="HFTLibrary.h";PI_ACROCOLOR_VERSION=AcroColorHFT_VERSION_6;%(PreprocessorDefinitions) + true + ..\..\..\Include\Headers;..\..\_Common;..\..\_Common;%(AdditionalIncludeDirectories) + + + + + Console + true + ..\..\..\Binaries + kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;DL180PDFL.lib;%(AdditionalDependencies) + DL180pdfl.dll + + + + + NotUsing + Level3 + Disabled + _CRT_SECURE_NO_WARNINGS;_CONSOLE;WIN32;WIN32;WIN_ENV;WIN_PLATFORM;PRODUCT="HFTLibrary.h";PI_ACROCOLOR_VERSION=AcroColorHFT_VERSION_6;%(PreprocessorDefinitions) + true + ..\..\..\Include\Headers;..\..\_Common;..\..\_Common;%(AdditionalIncludeDirectories) + + + + Console + true + ..\..\..\Binaries + kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;DL180PDFL.lib;%(AdditionalDependencies) + DL180pdfl.dll + + + + + NotUsing + Level3 + Disabled + _CRT_SECURE_NO_WARNINGS;_CONSOLE;WIN64;WIN64;WIN_ENV;WIN_PLATFORM;PRODUCT="HFTLibrary.h";PI_ACROCOLOR_VERSION=AcroColorHFT_VERSION_6;%(PreprocessorDefinitions) + true + ..\..\..\Include\Headers;..\..\_Common;..\..\_Common;%(AdditionalIncludeDirectories) + + + + Console + true + ..\..\..\Binaries + kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;DL180PDFL.lib;%(AdditionalDependencies) + DL180pdfl.dll + + + + + NotUsing + Level3 + Disabled + _CRT_SECURE_NO_WARNINGS;_CONSOLE;WIN64;WIN64;WIN_ENV;WIN_PLATFORM;PRODUCT="HFTLibrary.h";PI_ACROCOLOR_VERSION=AcroColorHFT_VERSION_6;%(PreprocessorDefinitions) + true + ..\..\..\Include\Headers;..\..\_Common;..\..\_Common;%(AdditionalIncludeDirectories) + + + + + Console + true + ..\..\..\Binaries + kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;DL180PDFL.lib;%(AdditionalDependencies) + DL180pdfl.dll + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/OCR/OCRImage/OCRImage_32Bit.sln b/OCR/OCRImage/OCRImage_32Bit.sln new file mode 100644 index 00000000..32bf5361 --- /dev/null +++ b/OCR/OCRImage/OCRImage_32Bit.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.14.36414.22 d17.14 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "OCRImage", "OCRImage.vcxproj", "{26B0DAAC-1B6E-4020-BEC0-D47DDBA263C6}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Win32 = Debug|Win32 + Release|Win32 = Release|Win32 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {26B0DAAC-1B6E-4020-BEC0-D47DDBA263C6}.Debug|Win32.ActiveCfg = Debug|Win32 + {26B0DAAC-1B6E-4020-BEC0-D47DDBA263C6}.Debug|Win32.Build.0 = Debug|Win32 + {26B0DAAC-1B6E-4020-BEC0-D47DDBA263C6}.Release|Win32.ActiveCfg = Release|Win32 + {26B0DAAC-1B6E-4020-BEC0-D47DDBA263C6}.Release|Win32.Build.0 = Release|Win32 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {64F4F291-FCDB-4D98-8BCB-FD51C1A46AEB} + EndGlobalSection +EndGlobal diff --git a/OCR/OCRImage/OCRImage_64Bit.sln b/OCR/OCRImage/OCRImage_64Bit.sln new file mode 100644 index 00000000..74f60855 --- /dev/null +++ b/OCR/OCRImage/OCRImage_64Bit.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.14.36414.22 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "OCRImage", "OCRImage.vcxproj", "{26B0DAAC-1B6E-4020-BEC0-D47DDBA263C6}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {26B0DAAC-1B6E-4020-BEC0-D47DDBA263C6}.Debug|x64.ActiveCfg = Debug|x64 + {26B0DAAC-1B6E-4020-BEC0-D47DDBA263C6}.Debug|x64.Build.0 = Debug|x64 + {26B0DAAC-1B6E-4020-BEC0-D47DDBA263C6}.Release|x64.ActiveCfg = Release|x64 + {26B0DAAC-1B6E-4020-BEC0-D47DDBA263C6}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {7FA1B031-C8CA-49FB-B2DD-1E089F917211} + EndGlobalSection +EndGlobal diff --git a/OCR/OCRImage/OCRImage_ARM64.sln b/OCR/OCRImage/OCRImage_ARM64.sln new file mode 100644 index 00000000..181d02f3 --- /dev/null +++ b/OCR/OCRImage/OCRImage_ARM64.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.14.36414.22 d17.14 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "OCRImage_ARM64", "OCRImage.vcxproj", "{CFABC1FE-3F70-47E9-A911-EA085E6D127A}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|ARM64 = Debug|ARM64 + Release|ARM64 = Release|ARM64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {CFABC1FE-3F70-47E9-A911-EA085E6D127A}.Debug|ARM64.ActiveCfg = Debug|ARM64 + {CFABC1FE-3F70-47E9-A911-EA085E6D127A}.Debug|ARM64.Build.0 = Debug|ARM64 + {CFABC1FE-3F70-47E9-A911-EA085E6D127A}.Release|ARM64.ActiveCfg = Release|ARM64 + {CFABC1FE-3F70-47E9-A911-EA085E6D127A}.Release|ARM64.Build.0 = Release|ARM64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {BAB73C82-51E9-4B70-956D-875079420EC4} + EndGlobalSection +EndGlobal diff --git a/OCR/README.md b/OCR/README.md new file mode 100644 index 00000000..f247cbf2 --- /dev/null +++ b/OCR/README.md @@ -0,0 +1,2 @@ +## ***OCRImage*** +Recognized text within an image using Optical Character Recognition. diff --git a/_Input/OCRImage.png b/_Input/OCRImage.png new file mode 100644 index 0000000000000000000000000000000000000000..91cba51438af01698c52855d9e218f65d3685a03 GIT binary patch literal 6403 zcmeHMS6CCs7Uu3+QRKP`2ndMu8kOD`sR;rSQV>W`kdn}=bk;>sX-Xg>O8ycNvDBmUv9@{c!JSu`G=3nZYaF(f>XMIZ1N zW!~vn^Gtm3v6%32|98pbX$Ht6efU4dGwQ#ayGi{{aY}2dd!1`kNVERqLi@@RwuHy< z{A)jzBz0(x)qP~p4Su?quOOnJ*=8V@{psxOQk9xalfq2n{>|S(&%%gBZ!Rmuuoi-^ zDx^!n@-zQuOqXlJY}-W$7nOzj#Ln$>O9H0(@} z%MkAUbP+*K&YZ_fZ?CSm5$?te{#<~toRo0+Uf&T)JjBkSd-V1w+Mbxlt@h|cWfc3bNee0s`YfAZGq z{g~9pDXznW3Wjoi?-g^OPp@fjIWne{S2|aC-|X0mXdb^#E)2eu@iX8^6C$Fmyuo{2 zY*V|-%&zO5oH;B0y0Xo;23pJRH-*jB-`l!!y zU>NniyS?*xwTug`Wm7_;#hw_dgtZB_o_$KlHW^)!Xx41IH*>BaCGYY3lZooT45&Nx z^0xuG#Y62vN0e{Fs8972#hfcAN&`~8>qoUB`Rk+lI#(?l zx<{oI#?aM%Xtg&}nUF+mkKW9jmd$cTA+}&HdIBaw(wke4cXhYJWU9K5w^PGfPUpgL zVNS_CJw4=i#2zh|oe?VAXthoF0fQDp8n@8@-Ux-U=9fak4CPgwtfaa(zfJnLZYQj!#O&L`cTT6|V#++Qh#tRL z3{EsciaK0bBieLcE7ZDK7A1$ljc0{KDENWEfhyj|+H2j!qT1h9#NGmAevFX9BpvYc zCXh^5ZhHN_I32}qcY`<7l<_k$xx@%Ya<s+~_(g+p#Dt5Jn zMAS=pdy}OY8w?n)ozV%x)P27BChl3l4;G1Ae3~*bW9!JDRyj{Om&*Dq3Zqt9C54e{ zw&P7{z91!$Jo}5f&a@L4G*>IU1)Vpd^Z6Cwa)DB5^iG}ma1A72GqJr&d~%1#TQ;fX?1YHB}9!&R>wPZ9rJq*`gW?U^}!&^Rkm;bPKMaHpxY(kA^ zpA(jP?&$MJ>$^oF*h|S{4&px?!a7Kf8xZ*YG~aq{dMWY6uR1?1srLuomQ+LrLgdx;oKjCTxFD&olLQF%A)RF{#6+7*6Tm?JvUDSd#F6m zh~f>g$zG%ZV-LeSonm+EQ1!zu87J*yDr;`&l4P>vrs?bd)#DXDM9^HO3m*Ze&@CQnhUXO~xl#2tHMsQeb;$)G0Zx7Pd*AM~qx%#*i{WDP-}D*L+&dJ=>v`_)#V*8EWa#D~Vvb1x!ghLkq{i5u(=W)TH_RDkGA5#=Be zACuAN)7j?k)cGRR^0aOxQkh(qYDyzof$M76vunrB4p5Tp7PG-h-E3i%?prPBYlFT&$X8G96)I=pVTX4O-N*!?w=0@@mkbBOPH>j4rneDb>#?JB50c-~lO61nFBKi_sf*R~dX-QHX%pLAqV+UHkvlwxe}Ffq zDIUN19@Q|E@+CKyjZ&JjhCknaSB~qd+O2c|IDg*X@4Z1b1RyviiD(U!|Ea!lQcei@ zO>`3;uRs;;7Ss|XUADFc&HzBAF1guEZiPjCcr`a5!2c9hs0cBM@lsSn1mzhoG+oWwq0Wbek2kx;^E&#HlCaic>d5C?N*96HmqAj}`{~ zd<1}8B=|DB5;HwrsIixOfWl@EjV^piE|&6(DT7pcpN^7{Uy0MzNQpaS*oQazzU?jU zdr)R-l)O~P#26!L&Q)DVhVyw}x=g+!h6tTwhr4mRO8_C|VL5hzv&4aClIPy$2H%Q<0rs?_4JQ+l6M8VX)W(vR zdDHy?a`p%T@&P|4R$k@~loA}!uL|7lbLI|UPXy=TmIzLicymQ-D^z2}wfD;J35Kva zxb8iutYYah=0(OB!%mF`wc|Itxe<}AG6_+#lP%509c0OVGq=+{FwS>?LNm>>RV$*D zordrm>-ql_}QeJoRqeA%GNK(4kO z1c6&A!m&7X=N(vWhUjS5X&o3>SzrTQwwvXJz!$KOatJ~=YZ*IbNt1oZo94DD1tx*tY|>#nXve(-50f|FtOQIcG({eTRAF0K&}G*{9k*iXg;ng z_wR+E(sNFnZ6wWaPyp&KiN{*8=Z+jo9DuS|Q|o$t^s%cvmatYqSkNrX-8epwal&?rpT?oeYgKD?_k z5&M{iq`clX#!8cL*uwGrTCQYKw;NSET0`Qiw)ygffwz6Z@uM)`s~Nb|#a{nPh)!$u z#W&#CHJ8$+n%)lG7enFC#u#myIpkWS3abwu$;Vgqn=W#8;dp7(Swp92Fi|<~Ot_F? zM{jFH)d(~exr6oRdhP!z@_5NDbVeyzXuJA6Z=@;`j=?2QxnOwl-?scu|9H6)h=d#E zrAxarn@oG?l3Ir6((Kqoz;UL5f_UE0FCNKm5Q77>EYNYM|3h@1iEz;!s3>@0!~ie8 zyKrA=AV@bFop$vTX+;Xf8h2k$667KVx_pM5sIHD3*~6KKuH|Y+PgJ!6B5_7B&w*e- z5+;y;`9Y$1g|pS6Q7)Oj@$Zx@{J*vTZ|$GTr~HQ|{f{9h+_2(6f)iPWcsRE3 R@UPzupqY(n)%Dwd{tHk Date: Thu, 6 Nov 2025 20:12:32 -0600 Subject: [PATCH 02/10] Use C++ 17 profiles because tesseract requires it --- dlproject.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/dlproject.yaml b/dlproject.yaml index 04d35482..dbf8f5f2 100644 --- a/dlproject.yaml +++ b/dlproject.yaml @@ -56,7 +56,7 @@ config: config: Release: profile_host: - - gcc-10-aix72-64-cppstd-14 + - gcc-10-aix72-64-cppstd-17 profile_build: - gcc-10-aix72-64-cppstd-17 options_host: @@ -89,7 +89,7 @@ config: config: Release: profile_host: - - clang-18-linux-x86_64-cppstd-14 + - clang-18-linux-x86_64-cppstd-17 profile_build: - clang-18-linux-x86_64-cppstd-17 build: missing @@ -110,7 +110,7 @@ config: config: Release: profile_host: - - clang-18-linux-armv8-cppstd-14 + - clang-18-linux-armv8-cppstd-17 profile_build: - clang-18-linux-armv8-cppstd-17 build: missing @@ -124,7 +124,7 @@ config: config: Release: profile_host: - - msvc-x64-194-cppstd-14 + - msvc-x64-194-cppstd-17 profile_build: - msvc-x64-194-cppstd-17 settings_host: @@ -140,7 +140,7 @@ config: - '*:license_managed=True' Release32: profile_host: - - msvc-x86-194-cppstd-14 + - msvc-x86-194-cppstd-17 profile_build: - msvc-x86-194-cppstd-17 build: @@ -160,7 +160,7 @@ config: config: Release: profile_host: - - msvc-arm64-193-cppstd-14 + - msvc-arm64-193-cppstd-17 profile_build: - msvc-arm64-193-cppstd-17 settings_host: From ef6ead5b94a9b1e8acccb284271114b8c0472933 Mon Sep 17 00:00:00 2001 From: datalogics-saharay Date: Thu, 6 Nov 2025 20:50:07 -0600 Subject: [PATCH 03/10] Copy tessdata4 dir into Binaries to run OCR sample --- conanfile.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/conanfile.py b/conanfile.py index e0bdcfe5..d4f5e829 100644 --- a/conanfile.py +++ b/conanfile.py @@ -57,6 +57,10 @@ def copy_apdfl(self, destination): if self.settings.os != "Windows": copy(self, "*", src=forms_ext_lib, dst=destination) + tessdata_path = os.path.join(apdfl_pkg.cpp_info.bindir, "tessdata4") + if os.path.isdir(tessdata_path): + copy(self, "*", src=tessdata_path, dst=os.path.join(destination, "tessdata4"), keep_path=True) + def _imports(self): pdfl_pkg_inc = os.path.join(self.dependencies["adobe_pdf_library"].package_folder, 'include') pdfl_pkg_src = os.path.join(self.dependencies["adobe_pdf_library"].package_folder, 'src') From ba3d35950cc444caed334352fa46c99f45898d3d Mon Sep 17 00:00:00 2001 From: datalogics-saharay Date: Thu, 6 Nov 2025 21:51:32 -0600 Subject: [PATCH 04/10] Retarget OCRImage.vcxproj to use v142 toolset --- OCR/OCRImage/OCRImage.vcxproj | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/OCR/OCRImage/OCRImage.vcxproj b/OCR/OCRImage/OCRImage.vcxproj index 9da97116..7fce1416 100644 --- a/OCR/OCRImage/OCRImage.vcxproj +++ b/OCR/OCRImage/OCRImage.vcxproj @@ -37,39 +37,39 @@ Application true - v143 + v142 Unicode Application true - v143 + v142 Unicode Application true - v143 + v142 Unicode Application false - v143 + v142 true Unicode Application false - v143 + v142 true Unicode Application false - v143 + v142 true Unicode From 6194c1fcfc8734541323938b47e454187279511e Mon Sep 17 00:00:00 2001 From: datalogics-saharay Date: Thu, 6 Nov 2025 22:08:53 -0600 Subject: [PATCH 05/10] Force use of xapian-core 64-bit on Windows ARM --- dlproject.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/dlproject.yaml b/dlproject.yaml index dbf8f5f2..6095f74d 100644 --- a/dlproject.yaml +++ b/dlproject.yaml @@ -168,4 +168,5 @@ config: - adobe_pdf_library/*:compiler.runtime=static - adobe_pdf_library/*:compiler.runtime_type=Release - adobe_pdf_library/*:compiler.update=8 + - xapian-core/*:arch=x86_64 build: missing From 0a8453854baca9da50aa6ea02ab998db90417f3b Mon Sep 17 00:00:00 2001 From: datalogics-saharay Date: Fri, 7 Nov 2025 09:19:48 -0600 Subject: [PATCH 06/10] Remove unecessary changes to 32-bit .sln files as OCR not supported there --- All/All_Datalogics_32Bit.sln | 6 ------ OCR/OCRImage/OCRImage_32Bit.sln | 25 ------------------------- 2 files changed, 31 deletions(-) delete mode 100644 OCR/OCRImage/OCRImage_32Bit.sln diff --git a/All/All_Datalogics_32Bit.sln b/All/All_Datalogics_32Bit.sln index e65229f7..b3fb481e 100644 --- a/All/All_Datalogics_32Bit.sln +++ b/All/All_Datalogics_32Bit.sln @@ -165,8 +165,6 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "AddDigitalSignatureCMS", ". EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "AddDigitalSignatureRFC3161", "..\Security\AddDigitalSignatureRFC3161\AddDigitalSignatureRFC3161.vcxproj", "{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}" EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "OCRImage", "..\OCR\OCRImage\OCRImage.vcxproj", "{26B0DAAC-1B6E-4020-BEC0-D47DDBA263C6}" -EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Win32 = Debug|Win32 @@ -493,10 +491,6 @@ Global {1D787362-28C9-4460-9606-840F3B484350}.Debug|Win32.Build.0 = Debug|Win32 {1D787362-28C9-4460-9606-840F3B484350}.Release|Win32.ActiveCfg = Release|Win32 {1D787362-28C9-4460-9606-840F3B484350}.Release|Win32.Build.0 = Release|Win32 - {26B0DAAC-1B6E-4020-BEC0-D47DDBA263C6}.Debug|Win32.ActiveCfg = Debug|Win32 - {26B0DAAC-1B6E-4020-BEC0-D47DDBA263C6}.Debug|Win32.Build.0 = Debug|Win32 - {26B0DAAC-1B6E-4020-BEC0-D47DDBA263C6}.Release|Win32.ActiveCfg = Release|Win32 - {26B0DAAC-1B6E-4020-BEC0-D47DDBA263C6}.Release|Win32.Build.0 = Release|Win32 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/OCR/OCRImage/OCRImage_32Bit.sln b/OCR/OCRImage/OCRImage_32Bit.sln deleted file mode 100644 index 32bf5361..00000000 --- a/OCR/OCRImage/OCRImage_32Bit.sln +++ /dev/null @@ -1,25 +0,0 @@ - -Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio Version 17 -VisualStudioVersion = 17.14.36414.22 d17.14 -MinimumVisualStudioVersion = 10.0.40219.1 -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "OCRImage", "OCRImage.vcxproj", "{26B0DAAC-1B6E-4020-BEC0-D47DDBA263C6}" -EndProject -Global - GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug|Win32 = Debug|Win32 - Release|Win32 = Release|Win32 - EndGlobalSection - GlobalSection(ProjectConfigurationPlatforms) = postSolution - {26B0DAAC-1B6E-4020-BEC0-D47DDBA263C6}.Debug|Win32.ActiveCfg = Debug|Win32 - {26B0DAAC-1B6E-4020-BEC0-D47DDBA263C6}.Debug|Win32.Build.0 = Debug|Win32 - {26B0DAAC-1B6E-4020-BEC0-D47DDBA263C6}.Release|Win32.ActiveCfg = Release|Win32 - {26B0DAAC-1B6E-4020-BEC0-D47DDBA263C6}.Release|Win32.Build.0 = Release|Win32 - EndGlobalSection - GlobalSection(SolutionProperties) = preSolution - HideSolutionNode = FALSE - EndGlobalSection - GlobalSection(ExtensibilityGlobals) = postSolution - SolutionGuid = {64F4F291-FCDB-4D98-8BCB-FD51C1A46AEB} - EndGlobalSection -EndGlobal From 61643a0fdc72b7230a59a42144f1053ee6c3c697 Mon Sep 17 00:00:00 2001 From: datalogics-saharay Date: Fri, 7 Nov 2025 10:40:52 -0600 Subject: [PATCH 07/10] Attempt to resolve Win ARM bootstrap issue --- dlproject.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dlproject.yaml b/dlproject.yaml index 6095f74d..6f715106 100644 --- a/dlproject.yaml +++ b/dlproject.yaml @@ -163,10 +163,11 @@ config: - msvc-arm64-193-cppstd-17 profile_build: - msvc-arm64-193-cppstd-17 + settings_build: + - xapian-core/*:arch=x86_64 settings_host: - adobe_pdf_library/*:build_type=RelWithDebInfo - adobe_pdf_library/*:compiler.runtime=static - adobe_pdf_library/*:compiler.runtime_type=Release - adobe_pdf_library/*:compiler.update=8 - - xapian-core/*:arch=x86_64 build: missing From 201be38a3b2312768b9e5f2198dae329d87ec97c Mon Sep 17 00:00:00 2001 From: datalogics-robl Date: Fri, 7 Nov 2025 15:51:16 -0600 Subject: [PATCH 08/10] Fix argument count in OCRImage sample --- OCR/OCRImage/OCRImage.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/OCR/OCRImage/OCRImage.cpp b/OCR/OCRImage/OCRImage.cpp index f80615b6..f4bc917f 100644 --- a/OCR/OCRImage/OCRImage.cpp +++ b/OCR/OCRImage/OCRImage.cpp @@ -35,7 +35,7 @@ int main(int argc, char **argv) { } std::string csInputFileName(argc > 1 ? argv[1] : DIR_LOC DEF_INPUT); - std::string csOutputFileName(argc > 4 ? argv[4] : DEF_OUTPUT); + std::string csOutputFileName(argc > 2 ? argv[2] : DEF_OUTPUT); std::cout << "Recognizing text in " << csInputFileName.c_str() << std::endl; ASPathName sInput = APDFLDoc::makePath(csInputFileName.c_str()); From 6147930dc0da98398705e768ab2bfd3b4f652e59 Mon Sep 17 00:00:00 2001 From: Robert Boehne Date: Fri, 7 Nov 2025 14:54:26 -0700 Subject: [PATCH 09/10] Add build settings so we don't try to build a doxygen on arm --- dlproject.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dlproject.yaml b/dlproject.yaml index 6f715106..d60bf5ea 100644 --- a/dlproject.yaml +++ b/dlproject.yaml @@ -164,7 +164,10 @@ config: profile_build: - msvc-arm64-193-cppstd-17 settings_build: + - msys2/*:arch=x86_64 + - strawberryperl/*:arch=x86_64 - xapian-core/*:arch=x86_64 + - zlib/*:arch=x86_64 settings_host: - adobe_pdf_library/*:build_type=RelWithDebInfo - adobe_pdf_library/*:compiler.runtime=static From dbf6cb8814336b5938c722abe78f331211b25f76 Mon Sep 17 00:00:00 2001 From: Robert Boehne Date: Fri, 7 Nov 2025 15:19:58 -0700 Subject: [PATCH 10/10] Set the toolset to v143 for Windows ARM. --- OCR/OCRImage/OCRImage.vcxproj | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/OCR/OCRImage/OCRImage.vcxproj b/OCR/OCRImage/OCRImage.vcxproj index 7fce1416..6e5bd686 100644 --- a/OCR/OCRImage/OCRImage.vcxproj +++ b/OCR/OCRImage/OCRImage.vcxproj @@ -49,7 +49,7 @@ Application true - v142 + v143 Unicode @@ -69,7 +69,7 @@ Application false - v142 + v143 true Unicode