PDF Xpress for .NET - User Guide > How To > Search PDF Document Text |
PDF Xpress™ supports extracting text content from PDF pages and reporting contextual information for specific words. Use PDF Xpress to report the bounding quadrilaterals and surrounding words for any whole or partial word extracted from the pages of a PDF document.
To search text in a PDF document:
C# Example |
Copy Code
|
---|---|
/// This code demonstrates how to search PDF document text /// <summary> /// Search a PDF document for the first occurrence of a search string. /// Match the exact, case-sensitive search phrase and report /// the userspace coordinates of the match. public void SearchText( Document document , String searchPhraseString ) { // For each page in the PDF document for ( Int32 pageIndex = 0 ; pageIndex < document.PageCount ; pageIndex++ ) { // Get the text on the page. String searchDomainString = textFinder.GetText( ) ; // For each occurrence of the search phrase on the page Int32 searchCursor = 0 ; while ( 0 <= searchCursor ) { searchCursor = searchDomainString.IndexOf ( searchPhraseString , searchCursor ) ; if ( 0 <= searchCursor ) { // Request PDF text information for the match. TextMatchOptions whichMatch = new TextMatchOptions( ); whichMatch.BeginTextOffset = searchCursor; whichMatch.EndTextOffset = searchCursor + searchPhraseString.Length - 1; TextMatch textMatch = textFinder.GetTextMatch ( whichMatch ); // Report PDF text match information String locationString = ""; foreach ( Quadrilateral quad in textMatch.BoundingQuadrilaterals ) { locationString += String.Format ( NumberFormatInfo.CurrentInfo , "Userspace quadrilateral: x={0} y={1} w={2} h={3}" + Environment.NewLine , quad.BottomLeft.X , quad.BottomLeft.Y , quad.BottomLeft.X + quad.TopRight.X , quad.BottomLeft.Y + quad.TopRight.Y ) ; } String prettyText = String.Format ( "Matched \"{0}\" on page {1}, context is \"{0}{2}{3}\"\r\n{4}" , textMatch.MatchedText , pageIndex + 1 , textMatch.WordsBefore , textMatch.WordsAfter , locationString ); Console.WriteLine(prettyText); // Advance the search cursor searchCursor++; } } } catch ( PdfXpressLicensingException ) { } // PDF Xpress is not licensed for this feature. catch ( PdfXpressLibraryException ) { } // A problem was encountered. catch ( PdfXpressException ) { } // A problem was encountered finally { if ( null != textFinder ) { textFinder.Dispose( ) ; } } } } |