Reading texts
from microsoft PPT document using Aspose.Slides:
#region "Description"
/*
* Texts from each slide of
Microsoft PPT document can be extracted using GetAllTextsFromPPT() method given below
*/
#endregion
#region "Methods"
Method Name: GetAllTextsFromPPT()
/// <summary>
/// Given Function takes input as PPT
document file stream and returns created text list for per page in PPT
/// </summary>
/// <param
name="SourceFileStream">PPT
doc file stream</param>
/// <returns>List of text per slide </returns>
public static Dictionary<int,
string> GetAllTextsFromPPT(Stream fileStream)
{
Dictionary<int, string>
pageContent = new Dictionary<int, string>();
Aspose.Slides.Presentation
pptPresentation = null;
fileStream.Position = 0;
pptPresentation = new
Aspose.Slides.Presentation(fileStream);
//iterate all
slides
int
lastSlidePosition = pptPresentation.Slides.LastSlidePosition;
for (int pos = 1; pos <= lastSlidePosition; pos++)
{
Aspose.Slides.Slide
sld = pptPresentation.GetSlideByPosition(pos);
int
shapesCount = sld.Shapes.Count;
if
(!pageContent.ContainsKey(pos - 1))
pageContent.Add(pos - 1, string.Empty);
for (int shpIdx = 0; shpIdx < shapesCount; shpIdx++)
{
Aspose.Slides.Shape shp = sld.Shapes[shpIdx];
//Get the
paragraphs from textholder or textframe
Aspose.Slides.ParagraphCollection paras = null;
//Check
if shape holds a textholder
if (shp.TextFrame
!= null)
{
paras =
shp.TextFrame.Paragraphs;
}
else
if (shp.Placeholder != null
&& shp.IsTextHolder)
{
if
(shp.Placeholder.GetType().Name == "TextHolder")
{
Aspose.Slides.TextHolder thld = (Aspose.Slides.TextHolder)shp.Placeholder;
paras = thld.Paragraphs;
}
}
//Print
the text on Console
if
(paras != null)
{
int
parasCount = paras.Count;
for
(int paraIdx = 0; paraIdx < parasCount;
paraIdx++)
{
Aspose.Slides.Paragraph para = paras[paraIdx];
pageContent[pos - 1] =
pageContent[pos - 1] + " ; " +
para.Text;
}
}//end if
}//end for
}//end for
return
pageContent;
No comments:
Post a Comment