Merging documents into a single document using Open XML SDK 2.0 and Word Automation Services
By peter.stilgoe
The code below will create a webpart that can be added to any document library, when the button ‘Merge Reports’ is clicked it will merge all the documents together in the library in to a single document using Open XML SDK & Word Automation Services.
You will need to add the following references:
DocumentFormat.OpenXml
Microsoft.Office.Word.Server
Microsoft.Sharepoint
Microsoft.Sharepoint.Client
Microsoft.Sharepoint.Client.Runtime
Windows.Base
using System;
using System.ComponentModel;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.WebControls.WebParts;
using Microsoft.SharePoint;
using System.Linq;
using System.IO;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Wordprocessing;
using System.Text;
using Microsoft.SharePoint.Client;
using ClientOM = Microsoft.SharePoint.Client;
using Word = Microsoft.Office.Word.Server;
using Microsoft.Office.Word.Server.Conversions;
namespace DocumentMerge.VisualWebPart1
{
[ToolboxItemAttribute(false)]
public class VisualWebPart1 : WebPart
{
// Visual Studio might automatically update this path when you change the Visual Web Part project item.
private const string _ascxPath = @"~/_CONTROLTEMPLATES/DocumentMerge/VisualWebPart1/VisualWebPart1UserControl.ascx";
protected override void CreateChildControls()
{
System.Web.UI.Control control = Page.LoadControl(_ascxPath);
Controls.Add(control);
base.CreateChildControls();
Button btnSubmit = new Button();
btnSubmit.Text = "Merge Reports";
btnSubmit.Click += new EventHandler(OnSubmitClick);
Controls.Add(btnSubmit);
}
void OnSubmitClick(object sender, EventArgs e)
{
// String to store the output path
string outputPath = string.Format(@"{0}/masterreport-{1}.docx", SPContext.Current.RootFolderUrl, DateTime.Now.ToString("ddMMyyyy"));
// String containing the blank document part for our new DOCX
string strEmptyMainPart = "<?xml version='1.0' encoding='UTF-8' standalone='yes'?>" +
"<w:document xmlns:w='http://schemas.openxmlformats.org/wordprocessingml/2006/main'>" +
"<w:body><w:p><w:r><w:t></w:t></w:r></w:p></w:body></w:document>";
// In-memory stream for our consolidated WSR DOCX.
MemoryStream memOut = new MemoryStream();
// Out output document's OpenXML object
WordprocessingDocument outputDoc = WordprocessingDocument.Create(memOut, DocumentFormat.OpenXml.WordprocessingDocumentType.Document);
MainDocumentPart mainPart = outputDoc.AddMainDocumentPart();
Stream partStream = mainPart.GetStream();
UTF8Encoding encoder = new UTF8Encoding();
// Add our blank main part string to the newly created document
Byte[] buffer = encoder.GetBytes(strEmptyMainPart);
partStream.Write(buffer, 0, buffer.Length);
// Save the document in memory
mainPart.Document.Save();
SPListItemCollection files = SPContext.Current.List.Items;
int id = 1;
foreach (SPListItem item in files)
{
SPFile inputFile = item.File;
string altChunkId = "AltChunkId" + id;
id++;
byte[] byteArray = inputFile.OpenBinary();
AlternativeFormatImportPart chunk = outputDoc.MainDocumentPart.AddAlternativeFormatImportPart(AlternativeFormatImportPartType.WordprocessingML,
altChunkId);
using (MemoryStream mem = new MemoryStream())
{
mem.Write(byteArray, 0, (int)byteArray.Length);
mem.Seek(0, SeekOrigin.Begin);
chunk.FeedData(mem);
}
AltChunk altChunk = new AltChunk();
altChunk.Id = altChunkId;
outputDoc.MainDocumentPart.Document.Body.InsertAfter(altChunk,
outputDoc.MainDocumentPart.Document.Body.Elements<Paragraph>().Last());
outputDoc.MainDocumentPart.Document.Save();
}
outputDoc.Close();
memOut.Seek(0, SeekOrigin.Begin);
ClientContext clientContext = new ClientContext(SPContext.Current.Site.Url);
ClientOM.File.SaveBinaryDirect(clientContext, outputPath, memOut, true);
// Conversion
string docPath = string.Format(@"{0}{1}", SPContext.Current.Site.Url.Replace(@"\\", ""), outputPath);
//string pdfPath = docPath.Replace(".docx", ".pdf");
ConversionJobSettings JobSettings = new ConversionJobSettings();
JobSettings.OutputFormat = SaveFormat.Document;
JobSettings.OutputSaveBehavior = SaveBehavior.AlwaysOverwrite;
ConversionJob ConvJob = new ConversionJob("Word Automation Services", JobSettings);
ConvJob.UserToken = SPContext.Current.Site.UserToken;
ConvJob.AddFile(docPath, docPath);
ConvJob.Start();
}
}
}
More From pstilgoe
Word Automation Services – Convert a Word 2010 Doc to PDF
By peter.stilgoe
The following example provides the complete C# listing for the simplest Word Automation Services application
You need to add references to WindowsBase, Microsoft.Office.Word.Server & Microsoft.Sharepoint
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Microsoft.SharePoint;
using Microsoft.Office.Word.Server.Conversions;
class Program
{
static void Main(string[] args)
{
string siteUrl = "http://localhost";
// If you manually installed Word automation services, then replace the name
// in the following line with the name that you assigned to the service when
// you installed it.
string wordAutomationServiceName = "Word Automation Services";
using (SPSite spSite = new SPSite(siteUrl))
{
ConversionJob job = new ConversionJob(wordAutomationServiceName);
job.UserToken = spSite.UserToken;
job.Settings.UpdateFields = true;
job.Settings.OutputFormat = SaveFormat.PDF;
job.AddFile(siteUrl + "/Shared%20Documents/Test.docx",
siteUrl + "/Shared%20Documents/Test.pdf");
job.Start();
}
}
}
More From pstilgoe
Sharepoint 2010 Word Automation Services – Merging documents from a document set into a single document
By peter.stilgoe
Combining data and images from various Microsoft Office programs to create a single document is a frequently requested scenario. Doing this from documents stored in SharePoint Foundation 2010 has several advantages. Learn to create Word documents in a document library by merging components from Excel, PowerPoint, and other Word documents. (25 printed pages)
Document assembly seems to be a hot topic these days especially when you combine it with the power of Microsoft SharePoint Server 2010. Prior to the 2007 Microsoft Office system, accessing text, tables, images, and other information to create documents in Microsoft Word, for example, relied on COM Automation to access each program contributing to the merge. Automation has its share of issues such as interruptions by pop-up dialog boxes, timing constraints, inability to scale out, and other limitations. Additionally, running Microsoft Office solutions on the server is not recommended or supported by Microsoft.
With the release of 2007 Microsoft Office system, the Open XML file formats present documents as a composite of parts and relationships. For example, there are image parts, document parts, and charts parts. This enables you to access and create documents and their component parts without using Automation. In addition, the Open XML SDK 2.0 for Microsoft Office enables you to create and manipulate documents, and expose data and other components programmatically, without using Microsoft Office Automation. And because Automation is no longer required to work with Microsoft Office documents, you can now perform merges and other actions directly on the server.
Likewise, SharePoint Server 2010 is a great a tool for storing, sharing, and controlling documents and providing functionality to work with documents. For example, the Document Center in SharePoint Server 2010 is an area where you can collaborate and store documents in document libraries. Document libraries are containers that are already configured to use workflow, version history, and other features that are important for working with documents at the enterprise level.
This article describes a rich document assembly solution which takes Microsoft Word 2010 documents, Microsoft Excel 2010 documents, and Microsoft PowerPoint 2010 documents and merges them together to form a final report in Word. It does all of this from a SharePoint 2010 Web Part.
Imagine a scenario where you work for a company that analyzes stocks and generates reports for every company and stock analyzed. These reports are typically very rich and usually involve more than one person contributing to the content. Content is separated into multiple Word documents, Excel documents, and PowerPoint documents where each document is assigned to a person. After all the content is written, the content is assembled into a final report as a Word document. The company asks you to write a solution that merges all these documents programmatically.
This solution uses document sets, a feature in SharePoint 2010. Document sets enables you to manage collection of documents as single objects. Think of this feature as a binder of related content.
In this solution, a custom document set includes a set of files (six in this example) that correspond to the various components of the final analysis report. Figure 1 shows a document set for a company called Contoso.
Merging Documents from a Sharepoint 2010 Document Set to create a single document
SharePoint 2010 Word Automation Services
By peter.stilgoe
Use Word Automation Services to do server-side document conversions to and from a variety of document formats. By using the Open XML SDK, you can accomplish tasks that are difficult such as updating the table of contents or repaginating documents.
This scenario describes how you can use Word Automation Services to automate processing documents on a server.
An expert creates some Word template documents that follow specific conventions. She might use content controls to give structure to the template documents. This provides a good user experience and a reliable programmatic approach for determining the locations in the template document where data should be replaced in the document generation process. These template documents are typically stored in a SharePoint document library.
A program runs on the server to merge the template documents together with data, generating a set of Open XML WordprocessingML (DOCX) documents. This program is best written by using the Welcome to the Open XML SDK 2.0 for Microsoft Office, which is designed specifically for generating documents on a server. These documents are placed in a SharePoint document library.
After generating the set of documents, they might be automatically printed. Or, they might be sent by e-mail to a set of users, either as WordprocessingML documents, or perhaps as PDF, XPS, or MHTML documents after converting them from WordprocessingML to the desired format.
As part of the conversion, you can instruct Word Automation Services to update fields, such as the table of contents.
Using the Welcome to the Open XML SDK 2.0 for Microsoft Office together with Word Automation Services enables you to create rich, end-to-end solutions that perform well and do not require automation of the Word client application.
One of the key advantages of Word Automation Services is that it can scale out to your needs. Unlike the Word client application, you can configure it to use multiple processors. Further, you can configure it to load balance across multiple servers if your needs require that.
Another key advantage is that Word Automation Services has perfect fidelity with the Word client applications. Document layout, including pagination, is identical regardless of whether the document is processed on the server or client.



March 14th, 2011
