Operand

engine, you in?

gram: docs

> ./packages/core/src/docx/rezip/images.ts

/**
* New Image Registration
*
* On save, scan all parts (body + headers + footers) for images whose `src`
* is still a data URL — these were inserted in the editor and have no rels
* entry yet — then write the binary data into `word/media/`, register the
* relationship, update `[Content_Types].xml` for new extensions, and assign
* the resulting rId back onto the image so the serializer emits the right
* `r:embed` reference.
*/
import type JSZip from 'jszip';
import type { BlockContent, Image } from '../../types/content';
import type { Document } from '../../types/document';
import { RELATIONSHIP_TYPES } from '../relsParser';
import { findMaxRId, readRelsOrStub, headerFooterFilename, type Part } from './parts';
/**
* Get content type for a file extension. Falls back to the provided MIME type
* when the extension is unknown.
*/
export function getContentTypeForExtension(extension: string, mimeType: string): string {
// Use provided mime type or fall back to common types
if (mimeType) return mimeType;
const contentTypes: Record<string, string> = {
png: 'image/png',
jpg: 'image/jpeg',
jpeg: 'image/jpeg',
gif: 'image/gif',
bmp: 'image/bmp',
tif: 'image/tiff',
tiff: 'image/tiff',
svg: 'image/svg+xml',
webp: 'image/webp',
wmf: 'image/x-wmf',
emf: 'image/x-emf',
};
return contentTypes[extension] || 'application/octet-stream';
}
/**
* Collect all images with data-URL src from the document content.
* These are newly inserted images that need to be added to the ZIP.
*/
function collectNewImages(blocks: BlockContent[]): Image[] {
const images: Image[] = [];
const collectFromRun = (run: { content: { type: string; image?: Image }[] }): void => {
for (const c of run.content) {
if (c.type === 'drawing' && c.image?.src?.startsWith('data:')) {
images.push(c.image);
}
}
};
for (const block of blocks) {
if (block.type === 'paragraph') {
for (const item of block.content) {
if (item.type === 'run') {
collectFromRun(item);
} else if (
// A picture inserted/deleted under track changes lives inside an
// ins/del/move wrapper — descend so its media part still gets written.
item.type === 'insertion' ||
item.type === 'deletion' ||
item.type === 'moveFrom' ||
item.type === 'moveTo'
) {
for (const sub of item.content) {
if (sub.type === 'run') collectFromRun(sub);
}
}
}
} else if (block.type === 'table') {
for (const row of block.rows) {
for (const cell of row.cells) {
images.push(...collectNewImages(cell.content));
}
}
}
}
return images;
}
/** Map MIME type to file extension (inverse of getContentTypeForExtension) */
const MIME_TO_EXT: Record<string, string> = {
'image/png': 'png',
'image/jpeg': 'jpeg',
'image/gif': 'gif',
'image/bmp': 'bmp',
'image/tiff': 'tiff',
'image/webp': 'webp',
'image/svg+xml': 'svg',
};
/**
* Decode a data URL to binary ArrayBuffer and file extension.
*/
function decodeDataUrl(dataUrl: string): { data: ArrayBuffer; extension: string } {
const match = dataUrl.match(/^data:([^;]+);base64,(.+)$/);
if (!match) {
throw new Error('Invalid data URL');
}
const binary = atob(match[2]);
const bytes = new Uint8Array(binary.length);
for (let i = 0; i < binary.length; i++) {
bytes[i] = binary.charCodeAt(i);
}
return { data: bytes.buffer, extension: MIME_TO_EXT[match[1]] || 'png' };
}
/**
* Register new image extensions in [Content_Types].xml (idempotent).
*/
async function registerImageExtensions(
zip: JSZip,
extensions: Set<string>,
compressionLevel: number
): Promise<void> {
if (extensions.size === 0) return;
const ctFile = zip.file('[Content_Types].xml');
if (!ctFile) return;
let ctXml = await ctFile.async('text');
let changed = false;
for (const ext of extensions) {
if (!ctXml.includes(`Extension="${ext}"`)) {
const contentType = getContentTypeForExtension(ext, '');
ctXml = ctXml.replace(
'</Types>',
`<Default Extension="${ext}" ContentType="${contentType}"/></Types>`
);
changed = true;
}
}
if (changed) {
zip.file('[Content_Types].xml', ctXml, {
compression: 'DEFLATE',
compressionOptions: { level: compressionLevel },
});
}
}
/**
* Find the highest image number currently used in `word/media/`. Media filenames
* are a shared package-wide namespace, so a single counter is used across parts.
*/
function findMaxImageNum(zip: JSZip): number {
let max = 0;
zip.forEach((relativePath) => {
const m = relativePath.match(/^word\/media\/image(\d+)\./);
if (m) {
const num = parseInt(m[1], 10);
if (num > max) max = num;
}
});
return max;
}
/**
* Process newly inserted images across all parts (body, headers, footers):
* add binary data to ZIP, create per-part relationships, update content types,
* and rewrite rIds so the serializer outputs correct references.
*
* Mutates each image's rId in-place.
*/
export async function processNewImages(
parts: Part[],
zip: JSZip,
compressionLevel: number
): Promise<void> {
let maxImageNum = findMaxImageNum(zip);
const extensionsAdded = new Set<string>();
for (const { relsPath, blocks } of parts) {
const images = collectNewImages(blocks);
if (images.length === 0) continue;
const relsXml = await readRelsOrStub(zip, relsPath);
let maxId = findMaxRId(relsXml);
const relEntries: string[] = [];
for (const image of images) {
const { data, extension } = decodeDataUrl(image.src!);
maxImageNum++;
maxId++;
const mediaFilename = `image${maxImageNum}.${extension}`;
const newRId = `rId${maxId}`;
zip.file(`word/media/${mediaFilename}`, data, {
compression: 'DEFLATE',
compressionOptions: { level: compressionLevel },
});
relEntries.push(
`<Relationship Id="${newRId}" Type="${RELATIONSHIP_TYPES.image}" Target="media/${mediaFilename}"/>`
);
extensionsAdded.add(extension);
image.rId = newRId;
}
const updatedRelsXml = relsXml.replace(
'</Relationships>',
relEntries.join('') + '</Relationships>'
);
zip.file(relsPath, updatedRelsXml, {
compression: 'DEFLATE',
compressionOptions: { level: compressionLevel },
});
}
await registerImageExtensions(zip, extensionsAdded, compressionLevel);
}
/** Normalize a rels Target to its `media/<file>` form for comparison. */
function normalizeMediaTarget(target: string): string {
return target.replace(/^\.?\/?(?:word\/)?/, '');
}
/** Find an existing relationship id in a rels XML whose Target points at `mediaTarget`. */
function findRelIdByMediaTarget(relsXml: string, mediaTarget: string): string | null {
const want = normalizeMediaTarget(mediaTarget);
const re = /<Relationship\b[^>]*?>/g;
let m: RegExpExecArray | null;
while ((m = re.exec(relsXml)) !== null) {
const el = m[0];
const target = /Target="([^"]*)"/.exec(el)?.[1];
if (target && normalizeMediaTarget(target) === want) {
return /Id="([^"]*)"/.exec(el)?.[1] ?? null;
}
}
return null;
}
/**
* Register picture-watermark images and bind each header's watermark to a
* relationship that resolves in **that header's** rels.
*
* A watermark is applied to several header parts (default/first/even), but a
* `<v:imagedata r:id>` is a header-part-local reference: the same rId is not
* valid across parts. So this runs per header and:
*
* - leaves a watermark alone when its `relId` already resolves in that header's
* rels (parsed-from-file headers, and idempotent re-saves);
* - otherwise resolves the image bytes (an in-editor `data:` URL written once
* per save, or an existing `mediaPath` from the original file) and binds the
* watermark to a relationship in that header's rels — reusing an existing rel
* to the same media when one is present, else adding a fresh one.
*
* Mutates each picture watermark's `relId` in place so the serializer emits a
* valid `<v:imagedata r:id="...">`.
*/
export async function processNewWatermarkImages(
doc: Document,
zip: JSZip,
compressionLevel: number
): Promise<void> {
const headers = doc.package.headers;
const rels = doc.package.relationships;
if (!headers || !rels) return;
const extensionsAdded = new Set<string>();
// dataUrl -> media filename, so an image shared across headers is written once.
const writtenMedia = new Map<string, string>();
let maxImageNum = findMaxImageNum(zip);
/** Resolve (and write, if new) the media file this watermark references. */
function resolveMediaFilename(wm: { mediaPath?: string; dataUrl?: string }): string | null {
// Existing media already in the package (parsed from the original file).
if (wm.mediaPath) {
const fn = wm.mediaPath.split('/').pop();
if (fn) return fn;
}
// New image inserted in the editor — write the binary once, dedup by data URL.
if (wm.dataUrl && wm.dataUrl.startsWith('data:')) {
const cached = writtenMedia.get(wm.dataUrl);
if (cached) return cached;
const { data, extension } = decodeDataUrl(wm.dataUrl);
maxImageNum++;
const fn = `image${maxImageNum}.${extension}`;
zip.file(`word/media/${fn}`, data, {
compression: 'DEFLATE',
compressionOptions: { level: compressionLevel },
});
extensionsAdded.add(extension);
writtenMedia.set(wm.dataUrl, fn);
return fn;
}
return null;
}
for (const [rId, hf] of headers.entries()) {
const wm = hf.watermark;
if (!wm || wm.kind !== 'picture') continue;
const headerRel = rels.get(rId);
if (!headerRel?.target) continue;
const filename = headerFooterFilename(headerRel.target).replace(/^word\//, '');
const relsPath = `word/_rels/${filename}.rels`;
const relsXml = await readRelsOrStub(zip, relsPath);
// Keep the existing relId when it already resolves in this header's rels.
if (wm.relId && new RegExp(`Id="${wm.relId}"`).test(relsXml)) continue;
const mediaFilename = resolveMediaFilename(wm);
if (!mediaFilename) continue;
const target = `media/${mediaFilename}`;
const existingRId = findRelIdByMediaTarget(relsXml, target);
if (existingRId) {
wm.relId = existingRId;
continue;
}
const newRId = `rId${findMaxRId(relsXml) + 1}`;
const updatedRelsXml = relsXml.replace(
'</Relationships>',
`<Relationship Id="${newRId}" Type="${RELATIONSHIP_TYPES.image}" ` +
`Target="${target}"/></Relationships>`
);
zip.file(relsPath, updatedRelsXml, {
compression: 'DEFLATE',
compressionOptions: { level: compressionLevel },
});
wm.relId = newRId;
}
await registerImageExtensions(zip, extensionsAdded, compressionLevel);
}