252 lines
		
	
	
		
			8.6 KiB
		
	
	
	
		
			TypeScript
		
	
	
	
	
	
			
		
		
	
	
			252 lines
		
	
	
		
			8.6 KiB
		
	
	
	
		
			TypeScript
		
	
	
	
	
	
import compareVersions from 'compare-versions';
 | 
						|
import * as glob from 'glob';
 | 
						|
import * as path from 'path';
 | 
						|
import slugify from 'slugify';
 | 
						|
 | 
						|
import { adminClient, IAlgoliaSettings, searchIndices, settings } from './algolia_constants';
 | 
						|
import { meta } from './algolia_meta';
 | 
						|
 | 
						|
// Note (piotr): can't find type definitions for these
 | 
						|
const remark = require('remark');
 | 
						|
const mdx = require('remark-mdx');
 | 
						|
const slug = require('remark-slug');
 | 
						|
const { read } = require('to-vfile');
 | 
						|
const findAfter = require('unist-util-find-after');
 | 
						|
const modifyChildren = require('unist-util-modify-children');
 | 
						|
const { selectAll } = require('unist-util-select');
 | 
						|
 | 
						|
function processContentTree(tree: Node[], file: any, indexName: string): void {
 | 
						|
    const modify = modifyChildren(modifier);
 | 
						|
    // We first modify the tree to get slugified ids from headings to all text nodes
 | 
						|
    modify(tree);
 | 
						|
    // Get all text nodes. I.e. 'heading', 'paragraph', 'list' all can have (nested) child text nodes
 | 
						|
    const textNodes = selectAll('text', tree);
 | 
						|
 | 
						|
    if (textNodes) {
 | 
						|
        const formattedTextNodes = formatTextNodes(textNodes);
 | 
						|
        const content = getContent(file, formattedTextNodes);
 | 
						|
 | 
						|
        const algoliaIndex = adminClient.initIndex(searchIndices[indexName]);
 | 
						|
        const algoliaSettings = settings[indexName];
 | 
						|
 | 
						|
        setIndexSettings(algoliaIndex, algoliaSettings);
 | 
						|
        pushObjectsToAlgolia(algoliaIndex, content);
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
function modifier(node: Node, index: number, parent: Node): void {
 | 
						|
    if (node.type === 'heading') {
 | 
						|
        const start = node;
 | 
						|
        const isEnd = (node: Node) => node.type === 'heading' && node.depth <= start.depth;
 | 
						|
        const end = findAfter(parent, start, isEnd);
 | 
						|
 | 
						|
        const startIndex = parent.children.indexOf(start);
 | 
						|
        const endIndex = parent.children.indexOf(end);
 | 
						|
        // Find all nodes between and including the heading and all nodes before the next heading
 | 
						|
        const between = parent.children.slice(startIndex, endIndex > 0 ? endIndex : undefined);
 | 
						|
        // We add the id of the heading as hash part of the url to all text nodes
 | 
						|
        for (const item of between) {
 | 
						|
            addHashToChildren(item, start);
 | 
						|
        }
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
function addHashToChildren(item: any, start: any): void {
 | 
						|
    if (item.children) {
 | 
						|
        for (const child of item.children) {
 | 
						|
            if (child.type === 'text') {
 | 
						|
                child.data = child.data || {};
 | 
						|
                child.data.hash = `#${start.data.id}`;
 | 
						|
            }
 | 
						|
            addHashToChildren(child, start);
 | 
						|
        }
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
function setIndexSettings(algoliaIndex: any, algoliaSettings: IAlgoliaSettings): void {
 | 
						|
    algoliaIndex.setSettings(algoliaSettings, (err: string) => {
 | 
						|
        if (err) {
 | 
						|
            throw Error(`Error: ${err}`);
 | 
						|
        }
 | 
						|
    });
 | 
						|
}
 | 
						|
 | 
						|
function pushObjectsToAlgolia(algoliaIndex: any, content: Content): void {
 | 
						|
    algoliaIndex
 | 
						|
        .saveObjects(content)
 | 
						|
        .then(({ objectIDs }: { objectIDs: string[] }) =>
 | 
						|
            console.log(
 | 
						|
                `✨ Pushed content to Algolia with Object IDs ${objectIDs[0]} to ${objectIDs[objectIDs.length - 1]}`,
 | 
						|
            ),
 | 
						|
        )
 | 
						|
        .catch((err: string) => {
 | 
						|
            throw Error(`Error: ${err}`);
 | 
						|
        });
 | 
						|
}
 | 
						|
 | 
						|
function getContent(file: any, formattedTextNodes: FormattedNode[]): any {
 | 
						|
    const { name, url }: { name: string; url: string } = file;
 | 
						|
    const metaData: Meta = meta[name];
 | 
						|
    const content: Content[] = [];
 | 
						|
 | 
						|
    formattedTextNodes.forEach((node: FormattedNode, index: number) => {
 | 
						|
        const titleSlug = slugify(metaData.title, { lower: true });
 | 
						|
 | 
						|
        content.push({
 | 
						|
            ...metaData,
 | 
						|
            url,
 | 
						|
            urlWithHash: url + node.hash,
 | 
						|
            hash: node.hash,
 | 
						|
            textContent: node.textContent,
 | 
						|
            id: titleSlug,
 | 
						|
            objectID: `${titleSlug}_${index}`,
 | 
						|
        });
 | 
						|
    });
 | 
						|
 | 
						|
    return content;
 | 
						|
}
 | 
						|
 | 
						|
function formatTextNodes(textNodes: Node[]): FormattedNode[] {
 | 
						|
    const formattedTextNodes: FormattedNode[] = []; // array structure: [ { line: [LINE_NUMBER], textContent: [MERGED_TEXT_VALUE] } ]
 | 
						|
 | 
						|
    textNodes.map((textNode: Node) => {
 | 
						|
        const { data, position, value } = textNode;
 | 
						|
        // If data (hash) is not present on the node it means that the text node occurs before any headings. I.e. in an intro text without a heading.
 | 
						|
        const hash = data ? data.hash : '';
 | 
						|
 | 
						|
        const { line } = position.start; // Line at which textnode starts (and for paragraphs, headings, ends).
 | 
						|
 | 
						|
        const nodeIndex = formattedTextNodes.findIndex((node: FormattedNode) => node.line === line);
 | 
						|
        const isIndexPresent = nodeIndex > -1;
 | 
						|
 | 
						|
        if (isIndexPresent) {
 | 
						|
            formattedTextNodes[nodeIndex].textContent += value; // Merge value with existing text at the given line
 | 
						|
        } else {
 | 
						|
            formattedTextNodes.push({ line, hash, textContent: value }); // Create text, hash part of the url, and its start line
 | 
						|
        }
 | 
						|
    });
 | 
						|
 | 
						|
    return formattedTextNodes;
 | 
						|
}
 | 
						|
 | 
						|
async function processMdxAsync(indexName: any, file: any): Promise<void> {
 | 
						|
    const content = await read(file.path);
 | 
						|
 | 
						|
    await remark()
 | 
						|
        .use(slug) // slugify heading text as ids
 | 
						|
        .use(mdx)
 | 
						|
        .use(() => (tree: Node[]) => processContentTree(tree, file, indexName))
 | 
						|
        .process(content);
 | 
						|
}
 | 
						|
 | 
						|
function getFiles(dirName: string): any {
 | 
						|
    const dirPath = path.join(__dirname, `../../mdx/${dirName}`);
 | 
						|
    const files = glob.sync(dirPath + '/**/*.mdx');
 | 
						|
    const processedFiles: any[] = [];
 | 
						|
 | 
						|
    for (const file of files) {
 | 
						|
        if (dirName === 'tools') {
 | 
						|
            // For now we are looking for all mdx files (which for now should only be 'reference.mdx')
 | 
						|
            // We can look for a different filename in the future, i.e. README and do some stuff with it
 | 
						|
            // const { name } = path.parse(file);
 | 
						|
            // if (name === 'reference') {
 | 
						|
            const toolName = path.basename(path.join(file, '../../'));
 | 
						|
            const version = path.basename(path.dirname(file));
 | 
						|
            const url = `/docs/${toolName}/${version}`; // could become `/docs/tools/${toolName}/${version}` in the future
 | 
						|
 | 
						|
            const fileIndex = processedFiles.findIndex((tool: any) => tool.name === toolName);
 | 
						|
            const isIndexPresent = fileIndex > -1;
 | 
						|
 | 
						|
            if (isIndexPresent) {
 | 
						|
                if (compareVersions.compare(version, processedFiles[fileIndex].version, '>')) {
 | 
						|
                    processedFiles[fileIndex] = { name: toolName, path: file, version, url };
 | 
						|
                }
 | 
						|
            } else {
 | 
						|
                processedFiles.push({ name: toolName, path: file, version, url });
 | 
						|
            }
 | 
						|
        }
 | 
						|
 | 
						|
        if (dirName === 'guides') {
 | 
						|
            const { name } = path.parse(file);
 | 
						|
            const url = `/docs/guides/${name}`;
 | 
						|
            processedFiles.push({ name, path: file, url });
 | 
						|
        }
 | 
						|
 | 
						|
        if (dirName === 'core-concepts' || dirName === 'api-explorer') {
 | 
						|
            const url = `/docs/${dirName}`;
 | 
						|
            processedFiles.push({ name: dirName, path: file, url });
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    return processedFiles;
 | 
						|
}
 | 
						|
 | 
						|
export async function indexFilesAsync(indexName: string): Promise<void> {
 | 
						|
    const files = getFiles(indexName);
 | 
						|
 | 
						|
    for (const file of files) {
 | 
						|
        await processMdxAsync(indexName, file);
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
interface Meta {
 | 
						|
    description: string;
 | 
						|
    title: string;
 | 
						|
    subtitle?: string;
 | 
						|
    difficulty?: 'Beginner' | 'Intermediate' | 'Advanced';
 | 
						|
    isCommunity?: boolean;
 | 
						|
    isFeatured?: boolean;
 | 
						|
    tags?: string[];
 | 
						|
    topics?: string[];
 | 
						|
    type?: string;
 | 
						|
}
 | 
						|
 | 
						|
interface Content extends Meta {
 | 
						|
    url: string;
 | 
						|
    urlWithHash: string;
 | 
						|
    hash: string;
 | 
						|
    textContent: string;
 | 
						|
    id: string;
 | 
						|
    objectID: string;
 | 
						|
}
 | 
						|
 | 
						|
interface FormattedNode {
 | 
						|
    hash: string;
 | 
						|
    line: number;
 | 
						|
    textContent: string;
 | 
						|
}
 | 
						|
 | 
						|
// Syntactic units in unist syntax trees are called nodes.
 | 
						|
interface Node {
 | 
						|
    type: string;
 | 
						|
    children?: Node[];
 | 
						|
    data?: Data;
 | 
						|
    depth?: number;
 | 
						|
    lang?: string;
 | 
						|
    ordered?: boolean;
 | 
						|
    position?: Position;
 | 
						|
    spread?: boolean;
 | 
						|
    value?: string;
 | 
						|
}
 | 
						|
 | 
						|
// Location of a node in a source file.
 | 
						|
interface Position {
 | 
						|
    start: Point; // Place of the first character of the parsed source region.
 | 
						|
    end: Point; // Place of the first character after the parsed source region.
 | 
						|
    indent: number[]; // Start column at each index (plus start line) in the source region
 | 
						|
}
 | 
						|
 | 
						|
// One place in a source file.
 | 
						|
interface Point {
 | 
						|
    line: number; // Line in a source file (1-indexed integer).
 | 
						|
    column: number; // Column in a source file (1-indexed integer).
 | 
						|
    offset: number; // Character in a source file (0-indexed integer).
 | 
						|
}
 | 
						|
 | 
						|
// Information associated by the ecosystem with the node.
 | 
						|
// Space is guaranteed to never be specified by unist or specifications
 | 
						|
// implementing unist.
 | 
						|
interface Data {
 | 
						|
    [key: string]: any;
 | 
						|
}
 |