Skip to content

Usage

Basic Conversion

convert() accepts an HTML string and returns a ConversionResult.

use html_to_markdown_rs::convert;

fn main() -> Result<(), Box<dyn std::error::Error>> {
    let html = "<h1>Hello</h1><p>This is <strong>fast</strong>!</p>";
    let result = convert(html, None)?;
    let markdown = result.content.unwrap_or_default();
    println!("{markdown}");
    Ok(())
}
from html_to_markdown import convert

html = "<h1>Hello</h1><p>This is <strong>fast</strong>!</p>"
result = convert(html)
markdown = result["content"]
import { convert } from '@kreuzberg/html-to-markdown';

const result = convert('<h1>Hello World</h1>');
const markdown: string = result.content;
console.log(markdown); // # Hello World
package main

import (
    "fmt"
    "log"

    "github.com/kreuzberg-dev/html-to-markdown/packages/go/v3/htmltomarkdown"
)

func main() {
    html := "<h1>Hello World</h1><p>This is a paragraph.</p>"

    result, err := htmltomarkdown.Convert(html)
    if err != nil {
        log.Fatal(err)
    }

    if result.Content != nil {
        fmt.Println(*result.Content)
    }
}
require 'html_to_markdown'

html = "<h1>Hello</h1><p>This is <strong>fast</strong>!</p>"
result = HtmlToMarkdown.convert(html)
markdown = result[:content]
use HtmlToMarkdown\Service\Converter;
use function HtmlToMarkdown\convert;

// Object-oriented usage
$converter = Converter::create();
$result = $converter->convert('<h1>Hello</h1><p>This is <strong>fast</strong>!</p>');
$markdown = $result['content'];

// Procedural helper
$result = convert('<h1>Hello</h1>');
$markdown = $result['content'];
import dev.kreuzberg.htmltomarkdown.HtmlToMarkdown;
import dev.kreuzberg.htmltomarkdown.ConversionResult;

public class Example {
    public static void main(String[] args) {
        String html = "<h1>Hello World</h1><p>This is a <strong>test</strong>.</p>";
        ConversionResult result = HtmlToMarkdown.convert(html);
        System.out.println(result.content());
    }
}
using HtmlToMarkdown;

var html = "<h1>Hello World</h1><p>This is a paragraph.</p>";
var result = HtmlToMarkdownConverter.Convert(html);
Console.WriteLine(result.Content);
{:ok, result} = HtmlToMarkdown.convert("<h1>Hello</h1><p>This is <strong>fast</strong>!</p>")
IO.puts(result.content)
library(htmltomarkdown)

html <- "<h1>Hello</h1><p>This is <strong>fast</strong>!</p>"
result <- convert(html)
markdown <- result$content
cat(markdown)
#include "html_to_markdown.h"
#include <stdio.h>

int main(void) {
    const char *html = "<h1>Hello</h1><p>World</p>";
    /* Returns JSON: {"content":"...","metadata":null,"tables":null} */
    char *json = html_to_markdown_convert(html, NULL);
    if (json) {
        /* Parse JSON to extract content field */
        printf("%s\n", json);
        html_to_markdown_free_string(json);
    }
    return 0;
}
import init, { convert } from '@kreuzberg/html-to-markdown-wasm';

await init();

const html = '<h1>Hello</h1><p>This is <strong>fast</strong>!</p>';
const result = convert(html);
const markdown = result.content;
console.log(markdown);

ConversionResult Fields

Every call to convert() returns a ConversionResult with the following fields:

Field Type Description
content Optional<String> The converted text (Markdown, Djot, or plain). None/null when output_format is "none".
document Optional<DocumentStructure> Structured document tree (headings, paragraphs, lists, tables). Only populated when include_document_structure is true.
metadata HtmlMetadata Extracted HTML metadata (title, description, Open Graph, Twitter Card, JSON-LD, links, images).
tables Vec<TableData> Extracted tables with full grid data (headers, rows, colspan/rowspan).
images Vec<ExtractedImage> Extracted inline images (data URIs, embedded SVGs). Only populated when extract_images is true.
warnings Vec<ProcessingWarning> Non-fatal warnings raised during conversion.

Using Options

Control output style, metadata extraction, and more via ConversionOptions.

use html_to_markdown_rs::{convert, ConversionOptions};

fn main() -> Result<(), Box<dyn std::error::Error>> {
    let options = ConversionOptions::builder()
        .heading_style(HeadingStyle::Atx)
        .skip_images(true)
        .build();
    let result = convert("<h1>Hello</h1><img src='pic.jpg'>", Some(options))?;
    let markdown = result.content.unwrap_or_default();
    println!("{markdown}");
    Ok(())
}
from html_to_markdown import ConversionOptions, convert

html = "<h1>Hello</h1><p>This is <strong>formatted</strong> content.</p>"
options = ConversionOptions(
    heading_style="atx",
    list_indent_width=2,
)
result = convert(html, options)
markdown = result["content"]
import { convert, ConversionOptions } from '@kreuzberg/html-to-markdown';

const options: ConversionOptions = {
  headingStyle: 'atx',
  listIndentWidth: 2,
  wrap: true,
};

const result = convert('<h1>Title</h1><p>Content</p>', options);
const markdown = result.content;
package main

import (
    "fmt"
    "log"

    "github.com/kreuzberg-dev/html-to-markdown/packages/go/v3/htmltomarkdown"
)

func main() {
    // Check library version
    version := htmltomarkdown.Version()
    fmt.Printf("html-to-markdown version: %s\n", version)

    html := "<h1>Hello</h1><p>Welcome</p>"

    // Convert with error handling
    result, err := htmltomarkdown.Convert(html)
    if err != nil {
        log.Fatalf("Conversion failed: %v", err)
    }

    if result.Content != nil {
        fmt.Println(*result.Content)
    }
}
require 'html_to_markdown'

html = "<h1>Hello</h1><p>This is <strong>fast</strong>!</p>"
result = HtmlToMarkdown.convert(html, heading_style: :atx, code_block_style: :fenced)
markdown = result[:content]
use HtmlToMarkdown\Config\ConversionOptions;
use HtmlToMarkdown\Service\Converter;

$converter = Converter::create();

$options = new ConversionOptions(
    headingStyle: 'Atx',
    listIndentWidth: 2,
);

$result = $converter->convert('<h1>Hello</h1>', $options);
$markdown = $result['content'];
import dev.kreuzberg.htmltomarkdown.HtmlToMarkdown;
import dev.kreuzberg.htmltomarkdown.ConversionOptions;
import dev.kreuzberg.htmltomarkdown.ConversionResult;

public class MetadataExample {
    public static void main(String[] args) {
        String html = "<html><head><title>My Page</title></head>"
            + "<body><h1>Welcome</h1><a href=\"https://example.com\">Link</a></body></html>";

        ConversionOptions options = ConversionOptions.builder()
            .extractMetadata(true)
            .build();
        ConversionResult result = HtmlToMarkdown.convert(html, options);

        System.out.println("Markdown: " + result.content());
        System.out.println("Title: " + result.metadata().document().title());
        System.out.println("Headers: " + result.metadata().headers().size());
        System.out.println("Links: " + result.metadata().links().size());
    }
}
using HtmlToMarkdown;

var options = new ConversionOptions
{
    HeadingStyle = "atx",
    Wrap = true,
    WrapWidth = 80,
    ListIndentWidth = 4,
};

var html = "<h1>Hello</h1><p>This is <strong>formatted</strong> content.</p>";
var result = HtmlToMarkdownConverter.Convert(html, options);
Console.WriteLine(result.Content);
opts = %HtmlToMarkdown.Options{wrap: true, wrap_width: 40}
{:ok, result} = HtmlToMarkdown.convert("<h1>Hello</h1><p>World</p>", opts)
IO.puts(result.content)
library(htmltomarkdown)

opts <- conversion_options(
  heading_style = "atx",
  wrap = TRUE,
  wrap_width = 80L
)

result <- convert("<h1>Hello</h1><p>World</p>", opts)
cat(result$content)

Metadata Extraction

Enable extract_metadata to populate the metadata field with structured data parsed from the HTML <head> and document body.

use html_to_markdown_rs::{convert, ConversionOptions};

fn main() -> Result<(), Box<dyn std::error::Error>> {
    let html = r#"<html><head><title>My Page</title></head>
    <body><h1>Hello</h1><a href="https://example.com">Link</a></body></html>"#;

    let options = ConversionOptions::builder()
        .extract_metadata(true)
        .build();
    let result = convert(html, Some(options))?;
    let markdown = result.content.unwrap_or_default();
    println!("Markdown: {}", markdown);
    println!("Title: {:?}", result.metadata.as_ref().and_then(|m| m.title.as_deref()));
    println!("Links: {:?}", result.metadata.as_ref().map(|m| &m.links));
    Ok(())
}
from html_to_markdown import ConversionOptions, convert

options = ConversionOptions(
    extract_metadata=True,
    extract_headers=True,
    extract_links=True,
    extract_images=True,
    extract_structured_data=True,
    max_structured_data_size=100000,
)
result = convert(html, options)
markdown = result["content"]
metadata = result["metadata"]
import { convert, ConversionOptions } from '@kreuzberg/html-to-markdown';

const options: ConversionOptions = { extractMetadata: true };
const result = convert('<h1>Title</h1><p>Content</p>', options);

console.log(result.content);           // Converted markdown
console.log(result.metadata?.document); // Document metadata (title, description, etc.)
console.log(result.metadata?.headers);  // Header elements (h1-h6)
console.log(result.metadata?.links);    // Extracted links
console.log(result.metadata?.images);   // Extracted images
package main

import (
    "fmt"
    "log"

    "github.com/kreuzberg-dev/html-to-markdown/packages/go/v3/htmltomarkdown"
)

func main() {
    html := `<html><head><title>My Page</title></head>
    <body><h1>Hello</h1><a href="https://example.com">Link</a></body></html>`

    opts := htmltomarkdown.ConversionOptions{ExtractMetadata: true}
    result, err := htmltomarkdown.Convert(html, opts)
    if err != nil {
        log.Fatal(err)
    }

    if result.Content != nil {
        fmt.Println("Markdown:", *result.Content)
    }
    if result.Metadata != nil {
        fmt.Println("Title:", result.Metadata.Title)
        fmt.Println("Links:", result.Metadata.Links)
    }
}
require 'html_to_markdown'

html = '<html lang="en"><head><title>Test</title></head><body><h1>Hello</h1></body></html>'
result = HtmlToMarkdown.convert(html, extract_metadata: true)

markdown = result[:content]
puts result[:metadata][:document][:title]     # "Test"
puts result[:metadata][:headers].first[:text] # "Hello"
use HtmlToMarkdown\Config\ConversionOptions;
use HtmlToMarkdown\Service\Converter;

$html = '<html><head><title>Example</title></head><body><h1>Welcome</h1><a href="https://example.com">Link</a></body></html>';

$converter = Converter::create();
$result = $converter->convert(
    $html,
    new ConversionOptions(
        headingStyle: 'Atx',
        extractMetadata: true,
        extractHeaders: true,
        extractLinks: true,
        extractImages: true,
    )
);

echo $result['content'];
echo $result['metadata']->document->title;
foreach ($result['metadata']->links as $link) {
    echo $link->href . ': ' . $link->text;
}
import dev.kreuzberg.htmltomarkdown.HtmlToMarkdown;
import dev.kreuzberg.htmltomarkdown.ConversionOptions;
import dev.kreuzberg.htmltomarkdown.ConversionResult;

public class MetadataExample {
    public static void main(String[] args) {
        String html = """
            <html><head><title>My Page</title></head>
            <body><h1>Hello</h1><a href="https://example.com">Link</a></body></html>
            """;

        ConversionOptions options = ConversionOptions.builder()
            .extractMetadata(true)
            .build();
        ConversionResult result = HtmlToMarkdown.convert(html, options);
        System.out.println("Markdown: " + result.content());
        System.out.println("Title: " + result.metadata().getTitle());
        System.out.println("Links: " + result.metadata().getLinks());
    }
}
using HtmlToMarkdown;

var html = @"<html><head><title>My Page</title></head>
<body><h1>Hello</h1><a href=""https://example.com"">Link</a></body></html>";

var options = new ConversionOptions { ExtractMetadata = true };
var result = HtmlToMarkdownConverter.Convert(html, options);
Console.WriteLine($"Markdown: {result.Content}");
Console.WriteLine($"Title: {result.Metadata?.Title}");
Console.WriteLine($"Links: {string.Join(", ", result.Metadata?.Links ?? [])}");

Metadata Extraction - Elixir

Extract structured metadata from HTML documents during conversion.

Basic Metadata Extraction

Use convert/2 with extract_metadata: true in options to extract document metadata alongside Markdown:

html = """
<html>
  <head>
    <title>Example</title>
    <meta name="description" content="Demo page">
  </head>
  <body>
    <h1 id="welcome">Welcome</h1>
    <a href="https://example.com" rel="nofollow external">Example link</a>
  </body>
</html>
"""

opts = %HtmlToMarkdown.Options{extract_metadata: true}
{:ok, result} = HtmlToMarkdown.convert(html, opts)

result.metadata["document"]["title"]        # "Example"
result.metadata["headers"] |> hd() |> Map.get("text") # "Welcome"
result.metadata["links"]   |> hd() |> Map.get("link_type") # "external"

Extracted Metadata Structure

The metadata map includes:

  • Document: Title and meta tags from <head>
  • Headers: All headings extracted with level, text, and optional ID
  • Links: All links with href, text, rel attributes, and link_type classification
  • Images: Image sources and alt text
  • Forms: Form action and method data
  • Other: Tables, code blocks, and additional structural information
library(htmltomarkdown)

html <- '
<html>
  <head><title>Example</title></head>
  <body>
    <h1 id="welcome">Welcome</h1>
    <a href="https://example.com">Example link</a>
  </body>
</html>'

opts <- conversion_options(extract_metadata = TRUE)
result <- convert(html, opts)

cat(result$content)
result$metadata$document$title
result$metadata$headers[[1]]$text
result$metadata$links[[1]]$link_type

Metadata Fields

Field Description
document.title Page title from <title> tag
document.description Content of <meta name="description">
document.language lang attribute of <html> tag
document.charset Character encoding declaration
document.open_graph Open Graph tags (og:title, og:description, etc.)
document.twitter_card Twitter Card tags
document.json_ld JSON-LD structured data blocks
headers All <h1><h6> elements with level, text, and id
links All <a> tags with href, text, rel, and link type
images All <img> tags with src, alt, width, height

Document Structure Extraction

Enable include_document_structure to get a parsed tree of the document's structural elements.

use html_to_markdown_rs::{convert, ConversionOptions};

let options = ConversionOptions::builder()
    .include_document_structure(true)
    .build();
let result = convert("<h1>Title</h1><p>Paragraph</p>", Some(options))?;

if let Some(doc) = &result.document {
    for node in &doc.nodes {
        println!("{:?}", node);
    }
}
from html_to_markdown import ConversionOptions, convert

options = ConversionOptions(include_document_structure=True)
result = convert("<h1>Title</h1><p>Paragraph</p>", options)
doc = result["document"]
for node in doc["nodes"]:
    print(node)
import { convert, ConversionOptions } from '@kreuzberg/html-to-markdown';

const options: ConversionOptions = { includeDocumentStructure: true };
const result = convert('<h1>Title</h1><p>Paragraph</p>', options);
const nodes = result.document?.nodes ?? [];
for (const node of nodes) {
  console.log(node);
}