Skip to content

Usage

Basic Conversion

convert() accepts an HTML string and returns a ConversionResult.

use html_to_markdown_rs::convert;

fn main() -> Result<(), Box<dyn std::error::Error>> {
    let html = "<h1>Hello</h1><p>This is <strong>fast</strong>!</p>";
    let result = convert(html, None)?;
    let markdown = result.content.unwrap_or_default();
    println!("{markdown}");
    Ok(())
}
from html_to_markdown import convert

html = "<h1>Hello</h1><p>This is <strong>fast</strong>!</p>"
result = convert(html)
markdown = result.content
import { convert } from "@kreuzberg/html-to-markdown";

const result = convert("<h1>Hello World</h1>");
const markdown: string = result.content;
console.log(markdown); // # Hello World
package main

import (
    "fmt"
    "log"

    htmltomarkdown "github.com/kreuzberg-dev/html-to-markdown/packages/go/v3"
)

func main() {
    html := "<h1>Hello World</h1><p>This is a paragraph.</p>"

    result, err := htmltomarkdown.Convert(html, nil)
    if err != nil {
        log.Fatal(err)
    }

    if result.Content != nil {
        fmt.Println(*result.Content)
    }
}
require 'html_to_markdown'

html = "<h1>Hello</h1><p>This is <strong>fast</strong>!</p>"
result = HtmlToMarkdown.convert(html)
markdown = result[:content]
use HtmlToMarkdown\HtmlToMarkdown;

$result = HtmlToMarkdown::convert('<h1>Hello</h1><p>This is <strong>fast</strong>!</p>');
echo $result->content;
import dev.kreuzberg.htmltomarkdown.HtmlToMarkdown;
import dev.kreuzberg.htmltomarkdown.ConversionResult;

public class Example {
    public static void main(String[] args) {
        String html = "<h1>Hello World</h1><p>This is a <strong>test</strong>.</p>";
        ConversionResult result = HtmlToMarkdown.convert(html);
        System.out.println(result.content());
    }
}
using HtmlToMarkdown;

var html = "<h1>Hello World</h1><p>This is a paragraph.</p>";
var result = HtmlToMarkdownConverter.Convert(html);
Console.WriteLine(result.Content);
{:ok, result} = HtmlToMarkdown.convert("<h1>Hello</h1><p>This is <strong>fast</strong>!</p>")
IO.puts(result.content)
library(htmltomarkdown)

html <- "<h1>Hello</h1><p>This is <strong>fast</strong>!</p>"
result <- convert(html)
markdown <- result$content
cat(markdown)
#include "html_to_markdown.h"
#include <stdio.h>

int main(void) {
    HTMConversionResult *result = htm_convert("<h1>Hello</h1><p>World</p>", NULL);
    if (result == NULL) {
        fprintf(stderr, "convert failed (code %d): %s\n",
                htm_last_error_code(), htm_last_error_context());
        return 1;
    }

    char *content = htm_conversion_result_content(result);
    if (content != NULL) {
        printf("%s\n", content);
        htm_free_string(content);
    }

    htm_conversion_result_free(result);
    return 0;
}
import init, { convert } from "@kreuzberg/html-to-markdown-wasm";

await init();

const html = "<h1>Hello</h1><p>This is <strong>fast</strong>!</p>";
const result = convert(html);
const markdown = result.content;
console.log(markdown);
import HtmlToMarkdown

let html = "<h1>Hello</h1><p>This is <strong>fast</strong>!</p>"
let result = try convert(html, nil)
let markdown = result.content()?.toString() ?? ""
print(markdown)
import 'package:h2m/h2m.dart';
import 'package:h2m/src/html_to_markdown_rs_bridge_generated/frb_generated.dart'
    show RustLib;

Future<void> main() async {
  await RustLib.init();

  const html = '<h1>Hello</h1><p>This is <strong>fast</strong>!</p>';
  final result = await H2mBridge.convert(html);
  print(result.content);
}
import dev.kreuzberg.android.HtmlToMarkdownRs

val html = "<h1>Hello</h1><p>This is <strong>fast</strong>!</p>"
val result = HtmlToMarkdownRs.convert(html)
val markdown: String? = result.content
const std = @import("std");
const html_to_markdown = @import("html_to_markdown");

pub fn main() !void {
    const html = "<h1>Hello</h1><p>This is <strong>fast</strong>!</p>";
    const result_json = try html_to_markdown.convert(html, null);
    defer std.heap.c_allocator.free(result_json);

    // result_json is the ConversionResult serialised as JSON; parse with
    // std.json or read the `content` field directly.
    std.debug.print("{s}\n", .{result_json});
}

ConversionResult Fields

Every call to convert() returns a ConversionResult with the following fields:

Field Type Description
content Optional<String> The converted text (Markdown, Djot, or plain). None/null when output_format is "none".
document Optional<DocumentStructure> Structured document tree (headings, paragraphs, lists, tables). Only populated when include_document_structure is true.
metadata HtmlMetadata Extracted HTML metadata (title, description, Open Graph, Twitter Card, JSON-LD, links, images).
tables Vec<TableData> Extracted tables with full grid data (headers, rows, colspan/rowspan).
images Vec<ExtractedImage> Extracted inline images (data URIs, embedded SVGs). Only populated when extract_images is true.
warnings Vec<ProcessingWarning> Non-fatal warnings raised during conversion.

Using Options

Control output style, metadata extraction, and more via ConversionOptions.

use html_to_markdown_rs::{ConversionOptions, HeadingStyle, convert};

fn main() -> Result<(), Box<dyn std::error::Error>> {
    let options = ConversionOptions::builder()
        .heading_style(HeadingStyle::Atx)
        .skip_images(true)
        .build();
    let result = convert("<h1>Hello</h1><img src='pic.jpg'>", Some(options))?;
    println!("{}", result.content.unwrap_or_default());
    Ok(())
}
from html_to_markdown import ConversionOptions, convert

html = "<h1>Hello</h1><p>This is <strong>formatted</strong> content.</p>"
options = ConversionOptions(
    heading_style="atx",
    list_indent_width=2,
)
result = convert(html, options)
markdown = result.content
import { convert, ConversionOptions } from "@kreuzberg/html-to-markdown";

const options: ConversionOptions = {
  headingStyle: "atx",
  listIndentWidth: 2,
  wrap: true,
};

const result = convert("<h1>Title</h1><p>Content</p>", options);
const markdown = result.content;
package main

import (
    "fmt"
    "log"

    htmltomarkdown "github.com/kreuzberg-dev/html-to-markdown/packages/go/v3"
)

func main() {
    html := "<h1>Hello</h1><p>Welcome</p>"

    width := uint(80)
    opts := htmltomarkdown.ConversionOptions{
        Wrap:      true,
        WrapWidth: &width,
    }

    result, err := htmltomarkdown.Convert(html, &opts)
    if err != nil {
        log.Fatalf("Conversion failed: %v", err)
    }

    if result.Content != nil {
        fmt.Println(*result.Content)
    }
}
require 'html_to_markdown'

html = "<h1>Hello</h1><p>This is <strong>fast</strong>!</p>"
result = HtmlToMarkdown.convert(html, heading_style: :atx, code_block_style: :fenced)
markdown = result[:content]
use HtmlToMarkdown\HtmlToMarkdown;
use HtmlToMarkdown\ConversionOptions;

$options = ConversionOptions::builder()
    ->headingStyle('atx')
    ->listIndentWidth(2)
    ->build();

$result = HtmlToMarkdown::convert('<h1>Hello</h1>', $options);
echo $result->content;
import dev.kreuzberg.htmltomarkdown.HtmlToMarkdown;
import dev.kreuzberg.htmltomarkdown.ConversionOptions;
import dev.kreuzberg.htmltomarkdown.ConversionResult;

public class MetadataExample {
    public static void main(String[] args) {
        String html = "<html><head><title>My Page</title></head>"
            + "<body><h1>Welcome</h1><a href=\"https://example.com\">Link</a></body></html>";

        ConversionOptions options = ConversionOptions.builder()
            .extractMetadata(true)
            .build();
        ConversionResult result = HtmlToMarkdown.convert(html, options);

        System.out.println("Markdown: " + result.content());
        System.out.println("Title: " + result.metadata().document().title());
        System.out.println("Headers: " + result.metadata().headers().size());
        System.out.println("Links: " + result.metadata().links().size());
    }
}
using HtmlToMarkdown;

var options = new ConversionOptions
{
    HeadingStyle = "atx",
    Wrap = true,
    WrapWidth = 80,
    ListIndentWidth = 4,
};

var html = "<h1>Hello</h1><p>This is <strong>formatted</strong> content.</p>";
var result = HtmlToMarkdownConverter.Convert(html, options);
Console.WriteLine(result.Content);
opts = %HtmlToMarkdown.Options{wrap: true, wrap_width: 40}
{:ok, result} = HtmlToMarkdown.convert("<h1>Hello</h1><p>World</p>", opts)
IO.puts(result.content)
library(htmltomarkdown)

opts <- conversion_options(
  heading_style = "atx",
  wrap = TRUE,
  wrap_width = 80L
)

result <- convert("<h1>Hello</h1><p>World</p>", opts)
cat(result$content)
import HtmlToMarkdown

let options = try conversionOptionsFromJson(
    "{\"heading_style\":\"atx\",\"list_indent_width\":2,\"wrap\":true}"
)

let html = "<h1>Hello</h1><p>This is <strong>formatted</strong> content.</p>"
let result = try convert(html, options)
let markdown = result.content()?.toString() ?? ""
print(markdown)
import 'package:h2m/h2m.dart';
import 'package:h2m/src/html_to_markdown_rs_bridge_generated/frb_generated.dart'
    show RustLib;

Future<void> main() async {
  await RustLib.init();

  final options = await createConversionOptionsFromJson(
    json: '{"heading_style":"atx","list_indent_width":2,"wrap":true}',
  );

  const html = '<h1>Hello</h1><p>This is <strong>formatted</strong> content.</p>';
  final result = await H2mBridge.convert(html, options: options);
  print(result.content);
}
import com.fasterxml.jackson.databind.ObjectMapper
import com.fasterxml.jackson.databind.PropertyNamingStrategies
import com.fasterxml.jackson.module.kotlin.registerKotlinModule
import dev.kreuzberg.android.ConversionOptions
import dev.kreuzberg.android.HtmlToMarkdownRs

val mapper = ObjectMapper()
    .registerKotlinModule()
    .setPropertyNamingStrategy(PropertyNamingStrategies.SNAKE_CASE)
val options = mapper.readValue(
    "{\"heading_style\":\"Atx\",\"list_indent_width\":2,\"wrap\":true}",
    ConversionOptions::class.java,
)

val html = "<h1>Hello</h1><p>This is <strong>formatted</strong> content.</p>"
val result = HtmlToMarkdownRs.convert(html, options)
val markdown: String? = result.content
const std = @import("std");
const html_to_markdown = @import("html_to_markdown");

pub fn main() !void {
    const html = "<h1>Hello</h1><p>This is <strong>formatted</strong> content.</p>";
    const options_json =
        \\{"heading_style":"atx","list_indent_width":2,"wrap":true}
    ;

    const result_json = try html_to_markdown.convert(html, options_json);
    defer std.heap.c_allocator.free(result_json);

    std.debug.print("{s}\n", .{result_json});
}

Metadata Extraction

Enable extract_metadata to populate the metadata field with structured data parsed from the HTML <head> and document body.

use html_to_markdown_rs::{convert, ConversionOptions};

fn main() -> Result<(), Box<dyn std::error::Error>> {
    let html = r#"<html><head><title>My Page</title></head>
    <body><h1>Hello</h1><a href="https://example.com">Link</a></body></html>"#;

    let options = ConversionOptions::builder()
        .extract_metadata(true)
        .build();
    let result = convert(html, Some(options))?;
    let markdown = result.content.unwrap_or_default();
    println!("Markdown: {}", markdown);
    println!("Title: {:?}", result.metadata.as_ref().and_then(|m| m.title.as_deref()));
    println!("Links: {:?}", result.metadata.as_ref().map(|m| &m.links));
    Ok(())
}
from html_to_markdown import ConversionOptions, convert

options = ConversionOptions(
    extract_metadata=True,
    extract_images=True,
)
result = convert(html, options)
markdown = result.content
metadata = result.metadata
import { convert, ConversionOptions } from "@kreuzberg/html-to-markdown";

const options: ConversionOptions = { extractMetadata: true };
const result = convert("<h1>Title</h1><p>Content</p>", options);

console.log(result.content); // Converted markdown
console.log(result.metadata?.document); // Document metadata (title, description, etc.)
console.log(result.metadata?.headers); // Header elements (h1-h6)
console.log(result.metadata?.links); // Extracted links
console.log(result.metadata?.images); // Extracted images
package main

import (
    "fmt"
    "log"

    htmltomarkdown "github.com/kreuzberg-dev/html-to-markdown/packages/go/v3"
)

func main() {
    html := `<html><head><title>My Page</title></head>
    <body><h1>Hello</h1><a href="https://example.com">Link</a></body></html>`

    // extract_metadata defaults to true; nil options is enough.
    result, err := htmltomarkdown.Convert(html, nil)
    if err != nil {
        log.Fatal(err)
    }

    if result.Content != nil {
        fmt.Println("Markdown:", *result.Content)
    }
    if result.Metadata.Document.Title != nil {
        fmt.Println("Title:", *result.Metadata.Document.Title)
    }
    for _, link := range result.Metadata.Links {
        fmt.Printf("Link: %s (%s)\n", link.Href, link.Text)
    }
}
require 'html_to_markdown'

html = '<html lang="en"><head><title>Test</title></head><body><h1>Hello</h1></body></html>'
result = HtmlToMarkdown.convert(html, extract_metadata: true)

markdown = result[:content]
puts result[:metadata][:document][:title]     # "Test"
puts result[:metadata][:headers].first[:text] # "Hello"
use HtmlToMarkdown\HtmlToMarkdown;

$html = '<html><head><title>Example</title></head><body><h1>Welcome</h1><a href="https://example.com">Link</a></body></html>';

// extract_metadata defaults to true.
$result = HtmlToMarkdown::convert($html);

echo $result->content;
echo $result->metadata->document->title;
foreach ($result->metadata->links as $link) {
    echo $link->href . ': ' . $link->text;
}
import dev.kreuzberg.htmltomarkdown.HtmlToMarkdown;
import dev.kreuzberg.htmltomarkdown.ConversionOptions;
import dev.kreuzberg.htmltomarkdown.ConversionResult;

public class MetadataExample {
    public static void main(String[] args) {
        String html = """
            <html><head><title>My Page</title></head>
            <body><h1>Hello</h1><a href="https://example.com">Link</a></body></html>
            """;

        ConversionOptions options = ConversionOptions.builder()
            .extractMetadata(true)
            .build();
        ConversionResult result = HtmlToMarkdown.convert(html, options);
        System.out.println("Markdown: " + result.content());
        System.out.println("Title: " + result.metadata().getTitle());
        System.out.println("Links: " + result.metadata().getLinks());
    }
}
using HtmlToMarkdown;

var html = @"<html><head><title>My Page</title></head>
<body><h1>Hello</h1><a href=""https://example.com"">Link</a></body></html>";

var options = new ConversionOptions { ExtractMetadata = true };
var result = HtmlToMarkdownConverter.Convert(html, options);
Console.WriteLine($"Markdown: {result.Content}");
Console.WriteLine($"Title: {result.Metadata?.Title}");
Console.WriteLine($"Links: {string.Join(", ", result.Metadata?.Links ?? [])}");

Metadata Extraction - Elixir

Extract structured metadata from HTML documents during conversion.

Basic Metadata Extraction

Use convert/2 with extract_metadata: true in options to extract document metadata alongside Markdown:

html = """
<html>
  <head>
    <title>Example</title>
    <meta name="description" content="Demo page">
  </head>
  <body>
    <h1 id="welcome">Welcome</h1>
    <a href="https://example.com" rel="nofollow external">Example link</a>
  </body>
</html>
"""

opts = %HtmlToMarkdown.Options{extract_metadata: true}
{:ok, result} = HtmlToMarkdown.convert(html, opts)

result.metadata["document"]["title"]        # "Example"
result.metadata["headers"] |> hd() |> Map.get("text") # "Welcome"
result.metadata["links"]   |> hd() |> Map.get("link_type") # "external"

Extracted Metadata Structure

The metadata map includes:

  • Document: Title and meta tags from <head>
  • Headers: All headings extracted with level, text, and optional ID
  • Links: All links with href, text, rel attributes, and link_type classification
  • Images: Image sources and alt text
  • Forms: Form action and method data
  • Other: Tables, code blocks, and additional structural information
library(htmltomarkdown)

html <- '
<html>
  <head><title>Example</title></head>
  <body>
    <h1 id="welcome">Welcome</h1>
    <a href="https://example.com">Example link</a>
  </body>
</html>'

opts <- conversion_options(extract_metadata = TRUE)
result <- convert(html, opts)

cat(result$content)
result$metadata$document$title
result$metadata$headers[[1]]$text
result$metadata$links[[1]]$link_type
import HtmlToMarkdown

let options = try conversionOptionsFromJson(
    "{\"extract_metadata\":true,\"extract_images\":true}"
)

let html = """
<html>
  <head>
    <title>My Page</title>
    <meta name="description" content="A short description.">
    <meta name="author" content="Jane Doe">
  </head>
  <body>
    <h1>Welcome</h1>
    <p>See <a href="https://example.com">example</a>.</p>
  </body>
</html>
"""

let result = try convert(html, options)
let markdown = result.content()?.toString() ?? ""

let metadata = result.metadata()
let document = metadata.document()
print("title:", document.title()?.toString() ?? "")
print("description:", document.description()?.toString() ?? "")
print("author:", document.author()?.toString() ?? "")
print("headers:", metadata.headers().count)
print("links:", metadata.links().count)
print("images:", metadata.images().count)
print(markdown)
import 'package:h2m/h2m.dart';
import 'package:h2m/src/html_to_markdown_rs_bridge_generated/frb_generated.dart'
    show RustLib;

Future<void> main() async {
  await RustLib.init();

  final options = await createConversionOptionsFromJson(
    json: '{"extract_metadata":true}',
  );
  final result = await H2mBridge.convert(
    '<html><head><title>Example</title>'
    '<meta name="description" content="A sample page">'
    '</head><body><h1>Hello</h1><a href="https://example.com">link</a></body></html>',
    options: options,
  );

  print(result.content);
  print(result.metadata.document.title); // "Example"
  print(result.metadata.document.description); // "A sample page"
  print(result.metadata.headers); // List<HeaderMetadata>
  print(result.metadata.links); // List<LinkMetadata>
  print(result.metadata.images); // List<ImageMetadata>
}
import com.fasterxml.jackson.databind.ObjectMapper
import com.fasterxml.jackson.databind.PropertyNamingStrategies
import com.fasterxml.jackson.module.kotlin.registerKotlinModule
import dev.kreuzberg.android.ConversionOptions
import dev.kreuzberg.android.HtmlToMarkdownRs

val mapper = ObjectMapper()
    .registerKotlinModule()
    .setPropertyNamingStrategy(PropertyNamingStrategies.SNAKE_CASE)
val options = mapper.readValue(
    "{\"extract_metadata\":true}",
    ConversionOptions::class.java,
)

val html = """
    <html>
      <head>
        <title>My Page</title>
        <meta name="description" content="A short description">
      </head>
      <body><h1>Hello</h1><a href="https://example.com">Link</a></body>
    </html>
""".trimIndent()

val result = HtmlToMarkdownRs.convert(html, options)
println("Markdown: ${result.content}")
println("Title: ${result.metadata.document.title}")
println("Description: ${result.metadata.document.description}")
println("Headers: ${result.metadata.headers}")
println("Links: ${result.metadata.links}")
println("Images: ${result.metadata.images}")
const std = @import("std");
const html_to_markdown = @import("html_to_markdown");

pub fn main() !void {
    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
    defer _ = gpa.deinit();
    const allocator = gpa.allocator();

    const html =
        \\<html><head>
        \\<title>Example Page</title>
        \\<meta name="description" content="A short description.">
        \\<meta name="author" content="Jane Doe">
        \\<link rel="canonical" href="https://example.com/page">
        \\</head><body><h1>Hello</h1></body></html>
    ;

    // `convert` returns the ConversionResult as a JSON string. Metadata
    // extraction is controlled by the `extract_metadata` option (default true).
    const result_json = try html_to_markdown.convert(html, "{\"extract_metadata\":true}");
    defer std.heap.c_allocator.free(result_json);

    var parsed = try std.json.parseFromSlice(std.json.Value, allocator, result_json, .{});
    defer parsed.deinit();
    const document = parsed.value.object.get("metadata").?.object.get("document").?.object;

    if (document.get("title")) |v| {
        if (v == .string) std.debug.print("Title: {s}\n", .{v.string});
    }
    if (document.get("description")) |v| {
        if (v == .string) std.debug.print("Description: {s}\n", .{v.string});
    }
    if (document.get("author")) |v| {
        if (v == .string) std.debug.print("Author: {s}\n", .{v.string});
    }
    if (document.get("canonical_url")) |v| {
        if (v == .string) std.debug.print("Canonical: {s}\n", .{v.string});
    }
}

Metadata Fields

result.metadata is an HtmlMetadata with five top-level fields: document, headers, links, images, and structured_data. Everything is populated in a single pass.

document (DocumentMetadata)

Field Type Description
title Option<String> Page title from the <title> element.
description Option<String> <meta name="description"> content.
keywords Vec<String> Parsed <meta name="keywords">, split on commas.
author Option<String> <meta name="author"> content.
canonical_url Option<String> <link rel="canonical"> href.
base_href Option<String> <base href="../…"> value.
language Option<String> lang attribute on <html>.
text_direction Option<TextDirection> dir attribute on <html>. One of left_to_right, right_to_left, auto.
open_graph BTreeMap<String, String> All og:* meta tags keyed by property (without the og: prefix).
twitter_card BTreeMap<String, String> All twitter:* meta tags keyed by name (without the prefix).
meta_tags BTreeMap<String, String> Every other <meta name> tag, keyed by name.
Field Description
headers HeaderMetadata entries for every <h1><h6> with level, text, and id.
links LinkMetadata entries for every <a> with href, text, rel values, and classified link_type.
images ImageMetadata entries for every <img> with src, alt, dimensions, and classified image_type.
structured_data JSON-LD, Microdata, and RDFa blocks with a data_type tag and the raw content.
Value Matches
anchor href starts with # (same-page anchors).
internal relative href or href that resolves inside the document's own host.
external absolute URL on a different host.
email mailto: URI.
phone tel: URI.
other anything else (javascript:, data:, custom schemes).

images[].image_type

Value Matches
data_uri src starts with data:.
inline_svg inline <svg> element (captured when extract_images is enabled).
external absolute URL on a remote host.
relative relative path or same-host URL.

structured_data[].data_type

Value Matches
json_ld <script type="application/ld+json"> blocks.
microdata itemscope/itemprop subtrees.
rdfa typeof/property subtrees.

Document Structure Extraction

Enable include_document_structure to get a parsed tree of the document's structural elements.

use html_to_markdown_rs::{convert, ConversionOptions};

    let options = ConversionOptions::builder()
        .include_document_structure(true)
        .build();
    let result = convert("<h1>Title</h1><p>Paragraph</p>", Some(options))?;

    if let Some(doc) = &result.document {
        for node in &doc.nodes {
            println!("{:?}", node);
        }
    }
    ```

=== "Python"
```python
from html_to_markdown import ConversionOptions, convert

    options = ConversionOptions(include_document_structure=True)
    result = convert("<h1>Title</h1><p>Paragraph</p>", options)
    doc = result.document
    for node in doc.nodes:
        print(node)
    ```

=== "TypeScript"
```typescript
import { convert, ConversionOptions } from '@kreuzberg/html-to-markdown';

    const options: ConversionOptions = { includeDocumentStructure: true };
    const result = convert('<h1>Title</h1><p>Paragraph</p>', options);
    const nodes = result.document?.nodes ?? [];
    for (const node of nodes) {
      console.log(node);
    }
    ```

---

!!! question "Found a bug or mistake on this page?"
If something here is wrong or out of date, [open an issue](https://github.com/kreuzberg-dev/html-to-markdown/issues/new?labels=documentation) on GitHub or [contribute a fix](https://github.com/kreuzberg-dev/html-to-markdown/blob/main/CONTRIBUTING.md) via pull request.

Edit this page on GitHub