Skip to content

Quick Start

This guide walks you through your first HTML-to-Markdown conversion, then shows how to customize the output with options.


Basic Conversion

Convert an HTML string to Markdown with a single function call.

from html_to_markdown import convert

html = "<h1>Hello</h1><p>This is <strong>fast</strong>!</p>"
markdown = convert(html)
import { convert } from '@kreuzberg/html-to-markdown';

const markdown: string = convert('<h1>Hello World</h1>');
console.log(markdown); // # Hello World
use html_to_markdown_rs::convert;

fn main() -> Result<(), Box<dyn std::error::Error>> {
    let html = "<h1>Hello</h1><p>This is <strong>fast</strong>!</p>";
    let markdown = convert(html, None)?;
    println!("{}", markdown);
    // Output:
    // # Hello
    //
    // This is **fast**!
    Ok(())
}
require 'html_to_markdown'

html = "<h1>Hello</h1><p>This is <strong>fast</strong>!</p>"
markdown = HtmlToMarkdown.convert(html)
use HtmlToMarkdown\Service\Converter;
use function HtmlToMarkdown\convert;

// Object-oriented usage
$converter = Converter::create();
$markdown = $converter->convert('<h1>Hello</h1><p>This is <strong>fast</strong>!</p>');

// Procedural helper
$markdown = convert('<h1>Hello</h1>');
package main

import (
    "fmt"
    "log"

    "github.com/kreuzberg-dev/html-to-markdown/packages/go/v2/htmltomarkdown"
)

func main() {
    html := "<h1>Hello World</h1><p>This is a paragraph.</p>"

    markdown, err := htmltomarkdown.Convert(html)
    if err != nil {
        log.Fatal(err)
    }

    fmt.Println(markdown)
}
import dev.kreuzberg.htmltomarkdown.HtmlToMarkdown;

public class Example {
    public static void main(String[] args) {
        String html = "<h1>Hello World</h1><p>This is a <strong>test</strong>.</p>";
        String markdown = HtmlToMarkdown.convert(html);
        System.out.println(markdown);
    }
}
using HtmlToMarkdown;

var html = "<h1>Hello World</h1><p>This is a paragraph.</p>";
var markdown = HtmlToMarkdownConverter.Convert(html);
Console.WriteLine(markdown);
{:ok, markdown} = HtmlToMarkdown.convert("<h1>Hello</h1><p>This is <strong>fast</strong>!</p>")
IO.puts(markdown)
library(htmltomarkdown)

html <- "<h1>Hello</h1><p>This is <strong>fast</strong>!</p>"
markdown <- convert(html)
cat(markdown)
#include "html_to_markdown.h"
#include <stdio.h>

int main(void) {
    const char *html = "<h1>Hello</h1><p>World</p>";
    char *markdown = html_to_markdown_convert(html);
    if (markdown) {
        printf("%s\n", markdown);
        html_to_markdown_free_string(markdown);
    }
    return 0;
}

Conversion with Options

Customize the Markdown output by passing configuration options. Every binding exposes the same set of options through language-idiomatic APIs.

from html_to_markdown import ConversionOptions, convert

html = "<h1>Hello</h1><p>This is <strong>formatted</strong> content.</p>"
options = ConversionOptions(
    heading_style="atx",
    list_indent_width=2,
)
markdown = convert(html, options)
import { convert, ConversionOptions } from '@kreuzberg/html-to-markdown';

const options: ConversionOptions = {
  headingStyle: 'atx',
  listIndentWidth: 2,
  wrap: true,
};

const markdown = convert('<h1>Title</h1><p>Content</p>', options);
use html_to_markdown_rs::{convert, ConversionOptions};

let options = ConversionOptions {
    heading_style: Some("atx".into()),
    list_indent_width: Some(2),
    ..Default::default()
};

let html = "<h1>Title</h1><p>Content</p>";
let markdown = convert(html, Some(options))?;
require 'html_to_markdown'

html = "<h1>Hello</h1><p>This is <strong>fast</strong>!</p>"
markdown = HtmlToMarkdown.convert(html, heading_style: :atx, code_block_style: :fenced)
use HtmlToMarkdown\Config\ConversionOptions;
use HtmlToMarkdown\Service\Converter;

$converter = Converter::create();

$options = new ConversionOptions(
    headingStyle: 'Atx',
    listIndentWidth: 2,
);

$markdown = $converter->convert('<h1>Hello</h1>', $options);
package main

import (
    "fmt"
    "log"

    "github.com/kreuzberg-dev/html-to-markdown/packages/go/v2/htmltomarkdown"
)

func main() {
    // Check library version
    version := htmltomarkdown.Version()
    fmt.Printf("html-to-markdown version: %s\n", version)

    html := "<h1>Hello</h1><p>Welcome</p>"

    // Convert with error handling
    markdown, err := htmltomarkdown.Convert(html)
    if err != nil {
        log.Fatalf("Conversion failed: %v", err)
    }

    fmt.Println(markdown)

    // Alternative: Use MustConvert for panicking on error
    anotherMarkdown := htmltomarkdown.MustConvert("<p>Safe HTML</p>")
    fmt.Println(anotherMarkdown)
}
import dev.kreuzberg.htmltomarkdown.HtmlToMarkdown;
import dev.kreuzberg.htmltomarkdown.metadata.MetadataExtraction;

public class MetadataExample {
    public static void main(String[] args) {
        String html = "<html><head><title>My Page</title></head>"
            + "<body><h1>Welcome</h1><a href=\"https://example.com\">Link</a></body></html>";

        MetadataExtraction result = HtmlToMarkdown.convertWithMetadata(html);

        System.out.println("Markdown: " + result.markdown());
        System.out.println("Title: " + result.metadata().document().title());
        System.out.println("Headers: " + result.metadata().headers().size());
        System.out.println("Links: " + result.metadata().links().size());
    }
}
using HtmlToMarkdown;

var options = new ConversionOptions
{
    HeadingStyle = "atx",
    Wrap = true,
    WrapWidth = 80,
    ListIndentWidth = 4,
};

var html = "<h1>Hello</h1><p>This is <strong>formatted</strong> content.</p>";
var markdown = HtmlToMarkdownConverter.Convert(html);
Console.WriteLine(markdown);
handle = HtmlToMarkdown.options(%HtmlToMarkdown.Options{wrap: true, wrap_width: 40})
{:ok, markdown} = HtmlToMarkdown.convert_with_options("<h1>Hello</h1><p>World</p>", handle)
IO.puts(markdown)
library(htmltomarkdown)

opts <- conversion_options(
  heading_style = "atx",
  wrap = TRUE,
  wrap_width = 80L
)

markdown <- convert_with_options("<h1>Hello</h1><p>World</p>", opts)
cat(markdown)
#include "html_to_markdown.h"
#include <stdio.h>

int main(void) {
    const char *html = "<h1>Title</h1><p>Paragraph</p>";

    char *markdown = html_to_markdown_convert_with_len(
        html, strlen(html), NULL, 0);
    if (markdown) {
        printf("%s\n", markdown);
        html_to_markdown_free_string(markdown);
    }
    return 0;
}

What Gets Converted

html-to-markdown handles the full range of HTML elements you would encounter in web content:

HTML Markdown
<h1> through <h6> # through ###### headings
<p> Paragraphs separated by blank lines
<strong>, <b> **bold**
<em>, <i> *italic*
<a href="..."> [text](url) links
<img src="..."> ![alt](src) images
<ul>, <ol> Bulleted and numbered lists
<pre><code> Fenced code blocks
<blockquote> > block quotes
<table> Markdown tables with alignment
<hr> --- thematic breaks
<del>, <s> ~~strikethrough~~
<input type="checkbox"> - [ ] / - [x] task lists

Next Steps

Using kreuzberg?

If you are processing diverse document types (PDFs, DOCX, images) and need Markdown output, consider kreuzberg which uses html-to-markdown internally and adds OCR, format detection, and multi-format extraction.