delete cmark_gfm

This commit is contained in:
JackLee 2025-03-07 13:56:56 +08:00
parent 81ea49de8d
commit 88d46c984a
169 changed files with 5 additions and 57409 deletions

View File

@ -1,18 +0,0 @@
# editorconfig.org
root = true
[*]
end_of_line = lf
charset = utf-8
insert_final_newline = true
[*.{c,h}]
trim_trailing_whitespace = true
indent_style = space
indent_size = 2
[Makefile]
trim_trailing_whitespace = true
indent_style = tab
indent_size = 8

View File

@ -1,83 +0,0 @@
name: CI tests
on: [push, workflow_dispatch]
jobs:
linux:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
cmake_opts:
- '-DCMARK_SHARED=ON'
- ''
compiler:
- c: 'clang'
cpp: 'clang++'
- c: 'gcc'
cpp: 'g++'
env:
CMAKE_OPTIONS: ${{ matrix.cmake_opts }}
CC: ${{ matrix.compiler.c }}
CXX: ${{ matrix.compiler.cpp }}
steps:
- uses: actions/checkout@v1
- name: Install valgrind
run: |
sudo apt install -y valgrind
- name: Build and test
run: |
make
make test
make leakcheck
macos:
runs-on: macOS-latest
strategy:
fail-fast: false
matrix:
cmake_opts:
- '-DCMARK_SHARED=ON'
- ''
compiler:
- c: 'clang'
cpp: 'clang++'
- c: 'gcc'
cpp: 'g++'
env:
CMAKE_OPTIONS: ${{ matrix.cmake_opts }}
CC: ${{ matrix.compiler.c }}
CXX: ${{ matrix.compiler.cpp }}
steps:
- uses: actions/checkout@v1
- name: Build and test
env:
CMAKE_OPTIONS: -DCMARK_SHARED=OFF
run: |
make
make test
windows:
runs-on: windows-latest
strategy:
fail-fast: false
matrix:
cmake_opts:
- '-DCMARK_SHARED=ON'
- ''
env:
CMAKE_OPTIONS: ${{ matrix.cmake_opts }}
steps:
- uses: actions/checkout@v1
- uses: ilammy/msvc-dev-cmd@v1
- name: Build and test
run: |
chcp 65001
nmake.exe /nologo /f Makefile.nmake test
shell: cmd

View File

@ -1,77 +0,0 @@
# For most projects, this workflow file will not need changing; you simply need
# to commit it to your repository.
#
# You may wish to alter this file to override the set of languages analyzed,
# or to provide custom queries or build logic.
#
# ******** NOTE ********
# We have attempted to detect the languages in your repository. Please check
# the `language` matrix defined below to confirm you have the correct set of
# supported CodeQL languages.
#
name: "CodeQL"
on:
push:
branches: [ "master" ]
pull_request:
# The branches below must be a subset of the branches above
branches: [ "master" ]
schedule:
- cron: '45 14 * * 3'
jobs:
analyze:
name: Analyze
runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }}
timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }}
permissions:
actions: read
contents: read
security-events: write
strategy:
fail-fast: false
matrix:
language: [ 'cpp', 'javascript', 'python', 'ruby' ]
# CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby', 'swift' ]
# Use only 'java' to analyze code written in Java, Kotlin or both
# Use only 'javascript' to analyze code written in JavaScript, TypeScript or both
# Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
steps:
- name: Checkout repository
uses: actions/checkout@v3
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@v2
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
# By default, queries listed here will override any specified in a config file.
# Prefix the list here with "+" to use these queries and those in the config file.
# For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
# queries: security-extended,security-and-quality
# Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift).
# If this step fails, then you should remove it and run the build manually (see below)
- name: Autobuild
uses: github/codeql-action/autobuild@v2
# Command-line programs to run using the OS shell.
# 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
# If the Autobuild fails above, remove it and uncomment the following three lines.
# modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.
# - run: |
# echo "Run, Build Application using script"
# ./location_of_script_within_repo/buildscript.sh
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v2
with:
category: "/language:${{matrix.language}}"

View File

@ -1,42 +0,0 @@
# Object files
*.o
*.ko
*.obj
*.elf
# Libraries
*.lib
*.a
# Shared objects (inc. Windows DLLs)
*.dll
*.so
*.so.*
*.dylib
# Executables
*.exe
*.out
*.app
*.i*86
*.x86_64
*.hex
*.pyc
*~
*.bak
*.diff
*#
*.zip
bstrlib.txt
build
cmark.dSYM/*
cmark
.vscode
.DS_Store
# Testing and benchmark
alltests.md
progit/
bench/benchinput.md
test/afl_results/

View File

@ -1,40 +0,0 @@
# Ensures that sudo is disabled, so that containerized builds are allowed
sudo: false
os:
- linux
- osx
language: c
compiler:
- clang
- gcc
matrix:
include:
- os: linux
compiler: gcc
env: CMAKE_OPTIONS="-DCMARK_SHARED=OFF"
addons:
apt:
# we need a more recent cmake than travis/linux provides (at least 2.8.9):
sources:
- kubuntu-backports
- kalakris-cmake
packages:
- cmake
- python3
- valgrind
before_install:
- |
if [ ${TRAVIS_OS_NAME:-'linux'} = 'osx' ]
then
echo "Building without python3, to make sure that works."
fi
script:
- (mkdir -p build && cd build && cmake $CMAKE_OPTIONS ..)
- make test
- |
if [ ${TRAVIS_OS_NAME:-'linux'} = 'linux' ]
then
make leakcheck
fi

View File

@ -1,48 +0,0 @@
cmake_minimum_required(VERSION 3.19)
project(cmark-gfm)
set(PROJECT_VERSION_MAJOR 0)
set(PROJECT_VERSION_MINOR 29)
set(PROJECT_VERSION_PATCH 0)
set(PROJECT_VERSION_GFM 13)
set(PROJECT_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}.gfm.${PROJECT_VERSION_GFM})
include("FindAsan.cmake")
include("CheckFileOffsetBits.cmake")
if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}")
message(FATAL_ERROR "Do not build in-source.\nPlease remove CMakeCache.txt and the CMakeFiles/ directory.\nThen: mkdir build ; cd build ; cmake .. ; make")
endif()
option(CMARK_TESTS "Build cmark-gfm tests and enable testing" OFF)
option(CMARK_STATIC "Build static libcmark-gfm library" ON)
option(CMARK_SHARED "Build shared libcmark-gfm library" OFF)
option(CMARK_LIB_FUZZER "Build libFuzzer fuzzing harness" OFF)
option(CMARK_FUZZ_QUADRATIC "Build quadratic fuzzing harness" OFF)
if(CMARK_FUZZ_QUADRATIC)
set(FUZZER_FLAGS "-fsanitize=fuzzer-no-link,address -g")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FUZZER_FLAGS}")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FUZZER_FLAGS}")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${FUZZER_FLAGS}")
set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} ${FUZZER_FLAGS}")
endif()
add_subdirectory(src)
add_subdirectory(extensions)
if(CMARK_TESTS AND (CMARK_SHARED OR CMARK_STATIC))
add_subdirectory(api_test)
endif()
add_subdirectory(man)
if(CMARK_TESTS)
enable_testing()
add_subdirectory(test testdir)
endif()
if(CMARK_FUZZ_QUADRATIC)
add_subdirectory(fuzz)
endif()
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "Release" CACHE STRING
"Choose the type of build, options are: Debug Profile Release Asan Ubsan." FORCE)
endif(NOT CMAKE_BUILD_TYPE)

View File

@ -1,170 +0,0 @@
Copyright (c) 2014, John MacFarlane
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided
with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-----
houdini.h, houdini_href_e.c, houdini_html_e.c, houdini_html_u.c
derive from https://github.com/vmg/houdini (with some modifications)
Copyright (C) 2012 Vicent Martí
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
-----
buffer.h, buffer.c, chunk.h
are derived from code (C) 2012 Github, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
-----
utf8.c and utf8.c
are derived from utf8proc
(<http://www.public-software-group.org/utf8proc>),
(C) 2009 Public Software Group e. V., Berlin, Germany.
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
-----
The normalization code in normalize.py was derived from the
markdowntest project, Copyright 2013 Karl Dubost:
The MIT License (MIT)
Copyright (c) 2013 Karl Dubost
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-----
The CommonMark spec (test/spec.txt) is
Copyright (C) 2014-15 John MacFarlane
Released under the Creative Commons CC-BY-SA 4.0 license:
<http://creativecommons.org/licenses/by-sa/4.0/>.
-----
The test software in test/ is
Copyright (c) 2014, John MacFarlane
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided
with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -1,14 +0,0 @@
#include <sys/types.h>
#define KB ((off_t)1024)
#define MB ((off_t)1024 * KB)
#define GB ((off_t)1024 * MB)
#define TB ((off_t)1024 * GB)
int t2[(((64 * GB -1) % 671088649) == 268434537)
&& (((TB - (64 * GB -1) + 255) % 1792151290) == 305159546)? 1: -1];
int main()
{
;
return 0;
}

View File

@ -1,43 +0,0 @@
# - Check if _FILE_OFFSET_BITS macro needed for large files
# CHECK_FILE_OFFSET_BITS ()
#
# The following variables may be set before calling this macro to
# modify the way the check is run:
#
# CMAKE_REQUIRED_FLAGS = string of compile command line flags
# CMAKE_REQUIRED_DEFINITIONS = list of macros to define (-DFOO=bar)
# CMAKE_REQUIRED_INCLUDES = list of include directories
# Copyright (c) 2009, Michihiro NAKAJIMA
#
# Redistribution and use is allowed according to the terms of the BSD license.
# For details see the accompanying COPYING-CMAKE-SCRIPTS file.
#INCLUDE(CheckCSourceCompiles)
GET_FILENAME_COMPONENT(_selfdir_CheckFileOffsetBits
"${CMAKE_CURRENT_LIST_FILE}" PATH)
MACRO (CHECK_FILE_OFFSET_BITS)
IF(NOT DEFINED _FILE_OFFSET_BITS)
MESSAGE(STATUS "Checking _FILE_OFFSET_BITS for large files")
TRY_COMPILE(__WITHOUT_FILE_OFFSET_BITS_64
${CMAKE_CURRENT_BINARY_DIR}
${_selfdir_CheckFileOffsetBits}/CheckFileOffsetBits.c
COMPILE_DEFINITIONS ${CMAKE_REQUIRED_DEFINITIONS})
IF(NOT __WITHOUT_FILE_OFFSET_BITS_64)
TRY_COMPILE(__WITH_FILE_OFFSET_BITS_64
${CMAKE_CURRENT_BINARY_DIR}
${_selfdir_CheckFileOffsetBits}/CheckFileOffsetBits.c
COMPILE_DEFINITIONS ${CMAKE_REQUIRED_DEFINITIONS} -D_FILE_OFFSET_BITS=64)
ENDIF(NOT __WITHOUT_FILE_OFFSET_BITS_64)
IF(NOT __WITHOUT_FILE_OFFSET_BITS_64 AND __WITH_FILE_OFFSET_BITS_64)
SET(_FILE_OFFSET_BITS 64 CACHE INTERNAL "_FILE_OFFSET_BITS macro needed for large files")
MESSAGE(STATUS "Checking _FILE_OFFSET_BITS for large files - needed")
ELSE(NOT __WITHOUT_FILE_OFFSET_BITS_64 AND __WITH_FILE_OFFSET_BITS_64)
SET(_FILE_OFFSET_BITS "" CACHE INTERNAL "_FILE_OFFSET_BITS macro needed for large files")
MESSAGE(STATUS "Checking _FILE_OFFSET_BITS for large files - not needed")
ENDIF(NOT __WITHOUT_FILE_OFFSET_BITS_64 AND __WITH_FILE_OFFSET_BITS_64)
ENDIF(NOT DEFINED _FILE_OFFSET_BITS)
ENDMACRO (CHECK_FILE_OFFSET_BITS)

View File

@ -1,74 +0,0 @@
#
# The MIT License (MIT)
#
# Copyright (c) 2013 Matthew Arsenault
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
# This module tests if address sanitizer is supported by the compiler,
# and creates a ASan build type (i.e. set CMAKE_BUILD_TYPE=ASan to use
# it). This sets the following variables:
#
# CMAKE_C_FLAGS_ASAN - Flags to use for C with asan
# CMAKE_CXX_FLAGS_ASAN - Flags to use for C++ with asan
# HAVE_ADDRESS_SANITIZER - True or false if the ASan build type is available
include(CheckCCompilerFlag)
# Set -Werror to catch "argument unused during compilation" warnings
set(CMAKE_REQUIRED_FLAGS "-Werror -faddress-sanitizer") # Also needs to be a link flag for test to pass
check_c_compiler_flag("-faddress-sanitizer" HAVE_FLAG_ADDRESS_SANITIZER)
set(CMAKE_REQUIRED_FLAGS "-Werror -fsanitize=address") # Also needs to be a link flag for test to pass
check_c_compiler_flag("-fsanitize=address" HAVE_FLAG_SANITIZE_ADDRESS)
unset(CMAKE_REQUIRED_FLAGS)
if(HAVE_FLAG_SANITIZE_ADDRESS)
# Clang 3.2+ use this version
set(ADDRESS_SANITIZER_FLAG "-fsanitize=address")
elseif(HAVE_FLAG_ADDRESS_SANITIZER)
# Older deprecated flag for ASan
set(ADDRESS_SANITIZER_FLAG "-faddress-sanitizer")
endif()
if(NOT ADDRESS_SANITIZER_FLAG)
return()
else(NOT ADDRESS_SANITIZER_FLAG)
set(HAVE_ADDRESS_SANITIZER FALSE)
endif()
set(HAVE_ADDRESS_SANITIZER TRUE)
set(CMAKE_C_FLAGS_ASAN "-O1 -g ${ADDRESS_SANITIZER_FLAG} -fno-omit-frame-pointer -fno-optimize-sibling-calls"
CACHE STRING "Flags used by the C compiler during ASan builds."
FORCE)
set(CMAKE_CXX_FLAGS_ASAN "-O1 -g ${ADDRESS_SANITIZER_FLAG} -fno-omit-frame-pointer -fno-optimize-sibling-calls"
CACHE STRING "Flags used by the C++ compiler during ASan builds."
FORCE)
set(CMAKE_EXE_LINKER_FLAGS_ASAN "${ADDRESS_SANITIZER_FLAG}"
CACHE STRING "Flags used for linking binaries during ASan builds."
FORCE)
set(CMAKE_SHARED_LINKER_FLAGS_ASAN "${ADDRESS_SANITIZER_FLAG}"
CACHE STRING "Flags used by the shared libraries linker during ASan builds."
FORCE)
mark_as_advanced(CMAKE_C_FLAGS_ASAN
CMAKE_CXX_FLAGS_ASAN
CMAKE_EXE_LINKER_FLAGS_ASAN
CMAKE_SHARED_LINKER_FLAGS_ASAN)

View File

@ -1,226 +0,0 @@
SRCDIR=src
EXTDIR=extensions
DATADIR=data
BUILDDIR?=build
GENERATOR?=Unix Makefiles
MINGW_BUILDDIR?=build-mingw
MINGW_INSTALLDIR?=windows
SPEC=test/spec.txt
EXTENSIONS_SPEC=test/extensions.txt
SITE=_site
SPECVERSION=$(shell perl -ne 'print $$1 if /^version: *([0-9.]+)/' $(SPEC))
FUZZCHARS?=2000000 # for fuzztest
BENCHDIR=bench
BENCHSAMPLES=$(wildcard $(BENCHDIR)/samples/*.md)
BENCHFILE=$(BENCHDIR)/benchinput.md
ALLTESTS=alltests.md
NUMRUNS?=20
CMARK=$(BUILDDIR)/src/cmark-gfm
CMARK_FUZZ=$(BUILDDIR)/src/cmark-fuzz
PROG?=$(CMARK)
VERSION?=$(SPECVERSION)
RELEASE?=CommonMark-$(VERSION)
INSTALL_PREFIX?=/usr/local
CLANG_CHECK?=clang-check
CLANG_FORMAT=clang-format -style llvm -sort-includes=0 -i
AFL_PATH?=/usr/local/bin
.PHONY: all cmake_build leakcheck clean fuzztest test debug ubsan asan mingw archive newbench bench format update-spec afl clang-check docker libFuzzer
all: cmake_build man/man3/cmark-gfm.3
$(CMARK): cmake_build
cmake_build: $(BUILDDIR)
@$(MAKE) -j2 -C $(BUILDDIR)
@echo "Binaries can be found in $(BUILDDIR)/src"
$(BUILDDIR):
@cmake --version > /dev/null || (echo "You need cmake to build this program: http://www.cmake.org/download/" && exit 1)
mkdir -p $(BUILDDIR); \
cd $(BUILDDIR); \
cmake .. \
-G "$(GENERATOR)" \
-DCMAKE_BUILD_TYPE=$(BUILD_TYPE) \
-DCMAKE_INSTALL_PREFIX=$(INSTALL_PREFIX) \
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON
install: $(BUILDDIR)
$(MAKE) -C $(BUILDDIR) install
debug:
mkdir -p $(BUILDDIR); \
cd $(BUILDDIR); \
cmake .. -DCMAKE_BUILD_TYPE=Debug; \
$(MAKE)
ubsan:
mkdir -p $(BUILDDIR); \
cd $(BUILDDIR); \
cmake .. -DCMAKE_BUILD_TYPE=Ubsan; \
$(MAKE)
asan:
mkdir -p $(BUILDDIR); \
cd $(BUILDDIR); \
cmake .. -DCMAKE_BUILD_TYPE=Asan; \
$(MAKE)
prof:
mkdir -p $(BUILDDIR); \
cd $(BUILDDIR); \
cmake .. -DCMAKE_BUILD_TYPE=Profile; \
$(MAKE)
afl:
@[ -n "$(AFL_PATH)" ] || { echo '$$AFL_PATH not set'; false; }
mkdir -p $(BUILDDIR)
cd $(BUILDDIR) && cmake .. -DCMARK_TESTS=0 -DCMAKE_C_COMPILER=$(AFL_PATH)/afl-clang
$(MAKE)
$(AFL_PATH)/afl-fuzz \
-i test/afl_test_cases \
-o test/afl_results \
-x test/fuzzing_dictionary \
$(AFL_OPTIONS) \
-t 100 \
$(CMARK) -e table -e strikethrough -e autolink -e tagfilter $(CMARK_OPTS)
libFuzzer:
@[ -n "$(LIB_FUZZER_PATH)" ] || { echo '$$LIB_FUZZER_PATH not set'; false; }
mkdir -p $(BUILDDIR)
cd $(BUILDDIR) && cmake -DCMAKE_BUILD_TYPE=Asan -DCMARK_LIB_FUZZER=ON -DCMAKE_LIB_FUZZER_PATH=$(LIB_FUZZER_PATH) ..
$(MAKE) -j2 -C $(BUILDDIR) cmark-fuzz
test/run-cmark-fuzz $(CMARK_FUZZ)
clang-check: all
${CLANG_CHECK} -p build -analyze src/*.c
mingw:
mkdir -p $(MINGW_BUILDDIR); \
cd $(MINGW_BUILDDIR); \
cmake .. -DCMAKE_TOOLCHAIN_FILE=../toolchain-mingw32.cmake -DCMAKE_INSTALL_PREFIX=$(MINGW_INSTALLDIR) ;\
$(MAKE) && $(MAKE) install
man/man3/cmark-gfm.3: src/cmark-gfm.h | $(CMARK)
python man/make_man_page.py $< > $@ \
archive:
git archive --prefix=$(RELEASE)/ -o $(RELEASE).tar.gz HEAD
git archive --prefix=$(RELEASE)/ -o $(RELEASE).zip HEAD
clean:
rm -rf $(BUILDDIR) $(MINGW_BUILDDIR) $(MINGW_INSTALLDIR)
# We include case_fold_switch.inc in the repository, so this shouldn't
# normally need to be generated.
$(SRCDIR)/case_fold_switch.inc: $(DATADIR)/CaseFolding.txt
perl tools/mkcasefold.pl < $< > $@
# We include scanners.c in the repository, so this shouldn't
# normally need to be generated.
$(SRCDIR)/scanners.c: $(SRCDIR)/scanners.re
@case "$$(re2c -v)" in \
*\ 0.13.*|*\ 0.14|*\ 0.14.1) \
echo "re2c >= 0.14.2 is required"; \
false; \
;; \
esac
re2c -W -Werror --case-insensitive -b -i --no-generation-date -8 \
--encoding-policy substitute -o $@ $<
$(CLANG_FORMAT) $@
# We include scanners.c in the repository, so this shouldn't
# normally need to be generated.
$(EXTDIR)/ext_scanners.c: $(EXTDIR)/ext_scanners.re
@case "$$(re2c -v)" in \
*\ 0.13.*|*\ 0.14|*\ 0.14.1) \
echo "re2c >= 0.14.2 is required"; \
false; \
;; \
esac
re2c --case-insensitive -b -i --no-generation-date -8 \
--encoding-policy substitute -o $@ $<
clang-format -style llvm -i $@
# We include entities.inc in the repository, so normally this
# doesn't need to be regenerated:
$(SRCDIR)/entities.inc: tools/make_entities_inc.py
python3 $< > $@
update-spec:
curl 'https://raw.githubusercontent.com/jgm/CommonMark/master/spec.txt'\
> $(SPEC)
test: $(SPEC) cmake_build
$(MAKE) -C $(BUILDDIR) test || (cat $(BUILDDIR)/Testing/Temporary/LastTest.log && exit 1)
$(ALLTESTS): $(SPEC) $(EXTENSIONS_SPEC)
( \
python3 test/spec_tests.py --spec $(SPEC) --dump-tests | \
python3 -c 'import json; import sys; tests = json.loads(sys.stdin.read()); u8s = open(1, "w", encoding="utf-8", closefd=False); print("\n".join([test["markdown"] for test in tests]), file=u8s)'; \
python3 test/spec_tests.py --spec $(EXTENSIONS_SPEC) --dump-tests | \
python3 -c 'import json; import sys; tests = json.loads(sys.stdin.read()); u8s = open(1, "w", encoding="utf-8", closefd=False); print("\n".join([test["markdown"] for test in tests]), file=u8s)'; \
) > $@
leakcheck: $(ALLTESTS)
for format in html man xml latex commonmark; do \
for opts in "" "--smart"; do \
echo "cmark-gfm -t $$format -e table -e strikethrough -e autolink -e tagfilter $$opts" ; \
valgrind -q --leak-check=full --dsymutil=yes --suppressions=suppressions --error-exitcode=1 $(PROG) -t $$format -e table -e strikethrough -e autolink -e tagfilter $$opts $(ALLTESTS) >/dev/null || exit 1;\
done; \
done;
fuzztest:
{ for i in `seq 1 10`; do \
cat /dev/urandom | head -c $(FUZZCHARS) | iconv -f latin1 -t utf-8 | tee fuzz-$$i.txt | \
/usr/bin/env time -p $(PROG) >/dev/null && rm fuzz-$$i.txt ; \
done } 2>&1 | grep 'user\|abnormally'
progit:
git clone https://github.com/progit/progit.git
$(BENCHFILE): progit
echo "" > $@
for lang in ar az be ca cs de en eo es es-ni fa fi fr hi hu id it ja ko mk nl no-nb pl pt-br ro ru sr th tr uk vi zh zh-tw; do \
for i in `seq 1 10`; do \
cat progit/$$lang/*/*.markdown >> $@; \
done; \
done
# for more accurate results, run with
# sudo renice -10 $$; make bench
bench: $(BENCHFILE)
{ for x in `seq 1 $(NUMRUNS)` ; do \
/usr/bin/env time -p $(PROG) </dev/null >/dev/null ; \
/usr/bin/env time -p $(PROG) $< >/dev/null ; \
done \
} 2>&1 | grep 'real' | awk '{print $$2}' | python3 'bench/stats.py'
newbench:
for f in $(BENCHSAMPLES) ; do \
printf "%26s " `basename $$f` ; \
{ for x in `seq 1 $(NUMRUNS)` ; do \
/usr/bin/env time -p $(PROG) </dev/null >/dev/null ; \
for x in `seq 1 200` ; do cat $$f ; done | \
/usr/bin/env time -p $(PROG) > /dev/null; \
done \
} 2>&1 | grep 'real' | awk '{print $$2}' | \
python3 'bench/stats.py'; done
format:
$(CLANG_FORMAT) src/*.c src/*.h api_test/*.c api_test/*.h
format-extensions:
clang-format -style llvm -i extensions/*.c extensions/*.h
operf: $(CMARK)
operf $< < $(BENCHFILE) > /dev/null
distclean: clean
-rm -rf *.dSYM
-rm -f README.html
-rm -rf $(BENCHFILE) $(ALLTESTS) progit
docker:
docker build -t cmark-gfm $(CURDIR)/tools
docker run --privileged -t -i -v $(CURDIR):/src/cmark-gfm -w /src/cmark-gfm cmark-gfm /bin/bash

View File

@ -1,38 +0,0 @@
SRCDIR=src
DATADIR=data
BUILDDIR=build
INSTALLDIR=windows
SPEC=test/spec.txt
PROG=$(BUILDDIR)\src\cmark-gfm.exe
GENERATOR=NMake Makefiles
all: $(BUILDDIR)/CMakeFiles
@cd $(BUILDDIR) && $(MAKE) /nologo && cd ..
$(BUILDDIR)/CMakeFiles:
@-mkdir $(BUILDDIR) 2> nul
cd $(BUILDDIR) && \
cmake \
-G "$(GENERATOR)" \
-D CMAKE_BUILD_TYPE=$(BUILD_TYPE) \
-D CMAKE_INSTALL_PREFIX=$(INSTALLDIR) \
-D CMARK_STATIC=ON \
-D CMARK_SHARED=OFF \
.. && \
cd ..
install: all
@cd $(BUILDDIR) && $(MAKE) /nologo install && cd ..
clean:
-rmdir /s /q $(BUILDDIR) $(MINGW_INSTALLDIR) 2> nul
$(SRCDIR)\case_fold_switch.inc: $(DATADIR)\CaseFolding-3.2.0.txt
perl mkcasefold.pl < $? > $@
test: $(SPEC) all
@cd $(BUILDDIR) && $(MAKE) /nologo test ARGS="-V" && cd ..
distclean: clean
del /q src\scanners.c 2> nul
del /q spec.md spec.html 2> nul

View File

@ -1,206 +0,0 @@
cmark-gfm
=========
![Actions CI](https://github.com/github/cmark-gfm/actions/workflows/ci.yml/badge.svg)
`cmark-gfm` is an extended version of the C reference implementation of
[CommonMark], a rationalized version of Markdown syntax with a spec. This
repository adds GitHub Flavored Markdown extensions to
[the upstream implementation], as defined in [the spec].
The rest of the README is preserved as-is from the upstream source. Note that
the library and binaries produced by this fork are suffixed with `-gfm` in
order to distinguish them from the upstream.
---
It provides a shared library (`libcmark`) with functions for parsing
CommonMark documents to an abstract syntax tree (AST), manipulating
the AST, and rendering the document to HTML, groff man, LaTeX,
CommonMark, or an XML representation of the AST. It also provides a
command-line program (`cmark`) for parsing and rendering CommonMark
documents.
Advantages of this library:
- **Portable.** The library and program are written in standard
C99 and have no external dependencies. They have been tested with
MSVC, gcc, tcc, and clang.
- **Fast.** cmark can render a Markdown version of *War and Peace* in
the blink of an eye (127 milliseconds on a ten year old laptop,
vs. 100-400 milliseconds for an eye blink). In our [benchmarks],
cmark is 10,000 times faster than the original `Markdown.pl`, and
on par with the very fastest available Markdown processors.
- **Accurate.** The library passes all CommonMark conformance tests.
- **Standardized.** The library can be expected to parse CommonMark
the same way as any other conforming parser. So, for example,
you can use `commonmark.js` on the client to preview content that
will be rendered on the server using `cmark`.
- **Robust.** The library has been extensively fuzz-tested using
[american fuzzy lop]. The test suite includes pathological cases
that bring many other Markdown parsers to a crawl (for example,
thousands-deep nested bracketed text or block quotes).
- **Flexible.** CommonMark input is parsed to an AST which can be
manipulated programmatically prior to rendering.
- **Multiple renderers.** Output in HTML, groff man, LaTeX, CommonMark,
and a custom XML format is supported. And it is easy to write new
renderers to support other formats.
- **Free.** BSD2-licensed.
It is easy to use `libcmark` in python, lua, ruby, and other dynamic
languages: see the `wrappers/` subdirectory for some simple examples.
There are also libraries that wrap `libcmark` for
[Go](https://github.com/rhinoman/go-commonmark),
[Haskell](https://hackage.haskell.org/package/cmark),
[Ruby](https://github.com/gjtorikian/commonmarker),
[Lua](https://github.com/jgm/cmark-lua),
[Perl](https://metacpan.org/release/CommonMark),
[Python](https://pypi.python.org/pypi/paka.cmark),
[R](https://cran.r-project.org/package=commonmark),
[Tcl](https://github.com/apnadkarni/tcl-cmark),
[Scala](https://github.com/sparsetech/cmark-scala) and
[Node.js](https://github.com/killa123/node-cmark).
Installing
----------
Building the C program (`cmark`) and shared library (`libcmark`)
requires [cmake]. If you modify `scanners.re`, then you will also
need [re2c] \(>= 0.14.2\), which is used to generate `scanners.c` from
`scanners.re`. We have included a pre-generated `scanners.c` in
the repository to reduce build dependencies.
If you have GNU make, you can simply `make`, `make test`, and `make
install`. This calls [cmake] to create a `Makefile` in the `build`
directory, then uses that `Makefile` to create the executable and
library. The binaries can be found in `build/src`. The default
installation prefix is `/usr/local`. To change the installation
prefix, pass the `INSTALL_PREFIX` variable if you run `make` for the
first time: `make INSTALL_PREFIX=path`.
For a more portable method, you can use [cmake] manually. [cmake] knows
how to create build environments for many build systems. For example,
on FreeBSD:
mkdir build
cd build
cmake .. # optionally: -DCMAKE_INSTALL_PREFIX=path
make # executable will be created as build/src/cmark
make test
make install
Or, to create Xcode project files on OSX:
mkdir build
cd build
cmake -G Xcode ..
open cmark.xcodeproj
The GNU Makefile also provides a few other targets for developers.
To run a benchmark:
make bench
For more detailed benchmarks:
make newbench
To run a test for memory leaks using `valgrind`:
make leakcheck
To reformat source code using `clang-format`:
make format
To run a "fuzz test" against ten long randomly generated inputs:
make fuzztest
To do a more systematic fuzz test with [american fuzzy lop]:
AFL_PATH=/path/to/afl_directory make afl
Fuzzing with [libFuzzer] is also supported but, because libFuzzer is still
under active development, may not work with your system-installed version of
clang. Assuming LLVM has been built in `$HOME/src/llvm/build` the fuzzer can be
run with:
CC="$HOME/src/llvm/build/bin/clang" LIB_FUZZER_PATH="$HOME/src/llvm/lib/Fuzzer/libFuzzer.a" make libFuzzer
To make a release tarball and zip archive:
make archive
Installing (Windows)
--------------------
To compile with MSVC and NMAKE:
nmake
You can cross-compile a Windows binary and dll on linux if you have the
`mingw32` compiler:
make mingw
The binaries will be in `build-mingw/windows/bin`.
Usage
-----
Instructions for the use of the command line program and library can
be found in the man pages in the `man` subdirectory.
Security
--------
By default, the library will scrub raw HTML and potentially
dangerous links (`javascript:`, `vbscript:`, `data:`, `file:`).
To allow these, use the option `CMARK_OPT_UNSAFE` (or
`--unsafe`) with the command line program. If doing so, we
recommend you use a HTML sanitizer specific to your needs to
protect against [XSS
attacks](http://en.wikipedia.org/wiki/Cross-site_scripting).
Contributing
------------
There is a [forum for discussing
CommonMark](http://talk.commonmark.org); you should use it instead of
github issues for questions and possibly open-ended discussions.
Use the [github issue tracker](http://github.com/commonmark/CommonMark/issues)
only for simple, clear, actionable issues.
Authors
-------
John MacFarlane wrote the original library and program.
The block parsing algorithm was worked out together with David
Greenspan. Vicent Marti optimized the C implementation for
performance, increasing its speed tenfold. Kārlis Gaņģis helped
work out a better parsing algorithm for links and emphasis,
eliminating several worst-case performance issues.
Nick Wellnhofer contributed many improvements, including
most of the C library's API and its test harness.
[benchmarks]: benchmarks.md
[the spec]: https://github.github.com/gfm/
[the upstream implementation]: https://github.com/jgm/cmark
[CommonMark]: http://commonmark.org
[cmake]: http://www.cmake.org/download/
[re2c]: http://re2c.org
[commonmark.js]: https://github.com/commonmark/commonmark.js
[Build Status]: https://img.shields.io/travis/github/cmark-gfm/master.svg?style=flat
[Windows Build Status]: https://ci.appveyor.com/api/projects/status/wv7ifhqhv5itm3d5?svg=true
[american fuzzy lop]: http://lcamtuf.coredump.cx/afl/
[libFuzzer]: http://llvm.org/docs/LibFuzzer.html

View File

@ -1,30 +0,0 @@
add_executable(api_test
cplusplus.cpp
harness.c
harness.h
main.c
)
include_directories(
${PROJECT_SOURCE_DIR}/src
${PROJECT_BINARY_DIR}/src
${PROJECT_BINARY_DIR}/extensions
)
if(CMARK_SHARED)
target_link_libraries(api_test libcmark-gfm-extensions libcmark-gfm)
else()
target_link_libraries(api_test libcmark-gfm-extensions_static libcmark-gfm_static)
endif()
# Compiler flags
if(MSVC)
# Force to always compile with W4
if(CMAKE_CXX_FLAGS MATCHES "/W[0-4]")
string(REGEX REPLACE "/W[0-4]" "/W4" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
else()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /W4")
endif()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /wd4706 /D_CRT_SECURE_NO_WARNINGS")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /TP")
elseif(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_C_COMPILER_ID}" STREQUAL "Clang")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -std=c99 -pedantic")
endif()

View File

@ -1,15 +0,0 @@
#include <cstdlib>
#include "cmark-gfm.h"
#include "cplusplus.h"
#include "harness.h"
void
test_cplusplus(test_batch_runner *runner)
{
static const char md[] = "paragraph\n";
char *html = cmark_markdown_to_html(md, sizeof(md) - 1, CMARK_OPT_DEFAULT);
STR_EQ(runner, html, "<p>paragraph</p>\n", "libcmark works with C++");
free(html);
}

View File

@ -1,16 +0,0 @@
#ifndef CMARK_API_TEST_CPLUSPLUS_H
#define CMARK_API_TEST_CPLUSPLUS_H
#include "harness.h"
#ifdef __cplusplus
extern "C" {
#endif
void test_cplusplus(test_batch_runner *runner);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -1,111 +0,0 @@
#define _DEFAULT_SOURCE
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "harness.h"
test_batch_runner *test_batch_runner_new() {
return (test_batch_runner *)calloc(1, sizeof(test_batch_runner));
}
static void test_result(test_batch_runner *runner, int cond, const char *msg,
va_list ap) {
++runner->test_num;
if (cond) {
++runner->num_passed;
} else {
fprintf(stderr, "FAILED test %d: ", runner->test_num);
vfprintf(stderr, msg, ap);
fprintf(stderr, "\n");
++runner->num_failed;
}
}
void SKIP(test_batch_runner *runner, int num_tests) {
runner->test_num += num_tests;
runner->num_skipped += num_tests;
}
void OK(test_batch_runner *runner, int cond, const char *msg, ...) {
va_list ap;
va_start(ap, msg);
test_result(runner, cond, msg, ap);
va_end(ap);
}
void INT_EQ(test_batch_runner *runner, int got, int expected, const char *msg,
...) {
int cond = got == expected;
va_list ap;
va_start(ap, msg);
test_result(runner, cond, msg, ap);
va_end(ap);
if (!cond) {
fprintf(stderr, " Got: %d\n", got);
fprintf(stderr, " Expected: %d\n", expected);
}
}
#ifndef _WIN32
#include <unistd.h>
static char *write_tmp(char const *header, char const *data) {
char *name = strdup("/tmp/fileXXXXXX");
int fd = mkstemp(name);
FILE *f = fdopen(fd, "w+");
fputs(header, f);
fwrite(data, 1, strlen(data), f);
fclose(f);
return name;
}
#endif
void STR_EQ(test_batch_runner *runner, const char *got, const char *expected,
const char *msg, ...) {
int cond = strcmp(got, expected) == 0;
va_list ap;
va_start(ap, msg);
test_result(runner, cond, msg, ap);
va_end(ap);
if (!cond) {
#ifndef _WIN32
char *got_fn = write_tmp("actual\n", got);
char *expected_fn = write_tmp("expected\n", expected);
char buf[1024];
snprintf(buf, sizeof(buf), "git diff --no-index %s %s", expected_fn, got_fn);
system(buf);
remove(got_fn);
remove(expected_fn);
free(got_fn);
free(expected_fn);
#else
fprintf(stderr, " Got: \"%s\"\n", got);
fprintf(stderr, " Expected: \"%s\"\n", expected);
#endif
}
}
int test_ok(test_batch_runner *runner) { return runner->num_failed == 0; }
void test_print_summary(test_batch_runner *runner) {
int num_passed = runner->num_passed;
int num_skipped = runner->num_skipped;
int num_failed = runner->num_failed;
fprintf(stderr, "%d tests passed, %d failed, %d skipped\n", num_passed,
num_failed, num_skipped);
if (test_ok(runner)) {
fprintf(stderr, "PASS\n");
} else {
fprintf(stderr, "FAIL\n");
}
}

View File

@ -1,35 +0,0 @@
#ifndef CMARK_API_TEST_HARNESS_H
#define CMARK_API_TEST_HARNESS_H
#ifdef __cplusplus
extern "C" {
#endif
typedef struct {
int test_num;
int num_passed;
int num_failed;
int num_skipped;
} test_batch_runner;
test_batch_runner *test_batch_runner_new();
void SKIP(test_batch_runner *runner, int num_tests);
void OK(test_batch_runner *runner, int cond, const char *msg, ...);
void INT_EQ(test_batch_runner *runner, int got, int expected, const char *msg,
...);
void STR_EQ(test_batch_runner *runner, const char *got, const char *expected,
const char *msg, ...);
int test_ok(test_batch_runner *runner);
void test_print_summary(test_batch_runner *runner);
#ifdef __cplusplus
}
#endif
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,21 +0,0 @@
environment:
PYTHON: "C:\\Python34-x64"
PYTHON_VERSION: "3.4.3"
PYTHON_ARCH: "64"
matrix:
- MSVC_VERSION: 10
- MSVC_VERSION: 12
# set up for nmake:
install:
- "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%"
build_script:
- 'tools\appveyor-build.bat'
artifacts:
- path: build/src/cmark-gfm.exe
name: cmark-gfm.exe
test_script:
- 'nmake test'

View File

@ -1,16 +0,0 @@
> the simple example of a blockquote
> the simple example of a blockquote
> the simple example of a blockquote
> the simple example of a blockquote
... continuation
... continuation
... continuation
... continuation
empty blockquote:
>
>
>
>

View File

@ -1,13 +0,0 @@
>>>>>> deeply nested blockquote
>>>>> deeply nested blockquote
>>>> deeply nested blockquote
>>> deeply nested blockquote
>> deeply nested blockquote
> deeply nested blockquote
> deeply nested blockquote
>> deeply nested blockquote
>>> deeply nested blockquote
>>>> deeply nested blockquote
>>>>> deeply nested blockquote
>>>>>> deeply nested blockquote

View File

@ -1,11 +0,0 @@
an
example
of
a code
block

View File

@ -1,14 +0,0 @@
``````````text
an
example
```
of
a fenced
```
code
block
``````````

View File

@ -1,9 +0,0 @@
# heading
### heading
##### heading
# heading #
### heading ###
##### heading \#\#\#\#\######
############ not a heading

View File

@ -1,10 +0,0 @@
* * * * *
- - - - -
________
************************* text

View File

@ -1,32 +0,0 @@
<div class="this is an html block">
blah blah
</div>
<table>
<tr>
<td>
**test**
</td>
</tr>
</table>
<table>
<tr>
<td>
test
</td>
</tr>
</table>
<![CDATA[
[[[[[[[[[[[... *cdata section - this should not be parsed* ...]]]]]]]]]]]
]]>

View File

@ -1,8 +0,0 @@
heading
---
heading
===================================
not a heading
----------------------------------- text

View File

@ -1,67 +0,0 @@
- tidy
- bullet
- list
- loose
- bullet
- list
0. ordered
1. list
2. example
-
-
-
-
1.
2.
3.
- an example
of a list item
with a continuation
this part is inside the list
this part is just a paragraph
1. test
- test
1. test
- test
111111111111111111111111111111111111111111. is this a valid bullet?
- _________________________
- this
- is
a
long
- loose
- list
- with
- some
tidy
- list
- items
- in
- between
- _________________________

View File

@ -1,36 +0,0 @@
- this
- is
- a
- deeply
- nested
- bullet
- list
1. this
2. is
3. a
4. deeply
5. nested
6. unordered
7. list
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 6
- 5
- 4
- 3
- 2
- 1
- - - - - - - - - deeply-nested one-element item

View File

@ -1,15 +0,0 @@
[1] [2] [3] [1] [2] [3]
[looooooooooooooooooooooooooooooooooooooooooooooooooong label]
[1]: <http://something.example.com/foo/bar>
[2]: http://something.example.com/foo/bar 'test'
[3]:
http://foo/bar
[ looooooooooooooooooooooooooooooooooooooooooooooooooong label ]:
111
'test'
[[[[[[[[[[[[[[[[[[[[ this should not slow down anything ]]]]]]]]]]]]]]]]]]]]: q
(as long as it is not referenced anywhere)
[[[[[[[[[[[[[[[[[[[[]: this is not a valid reference

View File

@ -1,17 +0,0 @@
[[[[[[[foo]]]]]]]
[[[[[[[foo]]]]]]]: bar
[[[[[[foo]]]]]]: bar
[[[[[foo]]]]]: bar
[[[[foo]]]]: bar
[[[foo]]]: bar
[[foo]]: bar
[foo]: bar
[*[*[*[*[foo]*]*]*]*]
[*[*[*[*[foo]*]*]*]*]: bar
[*[*[*[foo]*]*]*]: bar
[*[*[foo]*]*]: bar
[*[foo]*]: bar
[foo]: bar

View File

@ -1,14 +0,0 @@
closed (valid) autolinks:
<ftp://1.2.3.4:21/path/foo>
<http://foo.bar.baz?q=hello&id=22&boolean>
<http://veeeeeeeeeeeeeeeeeeery.loooooooooooooooooooooooooooooooong.autolink/>
<teeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeest@gmail.com>
these are not autolinks:
<ftp://1.2.3.4:21/path/foo
<http://foo.bar.baz?q=hello&id=22&boolean
<http://veeeeeeeeeeeeeeeeeeery.loooooooooooooooooooooooooooooooong.autolink
<teeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeest@gmail.com
< http://foo.bar.baz?q=hello&id=22&boolean >

View File

@ -1,3 +0,0 @@
`lots`of`backticks`
``i``wonder``how``this``will``be``parsed``

View File

@ -1,5 +0,0 @@
*this* *is* *your* *basic* *boring* *emphasis*
_this_ _is_ _your_ _basic_ _boring_ _emphasis_
**this** **is** **your** **basic** **boring** **emphasis**

View File

@ -1,5 +0,0 @@
*this *is *a *bunch* of* nested* emphases*
__this __is __a __bunch__ of__ nested__ emphases__
***this ***is ***a ***bunch*** of*** nested*** emphases***

View File

@ -1,5 +0,0 @@
*this *is *a *worst *case *for *em *backtracking
__this __is __a __worst __case __for __em __backtracking
***this ***is ***a ***worst ***case ***for ***em ***backtracking

View File

@ -1,11 +0,0 @@
entities:
&nbsp; &amp; &copy; &AElig; &Dcaron; &frac34; &HilbertSpace; &DifferentialD; &ClockwiseContourIntegral;
&#35; &#1234; &#992; &#98765432;
non-entities:
&18900987654321234567890; &1234567890098765432123456789009876543212345678987654;
&qwertyuioppoiuytrewqwer; &oiuytrewqwertyuioiuytrewqwertyuioytrewqwertyuiiuytri;

View File

@ -1,15 +0,0 @@
\t\e\s\t\i\n\g \e\s\c\a\p\e \s\e\q\u\e\n\c\e\s
\!\\\"\#\$\%\&\'\(\)\*\+\,\.\/\:\;\<\=\>\?
\@ \[ \] \^ \_ \` \{ \| \} \~ \- \'
\
\\
\\\
\\\\
\\\\\
\<this\> \<is\> \<not\> \<html\>

View File

@ -1,44 +0,0 @@
Taking commonmark tests from the spec for benchmarking here:
<a><bab><c2c>
<a/><b2/>
<a /><b2
data="foo" >
<a foo="bar" bam = 'baz <em>"</em>'
_boolean zoop:33=zoop:33 />
<33> <__>
<a h*#ref="hi">
<a href="hi'> <a href=hi'>
< a><
foo><bar/ >
<a href='bar'title=title>
</a>
</foo >
</a href="foo">
foo <!-- this is a
comment - with hyphen -->
foo <!-- not a comment -- two hyphens -->
foo <?php echo $a; ?>
foo <!ELEMENT br EMPTY>
foo <![CDATA[>&<]]>
<a href="&ouml;">
<a href="\*">
<a href="\"">

View File

@ -1,23 +0,0 @@
Valid links:
[this is a link]()
[this is a link](<http://something.example.com/foo/bar>)
[this is a link](http://something.example.com/foo/bar 'test')
![this is an image]()
![this is an image](<http://something.example.com/foo/bar>)
![this is an image](http://something.example.com/foo/bar 'test')
[escape test](<\>\>\>\>\>\>\>\>\>\>\>\>\>\>> '\'\'\'\'\'\'\'\'\'\'\'\'\'\'')
[escape test \]\]\]\]\]\]\]\]\]\]\]\]\]\]\]\]](\)\)\)\)\)\)\)\)\)\)\)\)\)\))
Invalid links:
[this is not a link
[this is not a link](
[this is not a link](http://something.example.com/foo/bar 'test'
[this is not a link](((((((((((((((((((((((((((((((((((((((((((((((
[this is not a link]((((((((((()))))))))) (((((((((()))))))))))

View File

@ -1,13 +0,0 @@
Valid links:
[[[[[[[[](test)](test)](test)](test)](test)](test)](test)]
[ [[[[[[[[[[[[[[[[[[ [](test) ]]]]]]]]]]]]]]]]]] ](test)
Invalid links:
[[[[[[[[[
[ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [
![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![![

View File

@ -1,24 +0,0 @@
this\
should\
be\
separated\
by\
newlines
this
should
be
separated
by
newlines
too
this
should
not
be
separated
by
newlines

View File

@ -1,13 +0,0 @@
Lorem ipsum dolor sit amet, __consectetur__ adipiscing elit. Cras imperdiet nec erat ac condimentum. Nulla vel rutrum ligula. Sed hendrerit interdum orci a posuere. Vivamus ut velit aliquet, mollis purus eget, iaculis nisl. Proin posuere malesuada ante. Proin auctor orci eros, ac molestie lorem dictum nec. Vestibulum sit amet erat est. Morbi luctus sed elit ac luctus. Proin blandit, enim vitae egestas posuere, neque elit ultricies dui, vel mattis nibh enim ac lorem. Maecenas molestie nisl sit amet velit dictum lobortis. Aliquam erat volutpat.
Vivamus sagittis, diam in [vehicula](https://github.com/markdown-it/markdown-it) lobortis, sapien arcu mattis erat, vel aliquet sem urna et risus. Ut feugiat sapien vitae mi elementum laoreet. Suspendisse potenti. Aliquam erat nisl, aliquam pretium libero aliquet, sagittis eleifend nunc. In hac habitasse platea dictumst. Integer turpis augue, tincidunt dignissim mauris id, rhoncus dapibus purus. Maecenas et enim odio. Nullam massa metus, varius quis vehicula sed, pharetra mollis erat. In quis viverra velit. Vivamus placerat, est nec hendrerit varius, enim dui hendrerit magna, ut pulvinar nibh lorem vel lacus. Mauris a orci iaculis, hendrerit eros sed, gravida leo. In dictum mauris vel augue varius, ac ullamcorper nisl ornare. In eu posuere velit, ac fermentum arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. Nullam sed malesuada leo, at interdum elit.
Nullam ut tincidunt nunc. [Pellentesque][1] metus lacus, commodo eget justo ut, rutrum varius nunc. Sed non rhoncus risus. Morbi sodales gravida pulvinar. Duis malesuada, odio volutpat elementum vulputate, massa magna scelerisque ante, et accumsan tellus nunc in sem. Donec mattis arcu et velit aliquet, non sagittis justo vestibulum. Suspendisse volutpat felis lectus, nec consequat ipsum mattis id. Donec dapibus vehicula facilisis. In tincidunt mi nisi, nec faucibus tortor euismod nec. Suspendisse ante ligula, aliquet vitae libero eu, vulputate dapibus libero. Sed bibendum, sapien at posuere interdum, libero est sollicitudin magna, ac gravida tellus purus eu ipsum. Proin ut quam arcu.
Suspendisse potenti. Donec ante velit, ornare at augue quis, tristique laoreet sem. Etiam in ipsum elit. Nullam cursus dolor sit amet nulla feugiat tristique. Phasellus ac tellus tincidunt, imperdiet purus eget, ullamcorper ipsum. Cras eu tincidunt sem. Nullam sed dapibus magna. Lorem ipsum dolor sit amet, consectetur adipiscing elit. In id venenatis tortor. In consectetur sollicitudin pharetra. Etiam convallis nisi nunc, et aliquam turpis viverra sit amet. Maecenas faucibus sodales tortor. Suspendisse lobortis mi eu leo viverra volutpat. Pellentesque velit ante, vehicula sodales congue ut, elementum a urna. Cras tempor, ipsum eget luctus rhoncus, arcu ligula fermentum urna, vulputate pharetra enim enim non libero.
Proin diam quam, elementum in eleifend id, elementum et metus. Cras in justo consequat justo semper ultrices. Sed dignissim lectus a ante mollis, nec vulputate ante molestie. Proin in porta nunc. Etiam pulvinar turpis sed velit porttitor, vel adipiscing velit fringilla. Cras ac tellus vitae purus pharetra tincidunt. Sed cursus aliquet aliquet. Cras eleifend commodo malesuada. In turpis turpis, ullamcorper ut tincidunt a, ullamcorper a nunc. Etiam luctus tellus ac dapibus gravida. Ut nec lacus laoreet neque ullamcorper volutpat.
Nunc et leo erat. Aenean mattis ultrices lorem, eget adipiscing dolor ultricies eu. In hac habitasse platea dictumst. Vivamus cursus feugiat sapien quis aliquam. Mauris quam libero, porta vel volutpat ut, blandit a purus. Vivamus vestibulum dui vel tortor molestie, sit amet feugiat sem commodo. Nulla facilisi. Sed molestie arcu eget tellus vestibulum tristique.
[1]: https://github.com/markdown-it

View File

@ -1,18 +0,0 @@
this is a test for tab expansion, be careful not to replace them with spaces
1 4444
22 333
333 22
4444 1
tab-indented line
space-indented line
tab-indented line
a lot of spaces in between here
a lot of tabs in between here

View File

@ -1,595 +0,0 @@
## Module statistics.py
##
## Copyright (c) 2013 Steven D'Aprano <steve+python@pearwood.info>.
##
## Licensed under the Apache License, Version 2.0 (the "License");
## you may not use this file except in compliance with the License.
## You may obtain a copy of the License at
##
## http://www.apache.org/licenses/LICENSE-2.0
##
## Unless required by applicable law or agreed to in writing, software
## distributed under the License is distributed on an "AS IS" BASIS,
## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
## See the License for the specific language governing permissions and
## limitations under the License.
"""
Basic statistics module.
This module provides functions for calculating statistics of data, including
averages, variance, and standard deviation.
Calculating averages
--------------------
================== =============================================
Function Description
================== =============================================
mean Arithmetic mean (average) of data.
median Median (middle value) of data.
median_low Low median of data.
median_high High median of data.
median_grouped Median, or 50th percentile, of grouped data.
mode Mode (most common value) of data.
================== =============================================
Calculate the arithmetic mean ("the average") of data:
>>> mean([-1.0, 2.5, 3.25, 5.75])
2.625
Calculate the standard median of discrete data:
>>> median([2, 3, 4, 5])
3.5
Calculate the median, or 50th percentile, of data grouped into class intervals
centred on the data values provided. E.g. if your data points are rounded to
the nearest whole number:
>>> median_grouped([2, 2, 3, 3, 3, 4]) #doctest: +ELLIPSIS
2.8333333333...
This should be interpreted in this way: you have two data points in the class
interval 1.5-2.5, three data points in the class interval 2.5-3.5, and one in
the class interval 3.5-4.5. The median of these data points is 2.8333...
Calculating variability or spread
---------------------------------
================== =============================================
Function Description
================== =============================================
pvariance Population variance of data.
variance Sample variance of data.
pstdev Population standard deviation of data.
stdev Sample standard deviation of data.
================== =============================================
Calculate the standard deviation of sample data:
>>> stdev([2.5, 3.25, 5.5, 11.25, 11.75]) #doctest: +ELLIPSIS
4.38961843444...
If you have previously calculated the mean, you can pass it as the optional
second argument to the four "spread" functions to avoid recalculating it:
>>> data = [1, 2, 2, 4, 4, 4, 5, 6]
>>> mu = mean(data)
>>> pvariance(data, mu)
2.5
Exceptions
----------
A single exception is defined: StatisticsError is a subclass of ValueError.
"""
__all__ = [ 'StatisticsError',
'pstdev', 'pvariance', 'stdev', 'variance',
'median', 'median_low', 'median_high', 'median_grouped',
'mean', 'mode',
]
import collections
import math
from fractions import Fraction
from decimal import Decimal
# === Exceptions ===
class StatisticsError(ValueError):
pass
# === Private utilities ===
def _sum(data, start=0):
"""_sum(data [, start]) -> value
Return a high-precision sum of the given numeric data. If optional
argument ``start`` is given, it is added to the total. If ``data`` is
empty, ``start`` (defaulting to 0) is returned.
Examples
--------
>>> _sum([3, 2.25, 4.5, -0.5, 1.0], 0.75)
11.0
Some sources of round-off error will be avoided:
>>> _sum([1e50, 1, -1e50] * 1000) # Built-in sum returns zero.
1000.0
Fractions and Decimals are also supported:
>>> from fractions import Fraction as F
>>> _sum([F(2, 3), F(7, 5), F(1, 4), F(5, 6)])
Fraction(63, 20)
>>> from decimal import Decimal as D
>>> data = [D("0.1375"), D("0.2108"), D("0.3061"), D("0.0419")]
>>> _sum(data)
Decimal('0.6963')
Mixed types are currently treated as an error, except that int is
allowed.
"""
# We fail as soon as we reach a value that is not an int or the type of
# the first value which is not an int. E.g. _sum([int, int, float, int])
# is okay, but sum([int, int, float, Fraction]) is not.
allowed_types = set([int, type(start)])
n, d = _exact_ratio(start)
partials = {d: n} # map {denominator: sum of numerators}
# Micro-optimizations.
exact_ratio = _exact_ratio
partials_get = partials.get
# Add numerators for each denominator.
for x in data:
_check_type(type(x), allowed_types)
n, d = exact_ratio(x)
partials[d] = partials_get(d, 0) + n
# Find the expected result type. If allowed_types has only one item, it
# will be int; if it has two, use the one which isn't int.
assert len(allowed_types) in (1, 2)
if len(allowed_types) == 1:
assert allowed_types.pop() is int
T = int
else:
T = (allowed_types - set([int])).pop()
if None in partials:
assert issubclass(T, (float, Decimal))
assert not math.isfinite(partials[None])
return T(partials[None])
total = Fraction()
for d, n in sorted(partials.items()):
total += Fraction(n, d)
if issubclass(T, int):
assert total.denominator == 1
return T(total.numerator)
if issubclass(T, Decimal):
return T(total.numerator)/total.denominator
return T(total)
def _check_type(T, allowed):
if T not in allowed:
if len(allowed) == 1:
allowed.add(T)
else:
types = ', '.join([t.__name__ for t in allowed] + [T.__name__])
raise TypeError("unsupported mixed types: %s" % types)
def _exact_ratio(x):
"""Convert Real number x exactly to (numerator, denominator) pair.
>>> _exact_ratio(0.25)
(1, 4)
x is expected to be an int, Fraction, Decimal or float.
"""
try:
try:
# int, Fraction
return (x.numerator, x.denominator)
except AttributeError:
# float
try:
return x.as_integer_ratio()
except AttributeError:
# Decimal
try:
return _decimal_to_ratio(x)
except AttributeError:
msg = "can't convert type '{}' to numerator/denominator"
raise TypeError(msg.format(type(x).__name__)) from None
except (OverflowError, ValueError):
# INF or NAN
if __debug__:
# Decimal signalling NANs cannot be converted to float :-(
if isinstance(x, Decimal):
assert not x.is_finite()
else:
assert not math.isfinite(x)
return (x, None)
# FIXME This is faster than Fraction.from_decimal, but still too slow.
def _decimal_to_ratio(d):
"""Convert Decimal d to exact integer ratio (numerator, denominator).
>>> from decimal import Decimal
>>> _decimal_to_ratio(Decimal("2.6"))
(26, 10)
"""
sign, digits, exp = d.as_tuple()
if exp in ('F', 'n', 'N'): # INF, NAN, sNAN
assert not d.is_finite()
raise ValueError
num = 0
for digit in digits:
num = num*10 + digit
if exp < 0:
den = 10**-exp
else:
num *= 10**exp
den = 1
if sign:
num = -num
return (num, den)
def _counts(data):
# Generate a table of sorted (value, frequency) pairs.
table = collections.Counter(iter(data)).most_common()
if not table:
return table
# Extract the values with the highest frequency.
maxfreq = table[0][1]
for i in range(1, len(table)):
if table[i][1] != maxfreq:
table = table[:i]
break
return table
# === Measures of central tendency (averages) ===
def mean(data):
"""Return the sample arithmetic mean of data.
>>> mean([1, 2, 3, 4, 4])
2.8
>>> from fractions import Fraction as F
>>> mean([F(3, 7), F(1, 21), F(5, 3), F(1, 3)])
Fraction(13, 21)
>>> from decimal import Decimal as D
>>> mean([D("0.5"), D("0.75"), D("0.625"), D("0.375")])
Decimal('0.5625')
If ``data`` is empty, StatisticsError will be raised.
"""
if iter(data) is data:
data = list(data)
n = len(data)
if n < 1:
raise StatisticsError('mean requires at least one data point')
return _sum(data)/n
# FIXME: investigate ways to calculate medians without sorting? Quickselect?
def median(data):
"""Return the median (middle value) of numeric data.
When the number of data points is odd, return the middle data point.
When the number of data points is even, the median is interpolated by
taking the average of the two middle values:
>>> median([1, 3, 5])
3
>>> median([1, 3, 5, 7])
4.0
"""
data = sorted(data)
n = len(data)
if n == 0:
raise StatisticsError("no median for empty data")
if n%2 == 1:
return data[n//2]
else:
i = n//2
return (data[i - 1] + data[i])/2
def median_low(data):
"""Return the low median of numeric data.
When the number of data points is odd, the middle value is returned.
When it is even, the smaller of the two middle values is returned.
>>> median_low([1, 3, 5])
3
>>> median_low([1, 3, 5, 7])
3
"""
data = sorted(data)
n = len(data)
if n == 0:
raise StatisticsError("no median for empty data")
if n%2 == 1:
return data[n//2]
else:
return data[n//2 - 1]
def median_high(data):
"""Return the high median of data.
When the number of data points is odd, the middle value is returned.
When it is even, the larger of the two middle values is returned.
>>> median_high([1, 3, 5])
3
>>> median_high([1, 3, 5, 7])
5
"""
data = sorted(data)
n = len(data)
if n == 0:
raise StatisticsError("no median for empty data")
return data[n//2]
def median_grouped(data, interval=1):
""""Return the 50th percentile (median) of grouped continuous data.
>>> median_grouped([1, 2, 2, 3, 4, 4, 4, 4, 4, 5])
3.7
>>> median_grouped([52, 52, 53, 54])
52.5
This calculates the median as the 50th percentile, and should be
used when your data is continuous and grouped. In the above example,
the values 1, 2, 3, etc. actually represent the midpoint of classes
0.5-1.5, 1.5-2.5, 2.5-3.5, etc. The middle value falls somewhere in
class 3.5-4.5, and interpolation is used to estimate it.
Optional argument ``interval`` represents the class interval, and
defaults to 1. Changing the class interval naturally will change the
interpolated 50th percentile value:
>>> median_grouped([1, 3, 3, 5, 7], interval=1)
3.25
>>> median_grouped([1, 3, 3, 5, 7], interval=2)
3.5
This function does not check whether the data points are at least
``interval`` apart.
"""
data = sorted(data)
n = len(data)
if n == 0:
raise StatisticsError("no median for empty data")
elif n == 1:
return data[0]
# Find the value at the midpoint. Remember this corresponds to the
# centre of the class interval.
x = data[n//2]
for obj in (x, interval):
if isinstance(obj, (str, bytes)):
raise TypeError('expected number but got %r' % obj)
try:
L = x - interval/2 # The lower limit of the median interval.
except TypeError:
# Mixed type. For now we just coerce to float.
L = float(x) - float(interval)/2
cf = data.index(x) # Number of values below the median interval.
# FIXME The following line could be more efficient for big lists.
f = data.count(x) # Number of data points in the median interval.
return L + interval*(n/2 - cf)/f
def mode(data):
"""Return the most common data point from discrete or nominal data.
``mode`` assumes discrete data, and returns a single value. This is the
standard treatment of the mode as commonly taught in schools:
>>> mode([1, 1, 2, 3, 3, 3, 3, 4])
3
This also works with nominal (non-numeric) data:
>>> mode(["red", "blue", "blue", "red", "green", "red", "red"])
'red'
If there is not exactly one most common value, ``mode`` will raise
StatisticsError.
"""
# Generate a table of sorted (value, frequency) pairs.
table = _counts(data)
if len(table) == 1:
return table[0][0]
elif table:
raise StatisticsError(
'no unique mode; found %d equally common values' % len(table)
)
else:
raise StatisticsError('no mode for empty data')
# === Measures of spread ===
# See http://mathworld.wolfram.com/Variance.html
# http://mathworld.wolfram.com/SampleVariance.html
# http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
#
# Under no circumstances use the so-called "computational formula for
# variance", as that is only suitable for hand calculations with a small
# amount of low-precision data. It has terrible numeric properties.
#
# See a comparison of three computational methods here:
# http://www.johndcook.com/blog/2008/09/26/comparing-three-methods-of-computing-standard-deviation/
def _ss(data, c=None):
"""Return sum of square deviations of sequence data.
If ``c`` is None, the mean is calculated in one pass, and the deviations
from the mean are calculated in a second pass. Otherwise, deviations are
calculated from ``c`` as given. Use the second case with care, as it can
lead to garbage results.
"""
if c is None:
c = mean(data)
ss = _sum((x-c)**2 for x in data)
# The following sum should mathematically equal zero, but due to rounding
# error may not.
ss -= _sum((x-c) for x in data)**2/len(data)
assert not ss < 0, 'negative sum of square deviations: %f' % ss
return ss
def variance(data, xbar=None):
"""Return the sample variance of data.
data should be an iterable of Real-valued numbers, with at least two
values. The optional argument xbar, if given, should be the mean of
the data. If it is missing or None, the mean is automatically calculated.
Use this function when your data is a sample from a population. To
calculate the variance from the entire population, see ``pvariance``.
Examples:
>>> data = [2.75, 1.75, 1.25, 0.25, 0.5, 1.25, 3.5]
>>> variance(data)
1.3720238095238095
If you have already calculated the mean of your data, you can pass it as
the optional second argument ``xbar`` to avoid recalculating it:
>>> m = mean(data)
>>> variance(data, m)
1.3720238095238095
This function does not check that ``xbar`` is actually the mean of
``data``. Giving arbitrary values for ``xbar`` may lead to invalid or
impossible results.
Decimals and Fractions are supported:
>>> from decimal import Decimal as D
>>> variance([D("27.5"), D("30.25"), D("30.25"), D("34.5"), D("41.75")])
Decimal('31.01875')
>>> from fractions import Fraction as F
>>> variance([F(1, 6), F(1, 2), F(5, 3)])
Fraction(67, 108)
"""
if iter(data) is data:
data = list(data)
n = len(data)
if n < 2:
raise StatisticsError('variance requires at least two data points')
ss = _ss(data, xbar)
return ss/(n-1)
def pvariance(data, mu=None):
"""Return the population variance of ``data``.
data should be an iterable of Real-valued numbers, with at least one
value. The optional argument mu, if given, should be the mean of
the data. If it is missing or None, the mean is automatically calculated.
Use this function to calculate the variance from the entire population.
To estimate the variance from a sample, the ``variance`` function is
usually a better choice.
Examples:
>>> data = [0.0, 0.25, 0.25, 1.25, 1.5, 1.75, 2.75, 3.25]
>>> pvariance(data)
1.25
If you have already calculated the mean of the data, you can pass it as
the optional second argument to avoid recalculating it:
>>> mu = mean(data)
>>> pvariance(data, mu)
1.25
This function does not check that ``mu`` is actually the mean of ``data``.
Giving arbitrary values for ``mu`` may lead to invalid or impossible
results.
Decimals and Fractions are supported:
>>> from decimal import Decimal as D
>>> pvariance([D("27.5"), D("30.25"), D("30.25"), D("34.5"), D("41.75")])
Decimal('24.815')
>>> from fractions import Fraction as F
>>> pvariance([F(1, 4), F(5, 4), F(1, 2)])
Fraction(13, 72)
"""
if iter(data) is data:
data = list(data)
n = len(data)
if n < 1:
raise StatisticsError('pvariance requires at least one data point')
ss = _ss(data, mu)
return ss/n
def stdev(data, xbar=None):
"""Return the square root of the sample variance.
See ``variance`` for arguments and other details.
>>> stdev([1.5, 2.5, 2.5, 2.75, 3.25, 4.75])
1.0810874155219827
"""
var = variance(data, xbar)
try:
return var.sqrt()
except AttributeError:
return math.sqrt(var)
def pstdev(data, mu=None):
"""Return the square root of the population variance.
See ``pvariance`` for arguments and other details.
>>> pstdev([1.5, 2.5, 2.5, 2.75, 3.25, 4.75])
0.986893273527251
"""
var = pvariance(data, mu)
try:
return var.sqrt()
except AttributeError:
return math.sqrt(var)

View File

@ -1,19 +0,0 @@
#!/usr/bin/env python3
import sys
import statistics
def pairs(l, n):
return zip(*[l[i::n] for i in range(n)])
# data comes in pairs:
# n - time for running the program with no input
# m - time for running it with the benchmark input
# we measure (m - n)
values = [ float(y) - float(x) for (x,y) in pairs(sys.stdin.readlines(),2)]
print("mean = %.4f, median = %.4f, stdev = %.4f" %
(statistics.mean(values), statistics.median(values),
statistics.stdev(values)))

View File

@ -1,33 +0,0 @@
# Benchmarks
Here are some benchmarks, run on an ancient Thinkpad running Intel
Core 2 Duo at 2GHz. The input text is a 11MB Markdown file built by
concatenating the Markdown sources of all the localizations of the
first edition of
[*Pro Git*](https://github.com/progit/progit/tree/master/en) by Scott
Chacon.
|Implementation | Time (sec)|
|-------------------|-----------:|
| Markdown.pl | 2921.24 |
| Python markdown | 291.25 |
| PHP markdown | 20.82 |
| kramdown | 17.32 |
| cheapskate | 8.24 |
| peg-markdown | 5.45 |
| parsedown | 5.06 |
| **commonmark.js** | 2.09 |
| marked | 1.99 |
| discount | 1.85 |
| **cmark** | 0.29 |
| hoedown | 0.21 |
To run these benchmarks, use `make bench PROG=/path/to/program`.
`time` is used to measure execution speed. The reported
time is the *difference* between the time to run the program
with the benchmark input and the time to run it with no input.
(This procedure ensures that implementations in dynamic languages are
not penalized by startup time.) A median of ten runs is taken. The
process is reniced to a high priority so that the system doesn't
interrupt runs.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,125 +0,0 @@
set(LIBRARY "libcmark-gfm-extensions")
set(STATICLIBRARY "libcmark-gfm-extensions_static")
set(LIBRARY_SOURCES
core-extensions.c
table.c
strikethrough.c
autolink.c
tagfilter.c
ext_scanners.c
ext_scanners.re
ext_scanners.h
tasklist.c
)
include_directories(
${PROJECT_SOURCE_DIR}/src
${PROJECT_BINARY_DIR}/src
)
include_directories(. ${CMAKE_CURRENT_BINARY_DIR})
set(CMAKE_C_FLAGS_PROFILE "${CMAKE_C_FLAGS_RELEASE} -pg")
set(CMAKE_LINKER_PROFILE "${CMAKE_LINKER_FLAGS_RELEASE} -pg")
if (CMARK_SHARED)
add_library(${LIBRARY} SHARED ${LIBRARY_SOURCES})
set_target_properties(${LIBRARY} PROPERTIES
OUTPUT_NAME "cmark-gfm-extensions"
DEFINE_SYMBOL "cmark-gfm"
SOVERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}.gfm.${PROJECT_VERSION_GFM}
VERSION ${PROJECT_VERSION})
set_property(TARGET ${LIBRARY}
APPEND PROPERTY MACOSX_RPATH true)
# Avoid name clash between PROGRAM and LIBRARY pdb files.
set_target_properties(${LIBRARY} PROPERTIES PDB_NAME cmark-gfm-extensions_dll)
list(APPEND CMARK_INSTALL ${LIBRARY})
target_link_libraries(${LIBRARY} libcmark-gfm)
endif()
if (CMARK_STATIC)
add_library(${STATICLIBRARY} STATIC ${LIBRARY_SOURCES})
set_target_properties(${STATICLIBRARY} PROPERTIES
AUTOMOC OFF
AUTOUIC OFF
AUTORCC OFF)
set_target_properties(${STATICLIBRARY} PROPERTIES
COMPILE_FLAGS "-DCMARK_GFM_STATIC_DEFINE -DCMARK_GFM_EXTENSIONS_STATIC_DEFINE"
DEFINE_SYMBOL "cmark-gfm"
POSITION_INDEPENDENT_CODE ON)
if (MSVC)
set_target_properties(${STATICLIBRARY} PROPERTIES
OUTPUT_NAME "cmark-gfm-extensions_static"
VERSION ${PROJECT_VERSION})
else()
set_target_properties(${STATICLIBRARY} PROPERTIES
OUTPUT_NAME "cmark-gfm-extensions"
VERSION ${PROJECT_VERSION})
endif(MSVC)
list(APPEND CMARK_INSTALL ${STATICLIBRARY})
endif()
set(CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS_NO_WARNINGS ON)
include (InstallRequiredSystemLibraries)
install(TARGETS ${CMARK_INSTALL}
EXPORT cmark-gfm-extensions
RUNTIME DESTINATION bin
LIBRARY DESTINATION lib${LIB_SUFFIX}
ARCHIVE DESTINATION lib${LIB_SUFFIX}
)
if (CMARK_SHARED OR CMARK_STATIC)
install(FILES
cmark-gfm-core-extensions.h
DESTINATION include
)
install(EXPORT cmark-gfm-extensions DESTINATION lib${LIB_SUFFIX}/cmake-gfm-extensions)
endif()
# Feature tests
include(CheckIncludeFile)
include(CheckCSourceCompiles)
include(CheckCSourceRuns)
include(CheckSymbolExists)
CHECK_INCLUDE_FILE(stdbool.h HAVE_STDBOOL_H)
CHECK_C_SOURCE_COMPILES(
"int main() { __builtin_expect(0,0); return 0; }"
HAVE___BUILTIN_EXPECT)
CHECK_C_SOURCE_COMPILES("
int f(void) __attribute__ (());
int main() { return 0; }
" HAVE___ATTRIBUTE__)
# Always compile with warnings
if(MSVC)
# Force to always compile with W4
if(CMAKE_CXX_FLAGS MATCHES "/W[0-4]")
string(REGEX REPLACE "/W[0-4]" "/W4" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
else()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /W4")
endif()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /WX /wd4706 /wd4204 /wd4221 /wd4100 /D_CRT_SECURE_NO_WARNINGS")
elseif(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -Wno-unused-parameter -std=c99 -pedantic")
endif()
# Compile as C++ under MSVC older than 12.0
if(MSVC AND MSVC_VERSION LESS 1800)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /TP")
endif()
if(CMAKE_BUILD_TYPE STREQUAL "Ubsan")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=undefined")
endif()

View File

@ -1,508 +0,0 @@
#include "autolink.h"
#include <parser.h>
#include <string.h>
#include <utf8.h>
#include <stddef.h>
#if defined(_WIN32)
#define strncasecmp _strnicmp
#else
#include <strings.h>
#endif
static int is_valid_hostchar(const uint8_t *link, size_t link_len) {
int32_t ch;
int r = cmark_utf8proc_iterate(link, (bufsize_t)link_len, &ch);
if (r < 0)
return 0;
return !cmark_utf8proc_is_space(ch) && !cmark_utf8proc_is_punctuation(ch);
}
static int sd_autolink_issafe(const uint8_t *link, size_t link_len) {
static const size_t valid_uris_count = 3;
static const char *valid_uris[] = {"http://", "https://", "ftp://"};
size_t i;
for (i = 0; i < valid_uris_count; ++i) {
size_t len = strlen(valid_uris[i]);
if (link_len > len && strncasecmp((char *)link, valid_uris[i], len) == 0 &&
is_valid_hostchar(link + len, link_len - len))
return 1;
}
return 0;
}
static size_t autolink_delim(uint8_t *data, size_t link_end) {
size_t i;
size_t closing = 0;
size_t opening = 0;
for (i = 0; i < link_end; ++i) {
const uint8_t c = data[i];
if (c == '<') {
link_end = i;
break;
} else if (c == '(') {
opening++;
} else if (c == ')') {
closing++;
}
}
while (link_end > 0) {
switch (data[link_end - 1]) {
case ')':
/* Allow any number of matching brackets (as recognised in copen/cclose)
* at the end of the URL. If there is a greater number of closing
* brackets than opening ones, we remove one character from the end of
* the link.
*
* Examples (input text => output linked portion):
*
* http://www.pokemon.com/Pikachu_(Electric)
* => http://www.pokemon.com/Pikachu_(Electric)
*
* http://www.pokemon.com/Pikachu_((Electric)
* => http://www.pokemon.com/Pikachu_((Electric)
*
* http://www.pokemon.com/Pikachu_(Electric))
* => http://www.pokemon.com/Pikachu_(Electric)
*
* http://www.pokemon.com/Pikachu_((Electric))
* => http://www.pokemon.com/Pikachu_((Electric))
*/
if (closing <= opening) {
return link_end;
}
closing--;
link_end--;
break;
case '?':
case '!':
case '.':
case ',':
case ':':
case '*':
case '_':
case '~':
case '\'':
case '"':
link_end--;
break;
case ';': {
size_t new_end = link_end - 2;
while (new_end > 0 && cmark_isalpha(data[new_end]))
new_end--;
if (new_end < link_end - 2 && data[new_end] == '&')
link_end = new_end;
else
link_end--;
break;
}
default:
return link_end;
}
}
return link_end;
}
static size_t check_domain(uint8_t *data, size_t size, int allow_short) {
size_t i, np = 0, uscore1 = 0, uscore2 = 0;
/* The purpose of this code is to reject urls that contain an underscore
* in one of the last two segments. Examples:
*
* www.xxx.yyy.zzz autolinked
* www.xxx.yyy._zzz not autolinked
* www.xxx._yyy.zzz not autolinked
* www._xxx.yyy.zzz autolinked
*
* The reason is that domain names are allowed to include underscores,
* but host names are not. See: https://stackoverflow.com/a/2183140
*/
for (i = 1; i < size - 1; i++) {
if (data[i] == '\\' && i < size - 2)
i++;
if (data[i] == '_')
uscore2++;
else if (data[i] == '.') {
uscore1 = uscore2;
uscore2 = 0;
np++;
} else if (!is_valid_hostchar(data + i, size - i) && data[i] != '-')
break;
}
if (uscore1 > 0 || uscore2 > 0) {
/* If the url is very long then accept it despite the underscores,
* to avoid quadratic behavior causing a denial of service. See:
* https://github.com/github/cmark-gfm/security/advisories/GHSA-29g3-96g3-jg6c
* Reasonable urls are unlikely to have more than 10 segments, so
* this extra condition shouldn't have any impact on normal usage.
*/
if (np <= 10) {
return 0;
}
}
if (allow_short) {
/* We don't need a valid domain in the strict sense (with
* least one dot; so just make sure it's composed of valid
* domain characters and return the length of the the valid
* sequence. */
return i;
} else {
/* a valid domain needs to have at least a dot.
* that's as far as we get */
return np ? i : 0;
}
}
static cmark_node *www_match(cmark_parser *parser, cmark_node *parent,
cmark_inline_parser *inline_parser) {
cmark_chunk *chunk = cmark_inline_parser_get_chunk(inline_parser);
size_t max_rewind = cmark_inline_parser_get_offset(inline_parser);
uint8_t *data = chunk->data + max_rewind;
size_t size = chunk->len - max_rewind;
int start = cmark_inline_parser_get_column(inline_parser);
size_t link_end;
if (max_rewind > 0 && strchr("*_~(", data[-1]) == NULL &&
!cmark_isspace(data[-1]))
return 0;
if (size < 4 || memcmp(data, "www.", strlen("www.")) != 0)
return 0;
link_end = check_domain(data, size, 0);
if (link_end == 0)
return NULL;
while (link_end < size && !cmark_isspace(data[link_end]) && data[link_end] != '<')
link_end++;
link_end = autolink_delim(data, link_end);
if (link_end == 0)
return NULL;
cmark_inline_parser_set_offset(inline_parser, (int)(max_rewind + link_end));
cmark_node *node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem);
cmark_strbuf buf;
cmark_strbuf_init(parser->mem, &buf, 10);
cmark_strbuf_puts(&buf, "http://");
cmark_strbuf_put(&buf, data, (bufsize_t)link_end);
node->as.link.url = cmark_chunk_buf_detach(&buf);
cmark_node *text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
text->as.literal =
cmark_chunk_dup(chunk, (bufsize_t)max_rewind, (bufsize_t)link_end);
cmark_node_append_child(node, text);
node->start_line = text->start_line =
node->end_line = text->end_line =
cmark_inline_parser_get_line(inline_parser);
node->start_column = text->start_column = start - 1;
node->end_column = text->end_column = cmark_inline_parser_get_column(inline_parser) - 1;
return node;
}
static cmark_node *url_match(cmark_parser *parser, cmark_node *parent,
cmark_inline_parser *inline_parser) {
size_t link_end, domain_len;
int rewind = 0;
cmark_chunk *chunk = cmark_inline_parser_get_chunk(inline_parser);
int max_rewind = cmark_inline_parser_get_offset(inline_parser);
uint8_t *data = chunk->data + max_rewind;
size_t size = chunk->len - max_rewind;
if (size < 4 || data[1] != '/' || data[2] != '/')
return 0;
while (rewind < max_rewind && cmark_isalpha(data[-rewind - 1]))
rewind++;
if (!sd_autolink_issafe(data - rewind, size + rewind))
return 0;
link_end = strlen("://");
domain_len = check_domain(data + link_end, size - link_end, 1);
if (domain_len == 0)
return 0;
link_end += domain_len;
while (link_end < size && !cmark_isspace(data[link_end]) && data[link_end] != '<')
link_end++;
link_end = autolink_delim(data, link_end);
if (link_end == 0)
return NULL;
cmark_inline_parser_set_offset(inline_parser, (int)(max_rewind + link_end));
cmark_node_unput(parent, rewind);
cmark_node *node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem);
cmark_chunk url = cmark_chunk_dup(chunk, max_rewind - rewind,
(bufsize_t)(link_end + rewind));
node->as.link.url = url;
cmark_node *text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
text->as.literal = url;
cmark_node_append_child(node, text);
node->start_line = text->start_line = node->end_line = text->end_line = cmark_inline_parser_get_line(inline_parser);
node->start_column = text->start_column = max_rewind - rewind;
node->end_column = text->end_column = cmark_inline_parser_get_column(inline_parser) - 1;
return node;
}
static cmark_node *match(cmark_syntax_extension *ext, cmark_parser *parser,
cmark_node *parent, unsigned char c,
cmark_inline_parser *inline_parser) {
if (cmark_inline_parser_in_bracket(inline_parser, false) ||
cmark_inline_parser_in_bracket(inline_parser, true))
return NULL;
if (c == ':')
return url_match(parser, parent, inline_parser);
if (c == 'w')
return www_match(parser, parent, inline_parser);
return NULL;
// note that we could end up re-consuming something already a
// part of an inline, because we don't track when the last
// inline was finished in inlines.c.
}
static bool validate_protocol(const char protocol[], uint8_t *data, size_t rewind, size_t max_rewind) {
size_t len = strlen(protocol);
if (len > (max_rewind - rewind)) {
return false;
}
// Check that the protocol matches
if (memcmp(data - rewind - len, protocol, len) != 0) {
return false;
}
if (len == (max_rewind - rewind)) {
return true;
}
char prev_char = data[-((ptrdiff_t)rewind) - len - 1];
// Make sure the character before the protocol is non-alphanumeric
return !cmark_isalnum(prev_char);
}
static void postprocess_text(cmark_parser *parser, cmark_node *text) {
size_t start = 0;
size_t offset = 0;
// `text` is going to be split into a list of nodes containing shorter segments
// of text, so we detach the memory buffer from text and use `cmark_chunk_dup` to
// create references to it. Later, `cmark_chunk_to_cstr` is used to convert
// the references into allocated buffers. The detached buffer is freed before we
// return.
cmark_chunk detached_chunk = text->as.literal;
text->as.literal = cmark_chunk_dup(&detached_chunk, 0, detached_chunk.len);
uint8_t *data = text->as.literal.data;
size_t remaining = text->as.literal.len;
while (true) {
size_t link_end;
uint8_t *at;
bool auto_mailto = true;
bool is_xmpp = false;
size_t rewind;
size_t max_rewind;
size_t np = 0;
if (offset >= remaining)
break;
at = (uint8_t *)memchr(data + start + offset, '@', remaining - offset);
if (!at)
break;
max_rewind = at - (data + start + offset);
found_at:
for (rewind = 0; rewind < max_rewind; ++rewind) {
uint8_t c = data[start + offset + max_rewind - rewind - 1];
if (cmark_isalnum(c))
continue;
if (strchr(".+-_", c) != NULL)
continue;
if (strchr(":", c) != NULL) {
if (validate_protocol("mailto:", data + start + offset + max_rewind, rewind, max_rewind)) {
auto_mailto = false;
continue;
}
if (validate_protocol("xmpp:", data + start + offset + max_rewind, rewind, max_rewind)) {
auto_mailto = false;
is_xmpp = true;
continue;
}
}
break;
}
if (rewind == 0) {
offset += max_rewind + 1;
continue;
}
assert(data[start + offset + max_rewind] == '@');
for (link_end = 1; link_end < remaining - offset - max_rewind; ++link_end) {
uint8_t c = data[start + offset + max_rewind + link_end];
if (cmark_isalnum(c))
continue;
if (c == '@') {
// Found another '@', so go back and try again with an updated offset and max_rewind.
offset += max_rewind + 1;
max_rewind = link_end - 1;
goto found_at;
} else if (c == '.' && link_end < remaining - offset - max_rewind - 1 &&
cmark_isalnum(data[start + offset + max_rewind + link_end + 1]))
np++;
else if (c == '/' && is_xmpp)
continue;
else if (c != '-' && c != '_')
break;
}
if (link_end < 2 || np == 0 ||
(!cmark_isalpha(data[start + offset + max_rewind + link_end - 1]) &&
data[start + offset + max_rewind + link_end - 1] != '.')) {
offset += max_rewind + link_end;
continue;
}
link_end = autolink_delim(data + start + offset + max_rewind, link_end);
if (link_end == 0) {
offset += max_rewind + 1;
continue;
}
cmark_node *link_node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem);
cmark_strbuf buf;
cmark_strbuf_init(parser->mem, &buf, 10);
if (auto_mailto)
cmark_strbuf_puts(&buf, "mailto:");
cmark_strbuf_put(&buf, data + start + offset + max_rewind - rewind, (bufsize_t)(link_end + rewind));
link_node->as.link.url = cmark_chunk_buf_detach(&buf);
cmark_node *link_text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
cmark_chunk email = cmark_chunk_dup(
&detached_chunk,
(bufsize_t)(start + offset + max_rewind - rewind),
(bufsize_t)(link_end + rewind));
cmark_chunk_to_cstr(parser->mem, &email);
link_text->as.literal = email;
cmark_node_append_child(link_node, link_text);
cmark_node_insert_after(text, link_node);
cmark_node *post = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
post->as.literal = cmark_chunk_dup(&detached_chunk,
(bufsize_t)(start + offset + max_rewind + link_end),
(bufsize_t)(remaining - offset - max_rewind - link_end));
cmark_node_insert_after(link_node, post);
text->as.literal = cmark_chunk_dup(&detached_chunk, (bufsize_t)start, (bufsize_t)(offset + max_rewind - rewind));
cmark_chunk_to_cstr(parser->mem, &text->as.literal);
text = post;
start += offset + max_rewind + link_end;
remaining -= offset + max_rewind + link_end;
offset = 0;
}
// Convert the reference to allocated memory.
assert(!text->as.literal.alloc);
cmark_chunk_to_cstr(parser->mem, &text->as.literal);
// Free the detached buffer.
cmark_chunk_free(parser->mem, &detached_chunk);
}
static cmark_node *postprocess(cmark_syntax_extension *ext, cmark_parser *parser, cmark_node *root) {
cmark_iter *iter;
cmark_event_type ev;
cmark_node *node;
bool in_link = false;
cmark_consolidate_text_nodes(root);
iter = cmark_iter_new(root);
while ((ev = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
node = cmark_iter_get_node(iter);
if (in_link) {
if (ev == CMARK_EVENT_EXIT && node->type == CMARK_NODE_LINK) {
in_link = false;
}
continue;
}
if (ev == CMARK_EVENT_ENTER && node->type == CMARK_NODE_LINK) {
in_link = true;
continue;
}
if (ev == CMARK_EVENT_ENTER && node->type == CMARK_NODE_TEXT) {
postprocess_text(parser, node);
}
}
cmark_iter_free(iter);
return root;
}
cmark_syntax_extension *create_autolink_extension(void) {
cmark_syntax_extension *ext = cmark_syntax_extension_new("autolink");
cmark_llist *special_chars = NULL;
cmark_syntax_extension_set_match_inline_func(ext, match);
cmark_syntax_extension_set_postprocess_func(ext, postprocess);
cmark_mem *mem = cmark_get_default_mem_allocator();
special_chars = cmark_llist_append(mem, special_chars, (void *)':');
special_chars = cmark_llist_append(mem, special_chars, (void *)'w');
cmark_syntax_extension_set_special_inline_chars(ext, special_chars);
return ext;
}

View File

@ -1,8 +0,0 @@
#ifndef CMARK_GFM_AUTOLINK_H
#define CMARK_GFM_AUTOLINK_H
#include "cmark-gfm-core-extensions.h"
cmark_syntax_extension *create_autolink_extension(void);
#endif

View File

@ -1,54 +0,0 @@
#ifndef CMARK_GFM_CORE_EXTENSIONS_H
#define CMARK_GFM_CORE_EXTENSIONS_H
#ifdef __cplusplus
extern "C" {
#endif
#include "cmark-gfm-extension_api.h"
#include "cmark-gfm_export.h"
#include <stdbool.h>
#include <stdint.h>
CMARK_GFM_EXPORT
void cmark_gfm_core_extensions_ensure_registered(void);
CMARK_GFM_EXPORT
uint16_t cmark_gfm_extensions_get_table_columns(cmark_node *node);
/** Sets the number of columns for the table, returning 1 on success and 0 on error.
*/
CMARK_GFM_EXPORT
int cmark_gfm_extensions_set_table_columns(cmark_node *node, uint16_t n_columns);
CMARK_GFM_EXPORT
uint8_t *cmark_gfm_extensions_get_table_alignments(cmark_node *node);
/** Sets the alignments for the table, returning 1 on success and 0 on error.
*/
CMARK_GFM_EXPORT
int cmark_gfm_extensions_set_table_alignments(cmark_node *node, uint16_t ncols, uint8_t *alignments);
CMARK_GFM_EXPORT
int cmark_gfm_extensions_get_table_row_is_header(cmark_node *node);
/** Sets whether the node is a table header row, returning 1 on success and 0 on error.
*/
CMARK_GFM_EXPORT
int cmark_gfm_extensions_set_table_row_is_header(cmark_node *node, int is_header);
CMARK_GFM_EXPORT
bool cmark_gfm_extensions_get_tasklist_item_checked(cmark_node *node);
/* For backwards compatibility */
#define cmark_gfm_extensions_tasklist_is_checked cmark_gfm_extensions_get_tasklist_item_checked
/** Sets whether a tasklist item is "checked" (completed), returning 1 on success and 0 on error.
*/
CMARK_GFM_EXPORT
int cmark_gfm_extensions_set_tasklist_item_checked(cmark_node *node, bool is_checked);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -1,27 +0,0 @@
#include "cmark-gfm-core-extensions.h"
#include "autolink.h"
#include "strikethrough.h"
#include "table.h"
#include "tagfilter.h"
#include "tasklist.h"
#include "registry.h"
#include "plugin.h"
static int core_extensions_registration(cmark_plugin *plugin) {
cmark_plugin_register_syntax_extension(plugin, create_table_extension());
cmark_plugin_register_syntax_extension(plugin,
create_strikethrough_extension());
cmark_plugin_register_syntax_extension(plugin, create_autolink_extension());
cmark_plugin_register_syntax_extension(plugin, create_tagfilter_extension());
cmark_plugin_register_syntax_extension(plugin, create_tasklist_extension());
return 1;
}
void cmark_gfm_core_extensions_ensure_registered(void) {
static int registered = 0;
if (!registered) {
cmark_register_plugin(core_extensions_registration);
registered = 1;
}
}

View File

@ -1,879 +0,0 @@
/* Generated by re2c 1.3 */
#include "ext_scanners.h"
#include <stdlib.h>
bufsize_t _ext_scan_at(bufsize_t (*scanner)(const unsigned char *),
unsigned char *ptr, int len, bufsize_t offset) {
bufsize_t res;
if (ptr == NULL || offset >= len) {
return 0;
} else {
unsigned char lim = ptr[len];
ptr[len] = '\0';
res = scanner(ptr + offset);
ptr[len] = lim;
}
return res;
}
bufsize_t _scan_table_start(const unsigned char *p) {
const unsigned char *marker = NULL;
const unsigned char *start = p;
{
unsigned char yych;
static const unsigned char yybm[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
yych = *p;
if (yych <= ' ') {
if (yych <= '\n') {
if (yych == '\t')
goto yy4;
} else {
if (yych <= '\f')
goto yy4;
if (yych >= ' ')
goto yy4;
}
} else {
if (yych <= '9') {
if (yych == '-')
goto yy5;
} else {
if (yych <= ':')
goto yy6;
if (yych == '|')
goto yy4;
}
}
++p;
yy3 : { return 0; }
yy4:
yych = *(marker = ++p);
if (yybm[0 + yych] & 64) {
goto yy7;
}
if (yych == '-')
goto yy10;
if (yych == ':')
goto yy12;
goto yy3;
yy5:
yych = *(marker = ++p);
if (yybm[0 + yych] & 128) {
goto yy10;
}
if (yych <= ' ') {
if (yych <= 0x08)
goto yy3;
if (yych <= '\r')
goto yy14;
if (yych <= 0x1F)
goto yy3;
goto yy14;
} else {
if (yych <= ':') {
if (yych <= '9')
goto yy3;
goto yy13;
} else {
if (yych == '|')
goto yy14;
goto yy3;
}
}
yy6:
yych = *(marker = ++p);
if (yybm[0 + yych] & 128) {
goto yy10;
}
goto yy3;
yy7:
yych = *++p;
if (yybm[0 + yych] & 64) {
goto yy7;
}
if (yych == '-')
goto yy10;
if (yych == ':')
goto yy12;
yy9:
p = marker;
goto yy3;
yy10:
yych = *++p;
if (yybm[0 + yych] & 128) {
goto yy10;
}
if (yych <= 0x1F) {
if (yych <= '\n') {
if (yych <= 0x08)
goto yy9;
if (yych <= '\t')
goto yy13;
goto yy15;
} else {
if (yych <= '\f')
goto yy13;
if (yych <= '\r')
goto yy17;
goto yy9;
}
} else {
if (yych <= ':') {
if (yych <= ' ')
goto yy13;
if (yych <= '9')
goto yy9;
goto yy13;
} else {
if (yych == '|')
goto yy18;
goto yy9;
}
}
yy12:
yych = *++p;
if (yybm[0 + yych] & 128) {
goto yy10;
}
goto yy9;
yy13:
yych = *++p;
yy14:
if (yych <= '\r') {
if (yych <= '\t') {
if (yych <= 0x08)
goto yy9;
goto yy13;
} else {
if (yych <= '\n')
goto yy15;
if (yych <= '\f')
goto yy13;
goto yy17;
}
} else {
if (yych <= ' ') {
if (yych <= 0x1F)
goto yy9;
goto yy13;
} else {
if (yych == '|')
goto yy18;
goto yy9;
}
}
yy15:
++p;
{ return (bufsize_t)(p - start); }
yy17:
yych = *++p;
if (yych == '\n')
goto yy15;
goto yy9;
yy18:
yych = *++p;
if (yybm[0 + yych] & 128) {
goto yy10;
}
if (yych <= '\r') {
if (yych <= '\t') {
if (yych <= 0x08)
goto yy9;
goto yy18;
} else {
if (yych <= '\n')
goto yy15;
if (yych <= '\f')
goto yy18;
goto yy17;
}
} else {
if (yych <= ' ') {
if (yych <= 0x1F)
goto yy9;
goto yy18;
} else {
if (yych == ':')
goto yy12;
goto yy9;
}
}
}
}
bufsize_t _scan_table_cell(const unsigned char *p) {
const unsigned char *marker = NULL;
const unsigned char *start = p;
{
unsigned char yych;
unsigned int yyaccept = 0;
static const unsigned char yybm[] = {
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 0, 64, 64, 0, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 128, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 0, 64,
64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0,
};
yych = *p;
if (yybm[0 + yych] & 64) {
goto yy22;
}
if (yych <= 0xEC) {
if (yych <= 0xC1) {
if (yych <= '\r')
goto yy25;
if (yych <= '\\')
goto yy27;
goto yy25;
} else {
if (yych <= 0xDF)
goto yy29;
if (yych <= 0xE0)
goto yy30;
goto yy31;
}
} else {
if (yych <= 0xF0) {
if (yych <= 0xED)
goto yy32;
if (yych <= 0xEF)
goto yy31;
goto yy33;
} else {
if (yych <= 0xF3)
goto yy34;
if (yych <= 0xF4)
goto yy35;
goto yy25;
}
}
yy22:
yyaccept = 0;
yych = *(marker = ++p);
if (yybm[0 + yych] & 64) {
goto yy22;
}
if (yych <= 0xEC) {
if (yych <= 0xC1) {
if (yych <= '\r')
goto yy24;
if (yych <= '\\')
goto yy27;
} else {
if (yych <= 0xDF)
goto yy36;
if (yych <= 0xE0)
goto yy38;
goto yy39;
}
} else {
if (yych <= 0xF0) {
if (yych <= 0xED)
goto yy40;
if (yych <= 0xEF)
goto yy39;
goto yy41;
} else {
if (yych <= 0xF3)
goto yy42;
if (yych <= 0xF4)
goto yy43;
}
}
yy24 : { return (bufsize_t)(p - start); }
yy25:
++p;
yy26 : { return 0; }
yy27:
yyaccept = 0;
yych = *(marker = ++p);
if (yybm[0 + yych] & 128) {
goto yy27;
}
if (yych <= 0xDF) {
if (yych <= '\f') {
if (yych == '\n')
goto yy24;
goto yy22;
} else {
if (yych <= '\r')
goto yy24;
if (yych <= 0x7F)
goto yy22;
if (yych <= 0xC1)
goto yy24;
goto yy36;
}
} else {
if (yych <= 0xEF) {
if (yych <= 0xE0)
goto yy38;
if (yych == 0xED)
goto yy40;
goto yy39;
} else {
if (yych <= 0xF0)
goto yy41;
if (yych <= 0xF3)
goto yy42;
if (yych <= 0xF4)
goto yy43;
goto yy24;
}
}
yy29:
yych = *++p;
if (yych <= 0x7F)
goto yy26;
if (yych <= 0xBF)
goto yy22;
goto yy26;
yy30:
yyaccept = 1;
yych = *(marker = ++p);
if (yych <= 0x9F)
goto yy26;
if (yych <= 0xBF)
goto yy36;
goto yy26;
yy31:
yyaccept = 1;
yych = *(marker = ++p);
if (yych <= 0x7F)
goto yy26;
if (yych <= 0xBF)
goto yy36;
goto yy26;
yy32:
yyaccept = 1;
yych = *(marker = ++p);
if (yych <= 0x7F)
goto yy26;
if (yych <= 0x9F)
goto yy36;
goto yy26;
yy33:
yyaccept = 1;
yych = *(marker = ++p);
if (yych <= 0x8F)
goto yy26;
if (yych <= 0xBF)
goto yy39;
goto yy26;
yy34:
yyaccept = 1;
yych = *(marker = ++p);
if (yych <= 0x7F)
goto yy26;
if (yych <= 0xBF)
goto yy39;
goto yy26;
yy35:
yyaccept = 1;
yych = *(marker = ++p);
if (yych <= 0x7F)
goto yy26;
if (yych <= 0x8F)
goto yy39;
goto yy26;
yy36:
yych = *++p;
if (yych <= 0x7F)
goto yy37;
if (yych <= 0xBF)
goto yy22;
yy37:
p = marker;
if (yyaccept == 0) {
goto yy24;
} else {
goto yy26;
}
yy38:
yych = *++p;
if (yych <= 0x9F)
goto yy37;
if (yych <= 0xBF)
goto yy36;
goto yy37;
yy39:
yych = *++p;
if (yych <= 0x7F)
goto yy37;
if (yych <= 0xBF)
goto yy36;
goto yy37;
yy40:
yych = *++p;
if (yych <= 0x7F)
goto yy37;
if (yych <= 0x9F)
goto yy36;
goto yy37;
yy41:
yych = *++p;
if (yych <= 0x8F)
goto yy37;
if (yych <= 0xBF)
goto yy39;
goto yy37;
yy42:
yych = *++p;
if (yych <= 0x7F)
goto yy37;
if (yych <= 0xBF)
goto yy39;
goto yy37;
yy43:
yych = *++p;
if (yych <= 0x7F)
goto yy37;
if (yych <= 0x8F)
goto yy39;
goto yy37;
}
}
bufsize_t _scan_table_cell_end(const unsigned char *p) {
const unsigned char *start = p;
{
unsigned char yych;
static const unsigned char yybm[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 128, 128, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
yych = *p;
if (yych == '|')
goto yy48;
++p;
{ return 0; }
yy48:
yych = *++p;
if (yybm[0 + yych] & 128) {
goto yy48;
}
{ return (bufsize_t)(p - start); }
}
}
bufsize_t _scan_table_row_end(const unsigned char *p) {
const unsigned char *marker = NULL;
const unsigned char *start = p;
{
unsigned char yych;
static const unsigned char yybm[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 128, 128, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
yych = *p;
if (yych <= '\f') {
if (yych <= 0x08)
goto yy53;
if (yych == '\n')
goto yy56;
goto yy55;
} else {
if (yych <= '\r')
goto yy58;
if (yych == ' ')
goto yy55;
}
yy53:
++p;
yy54 : { return 0; }
yy55:
yych = *(marker = ++p);
if (yych <= 0x08)
goto yy54;
if (yych <= '\r')
goto yy60;
if (yych == ' ')
goto yy60;
goto yy54;
yy56:
++p;
{ return (bufsize_t)(p - start); }
yy58:
yych = *++p;
if (yych == '\n')
goto yy56;
goto yy54;
yy59:
yych = *++p;
yy60:
if (yybm[0 + yych] & 128) {
goto yy59;
}
if (yych <= 0x08)
goto yy61;
if (yych <= '\n')
goto yy56;
if (yych <= '\r')
goto yy62;
yy61:
p = marker;
goto yy54;
yy62:
yych = *++p;
if (yych == '\n')
goto yy56;
goto yy61;
}
}
bufsize_t _scan_tasklist(const unsigned char *p) {
const unsigned char *marker = NULL;
const unsigned char *start = p;
{
unsigned char yych;
static const unsigned char yybm[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 64, 64, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
yych = *p;
if (yych <= ' ') {
if (yych <= '\n') {
if (yych == '\t')
goto yy67;
} else {
if (yych <= '\f')
goto yy67;
if (yych >= ' ')
goto yy67;
}
} else {
if (yych <= ',') {
if (yych <= ')')
goto yy65;
if (yych <= '+')
goto yy68;
} else {
if (yych <= '-')
goto yy68;
if (yych <= '/')
goto yy65;
if (yych <= '9')
goto yy69;
}
}
yy65:
++p;
yy66 : { return 0; }
yy67:
yych = *(marker = ++p);
if (yybm[0 + yych] & 64) {
goto yy70;
}
if (yych <= ',') {
if (yych <= ')')
goto yy66;
if (yych <= '+')
goto yy73;
goto yy66;
} else {
if (yych <= '-')
goto yy73;
if (yych <= '/')
goto yy66;
if (yych <= '9')
goto yy74;
goto yy66;
}
yy68:
yych = *(marker = ++p);
if (yych <= '\n') {
if (yych == '\t')
goto yy75;
goto yy66;
} else {
if (yych <= '\f')
goto yy75;
if (yych == ' ')
goto yy75;
goto yy66;
}
yy69:
yych = *(marker = ++p);
if (yych <= 0x1F) {
if (yych <= '\t') {
if (yych <= 0x08)
goto yy78;
goto yy73;
} else {
if (yych <= '\n')
goto yy66;
if (yych <= '\f')
goto yy73;
goto yy78;
}
} else {
if (yych <= 0x7F) {
if (yych <= ' ')
goto yy73;
goto yy78;
} else {
if (yych <= 0xC1)
goto yy66;
if (yych <= 0xF4)
goto yy78;
goto yy66;
}
}
yy70:
yych = *++p;
if (yybm[0 + yych] & 64) {
goto yy70;
}
if (yych <= ',') {
if (yych <= ')')
goto yy72;
if (yych <= '+')
goto yy73;
} else {
if (yych <= '-')
goto yy73;
if (yych <= '/')
goto yy72;
if (yych <= '9')
goto yy74;
}
yy72:
p = marker;
goto yy66;
yy73:
yych = *++p;
if (yych == '[')
goto yy72;
goto yy76;
yy74:
yych = *++p;
if (yych <= '\n') {
if (yych == '\t')
goto yy73;
goto yy78;
} else {
if (yych <= '\f')
goto yy73;
if (yych == ' ')
goto yy73;
goto yy78;
}
yy75:
yych = *++p;
yy76:
if (yych <= '\f') {
if (yych == '\t')
goto yy75;
if (yych <= '\n')
goto yy72;
goto yy75;
} else {
if (yych <= ' ') {
if (yych <= 0x1F)
goto yy72;
goto yy75;
} else {
if (yych == '[')
goto yy86;
goto yy72;
}
}
yy77:
yych = *++p;
yy78:
if (yybm[0 + yych] & 128) {
goto yy77;
}
if (yych <= 0xC1) {
if (yych <= '\f') {
if (yych <= 0x08)
goto yy73;
if (yych == '\n')
goto yy72;
goto yy75;
} else {
if (yych == ' ')
goto yy75;
if (yych <= 0x7F)
goto yy73;
goto yy72;
}
} else {
if (yych <= 0xED) {
if (yych <= 0xDF)
goto yy79;
if (yych <= 0xE0)
goto yy80;
if (yych <= 0xEC)
goto yy81;
goto yy82;
} else {
if (yych <= 0xF0) {
if (yych <= 0xEF)
goto yy81;
goto yy83;
} else {
if (yych <= 0xF3)
goto yy84;
if (yych <= 0xF4)
goto yy85;
goto yy72;
}
}
}
yy79:
yych = *++p;
if (yych <= 0x7F)
goto yy72;
if (yych <= 0xBF)
goto yy73;
goto yy72;
yy80:
yych = *++p;
if (yych <= 0x9F)
goto yy72;
if (yych <= 0xBF)
goto yy79;
goto yy72;
yy81:
yych = *++p;
if (yych <= 0x7F)
goto yy72;
if (yych <= 0xBF)
goto yy79;
goto yy72;
yy82:
yych = *++p;
if (yych <= 0x7F)
goto yy72;
if (yych <= 0x9F)
goto yy79;
goto yy72;
yy83:
yych = *++p;
if (yych <= 0x8F)
goto yy72;
if (yych <= 0xBF)
goto yy81;
goto yy72;
yy84:
yych = *++p;
if (yych <= 0x7F)
goto yy72;
if (yych <= 0xBF)
goto yy81;
goto yy72;
yy85:
yych = *++p;
if (yych <= 0x7F)
goto yy72;
if (yych <= 0x8F)
goto yy81;
goto yy72;
yy86:
yych = *++p;
if (yych <= 'W') {
if (yych != ' ')
goto yy72;
} else {
if (yych <= 'X')
goto yy87;
if (yych != 'x')
goto yy72;
}
yy87:
yych = *++p;
if (yych != ']')
goto yy72;
yych = *++p;
if (yych <= '\n') {
if (yych != '\t')
goto yy72;
} else {
if (yych <= '\f')
goto yy89;
if (yych != ' ')
goto yy72;
}
yy89:
yych = *++p;
if (yych <= '\n') {
if (yych == '\t')
goto yy89;
} else {
if (yych <= '\f')
goto yy89;
if (yych == ' ')
goto yy89;
}
{ return (bufsize_t)(p - start); }
}
}

View File

@ -1,24 +0,0 @@
#include "chunk.h"
#include "cmark-gfm.h"
#ifdef __cplusplus
extern "C" {
#endif
bufsize_t _ext_scan_at(bufsize_t (*scanner)(const unsigned char *),
unsigned char *ptr, int len, bufsize_t offset);
bufsize_t _scan_table_start(const unsigned char *p);
bufsize_t _scan_table_cell(const unsigned char *p);
bufsize_t _scan_table_cell_end(const unsigned char *p);
bufsize_t _scan_table_row_end(const unsigned char *p);
bufsize_t _scan_tasklist(const unsigned char *p);
#define scan_table_start(c, l, n) _ext_scan_at(&_scan_table_start, c, l, n)
#define scan_table_cell(c, l, n) _ext_scan_at(&_scan_table_cell, c, l, n)
#define scan_table_cell_end(c, l, n) _ext_scan_at(&_scan_table_cell_end, c, l, n)
#define scan_table_row_end(c, l, n) _ext_scan_at(&_scan_table_row_end, c, l, n)
#define scan_tasklist(c, l, n) _ext_scan_at(&_scan_tasklist, c, l, n)
#ifdef __cplusplus
}
#endif

View File

@ -1,92 +0,0 @@
/*!re2c re2c:flags:no-debug-info = 1; */
/*!re2c re2c:indent:string = ' '; */
#include <stdlib.h>
#include "ext_scanners.h"
bufsize_t _ext_scan_at(bufsize_t (*scanner)(const unsigned char *), unsigned char *ptr, int len, bufsize_t offset)
{
bufsize_t res;
if (ptr == NULL || offset >= len) {
return 0;
} else {
unsigned char lim = ptr[len];
ptr[len] = '\0';
res = scanner(ptr + offset);
ptr[len] = lim;
}
return res;
}
/*!re2c
re2c:define:YYCTYPE = "unsigned char";
re2c:define:YYCURSOR = p;
re2c:define:YYMARKER = marker;
re2c:yyfill:enable = 0;
spacechar = [ \t\v\f];
newline = [\r]?[\n];
escaped_char = [\\][|!"#$%&'()*+,./:;<=>?@[\\\]^_`{}~-];
table_marker = (spacechar*[:]?[-]+[:]?spacechar*);
table_cell = (escaped_char|[^|\r\n])+;
tasklist = spacechar*("-"|"+"|"*"|[0-9]+.)spacechar+("[ ]"|"[x]")spacechar+;
*/
bufsize_t _scan_table_start(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
[|]? table_marker ([|] table_marker)* [|]? spacechar* newline {
return (bufsize_t)(p - start);
}
* { return 0; }
*/
}
bufsize_t _scan_table_cell(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
// In fact, `table_cell` matches non-empty table cells only. The empty
// string is also a valid table cell, but is handled by the default rule.
// This approach prevents re2c's match-empty-string warning.
table_cell { return (bufsize_t)(p - start); }
* { return 0; }
*/
}
bufsize_t _scan_table_cell_end(const unsigned char *p)
{
const unsigned char *start = p;
/*!re2c
[|] spacechar* { return (bufsize_t)(p - start); }
* { return 0; }
*/
}
bufsize_t _scan_table_row_end(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
spacechar* newline { return (bufsize_t)(p - start); }
* { return 0; }
*/
}
bufsize_t _scan_tasklist(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
tasklist { return (bufsize_t)(p - start); }
* { return 0; }
*/
}

View File

@ -1,167 +0,0 @@
#include "strikethrough.h"
#include <parser.h>
#include <render.h>
cmark_node_type CMARK_NODE_STRIKETHROUGH;
static cmark_node *match(cmark_syntax_extension *self, cmark_parser *parser,
cmark_node *parent, unsigned char character,
cmark_inline_parser *inline_parser) {
cmark_node *res = NULL;
int left_flanking, right_flanking, punct_before, punct_after, delims;
char buffer[101];
if (character != '~')
return NULL;
delims = cmark_inline_parser_scan_delimiters(
inline_parser, sizeof(buffer) - 1, '~',
&left_flanking,
&right_flanking, &punct_before, &punct_after);
memset(buffer, '~', delims);
buffer[delims] = 0;
res = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
cmark_node_set_literal(res, buffer);
res->start_line = res->end_line = cmark_inline_parser_get_line(inline_parser);
res->start_column = cmark_inline_parser_get_column(inline_parser) - delims;
if ((left_flanking || right_flanking) &&
(delims == 2 || (!(parser->options & CMARK_OPT_STRIKETHROUGH_DOUBLE_TILDE) && delims == 1))) {
cmark_inline_parser_push_delimiter(inline_parser, character, left_flanking,
right_flanking, res);
}
return res;
}
static delimiter *insert(cmark_syntax_extension *self, cmark_parser *parser,
cmark_inline_parser *inline_parser, delimiter *opener,
delimiter *closer) {
cmark_node *strikethrough;
cmark_node *tmp, *next;
delimiter *delim, *tmp_delim;
delimiter *res = closer->next;
strikethrough = opener->inl_text;
if (opener->inl_text->as.literal.len != closer->inl_text->as.literal.len)
goto done;
if (!cmark_node_set_type(strikethrough, CMARK_NODE_STRIKETHROUGH))
goto done;
cmark_node_set_syntax_extension(strikethrough, self);
tmp = cmark_node_next(opener->inl_text);
while (tmp) {
if (tmp == closer->inl_text)
break;
next = cmark_node_next(tmp);
cmark_node_append_child(strikethrough, tmp);
tmp = next;
}
strikethrough->end_column = closer->inl_text->start_column + closer->inl_text->as.literal.len - 1;
cmark_node_free(closer->inl_text);
done:
delim = closer;
while (delim != NULL && delim != opener) {
tmp_delim = delim->previous;
cmark_inline_parser_remove_delimiter(inline_parser, delim);
delim = tmp_delim;
}
cmark_inline_parser_remove_delimiter(inline_parser, opener);
return res;
}
static const char *get_type_string(cmark_syntax_extension *extension,
cmark_node *node) {
return node->type == CMARK_NODE_STRIKETHROUGH ? "strikethrough" : "<unknown>";
}
static int can_contain(cmark_syntax_extension *extension, cmark_node *node,
cmark_node_type child_type) {
if (node->type != CMARK_NODE_STRIKETHROUGH)
return false;
return CMARK_NODE_TYPE_INLINE_P(child_type);
}
static void commonmark_render(cmark_syntax_extension *extension,
cmark_renderer *renderer, cmark_node *node,
cmark_event_type ev_type, int options) {
renderer->out(renderer, node, "~~", false, LITERAL);
}
static void latex_render(cmark_syntax_extension *extension,
cmark_renderer *renderer, cmark_node *node,
cmark_event_type ev_type, int options) {
// requires \usepackage{ulem}
bool entering = (ev_type == CMARK_EVENT_ENTER);
if (entering) {
renderer->out(renderer, node, "\\sout{", false, LITERAL);
} else {
renderer->out(renderer, node, "}", false, LITERAL);
}
}
static void man_render(cmark_syntax_extension *extension,
cmark_renderer *renderer, cmark_node *node,
cmark_event_type ev_type, int options) {
bool entering = (ev_type == CMARK_EVENT_ENTER);
if (entering) {
renderer->cr(renderer);
renderer->out(renderer, node, ".ST \"", false, LITERAL);
} else {
renderer->out(renderer, node, "\"", false, LITERAL);
renderer->cr(renderer);
}
}
static void html_render(cmark_syntax_extension *extension,
cmark_html_renderer *renderer, cmark_node *node,
cmark_event_type ev_type, int options) {
bool entering = (ev_type == CMARK_EVENT_ENTER);
if (entering) {
cmark_strbuf_puts(renderer->html, "<del>");
} else {
cmark_strbuf_puts(renderer->html, "</del>");
}
}
static void plaintext_render(cmark_syntax_extension *extension,
cmark_renderer *renderer, cmark_node *node,
cmark_event_type ev_type, int options) {
renderer->out(renderer, node, "~", false, LITERAL);
}
cmark_syntax_extension *create_strikethrough_extension(void) {
cmark_syntax_extension *ext = cmark_syntax_extension_new("strikethrough");
cmark_llist *special_chars = NULL;
cmark_syntax_extension_set_get_type_string_func(ext, get_type_string);
cmark_syntax_extension_set_can_contain_func(ext, can_contain);
cmark_syntax_extension_set_commonmark_render_func(ext, commonmark_render);
cmark_syntax_extension_set_latex_render_func(ext, latex_render);
cmark_syntax_extension_set_man_render_func(ext, man_render);
cmark_syntax_extension_set_html_render_func(ext, html_render);
cmark_syntax_extension_set_plaintext_render_func(ext, plaintext_render);
CMARK_NODE_STRIKETHROUGH = cmark_syntax_extension_add_node(1);
cmark_syntax_extension_set_match_inline_func(ext, match);
cmark_syntax_extension_set_inline_from_delim_func(ext, insert);
cmark_mem *mem = cmark_get_default_mem_allocator();
special_chars = cmark_llist_append(mem, special_chars, (void *)'~');
cmark_syntax_extension_set_special_inline_chars(ext, special_chars);
cmark_syntax_extension_set_emphasis(ext, 1);
return ext;
}

View File

@ -1,9 +0,0 @@
#ifndef CMARK_GFM_STRIKETHROUGH_H
#define CMARK_GFM_STRIKETHROUGH_H
#include "cmark-gfm-core-extensions.h"
extern cmark_node_type CMARK_NODE_STRIKETHROUGH;
cmark_syntax_extension *create_strikethrough_extension(void);
#endif

View File

@ -1,917 +0,0 @@
#include <cmark-gfm-extension_api.h>
#include <html.h>
#include <inlines.h>
#include <parser.h>
#include <references.h>
#include <string.h>
#include <render.h>
#include "ext_scanners.h"
#include "strikethrough.h"
#include "table.h"
#include "cmark-gfm-core-extensions.h"
// Limit to prevent a malicious input from causing a denial of service.
#define MAX_AUTOCOMPLETED_CELLS 0x80000
// Custom node flag, initialized in `create_table_extension`.
static cmark_node_internal_flags CMARK_NODE__TABLE_VISITED;
cmark_node_type CMARK_NODE_TABLE, CMARK_NODE_TABLE_ROW,
CMARK_NODE_TABLE_CELL;
typedef struct {
cmark_strbuf *buf;
int start_offset, end_offset, internal_offset;
} node_cell;
typedef struct {
uint16_t n_columns;
int paragraph_offset;
node_cell *cells;
} table_row;
typedef struct {
uint16_t n_columns;
uint8_t *alignments;
int n_rows;
int n_nonempty_cells;
} node_table;
typedef struct {
bool is_header;
} node_table_row;
static void free_table_cell(cmark_mem *mem, node_cell *cell) {
cmark_strbuf_free((cmark_strbuf *)cell->buf);
mem->free(cell->buf);
}
static void free_row_cells(cmark_mem *mem, table_row *row) {
while (row->n_columns > 0) {
free_table_cell(mem, &row->cells[--row->n_columns]);
}
mem->free(row->cells);
row->cells = NULL;
}
static void free_table_row(cmark_mem *mem, table_row *row) {
if (!row)
return;
free_row_cells(mem, row);
mem->free(row);
}
static void free_node_table(cmark_mem *mem, void *ptr) {
node_table *t = (node_table *)ptr;
mem->free(t->alignments);
mem->free(t);
}
static void free_node_table_row(cmark_mem *mem, void *ptr) {
mem->free(ptr);
}
static int get_n_table_columns(cmark_node *node) {
if (!node || node->type != CMARK_NODE_TABLE)
return -1;
return (int)((node_table *)node->as.opaque)->n_columns;
}
static int set_n_table_columns(cmark_node *node, uint16_t n_columns) {
if (!node || node->type != CMARK_NODE_TABLE)
return 0;
((node_table *)node->as.opaque)->n_columns = n_columns;
return 1;
}
// Increment the number of rows in the table. Also update n_nonempty_cells,
// which keeps track of the number of cells which were parsed from the
// input file. (If one of the rows is too short, then the trailing cells
// are autocompleted. Autocompleted cells are not counted in n_nonempty_cells.)
// The purpose of this is to prevent a malicious input from generating a very
// large number of autocompleted cells, which could cause a denial of service
// vulnerability.
static int incr_table_row_count(cmark_node *node, int i) {
if (!node || node->type != CMARK_NODE_TABLE) {
return 0;
}
((node_table *)node->as.opaque)->n_rows++;
((node_table *)node->as.opaque)->n_nonempty_cells += i;
return 1;
}
// Calculate the number of autocompleted cells.
static int get_n_autocompleted_cells(cmark_node *node) {
if (!node || node->type != CMARK_NODE_TABLE) {
return 0;
}
const node_table *nt = (node_table *)node->as.opaque;
return (nt->n_columns * nt->n_rows) - nt->n_nonempty_cells;
}
static uint8_t *get_table_alignments(cmark_node *node) {
if (!node || node->type != CMARK_NODE_TABLE)
return 0;
return ((node_table *)node->as.opaque)->alignments;
}
static int set_table_alignments(cmark_node *node, uint8_t *alignments) {
if (!node || node->type != CMARK_NODE_TABLE)
return 0;
((node_table *)node->as.opaque)->alignments = alignments;
return 1;
}
static uint8_t get_cell_alignment(cmark_node *node) {
if (!node || node->type != CMARK_NODE_TABLE_CELL)
return 0;
const uint8_t *alignments = get_table_alignments(node->parent->parent);
int i = node->as.cell_index;
return alignments[i];
}
static int set_cell_index(cmark_node *node, int i) {
if (!node || node->type != CMARK_NODE_TABLE_CELL)
return 0;
node->as.cell_index = i;
return 1;
}
static cmark_strbuf *unescape_pipes(cmark_mem *mem, unsigned char *string, bufsize_t len)
{
cmark_strbuf *res = (cmark_strbuf *)mem->calloc(1, sizeof(cmark_strbuf));
bufsize_t r, w;
cmark_strbuf_init(mem, res, len + 1);
cmark_strbuf_put(res, string, len);
cmark_strbuf_putc(res, '\0');
for (r = 0, w = 0; r < len; ++r) {
if (res->ptr[r] == '\\' && res->ptr[r + 1] == '|')
r++;
res->ptr[w++] = res->ptr[r];
}
cmark_strbuf_truncate(res, w);
return res;
}
// Adds a new cell to the end of the row. A pointer to the new cell is returned
// for the caller to initialize.
static node_cell* append_row_cell(cmark_mem *mem, table_row *row) {
const uint32_t n_columns = row->n_columns + 1;
// realloc when n_columns is a power of 2
if ((n_columns & (n_columns-1)) == 0) {
// make sure we never wrap row->n_columns
// offset will != len and our exit will clean up as intended
if (n_columns > UINT16_MAX) {
return NULL;
}
// Use realloc to double the size of the buffer.
row->cells = (node_cell *)mem->realloc(row->cells, (2 * n_columns - 1) * sizeof(node_cell));
}
row->n_columns = (uint16_t)n_columns;
return &row->cells[n_columns-1];
}
static table_row *row_from_string(cmark_syntax_extension *self,
cmark_parser *parser, unsigned char *string,
int len) {
// Parses a single table row. It has the following form:
// `delim? table_cell (delim table_cell)* delim? newline`
// Note that cells are allowed to be empty.
//
// From the GitHub-flavored Markdown specification:
//
// > Each row consists of cells containing arbitrary text, in which inlines
// > are parsed, separated by pipes (|). A leading and trailing pipe is also
// > recommended for clarity of reading, and if theres otherwise parsing
// > ambiguity.
table_row *row = NULL;
bufsize_t cell_matched = 1, pipe_matched = 1, offset;
int expect_more_cells = 1;
int row_end_offset = 0;
int int_overflow_abort = 0;
row = (table_row *)parser->mem->calloc(1, sizeof(table_row));
row->n_columns = 0;
row->cells = NULL;
// Scan past the (optional) leading pipe.
offset = scan_table_cell_end(string, len, 0);
// Parse the cells of the row. Stop if we reach the end of the input, or if we
// cannot detect any more cells.
while (offset < len && expect_more_cells) {
cell_matched = scan_table_cell(string, len, offset);
pipe_matched = scan_table_cell_end(string, len, offset + cell_matched);
if (cell_matched || pipe_matched) {
// We are guaranteed to have a cell, since (1) either we found some
// content and cell_matched, or (2) we found an empty cell followed by a
// pipe.
cmark_strbuf *cell_buf = unescape_pipes(parser->mem, string + offset,
cell_matched);
cmark_strbuf_trim(cell_buf);
node_cell *cell = append_row_cell(parser->mem, row);
if (!cell) {
int_overflow_abort = 1;
cmark_strbuf_free(cell_buf);
parser->mem->free(cell_buf);
break;
}
cell->buf = cell_buf;
cell->start_offset = offset;
cell->end_offset = offset + cell_matched - 1;
cell->internal_offset = 0;
while (cell->start_offset > row->paragraph_offset && string[cell->start_offset - 1] != '|') {
--cell->start_offset;
++cell->internal_offset;
}
}
offset += cell_matched + pipe_matched;
if (pipe_matched) {
expect_more_cells = 1;
} else {
// We've scanned the last cell. Check if we have reached the end of the row
row_end_offset = scan_table_row_end(string, len, offset);
offset += row_end_offset;
// If the end of the row is not the end of the input,
// the row is not a real row but potentially part of the paragraph
// preceding the table.
if (row_end_offset && offset != len) {
row->paragraph_offset = offset;
free_row_cells(parser->mem, row);
// Scan past the (optional) leading pipe.
offset += scan_table_cell_end(string, len, offset);
expect_more_cells = 1;
} else {
expect_more_cells = 0;
}
}
}
if (offset != len || row->n_columns == 0 || int_overflow_abort) {
free_table_row(parser->mem, row);
row = NULL;
}
return row;
}
static void try_inserting_table_header_paragraph(cmark_parser *parser,
cmark_node *parent_container,
unsigned char *parent_string,
int paragraph_offset) {
cmark_node *paragraph;
cmark_strbuf *paragraph_content;
paragraph = cmark_node_new_with_mem(CMARK_NODE_PARAGRAPH, parser->mem);
paragraph_content = unescape_pipes(parser->mem, parent_string, paragraph_offset);
cmark_strbuf_trim(paragraph_content);
cmark_node_set_string_content(paragraph, (char *) paragraph_content->ptr);
cmark_strbuf_free(paragraph_content);
parser->mem->free(paragraph_content);
if (!cmark_node_insert_before(parent_container, paragraph)) {
parser->mem->free(paragraph);
}
}
static cmark_node *try_opening_table_header(cmark_syntax_extension *self,
cmark_parser *parser,
cmark_node *parent_container,
unsigned char *input, int len) {
cmark_node *table_header;
table_row *header_row = NULL;
table_row *delimiter_row = NULL;
node_table_row *ntr;
const char *parent_string;
uint16_t i;
if (parent_container->flags & CMARK_NODE__TABLE_VISITED) {
return parent_container;
}
if (!scan_table_start(input, len, cmark_parser_get_first_nonspace(parser))) {
return parent_container;
}
// Since scan_table_start was successful, we must have a delimiter row.
delimiter_row = row_from_string(
self, parser, input + cmark_parser_get_first_nonspace(parser),
len - cmark_parser_get_first_nonspace(parser));
// assert may be optimized out, don't rely on it for security boundaries
if (!delimiter_row) {
return parent_container;
}
assert(delimiter_row);
cmark_arena_push();
// Check for a matching header row. We call `row_from_string` with the entire
// (potentially long) parent container as input, but this should be safe since
// `row_from_string` bails out early if it does not find a row.
parent_string = cmark_node_get_string_content(parent_container);
header_row = row_from_string(self, parser, (unsigned char *)parent_string,
(int)strlen(parent_string));
if (!header_row || header_row->n_columns != delimiter_row->n_columns) {
free_table_row(parser->mem, delimiter_row);
free_table_row(parser->mem, header_row);
cmark_arena_pop();
parent_container->flags |= CMARK_NODE__TABLE_VISITED;
return parent_container;
}
if (cmark_arena_pop()) {
delimiter_row = row_from_string(
self, parser, input + cmark_parser_get_first_nonspace(parser),
len - cmark_parser_get_first_nonspace(parser));
header_row = row_from_string(self, parser, (unsigned char *)parent_string,
(int)strlen(parent_string));
// row_from_string can return NULL, add additional check to ensure n_columns match
if (!delimiter_row || !header_row || header_row->n_columns != delimiter_row->n_columns) {
free_table_row(parser->mem, delimiter_row);
free_table_row(parser->mem, header_row);
return parent_container;
}
}
if (!cmark_node_set_type(parent_container, CMARK_NODE_TABLE)) {
free_table_row(parser->mem, header_row);
free_table_row(parser->mem, delimiter_row);
return parent_container;
}
if (header_row->paragraph_offset) {
try_inserting_table_header_paragraph(parser, parent_container, (unsigned char *)parent_string,
header_row->paragraph_offset);
}
cmark_node_set_syntax_extension(parent_container, self);
parent_container->as.opaque = parser->mem->calloc(1, sizeof(node_table));
set_n_table_columns(parent_container, header_row->n_columns);
// allocate alignments based on delimiter_row->n_columns
// since we populate the alignments array based on delimiter_row->cells
uint8_t *alignments =
(uint8_t *)parser->mem->calloc(delimiter_row->n_columns, sizeof(uint8_t));
for (i = 0; i < delimiter_row->n_columns; ++i) {
node_cell *node = &delimiter_row->cells[i];
bool left = node->buf->ptr[0] == ':', right = node->buf->ptr[node->buf->size - 1] == ':';
if (left && right)
alignments[i] = 'c';
else if (left)
alignments[i] = 'l';
else if (right)
alignments[i] = 'r';
}
set_table_alignments(parent_container, alignments);
table_header =
cmark_parser_add_child(parser, parent_container, CMARK_NODE_TABLE_ROW,
parent_container->start_column);
cmark_node_set_syntax_extension(table_header, self);
table_header->end_column = parent_container->start_column + (int)strlen(parent_string) - 2;
table_header->start_line = table_header->end_line = parent_container->start_line;
table_header->as.opaque = ntr = (node_table_row *)parser->mem->calloc(1, sizeof(node_table_row));
ntr->is_header = true;
for (i = 0; i < header_row->n_columns; ++i) {
node_cell *cell = &header_row->cells[i];
cmark_node *header_cell = cmark_parser_add_child(parser, table_header,
CMARK_NODE_TABLE_CELL, parent_container->start_column + cell->start_offset);
header_cell->start_line = header_cell->end_line = parent_container->start_line;
header_cell->internal_offset = cell->internal_offset;
header_cell->end_column = parent_container->start_column + cell->end_offset;
cmark_node_set_string_content(header_cell, (char *) cell->buf->ptr);
cmark_node_set_syntax_extension(header_cell, self);
set_cell_index(header_cell, i);
}
incr_table_row_count(parent_container, i);
cmark_parser_advance_offset(
parser, (char *)input,
(int)strlen((char *)input) - 1 - cmark_parser_get_offset(parser), false);
free_table_row(parser->mem, header_row);
free_table_row(parser->mem, delimiter_row);
return parent_container;
}
static cmark_node *try_opening_table_row(cmark_syntax_extension *self,
cmark_parser *parser,
cmark_node *parent_container,
unsigned char *input, int len) {
cmark_node *table_row_block;
table_row *row;
if (cmark_parser_is_blank(parser))
return NULL;
if (get_n_autocompleted_cells(parent_container) > MAX_AUTOCOMPLETED_CELLS) {
return NULL;
}
table_row_block =
cmark_parser_add_child(parser, parent_container, CMARK_NODE_TABLE_ROW,
parent_container->start_column);
cmark_node_set_syntax_extension(table_row_block, self);
table_row_block->end_column = parent_container->end_column;
table_row_block->as.opaque = parser->mem->calloc(1, sizeof(node_table_row));
row = row_from_string(self, parser, input + cmark_parser_get_first_nonspace(parser),
len - cmark_parser_get_first_nonspace(parser));
if (!row) {
// clean up the dangling node
cmark_node_free(table_row_block);
return NULL;
}
{
int i, table_columns = get_n_table_columns(parent_container);
for (i = 0; i < row->n_columns && i < table_columns; ++i) {
node_cell *cell = &row->cells[i];
cmark_node *node = cmark_parser_add_child(parser, table_row_block,
CMARK_NODE_TABLE_CELL, parent_container->start_column + cell->start_offset);
node->internal_offset = cell->internal_offset;
node->end_column = parent_container->start_column + cell->end_offset;
cmark_node_set_string_content(node, (char *) cell->buf->ptr);
cmark_node_set_syntax_extension(node, self);
set_cell_index(node, i);
}
incr_table_row_count(parent_container, i);
for (; i < table_columns; ++i) {
cmark_node *node = cmark_parser_add_child(
parser, table_row_block, CMARK_NODE_TABLE_CELL, 0);
cmark_node_set_syntax_extension(node, self);
set_cell_index(node, i);
}
}
free_table_row(parser->mem, row);
cmark_parser_advance_offset(parser, (char *)input,
len - 1 - cmark_parser_get_offset(parser), false);
return table_row_block;
}
static cmark_node *try_opening_table_block(cmark_syntax_extension *self,
int indented, cmark_parser *parser,
cmark_node *parent_container,
unsigned char *input, int len) {
cmark_node_type parent_type = cmark_node_get_type(parent_container);
if (!indented && parent_type == CMARK_NODE_PARAGRAPH) {
return try_opening_table_header(self, parser, parent_container, input, len);
} else if (!indented && parent_type == CMARK_NODE_TABLE) {
return try_opening_table_row(self, parser, parent_container, input, len);
}
return NULL;
}
static int matches(cmark_syntax_extension *self, cmark_parser *parser,
unsigned char *input, int len,
cmark_node *parent_container) {
int res = 0;
if (cmark_node_get_type(parent_container) == CMARK_NODE_TABLE) {
cmark_arena_push();
table_row *new_row = row_from_string(
self, parser, input + cmark_parser_get_first_nonspace(parser),
len - cmark_parser_get_first_nonspace(parser));
if (new_row && new_row->n_columns)
res = 1;
free_table_row(parser->mem, new_row);
cmark_arena_pop();
}
return res;
}
static const char *get_type_string(cmark_syntax_extension *self,
cmark_node *node) {
if (node->type == CMARK_NODE_TABLE) {
return "table";
} else if (node->type == CMARK_NODE_TABLE_ROW) {
if (((node_table_row *)node->as.opaque)->is_header)
return "table_header";
else
return "table_row";
} else if (node->type == CMARK_NODE_TABLE_CELL) {
return "table_cell";
}
return "<unknown>";
}
static int can_contain(cmark_syntax_extension *extension, cmark_node *node,
cmark_node_type child_type) {
if (node->type == CMARK_NODE_TABLE) {
return child_type == CMARK_NODE_TABLE_ROW;
} else if (node->type == CMARK_NODE_TABLE_ROW) {
return child_type == CMARK_NODE_TABLE_CELL;
} else if (node->type == CMARK_NODE_TABLE_CELL) {
return child_type == CMARK_NODE_TEXT || child_type == CMARK_NODE_CODE ||
child_type == CMARK_NODE_EMPH || child_type == CMARK_NODE_STRONG ||
child_type == CMARK_NODE_LINK || child_type == CMARK_NODE_IMAGE ||
child_type == CMARK_NODE_STRIKETHROUGH ||
child_type == CMARK_NODE_HTML_INLINE ||
child_type == CMARK_NODE_FOOTNOTE_REFERENCE;
}
return false;
}
static int contains_inlines(cmark_syntax_extension *extension,
cmark_node *node) {
return node->type == CMARK_NODE_TABLE_CELL;
}
static void commonmark_render(cmark_syntax_extension *extension,
cmark_renderer *renderer, cmark_node *node,
cmark_event_type ev_type, int options) {
bool entering = (ev_type == CMARK_EVENT_ENTER);
if (node->type == CMARK_NODE_TABLE) {
renderer->blankline(renderer);
} else if (node->type == CMARK_NODE_TABLE_ROW) {
if (entering) {
renderer->cr(renderer);
renderer->out(renderer, node, "|", false, LITERAL);
}
} else if (node->type == CMARK_NODE_TABLE_CELL) {
if (entering) {
renderer->out(renderer, node, " ", false, LITERAL);
} else {
renderer->out(renderer, node, " |", false, LITERAL);
if (((node_table_row *)node->parent->as.opaque)->is_header &&
!node->next) {
int i;
uint8_t *alignments = get_table_alignments(node->parent->parent);
uint16_t n_cols =
((node_table *)node->parent->parent->as.opaque)->n_columns;
renderer->cr(renderer);
renderer->out(renderer, node, "|", false, LITERAL);
for (i = 0; i < n_cols; i++) {
switch (alignments[i]) {
case 0: renderer->out(renderer, node, " --- |", false, LITERAL); break;
case 'l': renderer->out(renderer, node, " :-- |", false, LITERAL); break;
case 'c': renderer->out(renderer, node, " :-: |", false, LITERAL); break;
case 'r': renderer->out(renderer, node, " --: |", false, LITERAL); break;
}
}
renderer->cr(renderer);
}
}
} else {
assert(false);
}
}
static void latex_render(cmark_syntax_extension *extension,
cmark_renderer *renderer, cmark_node *node,
cmark_event_type ev_type, int options) {
bool entering = (ev_type == CMARK_EVENT_ENTER);
if (node->type == CMARK_NODE_TABLE) {
if (entering) {
int i;
uint16_t n_cols;
uint8_t *alignments = get_table_alignments(node);
renderer->cr(renderer);
renderer->out(renderer, node, "\\begin{table}", false, LITERAL);
renderer->cr(renderer);
renderer->out(renderer, node, "\\begin{tabular}{", false, LITERAL);
n_cols = ((node_table *)node->as.opaque)->n_columns;
for (i = 0; i < n_cols; i++) {
switch(alignments[i]) {
case 0:
case 'l':
renderer->out(renderer, node, "l", false, LITERAL);
break;
case 'c':
renderer->out(renderer, node, "c", false, LITERAL);
break;
case 'r':
renderer->out(renderer, node, "r", false, LITERAL);
break;
}
}
renderer->out(renderer, node, "}", false, LITERAL);
renderer->cr(renderer);
} else {
renderer->out(renderer, node, "\\end{tabular}", false, LITERAL);
renderer->cr(renderer);
renderer->out(renderer, node, "\\end{table}", false, LITERAL);
renderer->cr(renderer);
}
} else if (node->type == CMARK_NODE_TABLE_ROW) {
if (!entering) {
renderer->cr(renderer);
}
} else if (node->type == CMARK_NODE_TABLE_CELL) {
if (!entering) {
if (node->next) {
renderer->out(renderer, node, " & ", false, LITERAL);
} else {
renderer->out(renderer, node, " \\\\", false, LITERAL);
}
}
} else {
assert(false);
}
}
static const char *xml_attr(cmark_syntax_extension *extension,
cmark_node *node) {
if (node->type == CMARK_NODE_TABLE_CELL) {
if (cmark_gfm_extensions_get_table_row_is_header(node->parent)) {
switch (get_cell_alignment(node)) {
case 'l': return " align=\"left\"";
case 'c': return " align=\"center\"";
case 'r': return " align=\"right\"";
}
}
}
return NULL;
}
static void man_render(cmark_syntax_extension *extension,
cmark_renderer *renderer, cmark_node *node,
cmark_event_type ev_type, int options) {
bool entering = (ev_type == CMARK_EVENT_ENTER);
if (node->type == CMARK_NODE_TABLE) {
if (entering) {
int i;
uint16_t n_cols;
uint8_t *alignments = get_table_alignments(node);
renderer->cr(renderer);
renderer->out(renderer, node, ".TS", false, LITERAL);
renderer->cr(renderer);
renderer->out(renderer, node, "tab(@);", false, LITERAL);
renderer->cr(renderer);
n_cols = ((node_table *)node->as.opaque)->n_columns;
for (i = 0; i < n_cols; i++) {
switch (alignments[i]) {
case 'l':
renderer->out(renderer, node, "l", false, LITERAL);
break;
case 0:
case 'c':
renderer->out(renderer, node, "c", false, LITERAL);
break;
case 'r':
renderer->out(renderer, node, "r", false, LITERAL);
break;
}
}
if (n_cols) {
renderer->out(renderer, node, ".", false, LITERAL);
renderer->cr(renderer);
}
} else {
renderer->out(renderer, node, ".TE", false, LITERAL);
renderer->cr(renderer);
}
} else if (node->type == CMARK_NODE_TABLE_ROW) {
if (!entering) {
renderer->cr(renderer);
}
} else if (node->type == CMARK_NODE_TABLE_CELL) {
if (!entering && node->next) {
renderer->out(renderer, node, "@", false, LITERAL);
}
} else {
assert(false);
}
}
static void html_table_add_align(cmark_strbuf* html, const char* align, int options) {
if (options & CMARK_OPT_TABLE_PREFER_STYLE_ATTRIBUTES) {
cmark_strbuf_puts(html, " style=\"text-align: ");
cmark_strbuf_puts(html, align);
cmark_strbuf_puts(html, "\"");
} else {
cmark_strbuf_puts(html, " align=\"");
cmark_strbuf_puts(html, align);
cmark_strbuf_puts(html, "\"");
}
}
struct html_table_state {
unsigned need_closing_table_body : 1;
unsigned in_table_header : 1;
};
static void html_render(cmark_syntax_extension *extension,
cmark_html_renderer *renderer, cmark_node *node,
cmark_event_type ev_type, int options) {
bool entering = (ev_type == CMARK_EVENT_ENTER);
cmark_strbuf *html = renderer->html;
// XXX: we just monopolise renderer->opaque.
struct html_table_state *table_state =
(struct html_table_state *)&renderer->opaque;
if (node->type == CMARK_NODE_TABLE) {
if (entering) {
cmark_html_render_cr(html);
cmark_strbuf_puts(html, "<table");
cmark_html_render_sourcepos(node, html, options);
cmark_strbuf_putc(html, '>');
table_state->need_closing_table_body = false;
} else {
if (table_state->need_closing_table_body) {
cmark_html_render_cr(html);
cmark_strbuf_puts(html, "</tbody>");
cmark_html_render_cr(html);
}
table_state->need_closing_table_body = false;
cmark_html_render_cr(html);
cmark_strbuf_puts(html, "</table>");
cmark_html_render_cr(html);
}
} else if (node->type == CMARK_NODE_TABLE_ROW) {
if (entering) {
cmark_html_render_cr(html);
if (((node_table_row *)node->as.opaque)->is_header) {
table_state->in_table_header = 1;
cmark_strbuf_puts(html, "<thead>");
cmark_html_render_cr(html);
} else if (!table_state->need_closing_table_body) {
cmark_strbuf_puts(html, "<tbody>");
cmark_html_render_cr(html);
table_state->need_closing_table_body = 1;
}
cmark_strbuf_puts(html, "<tr");
cmark_html_render_sourcepos(node, html, options);
cmark_strbuf_putc(html, '>');
} else {
cmark_html_render_cr(html);
cmark_strbuf_puts(html, "</tr>");
if (((node_table_row *)node->as.opaque)->is_header) {
cmark_html_render_cr(html);
cmark_strbuf_puts(html, "</thead>");
table_state->in_table_header = false;
}
}
} else if (node->type == CMARK_NODE_TABLE_CELL) {
if (entering) {
cmark_html_render_cr(html);
if (table_state->in_table_header) {
cmark_strbuf_puts(html, "<th");
} else {
cmark_strbuf_puts(html, "<td");
}
switch (get_cell_alignment(node)) {
case 'l': html_table_add_align(html, "left", options); break;
case 'c': html_table_add_align(html, "center", options); break;
case 'r': html_table_add_align(html, "right", options); break;
}
cmark_html_render_sourcepos(node, html, options);
cmark_strbuf_putc(html, '>');
} else {
if (table_state->in_table_header) {
cmark_strbuf_puts(html, "</th>");
} else {
cmark_strbuf_puts(html, "</td>");
}
}
} else {
assert(false);
}
}
static void opaque_alloc(cmark_syntax_extension *self, cmark_mem *mem, cmark_node *node) {
if (node->type == CMARK_NODE_TABLE) {
node->as.opaque = mem->calloc(1, sizeof(node_table));
} else if (node->type == CMARK_NODE_TABLE_ROW) {
node->as.opaque = mem->calloc(1, sizeof(node_table_row));
} else if (node->type == CMARK_NODE_TABLE_CELL) {
node->as.opaque = mem->calloc(1, sizeof(node_cell));
}
}
static void opaque_free(cmark_syntax_extension *self, cmark_mem *mem, cmark_node *node) {
if (node->type == CMARK_NODE_TABLE) {
free_node_table(mem, node->as.opaque);
} else if (node->type == CMARK_NODE_TABLE_ROW) {
free_node_table_row(mem, node->as.opaque);
}
}
static int escape(cmark_syntax_extension *self, cmark_node *node, int c) {
return
node->type != CMARK_NODE_TABLE &&
node->type != CMARK_NODE_TABLE_ROW &&
node->type != CMARK_NODE_TABLE_CELL &&
c == '|';
}
cmark_syntax_extension *create_table_extension(void) {
cmark_syntax_extension *self = cmark_syntax_extension_new("table");
cmark_register_node_flag(&CMARK_NODE__TABLE_VISITED);
cmark_syntax_extension_set_match_block_func(self, matches);
cmark_syntax_extension_set_open_block_func(self, try_opening_table_block);
cmark_syntax_extension_set_get_type_string_func(self, get_type_string);
cmark_syntax_extension_set_can_contain_func(self, can_contain);
cmark_syntax_extension_set_contains_inlines_func(self, contains_inlines);
cmark_syntax_extension_set_commonmark_render_func(self, commonmark_render);
cmark_syntax_extension_set_plaintext_render_func(self, commonmark_render);
cmark_syntax_extension_set_latex_render_func(self, latex_render);
cmark_syntax_extension_set_xml_attr_func(self, xml_attr);
cmark_syntax_extension_set_man_render_func(self, man_render);
cmark_syntax_extension_set_html_render_func(self, html_render);
cmark_syntax_extension_set_opaque_alloc_func(self, opaque_alloc);
cmark_syntax_extension_set_opaque_free_func(self, opaque_free);
cmark_syntax_extension_set_commonmark_escape_func(self, escape);
CMARK_NODE_TABLE = cmark_syntax_extension_add_node(0);
CMARK_NODE_TABLE_ROW = cmark_syntax_extension_add_node(0);
CMARK_NODE_TABLE_CELL = cmark_syntax_extension_add_node(0);
return self;
}
uint16_t cmark_gfm_extensions_get_table_columns(cmark_node *node) {
if (node->type != CMARK_NODE_TABLE)
return 0;
return ((node_table *)node->as.opaque)->n_columns;
}
uint8_t *cmark_gfm_extensions_get_table_alignments(cmark_node *node) {
if (node->type != CMARK_NODE_TABLE)
return 0;
return ((node_table *)node->as.opaque)->alignments;
}
int cmark_gfm_extensions_set_table_columns(cmark_node *node, uint16_t n_columns) {
return set_n_table_columns(node, n_columns);
}
int cmark_gfm_extensions_set_table_alignments(cmark_node *node, uint16_t ncols, uint8_t *alignments) {
uint8_t *a = (uint8_t *)cmark_node_mem(node)->calloc(1, ncols);
memcpy(a, alignments, ncols);
return set_table_alignments(node, a);
}
int cmark_gfm_extensions_get_table_row_is_header(cmark_node *node)
{
if (!node || node->type != CMARK_NODE_TABLE_ROW)
return 0;
return ((node_table_row *)node->as.opaque)->is_header;
}
int cmark_gfm_extensions_set_table_row_is_header(cmark_node *node, int is_header)
{
if (!node || node->type != CMARK_NODE_TABLE_ROW)
return 0;
((node_table_row *)node->as.opaque)->is_header = (is_header != 0);
return 1;
}

View File

@ -1,12 +0,0 @@
#ifndef CMARK_GFM_TABLE_H
#define CMARK_GFM_TABLE_H
#include "cmark-gfm-core-extensions.h"
extern cmark_node_type CMARK_NODE_TABLE, CMARK_NODE_TABLE_ROW,
CMARK_NODE_TABLE_CELL;
cmark_syntax_extension *create_table_extension(void);
#endif

View File

@ -1,60 +0,0 @@
#include "tagfilter.h"
#include <parser.h>
#include <ctype.h>
static const char *blacklist[] = {
"title", "textarea", "style", "xmp", "iframe",
"noembed", "noframes", "script", "plaintext", NULL,
};
static int is_tag(const unsigned char *tag_data, size_t tag_size,
const char *tagname) {
size_t i;
if (tag_size < 3 || tag_data[0] != '<')
return 0;
i = 1;
if (tag_data[i] == '/') {
i++;
}
for (; i < tag_size; ++i, ++tagname) {
if (*tagname == 0)
break;
if (tolower(tag_data[i]) != *tagname)
return 0;
}
if (i == tag_size)
return 0;
if (cmark_isspace(tag_data[i]) || tag_data[i] == '>')
return 1;
if (tag_data[i] == '/' && tag_size >= i + 2 && tag_data[i + 1] == '>')
return 1;
return 0;
}
static int filter(cmark_syntax_extension *ext, const unsigned char *tag,
size_t tag_len) {
const char **it;
for (it = blacklist; *it; ++it) {
if (is_tag(tag, tag_len, *it)) {
return 0;
}
}
return 1;
}
cmark_syntax_extension *create_tagfilter_extension(void) {
cmark_syntax_extension *ext = cmark_syntax_extension_new("tagfilter");
cmark_syntax_extension_set_html_filter_func(ext, filter);
return ext;
}

View File

@ -1,8 +0,0 @@
#ifndef CMARK_GFM_TAGFILTER_H
#define CMARK_GFM_TAGFILTER_H
#include "cmark-gfm-core-extensions.h"
cmark_syntax_extension *create_tagfilter_extension(void);
#endif

View File

@ -1,156 +0,0 @@
#include "tasklist.h"
#include <parser.h>
#include <render.h>
#include <html.h>
#include "ext_scanners.h"
typedef enum {
CMARK_TASKLIST_NOCHECKED,
CMARK_TASKLIST_CHECKED,
} cmark_tasklist_type;
// Local constants
static const char *TYPE_STRING = "tasklist";
static const char *get_type_string(cmark_syntax_extension *extension, cmark_node *node) {
return TYPE_STRING;
}
// Return 1 if state was set, 0 otherwise
int cmark_gfm_extensions_set_tasklist_item_checked(cmark_node *node, bool is_checked) {
// The node has to exist, and be an extension, and actually be the right type in order to get the value.
if (!node || !node->extension || strcmp(cmark_node_get_type_string(node), TYPE_STRING))
return 0;
node->as.list.checked = is_checked;
return 1;
}
bool cmark_gfm_extensions_get_tasklist_item_checked(cmark_node *node) {
if (!node || !node->extension || strcmp(cmark_node_get_type_string(node), TYPE_STRING))
return false;
if (node->as.list.checked) {
return true;
}
else {
return false;
}
}
static bool parse_node_item_prefix(cmark_parser *parser, const char *input,
cmark_node *container) {
bool res = false;
if (parser->indent >=
container->as.list.marker_offset + container->as.list.padding) {
cmark_parser_advance_offset(parser, input, container->as.list.marker_offset +
container->as.list.padding,
true);
res = true;
} else if (parser->blank && container->first_child != NULL) {
// if container->first_child is NULL, then the opening line
// of the list item was blank after the list marker; in this
// case, we are done with the list item.
cmark_parser_advance_offset(parser, input, parser->first_nonspace - parser->offset,
false);
res = true;
}
return res;
}
static int matches(cmark_syntax_extension *self, cmark_parser *parser,
unsigned char *input, int len,
cmark_node *parent_container) {
return parse_node_item_prefix(parser, (const char*)input, parent_container);
}
static int can_contain(cmark_syntax_extension *extension, cmark_node *node,
cmark_node_type child_type) {
return (node->type == CMARK_NODE_ITEM) ? 1 : 0;
}
static cmark_node *open_tasklist_item(cmark_syntax_extension *self,
int indented, cmark_parser *parser,
cmark_node *parent_container,
unsigned char *input, int len) {
cmark_node_type node_type = cmark_node_get_type(parent_container);
if (node_type != CMARK_NODE_ITEM) {
return NULL;
}
bufsize_t matched = scan_tasklist(input, len, 0);
if (!matched) {
return NULL;
}
cmark_node_set_syntax_extension(parent_container, self);
cmark_parser_advance_offset(parser, (char *)input, 3, false);
// Either an upper or lower case X means the task is completed.
parent_container->as.list.checked = (strstr((char*)input, "[x]") || strstr((char*)input, "[X]"));
return NULL;
}
static void commonmark_render(cmark_syntax_extension *extension,
cmark_renderer *renderer, cmark_node *node,
cmark_event_type ev_type, int options) {
bool entering = (ev_type == CMARK_EVENT_ENTER);
if (entering) {
renderer->cr(renderer);
if (node->as.list.checked) {
renderer->out(renderer, node, "- [x] ", false, LITERAL);
} else {
renderer->out(renderer, node, "- [ ] ", false, LITERAL);
}
cmark_strbuf_puts(renderer->prefix, " ");
} else {
cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 2);
renderer->cr(renderer);
}
}
static void html_render(cmark_syntax_extension *extension,
cmark_html_renderer *renderer, cmark_node *node,
cmark_event_type ev_type, int options) {
bool entering = (ev_type == CMARK_EVENT_ENTER);
if (entering) {
cmark_html_render_cr(renderer->html);
cmark_strbuf_puts(renderer->html, "<li");
cmark_html_render_sourcepos(node, renderer->html, options);
cmark_strbuf_putc(renderer->html, '>');
if (node->as.list.checked) {
cmark_strbuf_puts(renderer->html, "<input type=\"checkbox\" checked=\"\" disabled=\"\" /> ");
} else {
cmark_strbuf_puts(renderer->html, "<input type=\"checkbox\" disabled=\"\" /> ");
}
} else {
cmark_strbuf_puts(renderer->html, "</li>\n");
}
}
static const char *xml_attr(cmark_syntax_extension *extension,
cmark_node *node) {
if (node->as.list.checked) {
return " completed=\"true\"";
} else {
return " completed=\"false\"";
}
}
cmark_syntax_extension *create_tasklist_extension(void) {
cmark_syntax_extension *ext = cmark_syntax_extension_new("tasklist");
cmark_syntax_extension_set_match_block_func(ext, matches);
cmark_syntax_extension_set_get_type_string_func(ext, get_type_string);
cmark_syntax_extension_set_open_block_func(ext, open_tasklist_item);
cmark_syntax_extension_set_can_contain_func(ext, can_contain);
cmark_syntax_extension_set_commonmark_render_func(ext, commonmark_render);
cmark_syntax_extension_set_plaintext_render_func(ext, commonmark_render);
cmark_syntax_extension_set_html_render_func(ext, html_render);
cmark_syntax_extension_set_xml_attr_func(ext, xml_attr);
return ext;
}

View File

@ -1,8 +0,0 @@
#ifndef TASKLIST_H
#define TASKLIST_H
#include "cmark-gfm-core-extensions.h"
cmark_syntax_extension *create_tasklist_extension(void);
#endif

View File

@ -1,22 +0,0 @@
include_directories(
${PROJECT_BINARY_DIR}/extensions
${PROJECT_BINARY_DIR}/src
../extensions
../src
)
macro(fuzzer name)
add_executable(${name} ${name}.c)
set_target_properties(${name}
PROPERTIES
COMPILE_FLAGS "-fsanitize=fuzzer"
LINK_FLAGS "-fsanitize=fuzzer")
if(CMARK_SHARED)
target_link_libraries(${name} libcmark-gfm-extensions libcmark-gfm)
elseif(CMARK_STATIC)
target_link_libraries(${name} libcmark-gfm-extensions_static libcmark-gfm_static)
endif()
endmacro()
fuzzer(fuzz_quadratic)
fuzzer(fuzz_quadratic_brackets)

View File

@ -1,12 +0,0 @@
The quadratic fuzzer generates long sequences of repeated characters, such as `<?x<?x<?x<?x...`,
to detect quadratic complexity performance issues.
To build and run the quadratic fuzzer:
```bash
mkdir build-fuzz
cd build-fuzz
cmake -DCMARK_FUZZ_QUADRATIC=ON -DCMAKE_C_COMPILER=$(which clang) -DCMAKE_CXX_COMPILER=$(which clang++) -DCMAKE_BUILD_TYPE=Release ..
make
../fuzz/fuzzloop.sh
```

View File

@ -1,91 +0,0 @@
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "cmark-gfm.h"
#include "cmark-gfm-core-extensions.h"
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
const char *extension_names[] = {
"autolink",
"strikethrough",
"table",
"tagfilter",
NULL,
};
int LLVMFuzzerInitialize(int *argc, char ***argv) {
cmark_gfm_core_extensions_ensure_registered();
return 0;
}
int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
struct __attribute__((packed)) {
int options;
int width;
uint8_t splitpoint;
uint8_t repeatlen;
} fuzz_config;
if (size >= sizeof(fuzz_config)) {
/* The beginning of `data` is treated as fuzzer configuration */
memcpy(&fuzz_config, data, sizeof(fuzz_config));
/* Test options that are used by GitHub. */
fuzz_config.options = CMARK_OPT_UNSAFE | CMARK_OPT_FOOTNOTES | CMARK_OPT_GITHUB_PRE_LANG | CMARK_OPT_HARDBREAKS;
/* Remainder of input is the markdown */
const char *markdown0 = (const char *)(data + sizeof(fuzz_config));
const size_t markdown_size0 = size - sizeof(fuzz_config);
char markdown[0x80000];
if (markdown_size0 <= sizeof(markdown)) {
size_t markdown_size = 0;
if (fuzz_config.splitpoint <= markdown_size0 && 0 < fuzz_config.repeatlen &&
fuzz_config.repeatlen <= markdown_size0 - fuzz_config.splitpoint) {
const size_t size_after_splitpoint = markdown_size0 - fuzz_config.splitpoint - fuzz_config.repeatlen;
memcpy(&markdown[markdown_size], &markdown0[0], fuzz_config.splitpoint);
markdown_size += fuzz_config.splitpoint;
while (markdown_size + fuzz_config.repeatlen + size_after_splitpoint <= sizeof(markdown)) {
memcpy(&markdown[markdown_size], &markdown0[fuzz_config.splitpoint],
fuzz_config.repeatlen);
markdown_size += fuzz_config.repeatlen;
}
memcpy(&markdown[markdown_size], &markdown0[fuzz_config.splitpoint + fuzz_config.repeatlen],
size_after_splitpoint);
markdown_size += size_after_splitpoint;
} else {
markdown_size = markdown_size0;
memcpy(markdown, markdown0, markdown_size);
}
cmark_parser *parser = cmark_parser_new(fuzz_config.options);
for (const char **it = extension_names; *it; ++it) {
const char *extension_name = *it;
cmark_syntax_extension *syntax_extension = cmark_find_syntax_extension(extension_name);
if (!syntax_extension) {
fprintf(stderr, "%s is not a valid syntax extension\n", extension_name);
abort();
}
cmark_parser_attach_syntax_extension(parser, syntax_extension);
}
cmark_parser_feed(parser, markdown, markdown_size);
cmark_node *doc = cmark_parser_finish(parser);
free(cmark_render_html(doc, fuzz_config.options, NULL));
free(cmark_render_xml(doc, fuzz_config.options));
free(cmark_render_man(doc, fuzz_config.options, 80));
free(cmark_render_commonmark(doc, fuzz_config.options, 80));
free(cmark_render_plaintext(doc, fuzz_config.options, 80));
free(cmark_render_latex(doc, fuzz_config.options, 80));
cmark_node_free(doc);
cmark_parser_free(parser);
}
}
return 0;
}

View File

@ -1,110 +0,0 @@
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "cmark-gfm.h"
#include "cmark-gfm-core-extensions.h"
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
const char *extension_names[] = {
"autolink",
"strikethrough",
"table",
"tagfilter",
NULL,
};
int LLVMFuzzerInitialize(int *argc, char ***argv) {
cmark_gfm_core_extensions_ensure_registered();
return 0;
}
int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
struct __attribute__((packed)) {
int options;
int width;
uint8_t startlen;
uint8_t openlen;
uint8_t middlelen;
uint8_t closelen;
} fuzz_config;
if (size >= sizeof(fuzz_config)) {
/* The beginning of `data` is treated as fuzzer configuration */
memcpy(&fuzz_config, data, sizeof(fuzz_config));
/* Test options that are used by GitHub. */
fuzz_config.options = CMARK_OPT_UNSAFE | CMARK_OPT_FOOTNOTES | CMARK_OPT_GITHUB_PRE_LANG | CMARK_OPT_HARDBREAKS;
fuzz_config.openlen = fuzz_config.openlen & 0x7;
fuzz_config.middlelen = fuzz_config.middlelen & 0x7;
fuzz_config.closelen = fuzz_config.closelen & 0x7;
/* Remainder of input is the markdown */
const char *markdown0 = (const char *)(data + sizeof(fuzz_config));
const size_t markdown_size0 = size - sizeof(fuzz_config);
char markdown[0x80000];
if (markdown_size0 <= sizeof(markdown)) {
size_t markdown_size = 0;
const size_t componentslen = fuzz_config.startlen + fuzz_config.openlen + fuzz_config.middlelen + fuzz_config.closelen;
if (componentslen <= markdown_size0) {
size_t offset = 0;
const size_t endlen = markdown_size0 - componentslen;
memcpy(&markdown[markdown_size], &markdown0[offset], fuzz_config.startlen);
markdown_size += fuzz_config.startlen;
offset += fuzz_config.startlen;
if (0 < fuzz_config.openlen) {
while (markdown_size + fuzz_config.openlen <= sizeof(markdown)/2) {
memcpy(&markdown[markdown_size], &markdown0[offset],
fuzz_config.openlen);
markdown_size += fuzz_config.openlen;
}
offset += fuzz_config.openlen;
}
memcpy(&markdown[markdown_size], &markdown0[offset],
fuzz_config.middlelen);
markdown_size += fuzz_config.middlelen;
offset += fuzz_config.middlelen;
if (0 < fuzz_config.closelen) {
while (markdown_size + fuzz_config.closelen + endlen <= sizeof(markdown)) {
memcpy(&markdown[markdown_size], &markdown0[offset],
fuzz_config.closelen);
markdown_size += fuzz_config.closelen;
}
offset += fuzz_config.closelen;
}
if (markdown_size + endlen <= sizeof(markdown)) {
memcpy(&markdown[markdown_size], &markdown0[offset],
endlen);
markdown_size += endlen;
}
} else {
markdown_size = markdown_size0;
memcpy(markdown, markdown0, markdown_size);
}
cmark_parser *parser = cmark_parser_new(fuzz_config.options);
for (const char **it = extension_names; *it; ++it) {
const char *extension_name = *it;
cmark_syntax_extension *syntax_extension = cmark_find_syntax_extension(extension_name);
if (!syntax_extension) {
fprintf(stderr, "%s is not a valid syntax extension\n", extension_name);
abort();
}
cmark_parser_attach_syntax_extension(parser, syntax_extension);
}
cmark_parser_feed(parser, markdown, markdown_size);
cmark_node *doc = cmark_parser_finish(parser);
free(cmark_render_html(doc, fuzz_config.options, NULL));
cmark_node_free(doc);
cmark_parser_free(parser);
}
}
return 0;
}

View File

@ -1,28 +0,0 @@
#!/bin/bash
# Stop when an error is found
set -e
# Create a corpus sub-directory if it doesn't already exist.
mkdir -p corpus
# The memory and disk usage grows over time, so this loop restarts the
# fuzzer every 4 hours. The `-merge=1` option is used to minimize the
# corpus on each iteration.
while :
do
date
echo restarting loop
# Minimize the corpus
mv corpus/ corpus2
mkdir corpus
echo minimizing corpus
./fuzz/fuzz_quadratic -merge=1 corpus ../bench corpus2/ -max_len=1024
rm -r corpus2
# Run the fuzzer for 4 hours
date
echo start fuzzer
./fuzz/fuzz_quadratic corpus -dict=../test/fuzzing_dictionary -jobs=$(nproc) -workers=$(nproc) -max_len=1024 -max_total_time=14400
done

View File

@ -1,10 +0,0 @@
if (NOT MSVC)
include(GNUInstallDirs)
install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/man1/cmark-gfm.1
DESTINATION ${CMAKE_INSTALL_MANDIR}/man1)
install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/man3/cmark-gfm.3
DESTINATION ${CMAKE_INSTALL_MANDIR}/man3)
endif(NOT MSVC)

View File

@ -1,133 +0,0 @@
#!/usr/bin/env python
# Creates a man page from a C file.
# first argument if present is path to cmark dynamic library
# Comments beginning with `/**` are treated as Groff man, except that
# 'this' is converted to \fIthis\f[], and ''this'' to \fBthis\f[].
# Non-blank lines immediately following a man page comment are treated
# as function signatures or examples and parsed into .Ft, .Fo, .Fa, .Fc. The
# immediately preceding man documentation chunk is printed after the example
# as a comment on it.
# That's about it!
import sys, re, os, platform
from datetime import date
from ctypes import CDLL, c_char_p, c_long, c_void_p
sysname = platform.system()
if sysname == 'Darwin':
cmark = CDLL("build/src/libcmark-gfm.dylib")
else:
cmark = CDLL("build/src/libcmark-gfm.so")
parse_document = cmark.cmark_parse_document
parse_document.restype = c_void_p
parse_document.argtypes = [c_char_p, c_long]
render_man = cmark.cmark_render_man
render_man.restype = c_char_p
render_man.argtypes = [c_void_p, c_long, c_long]
def md2man(text):
if sys.version_info >= (3,0):
textbytes = text.encode('utf-8')
textlen = len(textbytes)
return render_man(parse_document(textbytes, textlen), 0, 65).decode('utf-8')
else:
textbytes = text
textlen = len(text)
return render_man(parse_document(textbytes, textlen), 0, 72)
comment_start_re = re.compile('^\/\*\* ?')
comment_delim_re = re.compile('^[/ ]\** ?')
comment_end_re = re.compile('^ \**\/')
function_re = re.compile('^ *(?:CMARK_GFM_EXPORT\s+)?(?P<type>(?:const\s+)?\w+(?:\s*[*])?)\s*(?P<name>\w+)\s*\((?P<args>[^)]*)\)')
blank_re = re.compile('^\s*$')
macro_re = re.compile('CMARK_GFM_EXPORT *')
typedef_start_re = re.compile('typedef.*{$')
typedef_end_re = re.compile('}')
single_quote_re = re.compile("(?<!\w)'([^']+)'(?!\w)")
double_quote_re = re.compile("(?<!\w)''([^']+)''(?!\w)")
def handle_quotes(s):
return re.sub(double_quote_re, '**\g<1>**', re.sub(single_quote_re, '*\g<1>*', s))
typedef = False
mdlines = []
chunk = []
sig = []
if len(sys.argv) > 1:
sourcefile = sys.argv[1]
else:
print("Usage: make_man_page.py sourcefile")
exit(1)
with open(sourcefile, 'r') as cmarkh:
state = 'default'
for line in cmarkh:
# state transition
oldstate = state
if comment_start_re.match(line):
state = 'man'
elif comment_end_re.match(line) and state == 'man':
continue
elif comment_delim_re.match(line) and state == 'man':
state = 'man'
elif not typedef and blank_re.match(line):
state = 'default'
elif typedef and typedef_end_re.match(line):
typedef = False
elif typedef_start_re.match(line):
typedef = True
state = 'signature'
elif state == 'man':
state = 'signature'
# handle line
if state == 'man':
chunk.append(handle_quotes(re.sub(comment_delim_re, '', line)))
elif state == 'signature':
ln = re.sub(macro_re, '', line)
if typedef or not re.match(blank_re, ln):
sig.append(ln)
elif oldstate == 'signature' and state != 'signature':
if len(mdlines) > 0 and mdlines[-1] != '\n':
mdlines.append('\n')
rawsig = ''.join(sig)
m = function_re.match(rawsig)
mdlines.append('.PP\n')
if m:
mdlines.append('\\fI' + m.group('type') + '\\f[]' + ' ')
mdlines.append('\\fB' + m.group('name') + '\\f[]' + '(')
first = True
for argument in re.split(',', m.group('args')):
if not first:
mdlines.append(', ')
first = False
mdlines.append('\\fI' + argument.strip() + '\\f[]')
mdlines.append(')\n')
else:
mdlines.append('.nf\n\\fC\n.RS 0n\n')
mdlines += sig
mdlines.append('.RE\n\\f[]\n.fi\n')
if len(mdlines) > 0 and mdlines[-1] != '\n':
mdlines.append('\n')
mdlines += md2man(''.join(chunk))
mdlines.append('\n')
chunk = []
sig = []
elif oldstate == 'man' and state != 'signature':
if len(mdlines) > 0 and mdlines[-1] != '\n':
mdlines.append('\n')
mdlines += md2man(''.join(chunk)) # add man chunk
chunk = []
mdlines.append('\n')
sys.stdout.write('.TH cmark-gfm 3 "' + date.today().strftime('%B %d, %Y') + '" "LOCAL" "Library Functions Manual"\n')
sys.stdout.write(''.join(mdlines))

View File

@ -1,78 +0,0 @@
.TH "cmark-gfm" "1" "March 24, 2016" "LOCAL" "General Commands Manual"
.SH "NAME"
\fBcmark\fR
\- convert CommonMark formatted text with GitHub Flavored Markdown extensions to HTML
.SH "SYNOPSIS"
.HP 6n
\fBcmark-gfm\fR
[options]
file*
.SH "DESCRIPTION"
\fBcmark-gfm\fR
converts Markdown formatted plain text to either HTML, groff man,
CommonMark XML, LaTeX, or CommonMark, using the conventions
described in the CommonMark spec. It reads input from \fIstdin\fR
or the specified files (concatenating their contents) and writes
output to \fIstdout\fR.
.SH "OPTIONS"
.TP 12n
.B \-\-to, \-t \f[I]FORMAT\f[]
Specify output format (\f[C]html\f[], \f[C]man\f[], \f[C]xml\f[],
\f[C]latex\f[], \f[C]commonmark\f[]).
.TP 12n
.B \-\-width \f[I]WIDTH\f[]
Specify a column width to which to wrap the output. For no wrapping, use
the value 0 (the default). This option currently only affects the
commonmark, latex, and man renderers.
.TP 12n
.B \-\-hardbreaks
Render soft breaks (newlines inside paragraphs in the CommonMark source)
as hard line breaks in the target format. If this option is specified,
hard wrapping is disabled for CommonMark output, regardless of the value
given with \-\-width.
.TP 12n
.B \-\-nobreaks
Render soft breaks as spaces. If this option is specified,
hard wrapping is disabled for all output formats, regardless of the value
given with \-\-width.
.TP 12n
.B \-\-sourcepos
Include source position attribute.
.TP 12n
.B \-\-normalize
Consolidate adjacent text nodes.
.TP 12n
.B \-\-extension, \-e \f[I]EXTENSION_NAME\f[]
Specify an extension name to use.
.TP 12n
.B \-\-list\-extensions
List available extensions and quit.
.TP 12n
.B \-\-validate-utf8
Validate UTF-8, replacing illegal sequences with U+FFFD.
.TP 12n
.B \-\-smart
Use smart punctuation. Straight double and single quotes will
be rendered as curly quotes, depending on their position.
\f[C]\-\-\f[] will be rendered as an en-dash.
\f[C]\-\-\-\f[] will be rendered as an em-dash.
\f[C]...\f[] will be rendered as ellipses.
.TP 12n
.B \-\-unsafe
Render raw HTML and potentially dangerous URLs.
(Raw HTML is not replaced by a placeholder comment; potentially
dangerous URLs are not replaced by empty strings.) Dangerous
URLs are those that begin with `javascript:`, `vbscript:`,
`file:`, or `data:` (except for `image/png`, `image/gif`,
`image/jpeg`, or `image/webp` mime types).
.TP 12n
.B \-\-help
Print usage information.
.TP 12n
.B \-\-version
Print version.
.SH "AUTHORS"
John MacFarlane, Vicent Marti, Kārlis Gaņģis, Nick Wellnhofer.
.SH "SEE ALSO"
.PP
CommonMark spec: \f[C]http://spec.commonmark.org\f[].

File diff suppressed because it is too large Load Diff

View File

@ -1 +0,0 @@
@nmake.exe /nologo /f Makefile.nmake %*

View File

@ -1,206 +0,0 @@
if(${CMAKE_VERSION} VERSION_GREATER "3.3")
cmake_policy(SET CMP0063 NEW)
endif()
include(GNUInstallDirs)
set(LIBRARY "libcmark-gfm")
set(STATICLIBRARY "libcmark-gfm_static")
set(HEADERS
cmark-gfm.h
cmark-gfm-extension_api.h
parser.h
buffer.h
node.h
iterator.h
chunk.h
references.h
footnotes.h
map.h
utf8.h
scanners.h
inlines.h
houdini.h
cmark_ctype.h
render.h
registry.h
syntax_extension.h
plugin.h
)
set(LIBRARY_SOURCES
cmark.c
node.c
iterator.c
blocks.c
inlines.c
scanners.c
scanners.re
utf8.c
buffer.c
references.c
footnotes.c
map.c
render.c
man.c
xml.c
html.c
commonmark.c
plaintext.c
latex.c
houdini_href_e.c
houdini_html_e.c
houdini_html_u.c
cmark_ctype.c
arena.c
linked_list.c
syntax_extension.c
registry.c
plugin.c
${HEADERS}
)
include_directories(. ${CMAKE_CURRENT_BINARY_DIR})
include_directories(
${PROJECT_BINARY_DIR}/extensions
)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmark-gfm_version.h.in
${CMAKE_CURRENT_BINARY_DIR}/cmark-gfm_version.h)
include (GenerateExportHeader)
include("../CheckFileOffsetBits.cmake")
CHECK_FILE_OFFSET_BITS()
# Check integrity of node structure when compiled as debug:
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DCMARK_DEBUG_NODES -DDEBUG")
set(CMAKE_LINKER_DEBUG "${CMAKE_LINKER_FLAGS_DEBUG}")
set(CMAKE_C_FLAGS_PROFILE "${CMAKE_C_FLAGS_RELEASE} -pg")
set(CMAKE_LINKER_PROFILE "${CMAKE_LINKER_FLAGS_RELEASE} -pg")
# -fvisibility=hidden
set(CMAKE_C_VISIBILITY_PRESET hidden)
set(CMAKE_VISIBILITY_INLINES_HIDDEN 1)
if (CMARK_SHARED)
add_library(${LIBRARY} SHARED ${LIBRARY_SOURCES})
# Include minor version and patch level in soname for now.
set_target_properties(${LIBRARY} PROPERTIES
OUTPUT_NAME "cmark-gfm"
SOVERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}.gfm.${PROJECT_VERSION_GFM}
VERSION ${PROJECT_VERSION})
set_property(TARGET ${LIBRARY}
APPEND PROPERTY MACOSX_RPATH true)
# Avoid name clash between PROGRAM and LIBRARY pdb files.
set_target_properties(${LIBRARY} PROPERTIES PDB_NAME cmark-gfm_dll)
generate_export_header(${LIBRARY}
BASE_NAME ${PROJECT_NAME})
list(APPEND CMARK_INSTALL ${LIBRARY})
endif()
if (CMARK_STATIC)
add_library(${STATICLIBRARY} STATIC ${LIBRARY_SOURCES})
set_target_properties(${STATICLIBRARY} PROPERTIES
COMPILE_FLAGS -DCMARK_GFM_STATIC_DEFINE
POSITION_INDEPENDENT_CODE ON
AUTOMOC OFF
AUTOUIC OFF
AUTORCC OFF)
if (MSVC)
set_target_properties(${STATICLIBRARY} PROPERTIES
OUTPUT_NAME "cmark-gfm_static"
VERSION ${PROJECT_VERSION})
else()
set_target_properties(${STATICLIBRARY} PROPERTIES
OUTPUT_NAME "cmark-gfm"
VERSION ${PROJECT_VERSION})
endif(MSVC)
if (NOT CMARK_SHARED)
generate_export_header(${STATICLIBRARY}
BASE_NAME ${PROJECT_NAME})
endif()
list(APPEND CMARK_INSTALL ${STATICLIBRARY})
endif()
if(NOT MSVC OR CMAKE_HOST_SYSTEM_NAME STREQUAL Windows)
set(CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS_NO_WARNINGS ON)
include(InstallRequiredSystemLibraries)
endif()
set(libdir lib${LIB_SUFFIX})
if(CMARK_SHARED OR CMARK_STATIC)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/libcmark-gfm.pc.in
${CMAKE_CURRENT_BINARY_DIR}/libcmark-gfm.pc @ONLY)
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libcmark-gfm.pc
DESTINATION ${libdir}/pkgconfig)
install(FILES
cmark-gfm.h
cmark-gfm-extension_api.h
${CMAKE_CURRENT_BINARY_DIR}/cmark-gfm_export.h
${CMAKE_CURRENT_BINARY_DIR}/cmark-gfm_version.h
DESTINATION include
)
endif()
# Feature tests
include(CheckIncludeFile)
include(CheckCSourceCompiles)
include(CheckCSourceRuns)
include(CheckSymbolExists)
CHECK_INCLUDE_FILE(stdbool.h HAVE_STDBOOL_H)
CHECK_C_SOURCE_COMPILES(
"int main() { __builtin_expect(0,0); return 0; }"
HAVE___BUILTIN_EXPECT)
CHECK_C_SOURCE_COMPILES("
int f(void) __attribute__ (());
int main() { return 0; }
" HAVE___ATTRIBUTE__)
CONFIGURE_FILE(
${CMAKE_CURRENT_SOURCE_DIR}/config.h.in
${CMAKE_CURRENT_BINARY_DIR}/config.h)
# Always compile with warnings
if(MSVC)
# Force to always compile with W4
if(CMAKE_CXX_FLAGS MATCHES "/W[0-4]")
string(REGEX REPLACE "/W[0-4]" "/W4" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
else()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /W4")
endif()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /WX /wd4706 /wd4204 /wd4221 /wd4100 /D_CRT_SECURE_NO_WARNINGS")
elseif(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -Wno-unused-parameter -std=c99 -pedantic")
endif()
# Compile as C++ under MSVC older than 12.0
if(MSVC AND MSVC_VERSION LESS 1800)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /TP")
endif()
if(CMAKE_BUILD_TYPE STREQUAL "Ubsan")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=undefined")
endif()
if(CMARK_LIB_FUZZER)
set(FUZZ_HARNESS "cmark-fuzz")
add_executable(${FUZZ_HARNESS} ../test/cmark-fuzz.c ${LIBRARY_SOURCES})
target_link_libraries(${FUZZ_HARNESS} "${CMAKE_LIB_FUZZER_PATH}")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize-coverage=trace-pc-guard")
# cmark is written in C but the libFuzzer runtime is written in C++ which
# needs to link against the C++ runtime. Explicitly link it into cmark-fuzz
set_target_properties(${FUZZ_HARNESS} PROPERTIES LINK_FLAGS "-lstdc++")
endif()

View File

@ -1,104 +0,0 @@
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include "cmark-gfm.h"
#include "cmark-gfm-extension_api.h"
static struct arena_chunk {
size_t sz, used;
uint8_t push_point;
void *ptr;
struct arena_chunk *prev;
} *A = NULL;
static struct arena_chunk *alloc_arena_chunk(size_t sz, struct arena_chunk *prev) {
struct arena_chunk *c = (struct arena_chunk *)calloc(1, sizeof(*c));
if (!c)
abort();
c->sz = sz;
c->ptr = calloc(1, sz);
if (!c->ptr)
abort();
c->prev = prev;
return c;
}
void cmark_arena_push(void) {
if (!A)
return;
A->push_point = 1;
A = alloc_arena_chunk(10240, A);
}
int cmark_arena_pop(void) {
if (!A)
return 0;
while (A && !A->push_point) {
free(A->ptr);
struct arena_chunk *n = A->prev;
free(A);
A = n;
}
if (A)
A->push_point = 0;
return 1;
}
static void init_arena(void) {
A = alloc_arena_chunk(4 * 1048576, NULL);
}
void cmark_arena_reset(void) {
while (A) {
free(A->ptr);
struct arena_chunk *n = A->prev;
free(A);
A = n;
}
}
static void *arena_calloc(size_t nmem, size_t size) {
if (!A)
init_arena();
size_t sz = nmem * size + sizeof(size_t);
// Round allocation sizes to largest integer size to
// ensure returned memory is correctly aligned
const size_t align = sizeof(size_t) - 1;
sz = (sz + align) & ~align;
struct arena_chunk *chunk;
if (sz > A->sz) {
A->prev = chunk = alloc_arena_chunk(sz, A->prev);
} else if (sz > A->sz - A->used) {
A = chunk = alloc_arena_chunk(A->sz + A->sz / 2, A);
} else {
chunk = A;
}
void *ptr = (uint8_t *) chunk->ptr + chunk->used;
chunk->used += sz;
*((size_t *) ptr) = sz - sizeof(size_t);
return (uint8_t *) ptr + sizeof(size_t);
}
static void *arena_realloc(void *ptr, size_t size) {
if (!A)
init_arena();
void *new_ptr = arena_calloc(1, size);
if (ptr)
memcpy(new_ptr, ptr, ((size_t *) ptr)[-1]);
return new_ptr;
}
static void arena_free(void *ptr) {
(void) ptr;
/* no-op */
}
cmark_mem CMARK_ARENA_MEM_ALLOCATOR = {arena_calloc, arena_realloc, arena_free};
cmark_mem *cmark_get_arena_mem_allocator(void) {
return &CMARK_ARENA_MEM_ALLOCATOR;
}

File diff suppressed because it is too large Load Diff

View File

@ -1,278 +0,0 @@
#include <stdarg.h>
#include <string.h>
#include <assert.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <limits.h>
#include "config.h"
#include "cmark_ctype.h"
#include "buffer.h"
/* Used as default value for cmark_strbuf->ptr so that people can always
* assume ptr is non-NULL and zero terminated even for new cmark_strbufs.
*/
unsigned char cmark_strbuf__initbuf[1];
#ifndef MIN
#define MIN(x, y) ((x < y) ? x : y)
#endif
void cmark_strbuf_init(cmark_mem *mem, cmark_strbuf *buf,
bufsize_t initial_size) {
buf->mem = mem;
buf->asize = 0;
buf->size = 0;
buf->ptr = cmark_strbuf__initbuf;
if (initial_size > 0)
cmark_strbuf_grow(buf, initial_size);
}
static CMARK_INLINE void S_strbuf_grow_by(cmark_strbuf *buf, bufsize_t add) {
cmark_strbuf_grow(buf, buf->size + add);
}
void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size) {
assert(target_size > 0);
if (target_size < buf->asize)
return;
if (target_size > (bufsize_t)(INT32_MAX / 2)) {
fprintf(stderr,
"[cmark] cmark_strbuf_grow requests buffer with size > %d, aborting\n",
(INT32_MAX / 2));
abort();
}
/* Oversize the buffer by 50% to guarantee amortized linear time
* complexity on append operations. */
bufsize_t new_size = target_size + target_size / 2;
new_size += 1;
new_size = (new_size + 7) & ~7;
buf->ptr = (unsigned char *)buf->mem->realloc(buf->asize ? buf->ptr : NULL,
new_size);
buf->asize = new_size;
}
bufsize_t cmark_strbuf_len(const cmark_strbuf *buf) { return buf->size; }
void cmark_strbuf_free(cmark_strbuf *buf) {
if (!buf)
return;
if (buf->ptr != cmark_strbuf__initbuf)
buf->mem->free(buf->ptr);
cmark_strbuf_init(buf->mem, buf, 0);
}
void cmark_strbuf_clear(cmark_strbuf *buf) {
buf->size = 0;
if (buf->asize > 0)
buf->ptr[0] = '\0';
}
void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data,
bufsize_t len) {
if (len <= 0 || data == NULL) {
cmark_strbuf_clear(buf);
} else {
if (data != buf->ptr) {
if (len >= buf->asize)
cmark_strbuf_grow(buf, len);
memmove(buf->ptr, data, len);
}
buf->size = len;
buf->ptr[buf->size] = '\0';
}
}
void cmark_strbuf_sets(cmark_strbuf *buf, const char *string) {
cmark_strbuf_set(buf, (const unsigned char *)string,
string ? (bufsize_t)strlen(string) : 0);
}
void cmark_strbuf_putc(cmark_strbuf *buf, int c) {
S_strbuf_grow_by(buf, 1);
buf->ptr[buf->size++] = (unsigned char)(c & 0xFF);
buf->ptr[buf->size] = '\0';
}
void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data,
bufsize_t len) {
if (len <= 0)
return;
S_strbuf_grow_by(buf, len);
memmove(buf->ptr + buf->size, data, len);
buf->size += len;
buf->ptr[buf->size] = '\0';
}
void cmark_strbuf_puts(cmark_strbuf *buf, const char *string) {
cmark_strbuf_put(buf, (const unsigned char *)string, (bufsize_t)strlen(string));
}
void cmark_strbuf_copy_cstr(char *data, bufsize_t datasize,
const cmark_strbuf *buf) {
bufsize_t copylen;
assert(buf);
if (!data || datasize <= 0)
return;
data[0] = '\0';
if (buf->size == 0 || buf->asize <= 0)
return;
copylen = buf->size;
if (copylen > datasize - 1)
copylen = datasize - 1;
memmove(data, buf->ptr, copylen);
data[copylen] = '\0';
}
void cmark_strbuf_swap(cmark_strbuf *buf_a, cmark_strbuf *buf_b) {
cmark_strbuf t = *buf_a;
*buf_a = *buf_b;
*buf_b = t;
}
unsigned char *cmark_strbuf_detach(cmark_strbuf *buf) {
unsigned char *data = buf->ptr;
if (buf->asize == 0) {
/* return an empty string */
return (unsigned char *)buf->mem->calloc(1, 1);
}
cmark_strbuf_init(buf->mem, buf, 0);
return data;
}
int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b) {
int result = memcmp(a->ptr, b->ptr, MIN(a->size, b->size));
return (result != 0) ? result
: (a->size < b->size) ? -1 : (a->size > b->size) ? 1 : 0;
}
bufsize_t cmark_strbuf_strchr(const cmark_strbuf *buf, int c, bufsize_t pos) {
if (pos >= buf->size)
return -1;
if (pos < 0)
pos = 0;
const unsigned char *p =
(unsigned char *)memchr(buf->ptr + pos, c, buf->size - pos);
if (!p)
return -1;
return (bufsize_t)(p - (const unsigned char *)buf->ptr);
}
bufsize_t cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, bufsize_t pos) {
if (pos < 0 || buf->size == 0)
return -1;
if (pos >= buf->size)
pos = buf->size - 1;
bufsize_t i;
for (i = pos; i >= 0; i--) {
if (buf->ptr[i] == (unsigned char)c)
return i;
}
return -1;
}
void cmark_strbuf_truncate(cmark_strbuf *buf, bufsize_t len) {
if (len < 0)
len = 0;
if (len < buf->size) {
buf->size = len;
buf->ptr[buf->size] = '\0';
}
}
void cmark_strbuf_drop(cmark_strbuf *buf, bufsize_t n) {
if (n > 0) {
if (n > buf->size)
n = buf->size;
buf->size = buf->size - n;
if (buf->size)
memmove(buf->ptr, buf->ptr + n, buf->size);
buf->ptr[buf->size] = '\0';
}
}
void cmark_strbuf_rtrim(cmark_strbuf *buf) {
if (!buf->size)
return;
while (buf->size > 0) {
if (!cmark_isspace(buf->ptr[buf->size - 1]))
break;
buf->size--;
}
buf->ptr[buf->size] = '\0';
}
void cmark_strbuf_trim(cmark_strbuf *buf) {
bufsize_t i = 0;
if (!buf->size)
return;
while (i < buf->size && cmark_isspace(buf->ptr[i]))
i++;
cmark_strbuf_drop(buf, i);
cmark_strbuf_rtrim(buf);
}
// Destructively modify string, collapsing consecutive
// space and newline characters into a single space.
void cmark_strbuf_normalize_whitespace(cmark_strbuf *s) {
bool last_char_was_space = false;
bufsize_t r, w;
for (r = 0, w = 0; r < s->size; ++r) {
if (cmark_isspace(s->ptr[r])) {
if (!last_char_was_space) {
s->ptr[w++] = ' ';
last_char_was_space = true;
}
} else {
s->ptr[w++] = s->ptr[r];
last_char_was_space = false;
}
}
cmark_strbuf_truncate(s, w);
}
// Destructively unescape a string: remove backslashes before punctuation chars.
extern void cmark_strbuf_unescape(cmark_strbuf *buf) {
bufsize_t r, w;
for (r = 0, w = 0; r < buf->size; ++r) {
if (buf->ptr[r] == '\\' && cmark_ispunct(buf->ptr[r + 1]))
r++;
buf->ptr[w++] = buf->ptr[r];
}
cmark_strbuf_truncate(buf, w);
}

View File

@ -1,116 +0,0 @@
#ifndef CMARK_BUFFER_H
#define CMARK_BUFFER_H
#include <stddef.h>
#include <stdarg.h>
#include <string.h>
#include <limits.h>
#include <stdint.h>
#include "config.h"
#include "cmark-gfm.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef struct {
cmark_mem *mem;
unsigned char *ptr;
bufsize_t asize, size;
} cmark_strbuf;
extern unsigned char cmark_strbuf__initbuf[];
#define CMARK_BUF_INIT(mem) \
{ mem, cmark_strbuf__initbuf, 0, 0 }
/**
* Initialize a cmark_strbuf structure.
*
* For the cases where CMARK_BUF_INIT cannot be used to do static
* initialization.
*/
CMARK_GFM_EXPORT
void cmark_strbuf_init(cmark_mem *mem, cmark_strbuf *buf,
bufsize_t initial_size);
/**
* Grow the buffer to hold at least `target_size` bytes.
*/
CMARK_GFM_EXPORT
void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size);
CMARK_GFM_EXPORT
void cmark_strbuf_free(cmark_strbuf *buf);
CMARK_GFM_EXPORT
void cmark_strbuf_swap(cmark_strbuf *buf_a, cmark_strbuf *buf_b);
CMARK_GFM_EXPORT
bufsize_t cmark_strbuf_len(const cmark_strbuf *buf);
CMARK_GFM_EXPORT
int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b);
CMARK_GFM_EXPORT
unsigned char *cmark_strbuf_detach(cmark_strbuf *buf);
CMARK_GFM_EXPORT
void cmark_strbuf_copy_cstr(char *data, bufsize_t datasize,
const cmark_strbuf *buf);
static CMARK_INLINE const char *cmark_strbuf_cstr(const cmark_strbuf *buf) {
return (char *)buf->ptr;
}
#define cmark_strbuf_at(buf, n) ((buf)->ptr[n])
CMARK_GFM_EXPORT
void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data,
bufsize_t len);
CMARK_GFM_EXPORT
void cmark_strbuf_sets(cmark_strbuf *buf, const char *string);
CMARK_GFM_EXPORT
void cmark_strbuf_putc(cmark_strbuf *buf, int c);
CMARK_GFM_EXPORT
void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data,
bufsize_t len);
CMARK_GFM_EXPORT
void cmark_strbuf_puts(cmark_strbuf *buf, const char *string);
CMARK_GFM_EXPORT
void cmark_strbuf_clear(cmark_strbuf *buf);
CMARK_GFM_EXPORT
bufsize_t cmark_strbuf_strchr(const cmark_strbuf *buf, int c, bufsize_t pos);
CMARK_GFM_EXPORT
bufsize_t cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, bufsize_t pos);
CMARK_GFM_EXPORT
void cmark_strbuf_drop(cmark_strbuf *buf, bufsize_t n);
CMARK_GFM_EXPORT
void cmark_strbuf_truncate(cmark_strbuf *buf, bufsize_t len);
CMARK_GFM_EXPORT
void cmark_strbuf_rtrim(cmark_strbuf *buf);
CMARK_GFM_EXPORT
void cmark_strbuf_trim(cmark_strbuf *buf);
CMARK_GFM_EXPORT
void cmark_strbuf_normalize_whitespace(cmark_strbuf *s);
CMARK_GFM_EXPORT
void cmark_strbuf_unescape(cmark_strbuf *s);
#ifdef __cplusplus
}
#endif
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,135 +0,0 @@
#ifndef CMARK_CHUNK_H
#define CMARK_CHUNK_H
#include <string.h>
#include <stdlib.h>
#include <assert.h>
#include "cmark-gfm.h"
#include "buffer.h"
#include "cmark_ctype.h"
#define CMARK_CHUNK_EMPTY \
{ NULL, 0, 0 }
typedef struct cmark_chunk {
unsigned char *data;
bufsize_t len;
bufsize_t alloc; // also implies a NULL-terminated string
} cmark_chunk;
static CMARK_INLINE void cmark_chunk_free(cmark_mem *mem, cmark_chunk *c) {
if (c->alloc)
mem->free(c->data);
c->data = NULL;
c->alloc = 0;
c->len = 0;
}
static CMARK_INLINE void cmark_chunk_ltrim(cmark_chunk *c) {
assert(!c->alloc);
while (c->len && cmark_isspace(c->data[0])) {
c->data++;
c->len--;
}
}
static CMARK_INLINE void cmark_chunk_rtrim(cmark_chunk *c) {
assert(!c->alloc);
while (c->len > 0) {
if (!cmark_isspace(c->data[c->len - 1]))
break;
c->len--;
}
}
static CMARK_INLINE void cmark_chunk_trim(cmark_chunk *c) {
cmark_chunk_ltrim(c);
cmark_chunk_rtrim(c);
}
static CMARK_INLINE bufsize_t cmark_chunk_strchr(cmark_chunk *ch, int c,
bufsize_t offset) {
const unsigned char *p =
(unsigned char *)memchr(ch->data + offset, c, ch->len - offset);
return p ? (bufsize_t)(p - ch->data) : ch->len;
}
static CMARK_INLINE const char *cmark_chunk_to_cstr(cmark_mem *mem,
cmark_chunk *c) {
unsigned char *str;
if (c->alloc) {
return (char *)c->data;
}
str = (unsigned char *)mem->calloc(c->len + 1, 1);
if (c->len > 0) {
memcpy(str, c->data, c->len);
}
str[c->len] = 0;
c->data = str;
c->alloc = 1;
return (char *)str;
}
static CMARK_INLINE void cmark_chunk_set_cstr(cmark_mem *mem, cmark_chunk *c,
const char *str) {
unsigned char *old = c->alloc ? c->data : NULL;
if (str == NULL) {
c->len = 0;
c->data = NULL;
c->alloc = 0;
} else {
c->len = (bufsize_t)strlen(str);
c->data = (unsigned char *)mem->calloc(c->len + 1, 1);
c->alloc = 1;
memcpy(c->data, str, c->len + 1);
}
if (old != NULL) {
mem->free(old);
}
}
static CMARK_INLINE cmark_chunk cmark_chunk_literal(const char *data) {
bufsize_t len = data ? (bufsize_t)strlen(data) : 0;
cmark_chunk c = {(unsigned char *)data, len, 0};
return c;
}
static CMARK_INLINE cmark_chunk cmark_chunk_dup(const cmark_chunk *ch,
bufsize_t pos, bufsize_t len) {
cmark_chunk c = {ch->data + pos, len, 0};
return c;
}
static CMARK_INLINE cmark_chunk cmark_chunk_buf_detach(cmark_strbuf *buf) {
cmark_chunk c;
c.len = buf->size;
c.data = cmark_strbuf_detach(buf);
c.alloc = 1;
return c;
}
/* trim_new variants are to be used when the source chunk may or may not be
* allocated; forces a newly allocated chunk. */
static CMARK_INLINE cmark_chunk cmark_chunk_ltrim_new(cmark_mem *mem, cmark_chunk *c) {
cmark_chunk r = cmark_chunk_dup(c, 0, c->len);
cmark_chunk_ltrim(&r);
cmark_chunk_to_cstr(mem, &r);
return r;
}
static CMARK_INLINE cmark_chunk cmark_chunk_rtrim_new(cmark_mem *mem, cmark_chunk *c) {
cmark_chunk r = cmark_chunk_dup(c, 0, c->len);
cmark_chunk_rtrim(&r);
cmark_chunk_to_cstr(mem, &r);
return r;
}
#endif

View File

@ -1,737 +0,0 @@
#ifndef CMARK_GFM_EXTENSION_API_H
#define CMARK_GFM_EXTENSION_API_H
#ifdef __cplusplus
extern "C" {
#endif
#include "cmark-gfm.h"
struct cmark_renderer;
struct cmark_html_renderer;
struct cmark_chunk;
/**
* ## Extension Support
*
* While the "core" of libcmark is strictly compliant with the
* specification, an API is provided for extension writers to
* hook into the parsing process.
*
* It should be noted that the cmark_node API already offers
* room for customization, with methods offered to traverse and
* modify the AST, and even define custom blocks.
* When the desired customization is achievable in an error-proof
* way using that API, it should be the preferred method.
*
* The following API requires a more in-depth understanding
* of libcmark's parsing strategy, which is exposed
* [here](http://spec.commonmark.org/0.24/#appendix-a-parsing-strategy).
*
* It should be used when "a posteriori" modification of the AST
* proves to be too difficult / impossible to implement correctly.
*
* It can also serve as an intermediary step before extending
* the specification, as an extension implemented using this API
* will be trivially integrated in the core if it proves to be
* desirable.
*/
typedef struct cmark_plugin cmark_plugin;
/** A syntax extension that can be attached to a cmark_parser
* with cmark_parser_attach_syntax_extension().
*
* Extension writers should assign functions matching
* the signature of the following 'virtual methods' to
* implement new functionality.
*
* Their calling order and expected behaviour match the procedure outlined
* at <http://spec.commonmark.org/0.24/#phase-1-block-structure>:
*
* During step 1, cmark will call the function provided through
* 'cmark_syntax_extension_set_match_block_func' when it
* iterates over an open block created by this extension,
* to determine whether it could contain the new line.
* If no function was provided, cmark will close the block.
*
* During step 2, if and only if the new line doesn't match any
* of the standard syntax rules, cmark will call the function
* provided through 'cmark_syntax_extension_set_open_block_func'
* to let the extension determine whether that new line matches
* one of its syntax rules.
* It is the responsibility of the parser to create and add the
* new block with cmark_parser_make_block and cmark_parser_add_child.
* If no function was provided is NULL, the extension will have
* no effect at all on the final block structure of the AST.
*
* #### Inline parsing phase hooks
*
* For each character provided by the extension through
* 'cmark_syntax_extension_set_special_inline_chars',
* the function provided by the extension through
* 'cmark_syntax_extension_set_match_inline_func'
* will get called, it is the responsibility of the extension
* to scan the characters located at the current inline parsing offset
* with the cmark_inline_parser API.
*
* Depending on the type of the extension, it can either:
*
* * Scan forward, determine that the syntax matches and return
* a newly-created inline node with the appropriate type.
* This is the technique that would be used if inline code
* (with backticks) was implemented as an extension.
* * Scan only the character(s) that its syntax rules require
* for opening and closing nodes, push a delimiter on the
* delimiter stack, and return a simple text node with its
* contents set to the character(s) consumed.
* This is the technique that would be used if emphasis
* inlines were implemented as an extension.
*
* When an extension has pushed delimiters on the stack,
* the function provided through
* 'cmark_syntax_extension_set_inline_from_delim_func'
* will get called in a latter phase,
* when the inline parser has matched opener and closer delimiters
* created by the extension together.
*
* It is then the responsibility of the extension to modify
* and populate the opener inline text node, and to remove
* the necessary delimiters from the delimiter stack.
*
* Finally, the extension should return NULL if its scan didn't
* match its syntax rules.
*
* The extension can store whatever private data it might need
* with 'cmark_syntax_extension_set_private',
* and optionally define a free function for this data.
*/
typedef struct subject cmark_inline_parser;
/** Exposed raw for now */
typedef struct delimiter {
struct delimiter *previous;
struct delimiter *next;
cmark_node *inl_text;
bufsize_t position;
bufsize_t length;
unsigned char delim_char;
int can_open;
int can_close;
} delimiter;
/**
* ### Plugin API.
*
* Extensions should be distributed as dynamic libraries,
* with a single exported function named after the distributed
* filename.
*
* When discovering extensions (see cmark_init), cmark will
* try to load a symbol named "init_{{filename}}" in all the
* dynamic libraries it encounters.
*
* For example, given a dynamic library named myextension.so
* (or myextension.dll), cmark will try to load the symbol
* named "init_myextension". This means that the filename
* must lend itself to forming a valid C identifier, with
* the notable exception of dashes, which will be translated
* to underscores, which means cmark will look for a function
* named "init_my_extension" if it encounters a dynamic library
* named "my-extension.so".
*
* See the 'cmark_plugin_init_func' typedef for the exact prototype
* this function should follow.
*
* For now the extensibility of cmark is not complete, as
* it only offers API to hook into the block parsing phase
* (<http://spec.commonmark.org/0.24/#phase-1-block-structure>).
*
* See 'cmark_plugin_register_syntax_extension' for more information.
*/
/** The prototype plugins' init function should follow.
*/
typedef int (*cmark_plugin_init_func)(cmark_plugin *plugin);
/** Register a syntax 'extension' with the 'plugin', it will be made
* available as an extension and, if attached to a cmark_parser
* with 'cmark_parser_attach_syntax_extension', it will contribute
* to the block parsing process.
*
* See the documentation for 'cmark_syntax_extension' for information
* on how to implement one.
*
* This function will typically be called from the init function
* of external modules.
*
* This takes ownership of 'extension', one should not call
* 'cmark_syntax_extension_free' on a registered extension.
*/
CMARK_GFM_EXPORT
int cmark_plugin_register_syntax_extension(cmark_plugin *plugin,
cmark_syntax_extension *extension);
/** This will search for the syntax extension named 'name' among the
* registered syntax extensions.
*
* It can then be attached to a cmark_parser
* with the cmark_parser_attach_syntax_extension method.
*/
CMARK_GFM_EXPORT
cmark_syntax_extension *cmark_find_syntax_extension(const char *name);
/** Should create and add a new open block to 'parent_container' if
* 'input' matches a syntax rule for that block type. It is allowed
* to modify the type of 'parent_container'.
*
* Should return the newly created block if there is one, or
* 'parent_container' if its type was modified, or NULL.
*/
typedef cmark_node * (*cmark_open_block_func) (cmark_syntax_extension *extension,
int indented,
cmark_parser *parser,
cmark_node *parent_container,
unsigned char *input,
int len);
typedef cmark_node *(*cmark_match_inline_func)(cmark_syntax_extension *extension,
cmark_parser *parser,
cmark_node *parent,
unsigned char character,
cmark_inline_parser *inline_parser);
typedef delimiter *(*cmark_inline_from_delim_func)(cmark_syntax_extension *extension,
cmark_parser *parser,
cmark_inline_parser *inline_parser,
delimiter *opener,
delimiter *closer);
/** Should return 'true' if 'input' can be contained in 'container',
* 'false' otherwise.
*/
typedef int (*cmark_match_block_func) (cmark_syntax_extension *extension,
cmark_parser *parser,
unsigned char *input,
int len,
cmark_node *container);
typedef const char *(*cmark_get_type_string_func) (cmark_syntax_extension *extension,
cmark_node *node);
typedef int (*cmark_can_contain_func) (cmark_syntax_extension *extension,
cmark_node *node,
cmark_node_type child);
typedef int (*cmark_contains_inlines_func) (cmark_syntax_extension *extension,
cmark_node *node);
typedef void (*cmark_common_render_func) (cmark_syntax_extension *extension,
struct cmark_renderer *renderer,
cmark_node *node,
cmark_event_type ev_type,
int options);
typedef int (*cmark_commonmark_escape_func) (cmark_syntax_extension *extension,
cmark_node *node,
int c);
typedef const char* (*cmark_xml_attr_func) (cmark_syntax_extension *extension,
cmark_node *node);
typedef void (*cmark_html_render_func) (cmark_syntax_extension *extension,
struct cmark_html_renderer *renderer,
cmark_node *node,
cmark_event_type ev_type,
int options);
typedef int (*cmark_html_filter_func) (cmark_syntax_extension *extension,
const unsigned char *tag,
size_t tag_len);
typedef cmark_node *(*cmark_postprocess_func) (cmark_syntax_extension *extension,
cmark_parser *parser,
cmark_node *root);
typedef int (*cmark_ispunct_func) (char c);
typedef void (*cmark_opaque_alloc_func) (cmark_syntax_extension *extension,
cmark_mem *mem,
cmark_node *node);
typedef void (*cmark_opaque_free_func) (cmark_syntax_extension *extension,
cmark_mem *mem,
cmark_node *node);
/** Free a cmark_syntax_extension.
*/
CMARK_GFM_EXPORT
void cmark_syntax_extension_free (cmark_mem *mem, cmark_syntax_extension *extension);
/** Return a newly-constructed cmark_syntax_extension, named 'name'.
*/
CMARK_GFM_EXPORT
cmark_syntax_extension *cmark_syntax_extension_new (const char *name);
CMARK_GFM_EXPORT
cmark_node_type cmark_syntax_extension_add_node(int is_inline);
CMARK_GFM_EXPORT
void cmark_syntax_extension_set_emphasis(cmark_syntax_extension *extension, int emphasis);
/** See the documentation for 'cmark_syntax_extension'
*/
CMARK_GFM_EXPORT
void cmark_syntax_extension_set_open_block_func(cmark_syntax_extension *extension,
cmark_open_block_func func);
/** See the documentation for 'cmark_syntax_extension'
*/
CMARK_GFM_EXPORT
void cmark_syntax_extension_set_match_block_func(cmark_syntax_extension *extension,
cmark_match_block_func func);
/** See the documentation for 'cmark_syntax_extension'
*/
CMARK_GFM_EXPORT
void cmark_syntax_extension_set_match_inline_func(cmark_syntax_extension *extension,
cmark_match_inline_func func);
/** See the documentation for 'cmark_syntax_extension'
*/
CMARK_GFM_EXPORT
void cmark_syntax_extension_set_inline_from_delim_func(cmark_syntax_extension *extension,
cmark_inline_from_delim_func func);
/** See the documentation for 'cmark_syntax_extension'
*/
CMARK_GFM_EXPORT
void cmark_syntax_extension_set_special_inline_chars(cmark_syntax_extension *extension,
cmark_llist *special_chars);
/** See the documentation for 'cmark_syntax_extension'
*/
CMARK_GFM_EXPORT
void cmark_syntax_extension_set_get_type_string_func(cmark_syntax_extension *extension,
cmark_get_type_string_func func);
/** See the documentation for 'cmark_syntax_extension'
*/
CMARK_GFM_EXPORT
void cmark_syntax_extension_set_can_contain_func(cmark_syntax_extension *extension,
cmark_can_contain_func func);
/** See the documentation for 'cmark_syntax_extension'
*/
CMARK_GFM_EXPORT
void cmark_syntax_extension_set_contains_inlines_func(cmark_syntax_extension *extension,
cmark_contains_inlines_func func);
/** See the documentation for 'cmark_syntax_extension'
*/
CMARK_GFM_EXPORT
void cmark_syntax_extension_set_commonmark_render_func(cmark_syntax_extension *extension,
cmark_common_render_func func);
/** See the documentation for 'cmark_syntax_extension'
*/
CMARK_GFM_EXPORT
void cmark_syntax_extension_set_plaintext_render_func(cmark_syntax_extension *extension,
cmark_common_render_func func);
/** See the documentation for 'cmark_syntax_extension'
*/
CMARK_GFM_EXPORT
void cmark_syntax_extension_set_latex_render_func(cmark_syntax_extension *extension,
cmark_common_render_func func);
/** See the documentation for 'cmark_syntax_extension'
*/
CMARK_GFM_EXPORT
void cmark_syntax_extension_set_xml_attr_func(cmark_syntax_extension *extension,
cmark_xml_attr_func func);
/** See the documentation for 'cmark_syntax_extension'
*/
CMARK_GFM_EXPORT
void cmark_syntax_extension_set_man_render_func(cmark_syntax_extension *extension,
cmark_common_render_func func);
/** See the documentation for 'cmark_syntax_extension'
*/
CMARK_GFM_EXPORT
void cmark_syntax_extension_set_html_render_func(cmark_syntax_extension *extension,
cmark_html_render_func func);
/** See the documentation for 'cmark_syntax_extension'
*/
CMARK_GFM_EXPORT
void cmark_syntax_extension_set_html_filter_func(cmark_syntax_extension *extension,
cmark_html_filter_func func);
/** See the documentation for 'cmark_syntax_extension'
*/
CMARK_GFM_EXPORT
void cmark_syntax_extension_set_commonmark_escape_func(cmark_syntax_extension *extension,
cmark_commonmark_escape_func func);
/** See the documentation for 'cmark_syntax_extension'
*/
CMARK_GFM_EXPORT
void cmark_syntax_extension_set_private(cmark_syntax_extension *extension,
void *priv,
cmark_free_func free_func);
/** See the documentation for 'cmark_syntax_extension'
*/
CMARK_GFM_EXPORT
void *cmark_syntax_extension_get_private(cmark_syntax_extension *extension);
/** See the documentation for 'cmark_syntax_extension'
*/
CMARK_GFM_EXPORT
void cmark_syntax_extension_set_postprocess_func(cmark_syntax_extension *extension,
cmark_postprocess_func func);
/** See the documentation for 'cmark_syntax_extension'
*/
CMARK_GFM_EXPORT
void cmark_syntax_extension_set_opaque_alloc_func(cmark_syntax_extension *extension,
cmark_opaque_alloc_func func);
/** See the documentation for 'cmark_syntax_extension'
*/
CMARK_GFM_EXPORT
void cmark_syntax_extension_set_opaque_free_func(cmark_syntax_extension *extension,
cmark_opaque_free_func func);
/** See the documentation for 'cmark_syntax_extension'
*/
CMARK_GFM_EXPORT
void cmark_parser_set_backslash_ispunct_func(cmark_parser *parser,
cmark_ispunct_func func);
/** Return the index of the line currently being parsed, starting with 1.
*/
CMARK_GFM_EXPORT
int cmark_parser_get_line_number(cmark_parser *parser);
/** Return the offset in bytes in the line being processed.
*
* Example:
*
* ### foo
*
* Here, offset will first be 0, then 5 (the index of the 'f' character).
*/
CMARK_GFM_EXPORT
int cmark_parser_get_offset(cmark_parser *parser);
/**
* Return the offset in 'columns' in the line being processed.
*
* This value may differ from the value returned by
* cmark_parser_get_offset() in that it accounts for tabs,
* and as such should not be used as an index in the current line's
* buffer.
*
* Example:
*
* cmark_parser_advance_offset() can be called to advance the
* offset by a number of columns, instead of a number of bytes.
*
* In that case, if offset falls "in the middle" of a tab
* character, 'column' and offset will differ.
*
* ```
* foo \t bar
* ^ ^^
* offset (0) 20
* ```
*
* If cmark_parser_advance_offset is called here with 'columns'
* set to 'true' and 'offset' set to 22, cmark_parser_get_offset()
* will return 20, whereas cmark_parser_get_column() will return
* 22.
*
* Additionally, as tabs expand to the next multiple of 4 column,
* cmark_parser_has_partially_consumed_tab() will now return
* 'true'.
*/
CMARK_GFM_EXPORT
int cmark_parser_get_column(cmark_parser *parser);
/** Return the absolute index in bytes of the first nonspace
* character coming after the offset as returned by
* cmark_parser_get_offset() in the line currently being processed.
*
* Example:
*
* ```
* foo bar baz \n
* ^ ^ ^
* 0 offset (16) first_nonspace (28)
* ```
*/
CMARK_GFM_EXPORT
int cmark_parser_get_first_nonspace(cmark_parser *parser);
/** Return the absolute index of the first nonspace column coming after 'offset'
* in the line currently being processed, counting tabs as multiple
* columns as appropriate.
*
* See the documentation for cmark_parser_get_first_nonspace() and
* cmark_parser_get_column() for more information.
*/
CMARK_GFM_EXPORT
int cmark_parser_get_first_nonspace_column(cmark_parser *parser);
/** Return the difference between the values returned by
* cmark_parser_get_first_nonspace_column() and
* cmark_parser_get_column().
*
* This is not a byte offset, as it can count one tab as multiple
* characters.
*/
CMARK_GFM_EXPORT
int cmark_parser_get_indent(cmark_parser *parser);
/** Return 'true' if the line currently being processed has been entirely
* consumed, 'false' otherwise.
*
* Example:
*
* ```
* foo bar baz \n
* ^
* offset
* ```
*
* This function will return 'false' here.
*
* ```
* foo bar baz \n
* ^
* offset
* ```
* This function will still return 'false'.
*
* ```
* foo bar baz \n
* ^
* offset
* ```
*
* At this point, this function will now return 'true'.
*/
CMARK_GFM_EXPORT
int cmark_parser_is_blank(cmark_parser *parser);
/** Return 'true' if the value returned by cmark_parser_get_offset()
* is 'inside' an expanded tab.
*
* See the documentation for cmark_parser_get_column() for more
* information.
*/
CMARK_GFM_EXPORT
int cmark_parser_has_partially_consumed_tab(cmark_parser *parser);
/** Return the length in bytes of the previously processed line, excluding potential
* newline (\n) and carriage return (\r) trailing characters.
*/
CMARK_GFM_EXPORT
int cmark_parser_get_last_line_length(cmark_parser *parser);
/** Add a child to 'parent' during the parsing process.
*
* If 'parent' isn't the kind of node that can accept this child,
* this function will back up till it hits a node that can, closing
* blocks as appropriate.
*/
CMARK_GFM_EXPORT
cmark_node*cmark_parser_add_child(cmark_parser *parser,
cmark_node *parent,
cmark_node_type block_type,
int start_column);
/** Advance the 'offset' of the parser in the current line.
*
* See the documentation of cmark_parser_get_offset() and
* cmark_parser_get_column() for more information.
*/
CMARK_GFM_EXPORT
void cmark_parser_advance_offset(cmark_parser *parser,
const char *input,
int count,
int columns);
CMARK_GFM_EXPORT
void cmark_parser_feed_reentrant(cmark_parser *parser, const char *buffer, size_t len);
/** Attach the syntax 'extension' to the 'parser', to provide extra syntax
* rules.
* See the documentation for cmark_syntax_extension for more information.
*
* Returns 'true' if the 'extension' was successfully attached,
* 'false' otherwise.
*/
CMARK_GFM_EXPORT
int cmark_parser_attach_syntax_extension(cmark_parser *parser, cmark_syntax_extension *extension);
/** Change the type of 'node'.
*
* Return 0 if the type could be changed, 1 otherwise.
*/
CMARK_GFM_EXPORT int cmark_node_set_type(cmark_node *node, cmark_node_type type);
/** Return the string content for all types of 'node'.
* The pointer stays valid as long as 'node' isn't freed.
*/
CMARK_GFM_EXPORT const char *cmark_node_get_string_content(cmark_node *node);
/** Set the string 'content' for all types of 'node'.
* Copies 'content'.
*/
CMARK_GFM_EXPORT int cmark_node_set_string_content(cmark_node *node, const char *content);
/** Get the syntax extension responsible for the creation of 'node'.
* Return NULL if 'node' was created because it matched standard syntax rules.
*/
CMARK_GFM_EXPORT cmark_syntax_extension *cmark_node_get_syntax_extension(cmark_node *node);
/** Set the syntax extension responsible for creating 'node'.
*/
CMARK_GFM_EXPORT int cmark_node_set_syntax_extension(cmark_node *node,
cmark_syntax_extension *extension);
/**
* ## Inline syntax extension helpers
*
* The inline parsing process is described in detail at
* <http://spec.commonmark.org/0.24/#phase-2-inline-structure>
*/
/** Should return 'true' if the predicate matches 'c', 'false' otherwise
*/
typedef int (*cmark_inline_predicate)(int c);
/** Advance the current inline parsing offset */
CMARK_GFM_EXPORT
void cmark_inline_parser_advance_offset(cmark_inline_parser *parser);
/** Get the current inline parsing offset */
CMARK_GFM_EXPORT
int cmark_inline_parser_get_offset(cmark_inline_parser *parser);
/** Set the offset in bytes in the chunk being processed by the given inline parser.
*/
CMARK_GFM_EXPORT
void cmark_inline_parser_set_offset(cmark_inline_parser *parser, int offset);
/** Gets the cmark_chunk being operated on by the given inline parser.
* Use cmark_inline_parser_get_offset to get our current position in the chunk.
*/
CMARK_GFM_EXPORT
struct cmark_chunk *cmark_inline_parser_get_chunk(cmark_inline_parser *parser);
/** Returns 1 if the inline parser is currently in a bracket; pass 1 for 'image'
* if you want to know about an image-type bracket, 0 for link-type. */
CMARK_GFM_EXPORT
int cmark_inline_parser_in_bracket(cmark_inline_parser *parser, int image);
/** Remove the last n characters from the last child of the given node.
* This only works where all n characters are in the single last child, and the last
* child is CMARK_NODE_TEXT.
*/
CMARK_GFM_EXPORT
void cmark_node_unput(cmark_node *node, int n);
/** Get the character located at the current inline parsing offset
*/
CMARK_GFM_EXPORT
unsigned char cmark_inline_parser_peek_char(cmark_inline_parser *parser);
/** Get the character located 'pos' bytes in the current line.
*/
CMARK_GFM_EXPORT
unsigned char cmark_inline_parser_peek_at(cmark_inline_parser *parser, int pos);
/** Whether the inline parser has reached the end of the current line
*/
CMARK_GFM_EXPORT
int cmark_inline_parser_is_eof(cmark_inline_parser *parser);
/** Get the characters located after the current inline parsing offset
* while 'pred' matches. Free after usage.
*/
CMARK_GFM_EXPORT
char *cmark_inline_parser_take_while(cmark_inline_parser *parser, cmark_inline_predicate pred);
/** Push a delimiter on the delimiter stack.
* See <<http://spec.commonmark.org/0.24/#phase-2-inline-structure> for
* more information on the parameters
*/
CMARK_GFM_EXPORT
void cmark_inline_parser_push_delimiter(cmark_inline_parser *parser,
unsigned char c,
int can_open,
int can_close,
cmark_node *inl_text);
/** Remove 'delim' from the delimiter stack
*/
CMARK_GFM_EXPORT
void cmark_inline_parser_remove_delimiter(cmark_inline_parser *parser, delimiter *delim);
CMARK_GFM_EXPORT
delimiter *cmark_inline_parser_get_last_delimiter(cmark_inline_parser *parser);
CMARK_GFM_EXPORT
int cmark_inline_parser_get_line(cmark_inline_parser *parser);
CMARK_GFM_EXPORT
int cmark_inline_parser_get_column(cmark_inline_parser *parser);
/** Convenience function to scan a given delimiter.
*
* 'left_flanking' and 'right_flanking' will be set to true if they
* respectively precede and follow a non-space, non-punctuation
* character.
*
* Additionally, 'punct_before' and 'punct_after' will respectively be set
* if the preceding or following character is a punctuation character.
*
* Note that 'left_flanking' and 'right_flanking' can both be 'true'.
*
* Returns the number of delimiters encountered, in the limit
* of 'max_delims', and advances the inline parsing offset.
*/
CMARK_GFM_EXPORT
int cmark_inline_parser_scan_delimiters(cmark_inline_parser *parser,
int max_delims,
unsigned char c,
int *left_flanking,
int *right_flanking,
int *punct_before,
int *punct_after);
CMARK_GFM_EXPORT
void cmark_manage_extensions_special_characters(cmark_parser *parser, int add);
CMARK_GFM_EXPORT
cmark_llist *cmark_parser_get_syntax_extensions(cmark_parser *parser);
CMARK_GFM_EXPORT
void cmark_arena_push(void);
CMARK_GFM_EXPORT
int cmark_arena_pop(void);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -1,833 +0,0 @@
#ifndef CMARK_GFM_H
#define CMARK_GFM_H
#include <stdio.h>
#include <stdint.h>
#include "cmark-gfm_export.h"
#include "cmark-gfm_version.h"
#ifdef __cplusplus
extern "C" {
#endif
/** # NAME
*
* **cmark-gfm** - CommonMark parsing, manipulating, and rendering
*/
/** # DESCRIPTION
*
* ## Simple Interface
*/
/** Convert 'text' (assumed to be a UTF-8 encoded string with length
* 'len') from CommonMark Markdown to HTML, returning a null-terminated,
* UTF-8-encoded string. It is the caller's responsibility
* to free the returned buffer.
*/
CMARK_GFM_EXPORT
char *cmark_markdown_to_html(const char *text, size_t len, int options);
/** ## Node Structure
*/
#define CMARK_NODE_TYPE_PRESENT (0x8000)
#define CMARK_NODE_TYPE_BLOCK (CMARK_NODE_TYPE_PRESENT | 0x0000)
#define CMARK_NODE_TYPE_INLINE (CMARK_NODE_TYPE_PRESENT | 0x4000)
#define CMARK_NODE_TYPE_MASK (0xc000)
#define CMARK_NODE_VALUE_MASK (0x3fff)
typedef enum {
/* Error status */
CMARK_NODE_NONE = 0x0000,
/* Block */
CMARK_NODE_DOCUMENT = CMARK_NODE_TYPE_BLOCK | 0x0001,
CMARK_NODE_BLOCK_QUOTE = CMARK_NODE_TYPE_BLOCK | 0x0002,
CMARK_NODE_LIST = CMARK_NODE_TYPE_BLOCK | 0x0003,
CMARK_NODE_ITEM = CMARK_NODE_TYPE_BLOCK | 0x0004,
CMARK_NODE_CODE_BLOCK = CMARK_NODE_TYPE_BLOCK | 0x0005,
CMARK_NODE_HTML_BLOCK = CMARK_NODE_TYPE_BLOCK | 0x0006,
CMARK_NODE_CUSTOM_BLOCK = CMARK_NODE_TYPE_BLOCK | 0x0007,
CMARK_NODE_PARAGRAPH = CMARK_NODE_TYPE_BLOCK | 0x0008,
CMARK_NODE_HEADING = CMARK_NODE_TYPE_BLOCK | 0x0009,
CMARK_NODE_THEMATIC_BREAK = CMARK_NODE_TYPE_BLOCK | 0x000a,
CMARK_NODE_FOOTNOTE_DEFINITION = CMARK_NODE_TYPE_BLOCK | 0x000b,
/* Inline */
CMARK_NODE_TEXT = CMARK_NODE_TYPE_INLINE | 0x0001,
CMARK_NODE_SOFTBREAK = CMARK_NODE_TYPE_INLINE | 0x0002,
CMARK_NODE_LINEBREAK = CMARK_NODE_TYPE_INLINE | 0x0003,
CMARK_NODE_CODE = CMARK_NODE_TYPE_INLINE | 0x0004,
CMARK_NODE_HTML_INLINE = CMARK_NODE_TYPE_INLINE | 0x0005,
CMARK_NODE_CUSTOM_INLINE = CMARK_NODE_TYPE_INLINE | 0x0006,
CMARK_NODE_EMPH = CMARK_NODE_TYPE_INLINE | 0x0007,
CMARK_NODE_STRONG = CMARK_NODE_TYPE_INLINE | 0x0008,
CMARK_NODE_LINK = CMARK_NODE_TYPE_INLINE | 0x0009,
CMARK_NODE_IMAGE = CMARK_NODE_TYPE_INLINE | 0x000a,
CMARK_NODE_FOOTNOTE_REFERENCE = CMARK_NODE_TYPE_INLINE | 0x000b,
} cmark_node_type;
extern cmark_node_type CMARK_NODE_LAST_BLOCK;
extern cmark_node_type CMARK_NODE_LAST_INLINE;
/* For backwards compatibility: */
#define CMARK_NODE_HEADER CMARK_NODE_HEADING
#define CMARK_NODE_HRULE CMARK_NODE_THEMATIC_BREAK
#define CMARK_NODE_HTML CMARK_NODE_HTML_BLOCK
#define CMARK_NODE_INLINE_HTML CMARK_NODE_HTML_INLINE
typedef enum {
CMARK_NO_LIST,
CMARK_BULLET_LIST,
CMARK_ORDERED_LIST
} cmark_list_type;
typedef enum {
CMARK_NO_DELIM,
CMARK_PERIOD_DELIM,
CMARK_PAREN_DELIM
} cmark_delim_type;
typedef struct cmark_node cmark_node;
typedef struct cmark_parser cmark_parser;
typedef struct cmark_iter cmark_iter;
typedef struct cmark_syntax_extension cmark_syntax_extension;
/**
* ## Custom memory allocator support
*/
/** Defines the memory allocation functions to be used by CMark
* when parsing and allocating a document tree
*/
typedef struct cmark_mem {
void *(*calloc)(size_t, size_t);
void *(*realloc)(void *, size_t);
void (*free)(void *);
} cmark_mem;
/** The default memory allocator; uses the system's calloc,
* realloc and free.
*/
CMARK_GFM_EXPORT
cmark_mem *cmark_get_default_mem_allocator(void);
/** An arena allocator; uses system calloc to allocate large
* slabs of memory. Memory in these slabs is not reused at all.
*/
CMARK_GFM_EXPORT
cmark_mem *cmark_get_arena_mem_allocator(void);
/** Resets the arena allocator, quickly returning all used memory
* to the operating system.
*/
CMARK_GFM_EXPORT
void cmark_arena_reset(void);
/** Callback for freeing user data with a 'cmark_mem' context.
*/
typedef void (*cmark_free_func) (cmark_mem *mem, void *user_data);
/*
* ## Basic data structures
*
* To keep dependencies to the strict minimum, libcmark implements
* its own versions of "classic" data structures.
*/
/**
* ### Linked list
*/
/** A generic singly linked list.
*/
typedef struct _cmark_llist
{
struct _cmark_llist *next;
void *data;
} cmark_llist;
/** Append an element to the linked list, return the possibly modified
* head of the list.
*/
CMARK_GFM_EXPORT
cmark_llist * cmark_llist_append (cmark_mem * mem,
cmark_llist * head,
void * data);
/** Free the list starting with 'head', calling 'free_func' with the
* data pointer of each of its elements
*/
CMARK_GFM_EXPORT
void cmark_llist_free_full (cmark_mem * mem,
cmark_llist * head,
cmark_free_func free_func);
/** Free the list starting with 'head'
*/
CMARK_GFM_EXPORT
void cmark_llist_free (cmark_mem * mem,
cmark_llist * head);
/**
* ## Creating and Destroying Nodes
*/
/** Creates a new node of type 'type'. Note that the node may have
* other required properties, which it is the caller's responsibility
* to assign.
*/
CMARK_GFM_EXPORT cmark_node *cmark_node_new(cmark_node_type type);
/** Same as `cmark_node_new`, but explicitly listing the memory
* allocator used to allocate the node. Note: be sure to use the same
* allocator for every node in a tree, or bad things can happen.
*/
CMARK_GFM_EXPORT cmark_node *cmark_node_new_with_mem(cmark_node_type type,
cmark_mem *mem);
CMARK_GFM_EXPORT cmark_node *cmark_node_new_with_ext(cmark_node_type type,
cmark_syntax_extension *extension);
CMARK_GFM_EXPORT cmark_node *cmark_node_new_with_mem_and_ext(cmark_node_type type,
cmark_mem *mem,
cmark_syntax_extension *extension);
/** Frees the memory allocated for a node and any children.
*/
CMARK_GFM_EXPORT void cmark_node_free(cmark_node *node);
/**
* ## Tree Traversal
*/
/** Returns the next node in the sequence after 'node', or NULL if
* there is none.
*/
CMARK_GFM_EXPORT cmark_node *cmark_node_next(cmark_node *node);
/** Returns the previous node in the sequence after 'node', or NULL if
* there is none.
*/
CMARK_GFM_EXPORT cmark_node *cmark_node_previous(cmark_node *node);
/** Returns the parent of 'node', or NULL if there is none.
*/
CMARK_GFM_EXPORT cmark_node *cmark_node_parent(cmark_node *node);
/** Returns the first child of 'node', or NULL if 'node' has no children.
*/
CMARK_GFM_EXPORT cmark_node *cmark_node_first_child(cmark_node *node);
/** Returns the last child of 'node', or NULL if 'node' has no children.
*/
CMARK_GFM_EXPORT cmark_node *cmark_node_last_child(cmark_node *node);
/** Returns the footnote reference of 'node', or NULL if 'node' doesn't have a
* footnote reference.
*/
CMARK_GFM_EXPORT cmark_node *cmark_node_parent_footnote_def(cmark_node *node);
/**
* ## Iterator
*
* An iterator will walk through a tree of nodes, starting from a root
* node, returning one node at a time, together with information about
* whether the node is being entered or exited. The iterator will
* first descend to a child node, if there is one. When there is no
* child, the iterator will go to the next sibling. When there is no
* next sibling, the iterator will return to the parent (but with
* a 'cmark_event_type' of `CMARK_EVENT_EXIT`). The iterator will
* return `CMARK_EVENT_DONE` when it reaches the root node again.
* One natural application is an HTML renderer, where an `ENTER` event
* outputs an open tag and an `EXIT` event outputs a close tag.
* An iterator might also be used to transform an AST in some systematic
* way, for example, turning all level-3 headings into regular paragraphs.
*
* void
* usage_example(cmark_node *root) {
* cmark_event_type ev_type;
* cmark_iter *iter = cmark_iter_new(root);
*
* while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
* cmark_node *cur = cmark_iter_get_node(iter);
* // Do something with `cur` and `ev_type`
* }
*
* cmark_iter_free(iter);
* }
*
* Iterators will never return `EXIT` events for leaf nodes, which are nodes
* of type:
*
* * CMARK_NODE_HTML_BLOCK
* * CMARK_NODE_THEMATIC_BREAK
* * CMARK_NODE_CODE_BLOCK
* * CMARK_NODE_TEXT
* * CMARK_NODE_SOFTBREAK
* * CMARK_NODE_LINEBREAK
* * CMARK_NODE_CODE
* * CMARK_NODE_HTML_INLINE
*
* Nodes must only be modified after an `EXIT` event, or an `ENTER` event for
* leaf nodes.
*/
typedef enum {
CMARK_EVENT_NONE,
CMARK_EVENT_DONE,
CMARK_EVENT_ENTER,
CMARK_EVENT_EXIT
} cmark_event_type;
/** Creates a new iterator starting at 'root'. The current node and event
* type are undefined until 'cmark_iter_next' is called for the first time.
* The memory allocated for the iterator should be released using
* 'cmark_iter_free' when it is no longer needed.
*/
CMARK_GFM_EXPORT
cmark_iter *cmark_iter_new(cmark_node *root);
/** Frees the memory allocated for an iterator.
*/
CMARK_GFM_EXPORT
void cmark_iter_free(cmark_iter *iter);
/** Advances to the next node and returns the event type (`CMARK_EVENT_ENTER`,
* `CMARK_EVENT_EXIT` or `CMARK_EVENT_DONE`).
*/
CMARK_GFM_EXPORT
cmark_event_type cmark_iter_next(cmark_iter *iter);
/** Returns the current node.
*/
CMARK_GFM_EXPORT
cmark_node *cmark_iter_get_node(cmark_iter *iter);
/** Returns the current event type.
*/
CMARK_GFM_EXPORT
cmark_event_type cmark_iter_get_event_type(cmark_iter *iter);
/** Returns the root node.
*/
CMARK_GFM_EXPORT
cmark_node *cmark_iter_get_root(cmark_iter *iter);
/** Resets the iterator so that the current node is 'current' and
* the event type is 'event_type'. The new current node must be a
* descendant of the root node or the root node itself.
*/
CMARK_GFM_EXPORT
void cmark_iter_reset(cmark_iter *iter, cmark_node *current,
cmark_event_type event_type);
/**
* ## Accessors
*/
/** Returns the user data of 'node'.
*/
CMARK_GFM_EXPORT void *cmark_node_get_user_data(cmark_node *node);
/** Sets arbitrary user data for 'node'. Returns 1 on success,
* 0 on failure.
*/
CMARK_GFM_EXPORT int cmark_node_set_user_data(cmark_node *node, void *user_data);
/** Set free function for user data */
CMARK_GFM_EXPORT
int cmark_node_set_user_data_free_func(cmark_node *node,
cmark_free_func free_func);
/** Returns the type of 'node', or `CMARK_NODE_NONE` on error.
*/
CMARK_GFM_EXPORT cmark_node_type cmark_node_get_type(cmark_node *node);
/** Like 'cmark_node_get_type', but returns a string representation
of the type, or `"<unknown>"`.
*/
CMARK_GFM_EXPORT
const char *cmark_node_get_type_string(cmark_node *node);
/** Returns the string contents of 'node', or an empty
string if none is set. Returns NULL if called on a
node that does not have string content.
*/
CMARK_GFM_EXPORT const char *cmark_node_get_literal(cmark_node *node);
/** Sets the string contents of 'node'. Returns 1 on success,
* 0 on failure.
*/
CMARK_GFM_EXPORT int cmark_node_set_literal(cmark_node *node, const char *content);
/** Returns the heading level of 'node', or 0 if 'node' is not a heading.
*/
CMARK_GFM_EXPORT int cmark_node_get_heading_level(cmark_node *node);
/* For backwards compatibility */
#define cmark_node_get_header_level cmark_node_get_heading_level
#define cmark_node_set_header_level cmark_node_set_heading_level
/** Sets the heading level of 'node', returning 1 on success and 0 on error.
*/
CMARK_GFM_EXPORT int cmark_node_set_heading_level(cmark_node *node, int level);
/** Returns the list type of 'node', or `CMARK_NO_LIST` if 'node'
* is not a list.
*/
CMARK_GFM_EXPORT cmark_list_type cmark_node_get_list_type(cmark_node *node);
/** Sets the list type of 'node', returning 1 on success and 0 on error.
*/
CMARK_GFM_EXPORT int cmark_node_set_list_type(cmark_node *node,
cmark_list_type type);
/** Returns the list delimiter type of 'node', or `CMARK_NO_DELIM` if 'node'
* is not a list.
*/
CMARK_GFM_EXPORT cmark_delim_type cmark_node_get_list_delim(cmark_node *node);
/** Sets the list delimiter type of 'node', returning 1 on success and 0
* on error.
*/
CMARK_GFM_EXPORT int cmark_node_set_list_delim(cmark_node *node,
cmark_delim_type delim);
/** Returns starting number of 'node', if it is an ordered list, otherwise 0.
*/
CMARK_GFM_EXPORT int cmark_node_get_list_start(cmark_node *node);
/** Sets starting number of 'node', if it is an ordered list. Returns 1
* on success, 0 on failure.
*/
CMARK_GFM_EXPORT int cmark_node_set_list_start(cmark_node *node, int start);
/** Returns 1 if 'node' is a tight list, 0 otherwise.
*/
CMARK_GFM_EXPORT int cmark_node_get_list_tight(cmark_node *node);
/** Sets the "tightness" of a list. Returns 1 on success, 0 on failure.
*/
CMARK_GFM_EXPORT int cmark_node_set_list_tight(cmark_node *node, int tight);
/**
* Returns item index of 'node'. This is only used when rendering output
* formats such as commonmark, which need to output the index. It is not
* required for formats such as html or latex.
*/
CMARK_GFM_EXPORT int cmark_node_get_item_index(cmark_node *node);
/** Sets item index of 'node'. Returns 1 on success, 0 on failure.
*/
CMARK_GFM_EXPORT int cmark_node_set_item_index(cmark_node *node, int idx);
/** Returns the info string from a fenced code block.
*/
CMARK_GFM_EXPORT const char *cmark_node_get_fence_info(cmark_node *node);
/** Sets the info string in a fenced code block, returning 1 on
* success and 0 on failure.
*/
CMARK_GFM_EXPORT int cmark_node_set_fence_info(cmark_node *node, const char *info);
/** Sets code blocks fencing details
*/
CMARK_GFM_EXPORT int cmark_node_set_fenced(cmark_node * node, int fenced,
int length, int offset, char character);
/** Returns code blocks fencing details
*/
CMARK_GFM_EXPORT int cmark_node_get_fenced(cmark_node *node, int *length, int *offset, char *character);
/** Returns the URL of a link or image 'node', or an empty string
if no URL is set. Returns NULL if called on a node that is
not a link or image.
*/
CMARK_GFM_EXPORT const char *cmark_node_get_url(cmark_node *node);
/** Sets the URL of a link or image 'node'. Returns 1 on success,
* 0 on failure.
*/
CMARK_GFM_EXPORT int cmark_node_set_url(cmark_node *node, const char *url);
/** Returns the title of a link or image 'node', or an empty
string if no title is set. Returns NULL if called on a node
that is not a link or image.
*/
CMARK_GFM_EXPORT const char *cmark_node_get_title(cmark_node *node);
/** Sets the title of a link or image 'node'. Returns 1 on success,
* 0 on failure.
*/
CMARK_GFM_EXPORT int cmark_node_set_title(cmark_node *node, const char *title);
/** Returns the literal "on enter" text for a custom 'node', or
an empty string if no on_enter is set. Returns NULL if called
on a non-custom node.
*/
CMARK_GFM_EXPORT const char *cmark_node_get_on_enter(cmark_node *node);
/** Sets the literal text to render "on enter" for a custom 'node'.
Any children of the node will be rendered after this text.
Returns 1 on success 0 on failure.
*/
CMARK_GFM_EXPORT int cmark_node_set_on_enter(cmark_node *node,
const char *on_enter);
/** Returns the literal "on exit" text for a custom 'node', or
an empty string if no on_exit is set. Returns NULL if
called on a non-custom node.
*/
CMARK_GFM_EXPORT const char *cmark_node_get_on_exit(cmark_node *node);
/** Sets the literal text to render "on exit" for a custom 'node'.
Any children of the node will be rendered before this text.
Returns 1 on success 0 on failure.
*/
CMARK_GFM_EXPORT int cmark_node_set_on_exit(cmark_node *node, const char *on_exit);
/** Returns the line on which 'node' begins.
*/
CMARK_GFM_EXPORT int cmark_node_get_start_line(cmark_node *node);
/** Returns the column at which 'node' begins.
*/
CMARK_GFM_EXPORT int cmark_node_get_start_column(cmark_node *node);
/** Returns the line on which 'node' ends.
*/
CMARK_GFM_EXPORT int cmark_node_get_end_line(cmark_node *node);
/** Returns the column at which 'node' ends.
*/
CMARK_GFM_EXPORT int cmark_node_get_end_column(cmark_node *node);
/**
* ## Tree Manipulation
*/
/** Unlinks a 'node', removing it from the tree, but not freeing its
* memory. (Use 'cmark_node_free' for that.)
*/
CMARK_GFM_EXPORT void cmark_node_unlink(cmark_node *node);
/** Inserts 'sibling' before 'node'. Returns 1 on success, 0 on failure.
*/
CMARK_GFM_EXPORT int cmark_node_insert_before(cmark_node *node,
cmark_node *sibling);
/** Inserts 'sibling' after 'node'. Returns 1 on success, 0 on failure.
*/
CMARK_GFM_EXPORT int cmark_node_insert_after(cmark_node *node, cmark_node *sibling);
/** Replaces 'oldnode' with 'newnode' and unlinks 'oldnode' (but does
* not free its memory).
* Returns 1 on success, 0 on failure.
*/
CMARK_GFM_EXPORT int cmark_node_replace(cmark_node *oldnode, cmark_node *newnode);
/** Adds 'child' to the beginning of the children of 'node'.
* Returns 1 on success, 0 on failure.
*/
CMARK_GFM_EXPORT int cmark_node_prepend_child(cmark_node *node, cmark_node *child);
/** Adds 'child' to the end of the children of 'node'.
* Returns 1 on success, 0 on failure.
*/
CMARK_GFM_EXPORT int cmark_node_append_child(cmark_node *node, cmark_node *child);
/** Consolidates adjacent text nodes.
*/
CMARK_GFM_EXPORT void cmark_consolidate_text_nodes(cmark_node *root);
/** Ensures a node and all its children own their own chunk memory.
*/
CMARK_GFM_EXPORT void cmark_node_own(cmark_node *root);
/**
* ## Parsing
*
* Simple interface:
*
* cmark_node *document = cmark_parse_document("Hello *world*", 13,
* CMARK_OPT_DEFAULT);
*
* Streaming interface:
*
* cmark_parser *parser = cmark_parser_new(CMARK_OPT_DEFAULT);
* FILE *fp = fopen("myfile.md", "rb");
* while ((bytes = fread(buffer, 1, sizeof(buffer), fp)) > 0) {
* cmark_parser_feed(parser, buffer, bytes);
* if (bytes < sizeof(buffer)) {
* break;
* }
* }
* document = cmark_parser_finish(parser);
* cmark_parser_free(parser);
*/
/** Creates a new parser object.
*/
CMARK_GFM_EXPORT
cmark_parser *cmark_parser_new(int options);
/** Creates a new parser object with the given memory allocator
*/
CMARK_GFM_EXPORT
cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem);
/** Frees memory allocated for a parser object.
*/
CMARK_GFM_EXPORT
void cmark_parser_free(cmark_parser *parser);
/** Feeds a string of length 'len' to 'parser'.
*/
CMARK_GFM_EXPORT
void cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t len);
/** Finish parsing and return a pointer to a tree of nodes.
*/
CMARK_GFM_EXPORT
cmark_node *cmark_parser_finish(cmark_parser *parser);
/** Parse a CommonMark document in 'buffer' of length 'len'.
* Returns a pointer to a tree of nodes. The memory allocated for
* the node tree should be released using 'cmark_node_free'
* when it is no longer needed.
*/
CMARK_GFM_EXPORT
cmark_node *cmark_parse_document(const char *buffer, size_t len, int options);
/** Parse a CommonMark document in file 'f', returning a pointer to
* a tree of nodes. The memory allocated for the node tree should be
* released using 'cmark_node_free' when it is no longer needed.
*/
CMARK_GFM_EXPORT
cmark_node *cmark_parse_file(FILE *f, int options);
/**
* ## Rendering
*/
/** Render a 'node' tree as XML. It is the caller's responsibility
* to free the returned buffer.
*/
CMARK_GFM_EXPORT
char *cmark_render_xml(cmark_node *root, int options);
/** As for 'cmark_render_xml', but specifying the allocator to use for
* the resulting string.
*/
CMARK_GFM_EXPORT
char *cmark_render_xml_with_mem(cmark_node *root, int options, cmark_mem *mem);
/** Render a 'node' tree as an HTML fragment. It is up to the user
* to add an appropriate header and footer. It is the caller's
* responsibility to free the returned buffer.
*/
CMARK_GFM_EXPORT
char *cmark_render_html(cmark_node *root, int options, cmark_llist *extensions);
/** As for 'cmark_render_html', but specifying the allocator to use for
* the resulting string.
*/
CMARK_GFM_EXPORT
char *cmark_render_html_with_mem(cmark_node *root, int options, cmark_llist *extensions, cmark_mem *mem);
/** Render a 'node' tree as a groff man page, without the header.
* It is the caller's responsibility to free the returned buffer.
*/
CMARK_GFM_EXPORT
char *cmark_render_man(cmark_node *root, int options, int width);
/** As for 'cmark_render_man', but specifying the allocator to use for
* the resulting string.
*/
CMARK_GFM_EXPORT
char *cmark_render_man_with_mem(cmark_node *root, int options, int width, cmark_mem *mem);
/** Render a 'node' tree as a commonmark document.
* It is the caller's responsibility to free the returned buffer.
*/
CMARK_GFM_EXPORT
char *cmark_render_commonmark(cmark_node *root, int options, int width);
/** As for 'cmark_render_commonmark', but specifying the allocator to use for
* the resulting string.
*/
CMARK_GFM_EXPORT
char *cmark_render_commonmark_with_mem(cmark_node *root, int options, int width, cmark_mem *mem);
/** Render a 'node' tree as a plain text document.
* It is the caller's responsibility to free the returned buffer.
*/
CMARK_GFM_EXPORT
char *cmark_render_plaintext(cmark_node *root, int options, int width);
/** As for 'cmark_render_plaintext', but specifying the allocator to use for
* the resulting string.
*/
CMARK_GFM_EXPORT
char *cmark_render_plaintext_with_mem(cmark_node *root, int options, int width, cmark_mem *mem);
/** Render a 'node' tree as a LaTeX document.
* It is the caller's responsibility to free the returned buffer.
*/
CMARK_GFM_EXPORT
char *cmark_render_latex(cmark_node *root, int options, int width);
/** As for 'cmark_render_latex', but specifying the allocator to use for
* the resulting string.
*/
CMARK_GFM_EXPORT
char *cmark_render_latex_with_mem(cmark_node *root, int options, int width, cmark_mem *mem);
/**
* ## Options
*/
/** Default options.
*/
#define CMARK_OPT_DEFAULT 0
/**
* ### Options affecting rendering
*/
/** Include a `data-sourcepos` attribute on all block elements.
*/
#define CMARK_OPT_SOURCEPOS (1 << 1)
/** Render `softbreak` elements as hard line breaks.
*/
#define CMARK_OPT_HARDBREAKS (1 << 2)
/** `CMARK_OPT_SAFE` is defined here for API compatibility,
but it no longer has any effect. "Safe" mode is now the default:
set `CMARK_OPT_UNSAFE` to disable it.
*/
#define CMARK_OPT_SAFE (1 << 3)
/** Render raw HTML and unsafe links (`javascript:`, `vbscript:`,
* `file:`, and `data:`, except for `image/png`, `image/gif`,
* `image/jpeg`, or `image/webp` mime types). By default,
* raw HTML is replaced by a placeholder HTML comment. Unsafe
* links are replaced by empty strings.
*/
#define CMARK_OPT_UNSAFE (1 << 17)
/** Render `softbreak` elements as spaces.
*/
#define CMARK_OPT_NOBREAKS (1 << 4)
/**
* ### Options affecting parsing
*/
/** Legacy option (no effect).
*/
#define CMARK_OPT_NORMALIZE (1 << 8)
/** Validate UTF-8 in the input before parsing, replacing illegal
* sequences with the replacement character U+FFFD.
*/
#define CMARK_OPT_VALIDATE_UTF8 (1 << 9)
/** Convert straight quotes to curly, --- to em dashes, -- to en dashes.
*/
#define CMARK_OPT_SMART (1 << 10)
/** Use GitHub-style <pre lang="x"> tags for code blocks instead of <pre><code
* class="language-x">.
*/
#define CMARK_OPT_GITHUB_PRE_LANG (1 << 11)
/** Be liberal in interpreting inline HTML tags.
*/
#define CMARK_OPT_LIBERAL_HTML_TAG (1 << 12)
/** Parse footnotes.
*/
#define CMARK_OPT_FOOTNOTES (1 << 13)
/** Only parse strikethroughs if surrounded by exactly 2 tildes.
* Gives some compatibility with redcarpet.
*/
#define CMARK_OPT_STRIKETHROUGH_DOUBLE_TILDE (1 << 14)
/** Use style attributes to align table cells instead of align attributes.
*/
#define CMARK_OPT_TABLE_PREFER_STYLE_ATTRIBUTES (1 << 15)
/** Include the remainder of the info string in code blocks in
* a separate attribute.
*/
#define CMARK_OPT_FULL_INFO_STRING (1 << 16)
/**
* ## Version information
*/
/** The library version as integer for runtime checks. Also available as
* macro CMARK_VERSION for compile time checks.
*
* * Bits 16-23 contain the major version.
* * Bits 8-15 contain the minor version.
* * Bits 0-7 contain the patchlevel.
*
* In hexadecimal format, the number 0x010203 represents version 1.2.3.
*/
CMARK_GFM_EXPORT
int cmark_version(void);
/** The library version string for runtime checks. Also available as
* macro CMARK_VERSION_STRING for compile time checks.
*/
CMARK_GFM_EXPORT
const char *cmark_version_string(void);
/** # AUTHORS
*
* John MacFarlane, Vicent Marti, Kārlis Gaņģis, Nick Wellnhofer.
*/
#ifndef CMARK_NO_SHORT_NAMES
#define NODE_DOCUMENT CMARK_NODE_DOCUMENT
#define NODE_BLOCK_QUOTE CMARK_NODE_BLOCK_QUOTE
#define NODE_LIST CMARK_NODE_LIST
#define NODE_ITEM CMARK_NODE_ITEM
#define NODE_CODE_BLOCK CMARK_NODE_CODE_BLOCK
#define NODE_HTML_BLOCK CMARK_NODE_HTML_BLOCK
#define NODE_CUSTOM_BLOCK CMARK_NODE_CUSTOM_BLOCK
#define NODE_PARAGRAPH CMARK_NODE_PARAGRAPH
#define NODE_HEADING CMARK_NODE_HEADING
#define NODE_HEADER CMARK_NODE_HEADER
#define NODE_THEMATIC_BREAK CMARK_NODE_THEMATIC_BREAK
#define NODE_HRULE CMARK_NODE_HRULE
#define NODE_TEXT CMARK_NODE_TEXT
#define NODE_SOFTBREAK CMARK_NODE_SOFTBREAK
#define NODE_LINEBREAK CMARK_NODE_LINEBREAK
#define NODE_CODE CMARK_NODE_CODE
#define NODE_HTML_INLINE CMARK_NODE_HTML_INLINE
#define NODE_CUSTOM_INLINE CMARK_NODE_CUSTOM_INLINE
#define NODE_EMPH CMARK_NODE_EMPH
#define NODE_STRONG CMARK_NODE_STRONG
#define NODE_LINK CMARK_NODE_LINK
#define NODE_IMAGE CMARK_NODE_IMAGE
#define BULLET_LIST CMARK_BULLET_LIST
#define ORDERED_LIST CMARK_ORDERED_LIST
#define PERIOD_DELIM CMARK_PERIOD_DELIM
#define PAREN_DELIM CMARK_PAREN_DELIM
#endif
typedef int32_t bufsize_t;
#ifdef __cplusplus
}
#endif
#endif

View File

@ -1,43 +0,0 @@
#ifndef CMARK_GFM_EXPORT_H
#define CMARK_GFM_EXPORT_H
#ifdef CMARK_GFM_STATIC_DEFINE
# define CMARK_GFM_EXPORT
# define CMARK_GFM_NO_EXPORT
#else
# ifndef CMARK_GFM_EXPORT
# ifdef libcmark_gfm_static_EXPORTS
/* We are building this library */
# define CMARK_GFM_EXPORT
# else
/* We are using this library */
# define CMARK_GFM_EXPORT
# endif
# endif
# ifndef CMARK_GFM_NO_EXPORT
# define CMARK_GFM_NO_EXPORT
# endif
#endif
#ifndef CMARK_GFM_DEPRECATED
# define CMARK_GFM_DEPRECATED __declspec(deprecated)
#endif
#ifndef CMARK_GFM_DEPRECATED_EXPORT
# define CMARK_GFM_DEPRECATED_EXPORT CMARK_GFM_EXPORT CMARK_GFM_DEPRECATED
#endif
#ifndef CMARK_GFM_DEPRECATED_NO_EXPORT
# define CMARK_GFM_DEPRECATED_NO_EXPORT CMARK_GFM_NO_EXPORT CMARK_GFM_DEPRECATED
#endif
/* NOLINTNEXTLINE(readability-avoid-unconditional-preprocessor-if) */
#if 0 /* DEFINE_NO_DEPRECATED */
# ifndef CMARK_GFM_NO_DEPRECATED
# define CMARK_GFM_NO_DEPRECATED
# endif
#endif
#endif /* CMARK_GFM_EXPORT_H */

View File

@ -1,7 +0,0 @@
#ifndef CMARK_GFM_VERSION_H
#define CMARK_GFM_VERSION_H
#define CMARK_GFM_VERSION ((0 << 24) | (29 << 16) | (0 << 8) | 13)
#define CMARK_GFM_VERSION_STRING "0.29.0.gfm.13"
#endif

View File

@ -1,7 +0,0 @@
#ifndef CMARK_GFM_VERSION_H
#define CMARK_GFM_VERSION_H
#define CMARK_GFM_VERSION ((@PROJECT_VERSION_MAJOR@ << 24) | (@PROJECT_VERSION_MINOR@ << 16) | (@PROJECT_VERSION_PATCH@ << 8) | @PROJECT_VERSION_GFM@)
#define CMARK_GFM_VERSION_STRING "@PROJECT_VERSION_MAJOR@.@PROJECT_VERSION_MINOR@.@PROJECT_VERSION_PATCH@.gfm.@PROJECT_VERSION_GFM@"
#endif

View File

@ -1,55 +0,0 @@
#include <stdlib.h>
#include <assert.h>
#include <stdio.h>
#include "registry.h"
#include "node.h"
#include "houdini.h"
#include "cmark-gfm.h"
#include "buffer.h"
cmark_node_type CMARK_NODE_LAST_BLOCK = CMARK_NODE_FOOTNOTE_DEFINITION;
cmark_node_type CMARK_NODE_LAST_INLINE = CMARK_NODE_FOOTNOTE_REFERENCE;
int cmark_version(void) { return CMARK_GFM_VERSION; }
const char *cmark_version_string(void) { return CMARK_GFM_VERSION_STRING; }
static void *xcalloc(size_t nmem, size_t size) {
void *ptr = calloc(nmem, size);
if (!ptr) {
fprintf(stderr, "[cmark] calloc returned null pointer, aborting\n");
abort();
}
return ptr;
}
static void *xrealloc(void *ptr, size_t size) {
void *new_ptr = realloc(ptr, size);
if (!new_ptr) {
fprintf(stderr, "[cmark] realloc returned null pointer, aborting\n");
abort();
}
return new_ptr;
}
static void xfree(void *ptr) {
free(ptr);
}
cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR = {xcalloc, xrealloc, xfree};
cmark_mem *cmark_get_default_mem_allocator(void) {
return &CMARK_DEFAULT_MEM_ALLOCATOR;
}
char *cmark_markdown_to_html(const char *text, size_t len, int options) {
cmark_node *doc;
char *result;
doc = cmark_parse_document(text, len, options);
result = cmark_render_html(doc, options, NULL);
cmark_node_free(doc);
return result;
}

View File

@ -1,44 +0,0 @@
#include <stdint.h>
#include "cmark_ctype.h"
/** 1 = space, 2 = punct, 3 = digit, 4 = alpha, 0 = other
*/
static const uint8_t cmark_ctype_class[256] = {
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
/* 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
/* 1 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 2 */ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
/* 3 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2,
/* 4 */ 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
/* 5 */ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2,
/* 6 */ 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
/* 7 */ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 0,
/* 8 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 9 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* a */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* b */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* c */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* d */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* e */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* f */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
/**
* Returns 1 if c is a "whitespace" character as defined by the spec.
*/
int cmark_isspace(char c) { return cmark_ctype_class[(uint8_t)c] == 1; }
/**
* Returns 1 if c is an ascii punctuation character.
*/
int cmark_ispunct(char c) { return cmark_ctype_class[(uint8_t)c] == 2; }
int cmark_isalnum(char c) {
uint8_t result;
result = cmark_ctype_class[(uint8_t)c];
return (result == 3 || result == 4);
}
int cmark_isdigit(char c) { return cmark_ctype_class[(uint8_t)c] == 3; }
int cmark_isalpha(char c) { return cmark_ctype_class[(uint8_t)c] == 4; }

View File

@ -1,33 +0,0 @@
#ifndef CMARK_CMARK_CTYPE_H
#define CMARK_CMARK_CTYPE_H
#ifdef __cplusplus
extern "C" {
#endif
#include "cmark-gfm_export.h"
/** Locale-independent versions of functions from ctype.h.
* We want cmark to behave the same no matter what the system locale.
*/
CMARK_GFM_EXPORT
int cmark_isspace(char c);
CMARK_GFM_EXPORT
int cmark_ispunct(char c);
CMARK_GFM_EXPORT
int cmark_isalnum(char c);
CMARK_GFM_EXPORT
int cmark_isdigit(char c);
CMARK_GFM_EXPORT
int cmark_isalpha(char c);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -1,514 +0,0 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <assert.h>
#include "config.h"
#include "cmark-gfm.h"
#include "node.h"
#include "buffer.h"
#include "utf8.h"
#include "scanners.h"
#include "render.h"
#include "syntax_extension.h"
#define OUT(s, wrap, escaping) renderer->out(renderer, node, s, wrap, escaping)
#define LIT(s) renderer->out(renderer, node, s, false, LITERAL)
#define CR() renderer->cr(renderer)
#define BLANKLINE() renderer->blankline(renderer)
#define ENCODED_SIZE 20
#define LISTMARKER_SIZE 20
// Functions to convert cmark_nodes to commonmark strings.
static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_node *node,
cmark_escaping escape,
int32_t c, unsigned char nextc) {
bool needs_escaping = false;
bool follows_digit =
renderer->buffer->size > 0 &&
cmark_isdigit(renderer->buffer->ptr[renderer->buffer->size - 1]);
char encoded[ENCODED_SIZE];
needs_escaping =
c < 0x80 && escape != LITERAL &&
((escape == NORMAL &&
(c < 0x20 ||
c == '*' || c == '_' || c == '[' || c == ']' || c == '#' || c == '<' ||
c == '>' || c == '\\' || c == '`' || c == '~' || c == '!' ||
(c == '&' && cmark_isalpha(nextc)) || (c == '!' && nextc == '[') ||
(renderer->begin_content && (c == '-' || c == '+' || c == '=') &&
// begin_content doesn't get set to false til we've passed digits
// at the beginning of line, so...
!follows_digit) ||
(renderer->begin_content && (c == '.' || c == ')') && follows_digit &&
(nextc == 0 || cmark_isspace(nextc))))) ||
(escape == URL &&
(c == '`' || c == '<' || c == '>' || cmark_isspace((char)c) || c == '\\' ||
c == ')' || c == '(')) ||
(escape == TITLE &&
(c == '`' || c == '<' || c == '>' || c == '"' || c == '\\')));
if (needs_escaping) {
if (escape == URL && cmark_isspace((char)c)) {
// use percent encoding for spaces
snprintf(encoded, ENCODED_SIZE, "%%%2X", c);
cmark_strbuf_puts(renderer->buffer, encoded);
renderer->column += 3;
} else if (cmark_ispunct((char)c)) {
cmark_render_ascii(renderer, "\\");
cmark_render_code_point(renderer, c);
} else { // render as entity
snprintf(encoded, ENCODED_SIZE, "&#%d;", c);
cmark_strbuf_puts(renderer->buffer, encoded);
renderer->column += (int)strlen(encoded);
}
} else {
cmark_render_code_point(renderer, c);
}
}
static int longest_backtick_sequence(const char *code) {
int longest = 0;
int current = 0;
size_t i = 0;
size_t code_len = strlen(code);
while (i <= code_len) {
if (code[i] == '`') {
current++;
} else {
if (current > longest) {
longest = current;
}
current = 0;
}
i++;
}
return longest;
}
static int shortest_unused_backtick_sequence(const char *code) {
// note: if the shortest sequence is >= 32, this returns 32
// so as not to overflow the bit array.
uint32_t used = 1;
int current = 0;
size_t i = 0;
size_t code_len = strlen(code);
while (i <= code_len) {
if (code[i] == '`') {
current++;
} else {
if (current > 0 && current < 32) {
used |= (1U << current);
}
current = 0;
}
i++;
}
// return number of first bit that is 0:
i = 0;
while (i < 32 && used & 1) {
used = used >> 1;
i++;
}
return (int)i;
}
static bool is_autolink(cmark_node *node) {
cmark_chunk *title;
cmark_chunk *url;
cmark_node *link_text;
char *realurl;
int realurllen;
if (node->type != CMARK_NODE_LINK) {
return false;
}
url = &node->as.link.url;
if (url->len == 0 || scan_scheme(url, 0) == 0) {
return false;
}
title = &node->as.link.title;
// if it has a title, we can't treat it as an autolink:
if (title->len > 0) {
return false;
}
link_text = node->first_child;
if (link_text == NULL) {
return false;
}
cmark_consolidate_text_nodes(link_text);
realurl = (char *)url->data;
realurllen = url->len;
if (strncmp(realurl, "mailto:", 7) == 0) {
realurl += 7;
realurllen -= 7;
}
return (realurllen == link_text->as.literal.len &&
strncmp(realurl, (char *)link_text->as.literal.data,
link_text->as.literal.len) == 0);
}
static int S_render_node(cmark_renderer *renderer, cmark_node *node,
cmark_event_type ev_type, int options) {
int list_number;
cmark_delim_type list_delim;
int numticks;
bool extra_spaces;
int i;
bool entering = (ev_type == CMARK_EVENT_ENTER);
const char *info, *code, *title;
char fencechar[2] = {'\0', '\0'};
size_t info_len, code_len;
char listmarker[LISTMARKER_SIZE];
const char *emph_delim;
bool first_in_list_item;
bufsize_t marker_width;
bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options) &&
!(CMARK_OPT_HARDBREAKS & options);
// Don't adjust tight list status til we've started the list.
// Otherwise we loose the blank line between a paragraph and
// a following list.
if (entering) {
if (node->parent && node->parent->type == CMARK_NODE_ITEM) {
renderer->in_tight_list_item = node->parent->parent->as.list.tight;
}
} else {
if (node->type == CMARK_NODE_LIST) {
renderer->in_tight_list_item =
node->parent &&
node->parent->type == CMARK_NODE_ITEM &&
node->parent->parent->as.list.tight;
}
}
if (node->extension && node->extension->commonmark_render_func) {
node->extension->commonmark_render_func(node->extension, renderer, node, ev_type, options);
return 1;
}
switch (node->type) {
case CMARK_NODE_DOCUMENT:
break;
case CMARK_NODE_BLOCK_QUOTE:
if (entering) {
LIT("> ");
renderer->begin_content = true;
cmark_strbuf_puts(renderer->prefix, "> ");
} else {
cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 2);
BLANKLINE();
}
break;
case CMARK_NODE_LIST:
if (!entering && node->next && (node->next->type == CMARK_NODE_CODE_BLOCK ||
node->next->type == CMARK_NODE_LIST)) {
// this ensures that a following indented code block or list will be
// inteprereted correctly.
CR();
LIT("<!-- end list -->");
BLANKLINE();
}
break;
case CMARK_NODE_ITEM:
if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) {
marker_width = 4;
} else {
list_number = cmark_node_get_item_index(node);
list_delim = cmark_node_get_list_delim(node->parent);
// we ensure a width of at least 4 so
// we get nice transition from single digits
// to double
snprintf(listmarker, LISTMARKER_SIZE, "%d%s%s", list_number,
list_delim == CMARK_PAREN_DELIM ? ")" : ".",
list_number < 10 ? " " : " ");
marker_width = (bufsize_t)strlen(listmarker);
}
if (entering) {
if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) {
LIT(" - ");
renderer->begin_content = true;
} else {
LIT(listmarker);
renderer->begin_content = true;
}
for (i = marker_width; i--;) {
cmark_strbuf_putc(renderer->prefix, ' ');
}
} else {
cmark_strbuf_truncate(renderer->prefix,
renderer->prefix->size - marker_width);
CR();
}
break;
case CMARK_NODE_HEADING:
if (entering) {
for (i = cmark_node_get_heading_level(node); i > 0; i--) {
LIT("#");
}
LIT(" ");
renderer->begin_content = true;
renderer->no_linebreaks = true;
} else {
renderer->no_linebreaks = false;
BLANKLINE();
}
break;
case CMARK_NODE_CODE_BLOCK:
first_in_list_item = node->prev == NULL && node->parent &&
node->parent->type == CMARK_NODE_ITEM;
if (!first_in_list_item) {
BLANKLINE();
}
info = cmark_node_get_fence_info(node);
info_len = strlen(info);
fencechar[0] = strchr(info, '`') == NULL ? '`' : '~';
code = cmark_node_get_literal(node);
code_len = strlen(code);
// use indented form if no info, and code doesn't
// begin or end with a blank line, and code isn't
// first thing in a list item
if (info_len == 0 && (code_len > 2 && !cmark_isspace(code[0]) &&
!(cmark_isspace(code[code_len - 1]) &&
cmark_isspace(code[code_len - 2]))) &&
!first_in_list_item) {
LIT(" ");
cmark_strbuf_puts(renderer->prefix, " ");
OUT(cmark_node_get_literal(node), false, LITERAL);
cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 4);
} else {
numticks = longest_backtick_sequence(code) + 1;
if (numticks < 3) {
numticks = 3;
}
for (i = 0; i < numticks; i++) {
LIT(fencechar);
}
LIT(" ");
OUT(info, false, LITERAL);
CR();
OUT(cmark_node_get_literal(node), false, LITERAL);
CR();
for (i = 0; i < numticks; i++) {
LIT(fencechar);
}
}
BLANKLINE();
break;
case CMARK_NODE_HTML_BLOCK:
BLANKLINE();
OUT(cmark_node_get_literal(node), false, LITERAL);
BLANKLINE();
break;
case CMARK_NODE_CUSTOM_BLOCK:
BLANKLINE();
OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node),
false, LITERAL);
BLANKLINE();
break;
case CMARK_NODE_THEMATIC_BREAK:
BLANKLINE();
LIT("-----");
BLANKLINE();
break;
case CMARK_NODE_PARAGRAPH:
if (!entering) {
BLANKLINE();
}
break;
case CMARK_NODE_TEXT:
OUT(cmark_node_get_literal(node), allow_wrap, NORMAL);
break;
case CMARK_NODE_LINEBREAK:
if (!(CMARK_OPT_HARDBREAKS & options)) {
LIT(" ");
}
CR();
break;
case CMARK_NODE_SOFTBREAK:
if (CMARK_OPT_HARDBREAKS & options) {
LIT(" ");
CR();
} else if (!renderer->no_linebreaks && renderer->width == 0 &&
!(CMARK_OPT_HARDBREAKS & options) &&
!(CMARK_OPT_NOBREAKS & options)) {
CR();
} else {
OUT(" ", allow_wrap, LITERAL);
}
break;
case CMARK_NODE_CODE:
code = cmark_node_get_literal(node);
code_len = strlen(code);
numticks = shortest_unused_backtick_sequence(code);
extra_spaces = code_len == 0 ||
code[0] == '`' || code[code_len - 1] == '`' ||
code[0] == ' ' || code[code_len - 1] == ' ';
for (i = 0; i < numticks; i++) {
LIT("`");
}
if (extra_spaces) {
LIT(" ");
}
OUT(cmark_node_get_literal(node), allow_wrap, LITERAL);
if (extra_spaces) {
LIT(" ");
}
for (i = 0; i < numticks; i++) {
LIT("`");
}
break;
case CMARK_NODE_HTML_INLINE:
OUT(cmark_node_get_literal(node), false, LITERAL);
break;
case CMARK_NODE_CUSTOM_INLINE:
OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node),
false, LITERAL);
break;
case CMARK_NODE_STRONG:
if (node->parent == NULL || node->parent->type != CMARK_NODE_STRONG) {
if (entering) {
LIT("**");
} else {
LIT("**");
}
}
break;
case CMARK_NODE_EMPH:
// If we have EMPH(EMPH(x)), we need to use *_x_*
// because **x** is STRONG(x):
if (node->parent && node->parent->type == CMARK_NODE_EMPH &&
node->next == NULL && node->prev == NULL) {
emph_delim = "_";
} else {
emph_delim = "*";
}
if (entering) {
LIT(emph_delim);
} else {
LIT(emph_delim);
}
break;
case CMARK_NODE_LINK:
if (is_autolink(node)) {
if (entering) {
LIT("<");
if (strncmp(cmark_node_get_url(node), "mailto:", 7) == 0) {
LIT((const char *)cmark_node_get_url(node) + 7);
} else {
LIT((const char *)cmark_node_get_url(node));
}
LIT(">");
// return signal to skip contents of node...
return 0;
}
} else {
if (entering) {
LIT("[");
} else {
LIT("](");
OUT(cmark_node_get_url(node), false, URL);
title = cmark_node_get_title(node);
if (strlen(title) > 0) {
LIT(" \"");
OUT(title, false, TITLE);
LIT("\"");
}
LIT(")");
}
}
break;
case CMARK_NODE_IMAGE:
if (entering) {
LIT("![");
} else {
LIT("](");
OUT(cmark_node_get_url(node), false, URL);
title = cmark_node_get_title(node);
if (strlen(title) > 0) {
OUT(" \"", allow_wrap, LITERAL);
OUT(title, false, TITLE);
LIT("\"");
}
LIT(")");
}
break;
case CMARK_NODE_FOOTNOTE_REFERENCE:
if (entering) {
LIT("[^");
char *footnote_label = renderer->mem->calloc(node->parent_footnote_def->as.literal.len + 1, sizeof(char));
memmove(footnote_label, node->parent_footnote_def->as.literal.data, node->parent_footnote_def->as.literal.len);
OUT(footnote_label, false, LITERAL);
renderer->mem->free(footnote_label);
LIT("]");
}
break;
case CMARK_NODE_FOOTNOTE_DEFINITION:
if (entering) {
renderer->footnote_ix += 1;
LIT("[^");
char *footnote_label = renderer->mem->calloc(node->as.literal.len + 1, sizeof(char));
memmove(footnote_label, node->as.literal.data, node->as.literal.len);
OUT(footnote_label, false, LITERAL);
renderer->mem->free(footnote_label);
LIT("]:\n");
cmark_strbuf_puts(renderer->prefix, " ");
} else {
cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 4);
}
break;
default:
assert(false);
break;
}
return 1;
}
char *cmark_render_commonmark(cmark_node *root, int options, int width) {
return cmark_render_commonmark_with_mem(root, options, width, cmark_node_mem(root));
}
char *cmark_render_commonmark_with_mem(cmark_node *root, int options, int width, cmark_mem *mem) {
if (options & CMARK_OPT_HARDBREAKS) {
// disable breaking on width, since it has
// a different meaning with OPT_HARDBREAKS
width = 0;
}
return cmark_render(mem, root, options, width, outc, S_render_node);
}

View File

@ -1,76 +0,0 @@
#ifndef CMARK_CONFIG_H
#define CMARK_CONFIG_H
#ifdef __cplusplus
extern "C" {
#endif
#cmakedefine HAVE_STDBOOL_H
#ifdef HAVE_STDBOOL_H
#include <stdbool.h>
#elif !defined(__cplusplus)
typedef char bool;
#endif
#cmakedefine HAVE___BUILTIN_EXPECT
#cmakedefine HAVE___ATTRIBUTE__
#ifdef HAVE___ATTRIBUTE__
#define CMARK_ATTRIBUTE(list) __attribute__ (list)
#else
#define CMARK_ATTRIBUTE(list)
#endif
#ifndef CMARK_INLINE
#if defined(_MSC_VER) && !defined(__cplusplus)
#define CMARK_INLINE __inline
#else
#define CMARK_INLINE inline
#endif
#endif
/* snprintf and vsnprintf fallbacks for MSVC before 2015,
due to Valentin Milea http://stackoverflow.com/questions/2915672/
*/
#if defined(_MSC_VER) && _MSC_VER < 1900
#include <stdio.h>
#include <stdarg.h>
#define snprintf c99_snprintf
#define vsnprintf c99_vsnprintf
CMARK_INLINE int c99_vsnprintf(char *outBuf, size_t size, const char *format, va_list ap)
{
int count = -1;
if (size != 0)
count = _vsnprintf_s(outBuf, size, _TRUNCATE, format, ap);
if (count == -1)
count = _vscprintf(format, ap);
return count;
}
CMARK_INLINE int c99_snprintf(char *outBuf, size_t size, const char *format, ...)
{
int count;
va_list ap;
va_start(ap, format);
count = c99_vsnprintf(outBuf, size, format, ap);
va_end(ap);
return count;
}
#endif
#ifdef __cplusplus
}
#endif
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,63 +0,0 @@
#include "cmark-gfm.h"
#include "parser.h"
#include "footnotes.h"
#include "inlines.h"
#include "chunk.h"
static void footnote_free(cmark_map *map, cmark_map_entry *_ref) {
cmark_footnote *ref = (cmark_footnote *)_ref;
cmark_mem *mem = map->mem;
if (ref != NULL) {
mem->free(ref->entry.label);
if (ref->node)
cmark_node_free(ref->node);
mem->free(ref);
}
}
void cmark_footnote_create(cmark_map *map, cmark_node *node) {
cmark_footnote *ref;
unsigned char *reflabel = normalize_map_label(map->mem, &node->as.literal);
/* empty footnote name, or composed from only whitespace */
if (reflabel == NULL)
return;
assert(map->sorted == NULL);
ref = (cmark_footnote *)map->mem->calloc(1, sizeof(*ref));
ref->entry.label = reflabel;
ref->node = node;
ref->entry.age = map->size;
ref->entry.next = map->refs;
map->refs = (cmark_map_entry *)ref;
map->size++;
}
cmark_map *cmark_footnote_map_new(cmark_mem *mem) {
return cmark_map_new(mem, footnote_free);
}
// Before calling `cmark_map_free` on a map with `cmark_footnotes`, first
// unlink all of the footnote nodes before freeing their memory.
//
// Sometimes, two (unused) footnote nodes can end up referencing each other,
// which as they get freed up by calling `cmark_map_free` -> `footnote_free` ->
// etc, can lead to a use-after-free error.
//
// Better to `unlink` every footnote node first, setting their next, prev, and
// parent pointers to NULL, and only then walk thru & free them up.
void cmark_unlink_footnotes_map(cmark_map *map) {
cmark_map_entry *ref;
cmark_map_entry *next;
ref = map->refs;
while(ref) {
next = ref->next;
if (((cmark_footnote *)ref)->node) {
cmark_node_unlink(((cmark_footnote *)ref)->node);
}
ref = next;
}
}

View File

@ -1,27 +0,0 @@
#ifndef CMARK_FOOTNOTES_H
#define CMARK_FOOTNOTES_H
#include "map.h"
#ifdef __cplusplus
extern "C" {
#endif
struct cmark_footnote {
cmark_map_entry entry;
cmark_node *node;
unsigned int ix;
};
typedef struct cmark_footnote cmark_footnote;
void cmark_footnote_create(cmark_map *map, cmark_node *node);
cmark_map *cmark_footnote_map_new(cmark_mem *mem);
void cmark_unlink_footnotes_map(cmark_map *map);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -1,57 +0,0 @@
#ifndef CMARK_HOUDINI_H
#define CMARK_HOUDINI_H
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include "config.h"
#include "buffer.h"
#ifdef HAVE___BUILTIN_EXPECT
#define likely(x) __builtin_expect((x), 1)
#define unlikely(x) __builtin_expect((x), 0)
#else
#define likely(x) (x)
#define unlikely(x) (x)
#endif
#ifdef HOUDINI_USE_LOCALE
#define _isxdigit(c) isxdigit(c)
#define _isdigit(c) isdigit(c)
#else
/*
* Helper _isdigit methods -- do not trust the current locale
* */
#define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL)
#define _isdigit(c) ((c) >= '0' && (c) <= '9')
#endif
#define HOUDINI_ESCAPED_SIZE(x) (((x)*12) / 10)
#define HOUDINI_UNESCAPED_SIZE(x) (x)
CMARK_GFM_EXPORT
bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src,
bufsize_t size);
CMARK_GFM_EXPORT
int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src,
bufsize_t size);
CMARK_GFM_EXPORT
int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src,
bufsize_t size, int secure);
CMARK_GFM_EXPORT
int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src,
bufsize_t size);
CMARK_GFM_EXPORT
void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src,
bufsize_t size);
CMARK_GFM_EXPORT
int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src,
bufsize_t size);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -1,100 +0,0 @@
#include <assert.h>
#include <stdio.h>
#include <string.h>
#include "houdini.h"
/*
* The following characters will not be escaped:
*
* -_.+!*'(),%#@?=;:/,+&$~ alphanum
*
* Note that this character set is the addition of:
*
* - The characters which are safe to be in an URL
* - The characters which are *not* safe to be in
* an URL because they are RESERVED characters.
*
* We assume (lazily) that any RESERVED char that
* appears inside an URL is actually meant to
* have its native function (i.e. as an URL
* component/separator) and hence needs no escaping.
*
* There are two exceptions: the chacters & (amp)
* and ' (single quote) do not appear in the table.
* They are meant to appear in the URL as components,
* yet they require special HTML-entity escaping
* to generate valid HTML markup.
*
* All other characters will be escaped to %XX.
*
*/
static const char HREF_SAFE[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) {
static const uint8_t hex_chars[] = "0123456789ABCDEF";
bufsize_t i = 0, org;
uint8_t hex_str[3];
hex_str[0] = '%';
while (i < size) {
org = i;
while (i < size && HREF_SAFE[src[i]] != 0)
i++;
if (likely(i > org))
cmark_strbuf_put(ob, src + org, i - org);
/* escaping */
if (i >= size)
break;
switch (src[i]) {
/* amp appears all the time in URLs, but needs
* HTML-entity escaping to be inside an href */
case '&':
cmark_strbuf_puts(ob, "&amp;");
break;
/* the single quote is a valid URL character
* according to the standard; it needs HTML
* entity escaping too */
case '\'':
cmark_strbuf_puts(ob, "&#x27;");
break;
/* the space can be escaped to %20 or a plus
* sign. we're going with the generic escape
* for now. the plus thing is more commonly seen
* when building GET strings */
#if 0
case ' ':
cmark_strbuf_putc(ob, '+');
break;
#endif
/* every other character goes with a %XX escaping */
default:
hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
hex_str[2] = hex_chars[src[i] & 0xF];
cmark_strbuf_put(ob, hex_str, 3);
}
i++;
}
return 1;
}

View File

@ -1,66 +0,0 @@
#include <assert.h>
#include <stdio.h>
#include <string.h>
#include "houdini.h"
/**
* According to the OWASP rules:
*
* & --> &amp;
* < --> &lt;
* > --> &gt;
* " --> &quot;
* ' --> &#x27; &apos; is not recommended
* / --> &#x2F; forward slash is included as it helps end an HTML entity
*
*/
static const char HTML_ESCAPE_TABLE[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
static const char *HTML_ESCAPES[] = {"", "&quot;", "&amp;", "&#39;",
"&#47;", "&lt;", "&gt;"};
int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, bufsize_t size,
int secure) {
bufsize_t i = 0, org, esc = 0;
while (i < size) {
org = i;
while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
i++;
if (i > org)
cmark_strbuf_put(ob, src + org, i - org);
/* escaping */
if (unlikely(i >= size))
break;
/* The forward slash and single quote are only escaped in secure mode */
if ((src[i] == '/' || src[i] == '\'') && !secure) {
cmark_strbuf_putc(ob, src[i]);
} else {
cmark_strbuf_puts(ob, HTML_ESCAPES[esc]);
}
i++;
}
return 1;
}
int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) {
return houdini_escape_html0(ob, src, size, 1);
}

View File

@ -1,149 +0,0 @@
#include <assert.h>
#include <stdio.h>
#include <string.h>
#include "buffer.h"
#include "houdini.h"
#include "utf8.h"
#include "entities.inc"
/* Binary tree lookup code for entities added by JGM */
static const unsigned char *S_lookup(int i, int low, int hi,
const unsigned char *s, int len) {
int j;
int cmp =
strncmp((const char *)s, (const char *)cmark_entities[i].entity, len);
if (cmp == 0 && cmark_entities[i].entity[len] == 0) {
return (const unsigned char *)cmark_entities[i].bytes;
} else if (cmp <= 0 && i > low) {
j = i - ((i - low) / 2);
if (j == i)
j -= 1;
return S_lookup(j, low, i - 1, s, len);
} else if (cmp > 0 && i < hi) {
j = i + ((hi - i) / 2);
if (j == i)
j += 1;
return S_lookup(j, i + 1, hi, s, len);
} else {
return NULL;
}
}
static const unsigned char *S_lookup_entity(const unsigned char *s, int len) {
return S_lookup(CMARK_NUM_ENTITIES / 2, 0, CMARK_NUM_ENTITIES - 1, s, len);
}
bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src,
bufsize_t size) {
bufsize_t i = 0;
if (size >= 3 && src[0] == '#') {
int codepoint = 0;
int num_digits = 0;
if (_isdigit(src[1])) {
for (i = 1; i < size && _isdigit(src[i]); ++i) {
codepoint = (codepoint * 10) + (src[i] - '0');
if (codepoint >= 0x110000) {
// Keep counting digits but
// avoid integer overflow.
codepoint = 0x110000;
}
}
num_digits = i - 1;
}
else if (src[1] == 'x' || src[1] == 'X') {
for (i = 2; i < size && _isxdigit(src[i]); ++i) {
codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9);
if (codepoint >= 0x110000) {
// Keep counting digits but
// avoid integer overflow.
codepoint = 0x110000;
}
}
num_digits = i - 2;
}
if (num_digits >= 1 && num_digits <= 8 && i < size && src[i] == ';') {
if (codepoint == 0 || (codepoint >= 0xD800 && codepoint < 0xE000) ||
codepoint >= 0x110000) {
codepoint = 0xFFFD;
}
cmark_utf8proc_encode_char(codepoint, ob);
return i + 1;
}
}
else {
if (size > CMARK_ENTITY_MAX_LENGTH)
size = CMARK_ENTITY_MAX_LENGTH;
for (i = CMARK_ENTITY_MIN_LENGTH; i < size; ++i) {
if (src[i] == ' ')
break;
if (src[i] == ';') {
const unsigned char *entity = S_lookup_entity(src, i);
if (entity != NULL) {
cmark_strbuf_puts(ob, (const char *)entity);
return i + 1;
}
break;
}
}
}
return 0;
}
int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src,
bufsize_t size) {
bufsize_t i = 0, org, ent;
while (i < size) {
org = i;
while (i < size && src[i] != '&')
i++;
if (likely(i > org)) {
if (unlikely(org == 0)) {
if (i >= size)
return 0;
cmark_strbuf_grow(ob, HOUDINI_UNESCAPED_SIZE(size));
}
cmark_strbuf_put(ob, src + org, i - org);
}
/* escaping */
if (i >= size)
break;
i++;
ent = houdini_unescape_ent(ob, src + i, size - i);
i += ent;
/* not really an entity */
if (ent == 0)
cmark_strbuf_putc(ob, '&');
}
return 1;
}
void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src,
bufsize_t size) {
if (!houdini_unescape_html(ob, src, size))
cmark_strbuf_put(ob, src, size);
}

Some files were not shown because too many files have changed in this diff Show More